# 07B: Working with NumPy



In [1]:
import numpy as np

## Creating uniform sequences and random distributions of numbers

In [2]:
# creating an array for a range of integers

# default is to create integers from 0 at 1-integer intervals
my_integers = np.arange(20)
print(my_integers)

# can specify start, stope, step, will infer data type from parameters
my_float = np.arange(0, 1, 0.1, dtype='f')
print(my_float)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]
[0.         0.1        0.2        0.3        0.4        0.5
 0.6        0.7        0.8        0.90000004]


In [3]:
# creating random numbers using NumPy
print(np.random.rand(4))

print(np.random.rand(4,2))

print(np.random.randint(100, size = (4, 2)))

[0.12409531 0.61024983 0.3343143  0.55654994]
[[0.43867845 0.61763733]
 [0.48964141 0.08033697]
 [0.77691225 0.72462557]
 [0.03844477 0.00758086]]
[[88 96]
 [82 42]
 [73 53]
 [40 83]]


In [4]:
# create your own data distribution

# specify possible values and probabilities - probabilities must add up to 1
x = np.random.choice([3, 5, 7], p=[0.1, 0.3, 0.6], size=(10))  # 1D array
print(x)

x = np.random.choice([3, 5, 7], p=[0.1, 0.3, 0.6], size=(3, 5))  # 2D array
print(x)

[5 5 5 5 3 7 5 7 7 7]
[[5 5 7 3 7]
 [5 7 5 7 7]
 [7 5 7 5 5]]


In [5]:
# shuffle and permutation
my_array = np.array([1,2,3,4,5])

np.random.shuffle(my_array)
print(my_array)

my_array = np.array([1,2,3,4,5])
print(np.random.permutation(my_array))

[1 5 3 4 2]
[1 3 5 2 4]


## Statistical distributions

In [6]:
# Array calculations and vectorisation (additional)

# e.g. adding up two lists
x = [1, 2, 3, 4]
y = [4, 5, 6, 7]
z = []

# traditional method - using iteration
for i, j in zip(x, y):
  z.append(i + j)
print(z)

# using Numpy universal function (ufunc) with vectorisation
z = np.add(x, y)
print(z)

[5, 7, 9, 11]
[ 5  7  9 11]


In [7]:
# Array arithmetic (additional)

# simple calculations involving one or more arrays
x = np.array([1, 2, 3, 4])
y = np.array([4, 5, 6, 7])

print(np.add(x,y))          # add up two arrays
print(np.subtract(x, y))    # subtract
print(np.multiply(x, y))    # multiply
print(np.divide(x, 2))      # divide
print(np.power(x, 4))       # raise to power
print(np.mod(y, 2))         # return remainder
print(np.divmod(y, 2))      # return quotient (integer division) and remainder
print(np.absolute([1,-2, 3, -4]))   # return absolute values

[ 5  7  9 11]
[-3 -3 -3 -3]
[ 4 10 18 28]
[0.5 1.  1.5 2. ]
[  1  16  81 256]
[0 1 0 1]
(array([2, 2, 3, 3]), array([0, 1, 0, 1]))
[1 2 3 4]


In [8]:
# Controlling number representations (additional)
x = np.array([1.52345, 2.65432, 3.46891234, 4.278123])

print(np.around(x))     # round up if >= .5
print(np.around(x, 3))  # round to 3 decimal places
print(np.floor(x))      # round down
print(np.ceil(x))       # round up

[2. 3. 3. 4.]
[1.523 2.654 3.469 4.278]
[1. 2. 3. 4.]
[2. 3. 4. 5.]


In [9]:
# More calculations
x = np.array([1, 2, 3, 4])
y = np.array([4, 5, 6, 7])

# summation (and comparing with addition)
print(np.add(x, 2))       # add 2 to all elements in x
print(np.add(x, y))       # add the values in arrays x and y
print(np.sum(x))          # add up (sum) n elements of a collection

print(np.sum([x, y], axis=0))   # sum multiple arrays by axis - columns, equivalent to .add(x, y)
print(np.sum([x, y], axis=1))   # sum multiple arrays by axis - sum rows

print(np.cumsum(x))       # cumulative sum

# product and difference
print(np.prod(x))        # multiply elements in x: 1 * 2 * 3 * 4
print(np.prod([x, y]))   # multiply the elements across two arrays: 1 * 2 * 3 * 4 * 5 * 6 * 7
print(np.prod([x, y], axis=0))   # multiply columns
print(np.prod([x, y], axis=1))   # multiply rows

print(np.cumprod(x))     # cumulative product
print(np.dot(x, y))      # produce the dot product of two arrays

print(np.diff(x))        # default is discrete difference - subtracts 2 successive elements: 2-1, 3-2, 4-3
print(np.diff(x, n=2))   # adding n specifies how many repititions (default is 1)

[3 4 5 6]
[ 5  7  9 11]
10
[ 5  7  9 11]
[10 22]
[ 1  3  6 10]
24
20160
[ 4 10 18 28]
[ 24 840]
[ 1  2  6 24]
60
[1 1 1]
[0 0]


In [10]:
# set operations
x = np.array([1,2,2,3,4,5,6,6,7,8])
y = np.array([9,9,9,8,8,8,7,7,7])

print(np.unique(x))           # return a set - the unique elements of the array
print(np.intersect1d(x, y))   # return the set of values present in both arrays
print(np.setdiff1d(x, y))     # return the set of values in array 1 that are not in array 2
print(np.setxor1d(x, y))      # return the set of values that are not present in both

[1 2 3 4 5 6 7 8]
[7 8]
[1 2 3 4 5 6]
[1 2 3 4 5 6 9]


In [11]:
# test performance of vectorisation versus iterating through a loop (additional)
import time

x = np.array(range(10000000))
y = x.copy()

print(f'Each list contains {len(x)} elements')
print(f'\nAdding each element in list x to each element in list y')

# iterating to add two lists
tic = time.perf_counter()
z = []
for i in range(len(x)):
    z.append([x[i] + y[i]])

toc = time.perf_counter()
print(f'- time to iterate:   {toc - tic:0.4f} seconds')
      
# using vectorisation to add two lists
tic = time.perf_counter()
z = np.add(x, y)

toc = time.perf_counter()
print(f'- time to vectorise: {toc - tic:0.4f} seconds')


Each list contains 10000000 elements

Adding each element in list x to each element in list y
- time to iterate:   15.4327 seconds
- time to vectorise: 1.6618 seconds
