# Numpy Intro

Numpy Documentation: https://docs.scipy.org/doc/numpy/reference/

### Rank 1 numpy arrays

In [14]:
import numpy as np

an_array = np.array([3, 33, 333])

print('type:', type(an_array))

print("Shape (should only have one dimension): ", an_array.shape)

print(f'Individual indexes: \n1(0): {an_array[0]}\n2(1):{an_array[1]}\n3(2): {an_array[2]}')

an_array[0]=888

print('Replacing first index with 888:', an_array)

type: <class 'numpy.ndarray'>
Shape (should only have one dimension):  (3,)
Individual indexes: 
1(0): 3
2(1):33
3(2): 333
Replacing first index with 888: [888  33 333]


### Rank 2 numpy arrays

In [22]:
#These are useful for making matrices

another = np.array([[11,12,13],[21,22,23]])
print(another)
print(f'Shape: {another.shape}')
print(f'Accessing certain elements: \nR1C1: {another[0,0]}\nR1C2: {another[0,1]}\nR2C1: {another[1,0]}')

[[11 12 13]
 [21 22 23]]
Shape: (2, 3)
Accessing certain elements: 
R1C1: 11
R1C2: 12
R2C1: 21


In [63]:
#Arrays w/ Pre-filled values

ex1 = np.zeros((2,2))
print(ex1)
ex2 = np.full((2,2), 9.1)
print(ex2)
ex3 = np.eye(2,2) #identity matrix
print(ex3)
ex4 = np.ones((2,2))
print(ex4)
ex5=np.random.random((2,2))#create an array of random floats between 0 and 1
print(ex5)

[[0. 0.]
 [0. 0.]]
[[9.1 9.1]
 [9.1 9.1]]
[[1. 0.]
 [0. 1.]]
[[1. 1.]
 [1. 1.]]
[[0.74001739 0.35905411]
 [0.36249691 0.63652096]]


### Slice Indexing

In [88]:
an_array = np.array([[11,12,13,14],[21,22,23,24],[31,32,33,34]])
print(an_array)

[[11 12 13 14]
 [21 22 23 24]
 [31 32 33 34]]


In [89]:
a_slice = an_array[:2, 1:3]
print(a_slice)

[[12 13]
 [22 23]]


In [90]:
print("Before:", an_array[0, 1])   #inspect the element at 0, 1  
a_slice[0, 0] = 1000    # a_slice[0, 0] is the same piece of data as an_array[0, 1]
print("After:", an_array[0, 1])

Before: 12
After: 1000


In [91]:
# Create a Rank 2 array of shape (3, 4)
an_array = np.array([[11,12,13,14], [21,22,23,24], [31,32,33,34]])
print(an_array)

[[11 12 13 14]
 [21 22 23 24]
 [31 32 33 34]]


In [93]:
# Using both integer indexing & slicing generates an array of lower rank
row_rank1 = an_array[1, :]    # Rank 1 view 

print(row_rank1, row_rank1.shape)  # notice only a single []

[21 22 23 24] (4,)


In [94]:
# Slicing alone: generates an array of the same rank as the an_array
row_rank2 = an_array[1:2, :]  # Rank 2 view 

print(row_rank2, row_rank2.shape)   # Notice the [[ ]]

[[21 22 23 24]] (1, 4)


In [95]:
#We can do the same thing for columns of an array:

print()
col_rank1 = an_array[:, 1]
col_rank2 = an_array[:, 1:2]

print(col_rank1, col_rank1.shape)  # Rank 1
print()
print(col_rank2, col_rank2.shape)  # Rank 2


[12 22 32] (3,)

[[12]
 [22]
 [32]] (3, 1)


In [96]:
# Create a new array
an_array = np.array([[11,12,13], [21,22,23], [31,32,33], [41,42,43]])

print('Original Array:')
print(an_array)

Original Array:
[[11 12 13]
 [21 22 23]
 [31 32 33]
 [41 42 43]]


In [97]:
# Create an array of indices
col_indices = np.array([0, 1, 2, 0])
print('\nCol indices picked : ', col_indices)

row_indices = np.arange(4) #lists
print('\nRows indices picked : ', row_indices)


Col indices picked :  [0 1 2 0]

Rows indices picked :  [0 1 2 3]


In [101]:
# Examine the pairings of row_indices and col_indices.  These are the elements we'll change next.
for row,col in zip(row_indices,col_indices):
    print(row, ", ",col)

0 ,  0
1 ,  1
2 ,  2
3 ,  0


In [102]:
print('Values in the array at those indices: ',an_array[row_indices, col_indices])

Values in the array at those indices:  [11 22 33 41]


In [104]:
# Change one element from each row using the indices selected
an_array[row_indices, col_indices] += 100000

print('Changed Array:')
print(an_array)

Changed Array:
[[200011     12     13]
 [    21 200022     23]
 [    31     32 200033]
 [200041     42     43]]


### Boolean Indexing

In [105]:
# create a 3x2 array
an_array = np.array([[11,12], [21, 22], [31, 32]])
print(an_array)

[[11 12]
 [21 22]
 [31 32]]


In [106]:
# create a filter which will be boolean values for whether each element meets this condition
filter = (an_array > 15)
filter

array([[False, False],
       [ True,  True],
       [ True,  True]])

In [107]:
# we can now select just those elements which meet that criteria
print(an_array[filter])

[21 22 31 32]


In [109]:
# For short, we could have just used the approach below without the need for the separate filter array.

an_array[(an_array > 15)]

array([21, 22, 31, 32])

In [110]:
an_array[an_array > 15] +=100
print(an_array)

[[ 11  12]
 [121 122]
 [131 132]]


### Datatypes

In [119]:
ex1 = np.array([11, 12]) # Python assigns the  data type
print(ex1.dtype)
ex2 = np.array([11.0, 12.0]) # Python assigns the  data type
print(ex2.dtype)
ex3 = np.array([11.1, 21], dtype=np.int64) #You can also tell Python the  data type
print(ex3.dtype)
# you can use this to force floats into integers (using floor function)
ex4 = np.array([11.1,12.7], dtype=np.int64)
print(ex4.dtype)
print(ex4)
# you can use this to force integers into floats if you anticipate
# the values may change to floats later
ex5 = np.array([11, 21], dtype=np.float64)
print(ex5.dtype)
print(ex5)

int64
float64
int64
int64
[11 12]
float64
[11. 21.]


### Arithmatic Array Options

In [142]:
x = np.array([[111,112],[121,122]], dtype=np.int)
y = np.array([[211.1,212.1],[221.1,222.1]], dtype=np.float64)

print(x, '\n')
print(y, '\n')

# add
print(x + y, '\n')         # The plus sign works
print(np.add(x, y))  # so does the numpy function "add"
print()

# subtract
print(x - y)
print()
print(np.subtract(x, y))
print()

# multiply
print(x * y)
print()
print(np.multiply(x, y))
print()

# divide
print(x / y)
print()
print(np.divide(x, y))
print()

# square root
print(np.sqrt(x))
print()

# exponent (e ** x)
print(np.exp(x))

[[111 112]
 [121 122]] 

[[211.1 212.1]
 [221.1 222.1]] 

[[322.1 324.1]
 [342.1 344.1]] 

[[322.1 324.1]
 [342.1 344.1]]

[[-100.1 -100.1]
 [-100.1 -100.1]]

[[-100.1 -100.1]
 [-100.1 -100.1]]

[[23432.1 23755.2]
 [26753.1 27096.2]]

[[23432.1 23755.2]
 [26753.1 27096.2]]

[[0.52581715 0.52805281]
 [0.54726368 0.54930212]]

[[0.52581715 0.52805281]
 [0.54726368 0.54930212]]

[[10.53565375 10.58300524]
 [11.         11.04536102]]

[[1.60948707e+48 4.37503945e+48]
 [3.54513118e+52 9.63666567e+52]]


### Basic Statistical Operations

In [177]:
# setup a random 2 x 4 matrix
arr = 10 * np.random.randn(2,5)
print(np.round(arr, 3))

[[  6.419  22.289   4.244   0.347  -4.52 ]
 [  9.793 -17.001 -19.159   5.248  -9.214]]


In [154]:
# compute the mean for all elements
print(arr.mean())

-2.0826279287642926


In [162]:

# compute the means by row | axis = 0: columns, 1: rows
print(arr.mean(axis = 1))

# compute the means by column
print(arr.mean(axis = 0))

[-1.87499063 -2.29026523]
[-4.37776422 -3.04135786 -3.46005024 -9.64953561 10.11556829]


In [163]:
# sum all the elements
print(arr.sum())

-20.826279287642926


In [164]:
# compute the medians
print(np.median(arr, axis = 1))

[-3.76771857 -4.82260069]


### Sorting

In [188]:
# create a 10 element array of randoms
unsorted = np.random.randn(10)

print(unsorted)

[ 0.22910841  0.04731871  0.65184845  1.7399275  -1.03808534  0.67297404
  0.61089512 -0.84628786 -0.94469025  0.53891601]


In [189]:
# create copy and sort
sorted = np.array(unsorted)
sorted.sort()

print(sorted)
print()
print(unsorted)

[-1.03808534 -0.94469025 -0.84628786  0.04731871  0.22910841  0.53891601
  0.61089512  0.65184845  0.67297404  1.7399275 ]

[ 0.22910841  0.04731871  0.65184845  1.7399275  -1.03808534  0.67297404
  0.61089512 -0.84628786 -0.94469025  0.53891601]


In [190]:
# inplace sorting
unsorted.sort() 

print(unsorted)

[-1.03808534 -0.94469025 -0.84628786  0.04731871  0.22910841  0.53891601
  0.61089512  0.65184845  0.67297404  1.7399275 ]


### Finding Unique Elements

In [191]:
array = np.array([1,2,1,4,2,1,4,2])

print(np.unique(array))

[1 2 4]


### Set operations with np.array data type

In [192]:
s1 = np.array(['desk','chair','bulb'])
s2 = np.array(['lamp','bulb','chair'])
print(s1, s2)

['desk' 'chair' 'bulb'] ['lamp' 'bulb' 'chair']


In [193]:
print( np.intersect1d(s1, s2) )

['bulb' 'chair']


In [194]:
print( np.union1d(s1, s2) )

['bulb' 'chair' 'desk' 'lamp']


In [195]:
print( np.setdiff1d(s1, s2) )# elements in s1 that are not in s2

['desk']


In [196]:
print( np.in1d(s1, s2) )#which element of s1 is also in s2

[False  True  True]


### Broadcasting
https://docs.scipy.org/doc/numpy-1.10.1/user/basics.broadcasting.html

In [197]:
import numpy as np

start = np.zeros((4,3))
print(start)

[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]


In [199]:
# create a rank 1 ndarray with 3 values
add_rows = np.array([1, 0, 2])
print(add_rows)

[1 0 2]


In [200]:
y = start + add_rows  # add to each row of 'start' using broadcasting
print(y)

[[1. 0. 2.]
 [1. 0. 2.]
 [1. 0. 2.]
 [1. 0. 2.]]


In [201]:
# create an ndarray which is 4 x 1 to broadcast across columns
add_cols = np.array([[0,1,2,3]])
add_cols = add_cols.T

print(add_cols)

[[0]
 [1]
 [2]
 [3]]


In [202]:
# add to each column of 'start' using broadcasting
y = start + add_cols 
print(y)

[[0. 0. 0.]
 [1. 1. 1.]
 [2. 2. 2.]
 [3. 3. 3.]]


In [203]:

# this will just broadcast in both dimensions
add_scalar = np.array([1])  
print(start+add_scalar)

[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]


In [205]:
# create our 3x4 matrix
arrA = np.array([[1,2,3,4],[5,6,7,8],[9,10,11,12]])
print(arrA)
print()


# create our 4x1 array
arrB = [0,1,0,2]
print(arrB)
print()

# add the two together using broadcasting
print(arrA + arrB)

[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]

[0, 1, 0, 2]

[[ 1  3  3  6]
 [ 5  7  7 10]
 [ 9 11 11 14]]


### Speedtest: ndarrays vs lists 

In [206]:
from numpy import arange
from timeit import Timer

size    = 1000000
timeits = 1000

In [207]:
# create the ndarray with values 0,1,2...,size-1
nd_array = arange(size)
print( type(nd_array) )

<class 'numpy.ndarray'>


In [208]:
# timer expects the operation as a parameter, 
# here we pass nd_array.sum()
timer_numpy = Timer("nd_array.sum()", "from __main__ import nd_array")

print("Time taken by numpy ndarray: %f seconds" % 
      (timer_numpy.timeit(timeits)/timeits))

Time taken by numpy ndarray: 0.000509 seconds


In [209]:
# create the list with values 0,1,2...,size-1
a_list = list(range(size))
print (type(a_list) )

<class 'list'>


In [210]:
# timer expects the operation as a parameter, here we pass sum(a_list)
timer_list = Timer("sum(a_list)", "from __main__ import a_list")

print("Time taken by list:  %f seconds" % 
      (timer_list.timeit(timeits)/timeits))

Time taken by list:  0.003769 seconds


### Read or write to the disk

Binary Format

In [211]:
x = np.array([ 23.23, 24.24] )
np.save('an_array', x)
np.load('an_array.npy')

array([23.23, 24.24])

Text Format:

In [213]:
np.savetxt('array.txt', X=x, delimiter=',')
!cat array.txt
np.loadtxt('array.txt', delimiter=',')

2.323000000000000043e+01
2.423999999999999844e+01


array([23.23, 24.24])

### Dot Product on Matrices and Inner Product on Vectors:

In [215]:
# determine the dot product of two matrices
x2d = np.array([[1,1],[1,1]])
y2d = np.array([[2,2],[2,2]])

print(x2d.dot(y2d))
print()
print(np.dot(x2d, y2d))

[[4 4]
 [4 4]]

[[4 4]
 [4 4]]


In [216]:
# determine the inner product of two vectors
a1d = np.array([9 , 9 ])
b1d = np.array([10, 10])

print(a1d.dot(b1d))
print()
print(np.dot(a1d, b1d))

180

180


In [217]:
# dot produce on an array and vector
print(x2d.dot(a1d))
print()
print(np.dot(x2d, a1d))

[18 18]

[18 18]


### Sum

In [218]:
# sum elements in the array
ex1 = np.array([[11,12],[21,22]])

print(np.sum(ex1))          # add all members

66


In [219]:
print(np.sum(ex1, axis=0))  # columnwise sum

[32 34]


In [220]:
print(np.sum(ex1, axis=1))  # rowwise sum

[23 43]


### Element-wise Functions:

In [221]:
# random array
x = np.random.randn(8)
x

array([ 1.08711486,  1.17900821, -1.80961327, -3.26108602,  0.53219582,
        0.00667719, -0.45284389,  0.32961155])

In [222]:
# another random array
y = np.random.randn(8)
y

array([-0.66989384, -0.73173349,  1.12448877,  1.27874157,  3.01831762,
        0.85099513, -0.93464641, -2.50994522])

In [223]:
# returns element wise maximum between two arrays

np.maximum(x, y)

array([ 1.08711486,  1.17900821,  1.12448877,  1.27874157,  3.01831762,
        0.85099513, -0.45284389,  0.32961155])

### Reshaping array:

In [224]:
# grab values from 0 through 19 in an array
arr = np.arange(20)
print(arr)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]


In [225]:
# reshape to be a 4 x 5 matrix
arr.reshape(4,5)

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

### Transpose

In [226]:
# transpose
ex1 = np.array([[11,12],[21,22]])

ex1.T

array([[11, 21],
       [12, 22]])

### Indexing using where():

In [227]:
x_1 = np.array([1,2,3,4,5])

y_1 = np.array([11,22,33,44,55])

filter = np.array([True, False, True, False, True])

In [228]:
out = np.where(filter, x_1, y_1)
print(out)

[ 1 22  3 44  5]


In [229]:
mat = np.random.rand(5,5)
mat

array([[0.59828039, 0.88159186, 0.32235379, 0.81367393, 0.12175605],
       [0.69142033, 0.30717851, 0.24915319, 0.75909126, 0.90038647],
       [0.92908153, 0.63862102, 0.66643821, 0.26563966, 0.34086696],
       [0.29228011, 0.72407479, 0.7041309 , 0.10954541, 0.68823131],
       [0.84796816, 0.23103941, 0.64566034, 0.63450325, 0.26046035]])

In [230]:
np.where( mat > 0.5, 1000, -1)

array([[1000, 1000,   -1, 1000,   -1],
       [1000,   -1,   -1, 1000, 1000],
       [1000, 1000, 1000,   -1,   -1],
       [  -1, 1000, 1000,   -1, 1000],
       [1000,   -1, 1000, 1000,   -1]])

### "any" or "all" conditionals:

In [240]:
arr_bools = np.array([ True, False, True, True, False ])

In [241]:
arr_bools.any()

True

In [239]:
arr_bools.all()

False

### Random Number Generation:

In [242]:
Y = np.random.normal(size = (1,5))[0]
print(Y)

[ 1.67946015  1.22910417  0.64665702 -0.55207976  1.88038097]


In [243]:
Z = np.random.randint(low=2,high=50,size=4)
print(Z)

[45 11 23  7]


In [246]:
np.random.permutation(Z) #return a new ordering of elements in Z

array([ 7, 45, 23, 11])

In [247]:
np.random.uniform(size=4) #uniform distribution

array([0.20586187, 0.24835418, 0.044317  , 0.09274489])

In [248]:
np.random.normal(size=4) #normal distribution

array([ 0.42040992,  0.88990533, -0.11053573, -1.06519292])

### Merging Data Sets

In [249]:
K = np.random.randint(low=2,high=50,size=(2,2))
print(K)

print()
M = np.random.randint(low=2,high=50,size=(2,2))
print(M)

[[25 39]
 [41 13]]

[[45 47]
 [16 46]]


In [250]:
np.vstack((K,M))

array([[25, 39],
       [41, 13],
       [45, 47],
       [16, 46]])

In [251]:
np.hstack((K,M))

array([[25, 39, 45, 47],
       [41, 13, 16, 46]])

In [252]:
np.concatenate([K, M], axis = 0)

array([[25, 39],
       [41, 13],
       [45, 47],
       [16, 46]])

In [253]:
np.concatenate([K, M.T], axis = 1)

array([[25, 39, 45, 16],
       [41, 13, 47, 46]])