In [150]:
import numpy as np
np.random.seed(0)
A2 = np.random.random((4,4)) 
A2

array([[0.5488135 , 0.71518937, 0.60276338, 0.54488318],
       [0.4236548 , 0.64589411, 0.43758721, 0.891773  ],
       [0.96366276, 0.38344152, 0.79172504, 0.52889492],
       [0.56804456, 0.92559664, 0.07103606, 0.0871293 ]])

In [151]:
#create a subarray
A3 = A2[0:2,0:2]
A3

array([[0.5488135 , 0.71518937],
       [0.4236548 , 0.64589411]])

In [152]:
print(A3*2)

[[1.09762701 1.43037873]
 [0.8473096  1.29178823]]


In [153]:
print(A3+10)

[[10.5488135  10.71518937]
 [10.4236548  10.64589411]]


In [154]:
#reshaping of arrays
np.arange(1,10)

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [155]:
np.arange(1,10).reshape((3,3))

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [156]:
#A common reshaping pattern is the conversion of a one-dimensional 
#array into a two-dimensional row or column matrix
#Vector having only one column is called a column vector
#Vector having only one row is called a row vector
np.arange(1,10).reshape((1,9))

array([[1, 2, 3, 4, 5, 6, 7, 8, 9]])

In [157]:
np.arange(1,10).reshape((9,1))

array([[1],
       [2],
       [3],
       [4],
       [5],
       [6],
       [7],
       [8],
       [9]])

In [158]:
#Array concatenation

In [159]:
A1 = np.array([1,2,3])
A2 = np.array([4,5,6])
A3 = np.concatenate([A1,A2])
A3

array([1, 2, 3, 4, 5, 6])

In [160]:
A3 = np.concatenate([A2,A1])
A3

array([4, 5, 6, 1, 2, 3])

In [161]:
#Array splitting, the opposite of concatenation
#Need to specify the split points
#The number of arrays is N the number of split points + 1
A3
a1,a2,a3 = np.split(A3, [2,4])
print(a1,a2,a3)

[4 5] [6 1] [2 3]


In [162]:
#Append a column to a Numpy array
#Class example
import numpy as np
A1 = np.array([[1,2],[4,5]])
A1

array([[1, 2],
       [4, 5]])

In [163]:
#Column to add is a column vector
A2 = [[3],[6]]
A2

[[3], [6]]

In [164]:
newA1 = np.append(A1,A2,axis=1)
newA1

array([[1, 2, 3],
       [4, 5, 6]])

In [165]:
#Insert a column in a Numpy array
#Class example
import numpy as np
A1 = np.array([[1,3],[4,6]])
A1

array([[1, 3],
       [4, 6]])

In [166]:
#Values to insert into the Numpy array
A2 = [2,5]
A2

[2, 5]

In [167]:
#insert a new column at index 1
print(A1)
print(A2)
newA1 = np.insert(A1,1,A2,axis=1)
newA1

[[1 3]
 [4 6]]
[2, 5]


array([[1, 2, 3],
       [4, 5, 6]])

In [168]:
#Numpy provides an easy and flexible interface to optimize 
#computations with arrays of data
#To make computations faster, use vectorized operations implemented 
#through Numpy's universal functions (UFuncs)
#Types of ufuncs: single input i.e. unary ufuncs and binary ufuncs
#Numpy understands Python's built-in arithmetic operations


In [169]:
x = np.arange(10)
print(type(x),x)

<class 'numpy.ndarray'> [0 1 2 3 4 5 6 7 8 9]


In [170]:
print(x+1)
print(x*2)
print(x-10)
print(x/2)
print(-x) #negation
print(x**2) #exponentiation
print(x%2) #modulus
print(abs(x-3)) #absolute value

[ 1  2  3  4  5  6  7  8  9 10]
[ 0  2  4  6  8 10 12 14 16 18]
[-10  -9  -8  -7  -6  -5  -4  -3  -2  -1]
[0.  0.5 1.  1.5 2.  2.5 3.  3.5 4.  4.5]
[ 0 -1 -2 -3 -4 -5 -6 -7 -8 -9]
[ 0  1  4  9 16 25 36 49 64 81]
[0 1 0 1 0 1 0 1 0 1]
[3 2 1 0 1 2 3 4 5 6]


In [171]:
#can string them together such as an expression
print(x*5+10)

[10 15 20 25 30 35 40 45 50 55]


In [172]:
#Numpy understands other Python's built-in functions including 
#but not limited to 
#abs,trigonometric functions, exponents and logarithms

In [173]:
R1 = np.random.random()
print(R1)
A1 = np.arange(1,51)*R1
A1

0.02021839744032572


array([0.0202184 , 0.04043679, 0.06065519, 0.08087359, 0.10109199,
       0.12131038, 0.14152878, 0.16174718, 0.18196558, 0.20218397,
       0.22240237, 0.24262077, 0.26283917, 0.28305756, 0.30327596,
       0.32349436, 0.34371276, 0.36393115, 0.38414955, 0.40436795,
       0.42458635, 0.44480474, 0.46502314, 0.48524154, 0.50545994,
       0.52567833, 0.54589673, 0.56611513, 0.58633353, 0.60655192,
       0.62677032, 0.64698872, 0.66720712, 0.68742551, 0.70764391,
       0.72786231, 0.74808071, 0.7682991 , 0.7885175 , 0.8087359 ,
       0.8289543 , 0.84917269, 0.86939109, 0.88960949, 0.90982788,
       0.93004628, 0.95026468, 0.97048308, 0.99070147, 1.01091987])

In [174]:
A2 = A1.reshape(10,5)
A2

array([[0.0202184 , 0.04043679, 0.06065519, 0.08087359, 0.10109199],
       [0.12131038, 0.14152878, 0.16174718, 0.18196558, 0.20218397],
       [0.22240237, 0.24262077, 0.26283917, 0.28305756, 0.30327596],
       [0.32349436, 0.34371276, 0.36393115, 0.38414955, 0.40436795],
       [0.42458635, 0.44480474, 0.46502314, 0.48524154, 0.50545994],
       [0.52567833, 0.54589673, 0.56611513, 0.58633353, 0.60655192],
       [0.62677032, 0.64698872, 0.66720712, 0.68742551, 0.70764391],
       [0.72786231, 0.74808071, 0.7682991 , 0.7885175 , 0.8087359 ],
       [0.8289543 , 0.84917269, 0.86939109, 0.88960949, 0.90982788],
       [0.93004628, 0.95026468, 0.97048308, 0.99070147, 1.01091987]])

In [175]:
#Often when we are faced with a large amount of data, a first step is 
#to compute summary statistics (more to come)
#Using A2 and A3

In [176]:
A2.sum()

25.77845673641529

In [177]:
A2.sum(axis=0)

array([4.7513234 , 4.95350737, 5.15569135, 5.35787532, 5.5600593 ])

In [178]:
A2.sum(axis=1)

array([0.30327596, 0.8087359 , 1.31419583, 1.81965577, 2.32511571,
       2.83057564, 3.33603558, 3.84149551, 4.34695545, 4.85241539])

In [179]:
type(A2)

numpy.ndarray

In [180]:
A2.mean()

0.5155691347283058

In [181]:
#axis = 0 refers to within each column will be aggregated or collapsed
A2.mean(axis=0)

array([0.47513234, 0.49535074, 0.51556913, 0.53578753, 0.55600593])

In [182]:
#axis = 1 refers to within each row will be aggregated or collapsed
A2.mean(axis=1)

array([0.06065519, 0.16174718, 0.26283917, 0.36393115, 0.46502314,
       0.56611513, 0.66720712, 0.7682991 , 0.86939109, 0.97048308])

In [183]:
A2

array([[0.0202184 , 0.04043679, 0.06065519, 0.08087359, 0.10109199],
       [0.12131038, 0.14152878, 0.16174718, 0.18196558, 0.20218397],
       [0.22240237, 0.24262077, 0.26283917, 0.28305756, 0.30327596],
       [0.32349436, 0.34371276, 0.36393115, 0.38414955, 0.40436795],
       [0.42458635, 0.44480474, 0.46502314, 0.48524154, 0.50545994],
       [0.52567833, 0.54589673, 0.56611513, 0.58633353, 0.60655192],
       [0.62677032, 0.64698872, 0.66720712, 0.68742551, 0.70764391],
       [0.72786231, 0.74808071, 0.7682991 , 0.7885175 , 0.8087359 ],
       [0.8289543 , 0.84917269, 0.86939109, 0.88960949, 0.90982788],
       [0.93004628, 0.95026468, 0.97048308, 0.99070147, 1.01091987]])

In [184]:
A2.min(axis=0)

array([0.0202184 , 0.04043679, 0.06065519, 0.08087359, 0.10109199])

In [185]:
A2.max(axis=1)

array([0.10109199, 0.20218397, 0.30327596, 0.40436795, 0.50545994,
       0.60655192, 0.70764391, 0.8087359 , 0.90982788, 1.01091987])

In [186]:
#Let's use our normal distribution generator
A3 = np.random.normal(0,1,(100,5))

In [187]:
A3.shape

(100, 5)

In [188]:
#Calculate the mean for each column
A3.mean(axis=0)

array([-0.07271607, -0.17837509,  0.00832367, -0.08173388,  0.00830538])

In [189]:
#Calculate the mean for each row
A3.mean(axis=1)

array([ 0.3835206 ,  0.08107537, -0.23820199,  0.7521679 ,  0.34210952,
       -0.41403917, -0.17193642, -0.52299173, -0.37823284,  0.13853756,
        0.14520607,  0.17248319, -0.49188373,  0.0116472 ,  0.19097146,
        0.19814695,  0.51026438,  0.59316247,  0.24986204, -0.16904396,
       -0.17872055,  1.04301037, -0.66721455,  0.19207304, -0.36405662,
        0.51859969, -0.26874386,  0.50240549,  0.85928003, -0.87021659,
       -0.33687839, -0.28653485,  0.06700472,  0.26175892,  0.08156695,
       -1.31494588,  0.14315737, -0.41108489, -0.33279697, -0.40886095,
        0.05441296,  0.20291789,  0.47646166, -0.56174592, -0.32411879,
       -0.13347887, -0.33978084, -0.04805136, -0.31533384,  0.2222016 ,
       -0.246102  , -0.75084616, -0.12649455,  0.6507652 ,  0.15292565,
        0.38570275,  0.25349723,  0.22615299,  0.6313353 , -0.22916892,
       -0.44015075, -0.89615229, -0.58774274,  0.01245938, -0.26178942,
        0.21327791, -0.48473164, -0.05270161, -0.14141927, -0.47

In [190]:
#Other functions include standard deviation, minimu, maximum, 
#finding the index of minimum value, finding the index of maximum value
np.argmin(A3)

217

In [191]:
np.argmax(A3)

266

In [192]:
#The book goes into broadcasting by providing rules to determine the 
#interaction between two arrays
#Array can be padded or stretched. This topic is out of scope for this course.

In [193]:
#Numpy also implements comparison operators such as <, <=, >, >=, != 
#and == as element-wise ufuncs.
#The results is always an array with a boolean data type.
A4 = np.array([11,12,13,14,15])
A4 < 13

array([ True,  True, False, False, False])

In [194]:
A4 > 11

array([False,  True,  True,  True,  True])

In [195]:
A4 >= 14

array([False, False, False,  True,  True])

In [196]:
A4 != 12

array([ True, False,  True,  True,  True])

In [197]:
#Working with boolean arrays
np.count_nonzero(A4<13)

2

In [198]:
np.sum(A4<13)

2

In [199]:
#Recall that aggregate functions such as sum can be done along rows or columns
A5 = np.array([[21,23,25,27],[22,24,26,28],[23,25,27,29]])

print(np.sum(A5,axis=0))

[66 72 78 84]


In [200]:
print(np.sum(A5,axis=1))

[ 96 100 104]


In [201]:
#To sum the number of true entries in each row, collapsing or 
#aggregating the column
print(np.sum(A5<24,axis=1))

[2 1 1]


In [202]:
#To sum the number of true entries in each column, collapsing or 
#aggregating the row
print(np.sum(A5<24,axis=0))

[3 1 0 0]


In [203]:
#Fancing indexing - pass an array of indices to access multiple 
#array elements at once
#Shape of the results reflects the shape of the index array
A6 = np.array([10,11,12,13,14,15,16,17,18,19,20])

In [204]:
A6_ind = np.array([[4,6],[3,5]])

In [205]:
np.shape(A6)

(11,)

In [206]:
np.shape(A6_ind)

(2, 2)

In [207]:
A6[A6_ind]

array([[14, 16],
       [13, 15]])

In [208]:
#Combined indexing allows for fancing indexing to be combined 
#with other indexing schemes
A7 = np.array([['a','b','c','d'],['e','f','g','h'],['i','j','k','l']])

In [209]:
A7

array([['a', 'b', 'c', 'd'],
       ['e', 'f', 'g', 'h'],
       ['i', 'j', 'k', 'l']], dtype='<U1')

In [210]:
#Combine fancy and simple indices
A7[2,[2,0]]

array(['k', 'i'], dtype='<U1')

In [211]:
#Combine slicing and fancy indexing
A7[1:,[2,0]]

array([['g', 'e'],
       ['k', 'i']], dtype='<U1')

In [212]:
#Last topic - sorting using Numpy
A8 = np.array([['k','l','i','j'],['b','a','d','c'],['h','g','f','e']])
np.sort(A8)
#Note - default uses axis=1

array([['i', 'j', 'k', 'l'],
       ['a', 'b', 'c', 'd'],
       ['e', 'f', 'g', 'h']], dtype='<U1')

In [213]:
#Sorting by each row
np.sort(A8,axis=1)

array([['i', 'j', 'k', 'l'],
       ['a', 'b', 'c', 'd'],
       ['e', 'f', 'g', 'h']], dtype='<U1')

In [214]:
#Sorting by each column
print(A8)
np.sort(A8,axis=0)

[['k' 'l' 'i' 'j']
 ['b' 'a' 'd' 'c']
 ['h' 'g' 'f' 'e']]


array([['b', 'a', 'd', 'c'],
       ['h', 'g', 'f', 'e'],
       ['k', 'l', 'i', 'j']], dtype='<U1')

In [215]:
#Related method argsort returns the indices of the sorted elements
#Sorting by each row
np.argsort(A8,axis=1)

array([[2, 3, 0, 1],
       [1, 0, 3, 2],
       [3, 2, 1, 0]], dtype=int64)

In [216]:
#Sorting by each column
np.argsort(A8,axis=0)

array([[1, 1, 1, 1],
       [2, 2, 2, 2],
       [0, 0, 0, 0]], dtype=int64)