In [1]:
import numpy as np
import pandas as pd

In [2]:
# create basic sample numpy array
x = np.array([1,2,3,4,5])
print('x =',x)


x = [1 2 3 4 5]


In [3]:
# the shape of an ndarray gives sizes of each dimension
# dtypes refers to the datatypes held in the array
print('x has dimensions: ',x.shape)
print('x is an object of type ',type(x))
print('The elements in x are of type: ',x.dtype)

x has dimensions:  (5,)
x is an object of type  <class 'numpy.ndarray'>
The elements in x are of type:  int64


In [4]:
# ndarrays can also hold strings
x = np.array(['Hello','World'])
print('x =',x)
print('x has dimensions: ',x.shape)
print('x is an object of type ',type(x))
print('The elements in x are of type: ',x.dtype)


x = ['Hello' 'World']
x has dimensions:  (2,)
x is an object of type  <class 'numpy.ndarray'>
The elements in x are of type:  <U5


In [5]:
# type U5 means unicode strings of 5 characters
# numpy arrays must have elements of the same type whereas python lists can have elements of different datatypes
x = np.array([1,2,'World'])
print('x =',x)
print('x has dimensions: ',x.shape)
print('x is an object of type ',type(x))
print('The elements in x are of type: ',x.dtype)

x = ['1' '2' 'World']
x has dimensions:  (3,)
x is an object of type  <class 'numpy.ndarray'>
The elements in x are of type:  <U21


In [6]:
# create a rank 2 ndarray from a nested python list
y = np.array([[1,2,3],[4,5,6],[7,8,9],[10,11,12]])
print('y = ',y)
print('Y has dimensions:', y.shape)
print('Y has a total of', y.size, 'elements')
print('Y is an object of type:', type(y))
print('The elements in Y are of type:', y.dtype)

y =  [[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]
Y has dimensions: (4, 3)
Y has a total of 12 elements
Y is an object of type: <class 'numpy.ndarray'>
The elements in Y are of type: int64


In [7]:
# shape retursn (4,3) indicating it is a 2-dimensional array with 4 rows and 3 columns
# creating ndarrays with floats and integers
x = np.array([1,2,3])
y = np.array([1.0,2.0,3.0])
z = np.array([1,2.5,4])

print('The elements in x are of type:', x.dtype)
print('The elements in y are of type:', y.dtype)
print('The elements in z are of type:', z.dtype)

The elements in x are of type: int64
The elements in y are of type: float64
The elements in z are of type: float64


In [8]:
# numpy allows you to cast an array as a specific type
# will round floats down to floor of the integer if casting to int
x = np.array([1.5,2.2,3.7,4.0,5.9], dtype=np.int64)
print('x = ',x)
print('The elements in x are of type:', x.dtype)

x =  [1 2 3 4 5]
The elements in x are of type: int64


In [9]:
# saving numpy arrays for later use
x = np.array([1,2,3,4,5])
# save into current working directory
np.save('my_array',x)

In [10]:
# load the saved array
y = np.load('my_array.npy')
print('y = ',y)
print('y is an object of type:', type(y))
print('The elements in y are of type:', y.dtype)

y =  [1 2 3 4 5]
y is an object of type: <class 'numpy.ndarray'>
The elements in y are of type: int64


In [11]:
# generate arrays in a varienty of ways
x = np.zeros((3,4))
y = np.ones((4,3),dtype=int)
z = np.full((2,3),5)
sq = np.eye(5)
diag = np.diag([10,20,30,40,50])
print(x)
print(y)
print(z)
print(sq)
print(diag)

[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
[[1 1 1]
 [1 1 1]
 [1 1 1]
 [1 1 1]]
[[5 5 5]
 [5 5 5]]
[[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 1.]]
[[10  0  0  0  0]
 [ 0 20  0  0  0]
 [ 0  0 30  0  0]
 [ 0  0  0 40  0]
 [ 0  0  0  0 50]]


In [12]:
# create arrays with ranges
arange1 = np.arange(10)
print(arange1)
arange2 = np.arange(10,20)
print(arange2)
arange3 = np.arange(1,14,3)
print(arange3)

[0 1 2 3 4 5 6 7 8 9]
[10 11 12 13 14 15 16 17 18 19]
[ 1  4  7 10 13]


In [13]:
# returns n evenly spaced numbers from start to stop (start, stop, n)
x = np.linspace(0, 25, 10)
print(x)

[ 0.          2.77777778  5.55555556  8.33333333 11.11111111 13.88888889
 16.66666667 19.44444444 22.22222222 25.        ]


In [14]:
x = np.linspace(0, 25, 10, endpoint=False)
print(x)

[ 0.   2.5  5.   7.5 10.  12.5 15.  17.5 20.  22.5]


In [15]:
# reshaping arrays
x = np.arange(20)
x = np.reshape(x, (4,5)) # must be able to fit the number of elements evenly
print(x)

# can also chain functions -> np.arange(20).reshape(4,5)

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]
 [15 16 17 18 19]]


In [16]:
# create an array of given shape with random floats between 0,1
x = np.random.random((3,3))
print(x)

[[0.57478232 0.57883054 0.39333752]
 [0.01717357 0.39857239 0.62179298]
 [0.31717846 0.0922886  0.67720763]]


In [17]:
x = np.random.randint(4,15,(2,3))
print(x)

[[14  6 12]
 [ 5  6 12]]


In [18]:
# creates a normally distributed 1000x1000 array with a mean of 0 and std of 0.1
x = np.random.normal(0,0.1,size=(1000,1000))
print(x)

[[-0.3292057  -0.25297152 -0.00623182 ...  0.0083041  -0.12762003
   0.03690127]
 [ 0.08096837  0.02892005 -0.01015336 ... -0.06321088 -0.07193552
   0.00926079]
 [ 0.06596912 -0.18493398 -0.07000571 ...  0.12779296 -0.0132492
   0.02914469]
 ...
 [-0.07541813  0.01941357 -0.03418289 ... -0.03811557  0.09393645
   0.13692379]
 [ 0.03040297 -0.04672669  0.00274861 ...  0.03500218  0.09963916
  -0.00588076]
 [ 0.0782926   0.02184741 -0.16976572 ...  0.06406519  0.03034477
   0.0553461 ]]


In [19]:
print('mean: ',x.mean())
print('std: ',x.std())
print('max: ',x.max())
print('min: ',x.min())
print('# pos: ',(x>0).sum())
print('# neg: ',(x<0).sum())

mean:  1.1397193180173055e-05
std:  0.0999839120257737
max:  0.46033989257381025
min:  -0.4672542026822278
# pos:  499862
# neg:  500138


In [20]:
X = np.arange(2,34,2).reshape(4,4)
X

array([[ 2,  4,  6,  8],
       [10, 12, 14, 16],
       [18, 20, 22, 24],
       [26, 28, 30, 32]])

In [25]:
# can access elements like a standard array
# can also modify elements
X[3][3] = 20
print(X)

[[ 2  4  6  8]
 [10 12 14 16]
 [18 20 22 24]
 [26 20 30 20]]


In [30]:
# deleting elements
x = np.arange(2,22,2)
print(x)
y = np.arange(2,20,2).reshape(3,3)
print(y)

[ 2  4  6  8 10 12 14 16 18 20]
[[ 2  4  6]
 [ 8 10 12]
 [14 16 18]]


In [31]:
x = np.delete(x, [0,4])
print(x)

[ 4  6  8 12 14 16 18 20]


In [34]:
W = np.delete(y, 0, axis=0)
V = np.delete(y, [0,2], axis=1)
print(W,'\n',V)

[[ 8 10 12]
 [14 16 18]] 
 [[ 4]
 [10]
 [16]]


In [38]:
# appending elements
x = np.append(x, [7,8])
print(x)

y = np.arange(1,10).reshape(3,3)
W = np.append(y,[[10,11,12]], axis=0)
print(W)

[ 4  6  8 12 14 16 18 20  7  8  7  8  7  8  7  8]
[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]


In [39]:
V = np.append(y, [[10],[11],[12]],axis=1)
print(V)

[[ 1  2  3 10]
 [ 4  5  6 11]
 [ 7  8  9 12]]


In [42]:
# inserting elements
x = np.array([1,2,5,6,7])
x = np.insert(x,2,[3,4])
print(x)
y = np.array([[1,2,3],[7,8,9]])
y = np.insert(y,1,[4,5,6], axis=0)
print(y)

[1 2 3 4 5 6 7]
[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [43]:
y = np.insert(y,1,100,axis=1)

In [44]:
print(y)

[[  1 100   2   3]
 [  4 100   5   6]
 [  7 100   8   9]]


In [49]:
# stacking arrays
x = np.arange(1,4)
print(x)
y = np.arange(4,13).reshape(3,3)
z = np.vstack((x,y))
print(z)
w = np.hstack((y,x.reshape(3,1)))
print(w)

[1 2 3]
[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]
[[ 4  5  6  1]
 [ 7  8  9  2]
 [10 11 12  3]]


In [53]:
# slicing
# ndarray[start:end]
# ndarray[start:]
# ndarray[:end]
# **NOTE** Slicing only creates a view of an existing array, 
# therefore no info is copied, slices stored in variables represent the same ndarray

X = np.arange(1,21).reshape(4,5)
z = X[1:4,2:5]
zz = X[1:,2:]
print(X)
print()
print(z)
print()
print(zz)

[[ 1  2  3  4  5]
 [ 6  7  8  9 10]
 [11 12 13 14 15]
 [16 17 18 19 20]]

[[ 8  9 10]
 [13 14 15]
 [18 19 20]]

[[ 8  9 10]
 [13 14 15]
 [18 19 20]]


In [54]:
y = X[:3,2:]
yy = X[:,2]
print(y,"\n\n",yy)

[[ 3  4  5]
 [ 8  9 10]
 [13 14 15]] 

 [ 3  8 13 18]


In [55]:
# because slicing doesn't create copies, in order to do that we need to create a copy function
x = np.arange(1,21).reshape(4,5)
z = x[1:,2:].copy()
z[2,2] = 555
print(x)
print()
print(z)

[[ 1  2  3  4  5]
 [ 6  7  8  9 10]
 [11 12 13 14 15]
 [16 17 18 19 20]]

[[  8   9  10]
 [ 13  14  15]
 [ 18  19 555]]


In [58]:
# slicing with variables
indices = np.array([1,3])
# will grab the second and fourth rows (1,3)
y = x[indices,:]
print(y)

[[ 6  7  8  9 10]
 [16 17 18 19 20]]


In [59]:
# extracting unique elements
X = np.array([[1,2,3],[1,2,3],[1,2,3]])
print(np.unique(X))

[1 2 3]


In [60]:
# boolean indexing
x = np.arange(25).reshape(5,5)
print(x)

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]
 [15 16 17 18 19]
 [20 21 22 23 24]]


In [65]:
print(x[x > 10])
print()
print(x[x <= 7])
print()
print(x[(x <= 17) & (x > 10)])

# can set these values too
print()
x[(x <= 17) & (x > 10)] = -1
print(x)

[11 12 13 14 15 16 17 18 19 20 21 22 23 24]

[0 1 2 3 4 5 6 7]

[11 12 13 14 15 16 17]

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 -1 -1 -1 -1]
 [-1 -1 -1 18 19]
 [20 21 22 23 24]]


In [67]:
# set operations
x = np.arange(1,6)
y = np.random.randint(1,10,(5,))
print(x,"\n\n",y)

[1 2 3 4 5] 

 [5 8 5 2 7]


In [68]:
print(np.intersect1d(x,y))
print(np.setdiff1d(x,y))
print(np.union1d(x,y))

[2 5]
[1 3 4]
[1 2 3 4 5 7 8]


In [71]:
# sort arrays
# sort as a function -> sorts array out of place
# sort as a method -> array is sorted in place
x = np.random.randint(1,11, size=(10,))
print(x)
print()
# as a function
print(np.sort(x))
print()
# as a method (this will change x)
x.sort()
print(x)

[ 9  1  6  3  5  6 10  8  2  6]

[ 1  2  3  5  6  6  6  8  9 10]

[ 1  2  3  5  6  6  6  8  9 10]


In [72]:
# sorting 2 dimensional arrays
# uses the keyword axis (0 -> row,1 -> col)
x = np.random.randint(1,100,size=(5,5))
print(x)
print()
print(np.sort(x,axis=0))
print()
print(np.sort(x,axis=1))

[[ 7 29 57 22  2]
 [37 88 76 99 72]
 [44 22 18 33 55]
 [91 65 80 89 29]
 [98 72 53 14 73]]

[[ 7 22 18 14  2]
 [37 29 53 22 29]
 [44 65 57 33 55]
 [91 72 76 89 72]
 [98 88 80 99 73]]

[[ 2  7 22 29 57]
 [37 72 76 88 99]
 [18 22 33 44 55]
 [29 65 80 89 91]
 [14 53 72 73 98]]


In [76]:
# arithmetic operations
x = np.arange(1,5)
y = np.arange(5,9)
print(x)
print(y)
print()
# do the same thing
print(x+y)
# but if you use function, can make slight tweaks
print(np.add(x,y))
print()
print(x-y)
print(np.subtract(x,y))
print()
print(x*y)
print(np.multiply(x,y))
print()
print(x/y)
print(np.divide(x,y))
print()

[1 2 3 4]
[5 6 7 8]

[ 6  8 10 12]
[ 6  8 10 12]

[-4 -4 -4 -4]
[-4 -4 -4 -4]

[ 5 12 21 32]
[ 5 12 21 32]

[0.2        0.33333333 0.42857143 0.5       ]
[0.2        0.33333333 0.42857143 0.5       ]



In [88]:
# arrays must be same shape or 'broadcastable'
# other examples of math functions
print(np.sqrt(x))
print(np.exp(x))
print(np.power(x,2))
print()
print('avg: ',x.mean())
x = np.reshape(x, (2,2))
print(x)
print('avg per col: ',x.mean(axis=0))
print('avg per row: ',x.mean(axis=1))
print()
# sum and std
print('sum: ',x.sum())
print('sum per col: ',x.sum(axis=0))
print('sum per row: ',x.sum(axis=1))
print()
print('std: ',x.std())
print()
print('median: ',np.median(x))
print()
print('max: ',x.max())
print('min: ',x.min())

[[1.         1.41421356]
 [1.73205081 2.        ]]
[[ 2.71828183  7.3890561 ]
 [20.08553692 54.59815003]]
[[ 1  4]
 [ 9 16]]

avg:  2.5
[[1 2]
 [3 4]]
avg per col:  [2. 3.]
avg per row:  [1.5 3.5]

sum:  10
sum per col:  [4 6]
sum per row:  [3 7]

std:  1.118033988749895

median:  2.5

max:  4
min:  1


In [93]:
# broadcasting
x = np.arange(3)
y = np.arange(9).reshape(3,3)
print(x)
print()
print(y)
print()
print(y+x)

[0 1 2]

[[0 1 2]
 [3 4 5]
 [6 7 8]]

[[ 0  2  4]
 [ 3  5  7]
 [ 6  8 10]]


In [98]:
# quiz
x = np.zeros((4,4))
y = np.arange(1,5)
print(x+y)

[[1. 2. 3. 4.]
 [1. 2. 3. 4.]
 [1. 2. 3. 4.]
 [1. 2. 3. 4.]]


In [101]:
# quiz solution
X = np.ones((4,4)) * np.arange(1,5)
print(X)

[[1. 2. 3. 4.]
 [1. 2. 3. 4.]
 [1. 2. 3. 4.]
 [1. 2. 3. 4.]]


In [102]:
print(X.shape)

(4, 4)


In [103]:
print(X.shape[0])

4
