
Most of the examples are based on Wes McKinney's book Python for Data Analysis (O'Reilly)

In [2]:
#import numpy
import numpy as np
print(np.__version__) #you can update using conda or pip

1.20.1


In [3]:
#Simple operations with multidimensional arrays
m = np.array([[1, 2, 3, 4],[10, 11, 12, 13]])
print(m)
print("Dimensions: ", m.shape)
print("m's type: ", type(m))
print("Type of data in m: ", m.dtype)
m.shape

[[ 1  2  3  4]
 [10 11 12 13]]
Dimensions:  (2, 4)
m's type:  <class 'numpy.ndarray'>
Type of data in m:  int64


(2, 4)

In [4]:
#try a few mathematical operations
print("m * 5 = \n", m * 5)
print("m + m = \n", m + m)

m * 5 = 
 [[ 5 10 15 20]
 [50 55 60 65]]
m + m = 
 [[ 2  4  6  8]
 [20 22 24 26]]


In [5]:
#how many dimensions are we dealing with
print(m.ndim)

2


In [6]:
#converting our ints to floats
m.astype('float64')

array([[ 1.,  2.,  3.,  4.],
       [10., 11., 12., 13.]])

In [7]:
#creating arrays 
#Exampe 1: Creating a 2 x 4 array filled with zeros
zeros_matrix = np.zeros((2,4))
print("Matrix of zeros: \n", zeros_matrix)
#to fill with ones use np.ones
#creating an array of 10 sequential numbers
v = np.arange(10)
print("Array of 10 sequential numbers: \n",v)
#creating an identity matrix
i = np.eye(3)#diagonal will have ones, and the remaining will be 0s
print("Identity matrix:\n", i)

Matrix of zeros: 
 [[0. 0. 0. 0.]
 [0. 0. 0. 0.]]
Array of 10 sequential numbers: 
 [0 1 2 3 4 5 6 7 8 9]
Identity matrix:
 [[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]


In [8]:
#some more operations
print(m) #our original matrix
print("Matrix times itself:\n", m * m) #multiply corresponding elements
print("Matrix raised to 0.5:\n", m ** 0.5)

[[ 1  2  3  4]
 [10 11 12 13]]
Matrix times itself:
 [[  1   4   9  16]
 [100 121 144 169]]
Matrix raised to 0.5:
 [[1.         1.41421356 1.73205081 2.        ]
 [3.16227766 3.31662479 3.46410162 3.60555128]]


In [9]:
#slicing one dimensional numpy arrays - similar to lists in Python
x = np.array([1,2,3,4,5])
print("1: gives:\n", x[1:])
print("2:4 gives:\n", x[2:4])

1: gives:
 [2 3 4 5]
2:4 gives:
 [3 4]


In [10]:
#
x[2:4] *= 2
print(x)
#replace first two elements with 0
x[:2] = 0
print(x)

[1 2 6 8 5]
[0 0 6 8 5]


In [11]:
#difference between lists and arrays.....
aList = [1,2,3,4,5]
print("Original list:\n", aList)
a_slice = aList[:2] #get the first two elements
print("Slice:\n", a_slice)
#let us change the slice
a_slice[0] = 10
print("Slice:\n", a_slice)
#what about the original list?
print("List after slice is changed:\n", aList) #there should be no change
#Now, let us do it with numpy arrays and see what happens
x = np.array([1,2,3,4,5])
print("Original numpy array:\n", x)
x_slice = x[:2]
print("Original slice:\n", x_slice)
#now change x_slice
x_slice[0] = 10
print("Changed slice:\n", x_slice)
#was x affected?
print("x after change to slice:\n", x)

Original list:
 [1, 2, 3, 4, 5]
Slice:
 [1, 2]
Slice:
 [10, 2]
List after slice is changed:
 [1, 2, 3, 4, 5]
Original numpy array:
 [1 2 3 4 5]
Original slice:
 [1 2]
Changed slice:
 [10  2]
x after change to slice:
 [10  2  3  4  5]


In [12]:
import numpy as np
#creating a matrix of random normally distributed data
np.random.seed(1) ###to replicate random numbers generated 
data = np.random.randn(5,4)
print(data)

[[ 1.62434536 -0.61175641 -0.52817175 -1.07296862]
 [ 0.86540763 -2.3015387   1.74481176 -0.7612069 ]
 [ 0.3190391  -0.24937038  1.46210794 -2.06014071]
 [-0.3224172  -0.38405435  1.13376944 -1.09989127]
 [-0.17242821 -0.87785842  0.04221375  0.58281521]]


In [13]:
#we could associate each of the 5 rows above with some names
names = np.array(["John", "Mary", "Peter", "Pollock", "Richards"])
#let us display Peter's data - row# 3
data[names == "Peter"]

array([[ 0.3190391 , -0.24937038,  1.46210794, -2.06014071]])

In [14]:
#you can use Boolean expressions too
#example: display the rows corresponding to Mary and Richards
data[(names == 'Mary') | (names == "Richards")]

array([[ 0.86540763, -2.3015387 ,  1.74481176, -0.7612069 ],
       [-0.17242821, -0.87785842,  0.04221375,  0.58281521]])

In [15]:
#let us replace all negative numbers in data with 0
data[data < 0] = 0
data

array([[1.62434536, 0.        , 0.        , 0.        ],
       [0.86540763, 0.        , 1.74481176, 0.        ],
       [0.3190391 , 0.        , 1.46210794, 0.        ],
       [0.        , 0.        , 1.13376944, 0.        ],
       [0.        , 0.        , 0.04221375, 0.58281521]])

In [16]:
data[1:4,2:]

array([[1.74481176, 0.        ],
       [1.46210794, 0.        ],
       [1.13376944, 0.        ]])

In [17]:
#slice the data and then transpose
data[1:4,2:].T

array([[1.74481176, 1.46210794, 1.13376944],
       [0.        , 0.        , 0.        ]])

In [18]:
#universal functions - unary ufuncs
x = np.array([2,3,4,5,6])
np.sqrt(x)
#try out other functions - exp, log, log2, abs, isnan, cos, ....

array([1.41421356, 1.73205081, 2.        , 2.23606798, 2.44948974])

In [19]:
#binary ufuncs
#consider the following arrays
a = np.array([10,12,5,8,11])
b = np.array([5, 18, 1, 7, 13])
print(np.maximum(a,b))
print(np.add(a,b))
print(np.subtract(a,b))
print(np.multiply(a,b))

[10 18  5  8 13]
[15 30  6 15 24]
[ 5 -6  4  1 -2]
[ 50 216   5  56 143]


In [20]:
#using conditional logic - where
a = np.array([2,3,-4,8,-3,-7])
#let us replace 0 and positive numbers with 1 and negative numbers with -1
np.where( a >= 0, 1, -1)

array([ 1,  1, -1,  1, -1, -1])

In [21]:
#mathematical and statistical methods
#we will use the numpy array "a" given below
a = np.array([1,3,5,7,9,4])
print("Mean: ", a.mean()) #or, np.mean(a)
print("Sum: ", a.sum())
print("Std. Deviation: ", a.std()) #var() for variance


Mean:  4.833333333333333
Sum:  29
Std. Deviation:  2.608745973749755


In [22]:
#multidimensional arrays
a = np.array([[1,2,3],[4,5,6],[7,8,9]])
print("Matrix: ", a)
print("Overall mean: ", a.mean()) #mean of all the numbers in the array
print("Column averages: ", a.mean(axis = 0)) #gives column averages
print("Row averages: ", a.mean(axis = 1)) #gives row averages
print("Cumulative sum for each column: ", a.cumsum(0)) #get cumulative sums for each column


Matrix:  [[1 2 3]
 [4 5 6]
 [7 8 9]]
Overall mean:  5.0
Column averages:  [4. 5. 6.]
Row averages:  [2. 5. 8.]
Cumulative sum for each column:  [[ 1  2  3]
 [ 5  7  9]
 [12 15 18]]


In [None]:
arr = np.array([1,2,3])
print(np.cumsum(arr))

In [None]:
#consider the following array
a = np.array([11,3,15,7,9,4])
#let us try a function called argmax - you should also try argmin
a.argmax() #returns index of the largest element in the array

In [None]:
#sorting
a.sort()
a

In [None]:
#sorted in reverse order
a[::-1]

In [None]:
#Using argsort to get an int array of indices
a = np.array([11,3,15,7,9,4])
a.argsort()

You may store your arrays on disk and retrieve them later using save() and load functions. You may also save multiple arrays in a zipped format using savez()

In [None]:
np.save('my_array', a)
np.load('my_array')

Linear Algebra is arguably the most important foundation for data science. It typically involves manipulation of matrices - multiplication, eigenvalues, determinants, decompositions, and so forth. Numpy makes this fairly easy.

In [None]:
#Matrix multiplication
x = np.array([[1,2,3],[4,5,6]]) # 2 x 3 matrix
y = np.array([[10,11,12],[7,8,9],[1,2,3]]) # 3 x 3 matrix
#np.dot(x,y)#also, x.dot(y) or y.dot(x.T)
x.dot(y)

In [None]:
#inverse of a matrix -- (X.T.dot(X)) ** -1
from numpy.linalg import inv
inv(y)

In [None]:
#return diagonals -- Example: Useful to get the diagonal values in SVD
np.diag(x)

In [None]:
#eigenvalues and eigen vectors of a square matrix
from numpy.linalg import eig
eig(y)

In [None]:
#singular value decomposition -- m x n --> m x n, n x m, m x n
from numpy.linalg import svd
a = np.array([[1,2,3],[4,5,6],[7,8,9]])
svd(a)

End of Numpy Tutorial. Check out the other methods under numpy.linalg.