In [1]:
import numpy
print("numpy version: ",numpy.__version__)
!python --version

numpy version:  1.11.3
Python 3.6.0 :: Anaconda custom (x86_64)


### Python lists

In [2]:
# python list holds many python objects
L = list(range(10))
L, type(L), type(L[0])

([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], list, int)

In [3]:
L2 = list(str(i) for i in L)
L2, type(L2), type(L2[0])

(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'], list, str)

In [4]:
#because of python's dynamic typing, it can hold heterogenous lists
L3 = [True, "2", 3.0, 4]
[type(item) for item in L3]

[bool, str, float, int]

In [5]:
#built-in array module in ptyhon since 3.3
import array
L  = list(range(10))
A = array.array('i', L)
A

array('i', [0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

#### Creating Numpy arrays

1.np.array to create numpy array from python list

In [6]:
import numpy as np
Pylist = [1, 2, 3]

In [7]:
NumpyList = np.array(Pylist)

In [8]:
NumpyList

array([1, 2, 3])

In [9]:
type(Pylist) , type(NumpyList)

(list, numpy.ndarray)

In [10]:
# declare the type of list 
np.array([1,2,3,4], dtype=float)

array([ 1.,  2.,  3.,  4.])

In [11]:
# multidimensional numpy array
np.array([range(i, i+3) for i in [2,4,6]])

array([[2, 3, 4],
       [4, 5, 6],
       [6, 7, 8]])

2.from scratch 

In [12]:
# create zeros
np.zeros(3, dtype=int)

array([0, 0, 0])

In [13]:
# create 3x5 array filled with 1s
np.ones((3,5), dtype=float)

array([[ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.]])

In [14]:
# create an array of 5s 
np.full((2,4), 5, dtype=float)

array([[ 5.,  5.,  5.,  5.],
       [ 5.,  5.,  5.,  5.]])

In [15]:
#array of linear sequence from 4 to 10 
np.arange(4, 10, 2)

array([4, 6, 8])

In [16]:
# array of evenly spaced values
np.linspace(0, 1, 5)

array([ 0.  ,  0.25,  0.5 ,  0.75,  1.  ])

In [17]:
# uniformly distributed random values between 0 and 1
np.random.random((3,4))

array([[ 0.37363602,  0.71411651,  0.99764371,  0.88760963],
       [ 0.01367229,  0.2909225 ,  0.4739144 ,  0.80871193],
       [ 0.23836706,  0.77654664,  0.07137405,  0.91556977]])

In [18]:
# normally distributed values mean=0, std= 1
np.random.normal(0,1, (2,3))

array([[ 1.01576128,  0.23490507, -1.1768427 ],
       [-0.46823236, -0.4439429 , -0.63813726]])

In [19]:
# random integer, interval [0, 10)
np.random.randint(0, 10, (2,5))

array([[0, 2, 2, 3, 6],
       [1, 5, 7, 8, 4]])

In [20]:
# identity matrix
np.eye(3)

array([[ 1.,  0.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  0.,  1.]])

In [21]:
np.random.random_sample((3,4))

array([[ 0.84143663,  0.74794694,  0.02027866,  0.35198604],
       [ 0.90490387,  0.33101863,  0.01313256,  0.30986085],
       [ 0.40060986,  0.88748662,  0.95715117,  0.16355882]])

In [22]:
#creare an uninitialized array of three integers. the values will be whatever happens
#already exists in that memory location
np.empty(3)

array([ 1.,  1.,  1.])

### Basics of Numpy Arrays

a. Attributes of arrays

In [23]:
import numpy as np
np.random.seed(0) #to generate the same random array each time this code run
x1 = np.random.randint(10, size=6) #one dimensional array
x2 = np.random.randint(10, size=(3,4)) #two dimensional array
x3 = np.random.randint(10, size=(3,4,5)) #three dimensional array

In [24]:
x1

array([5, 0, 3, 3, 7, 9])

In [25]:
x2

array([[3, 5, 2, 4],
       [7, 6, 8, 8],
       [1, 6, 7, 7]])

In [26]:
x3

array([[[8, 1, 5, 9, 8],
        [9, 4, 3, 0, 3],
        [5, 0, 2, 3, 8],
        [1, 3, 3, 3, 7]],

       [[0, 1, 9, 9, 0],
        [4, 7, 3, 2, 7],
        [2, 0, 0, 4, 5],
        [5, 6, 8, 4, 1]],

       [[4, 9, 8, 1, 1],
        [7, 9, 9, 3, 6],
        [7, 2, 0, 3, 5],
        [9, 4, 4, 6, 4]]])

In [27]:
print("x3 ndim:", x3.ndim)
print("x3 shape:", x3.shape)
print("x3 size:", x3.size)
print("x3 data:", x3.data)
print("x3 dtype:", x3.dtype)
print("x3 item:", x3.item)
print("x3 itemsize:", x3.itemsize, "bytes")
print("x3 nbytes:", x3.nbytes, "bytes")

x3 ndim: 3
x3 shape: (3, 4, 5)
x3 size: 60
x3 data: <memory at 0x107b8eb88>
x3 dtype: int64
x3 item: <built-in method item of numpy.ndarray object at 0x10cf10260>
x3 itemsize: 8 bytes
x3 nbytes: 480 bytes


b. Array indexing

In [28]:
x1

array([5, 0, 3, 3, 7, 9])

In [29]:
x1[4]

7

In [30]:
x1[-1]

9

In [31]:
x2

array([[3, 5, 2, 4],
       [7, 6, 8, 8],
       [1, 6, 7, 7]])

In [32]:
x2[0, 0]

3

In [33]:
# colon is for slicing. see the difference above and below.
x2[0:1]

array([[3, 5, 2, 4]])

In [34]:
x2[1,1]

6

In [35]:
x2[2,-3]

6

In [36]:
# change the values in the array
x2[2,0] = 7
x2[2,1] = 7

In [37]:
x2

array([[3, 5, 2, 4],
       [7, 6, 8, 8],
       [7, 7, 7, 7]])

c. Array slicing x[start : stop : step] default values 0

In [38]:
x = np.arange(10)
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [39]:
x[:5] #first 5 elements

array([0, 1, 2, 3, 4])

In [40]:
x[::2] #every other element

array([0, 2, 4, 6, 8])

In [41]:
x[1::2] #every other element starting from 1

array([1, 3, 5, 7, 9])

In [42]:
np.arange(1,10, 2) #example of arange method

array([1, 3, 5, 7, 9])

In [43]:
x[::-1] #all elements, reversed

array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

In [44]:
# Multidimensional subarrays
x2

array([[3, 5, 2, 4],
       [7, 6, 8, 8],
       [7, 7, 7, 7]])

In [45]:
x2[0] #first row

array([3, 5, 2, 4])

In [46]:
x2[2] #third row

array([7, 7, 7, 7])

In [47]:
x2[2:] == x2[2]

array([[ True,  True,  True,  True]], dtype=bool)

In [48]:
x2[1:2, 1:3] #second row intersection between second and third column

array([[6, 8]])

In [49]:
x2[::-1, ::-1] #can be reversed

array([[7, 7, 7, 7],
       [8, 8, 6, 7],
       [4, 2, 5, 3]])

In [50]:
x2[:,0] #first column

array([3, 7, 7])

In [51]:
# copying feature 
x2_sub = x2[:2 , :2]
x2_sub

array([[3, 5],
       [7, 6]])

In [52]:
x2_sub[0,0] = 61
x2_sub

array([[61,  5],
       [ 7,  6]])

In [53]:
# modifying subarray affects the original array as well
x2

array([[61,  5,  2,  4],
       [ 7,  6,  8,  8],
       [ 7,  7,  7,  7]])

In [54]:
#use copy method to keep the original array not updated
x2_sub_copy = x2[:2, :2].copy()
x2_sub_copy

array([[61,  5],
       [ 7,  6]])

In [55]:
x2_sub_copy[0,0] = 99
x2_sub_copy

array([[99,  5],
       [ 7,  6]])

In [56]:
x2

array([[61,  5,  2,  4],
       [ 7,  6,  8,  8],
       [ 7,  7,  7,  7]])

d. Reshaping of arrays

In [57]:
# use reshape method 
grid = np.arange(1,10).reshape((3,3))
grid

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [58]:
# convert one dimensional array into two dimensional row or column matrix
y = np.array([1,2,3])
y

array([1, 2, 3])

In [59]:
# row vector via reshape
y.reshape((1,3))

array([[1, 2, 3]])

In [60]:
#row vector via newaxis  
y[np.newaxis, :]

array([[1, 2, 3]])

In [61]:
# column vector via reshape
y.reshape((3,1))

array([[1],
       [2],
       [3]])

In [62]:
#column vector via newaxis 
y[:, np.newaxis]

array([[1],
       [2],
       [3]])

e. Array concatination and splitting

In [63]:
x = np.array([1,2,3])
y = np.array([1,2,3])
z = np.array([61,61,61])

In [64]:
# use np.concatinate
np.concatenate([x,y,z])

array([ 1,  2,  3,  1,  2,  3, 61, 61, 61])

In [65]:
# concat two dimensional arrays
grid

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [66]:
np.concatenate([grid, grid])

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9],
       [1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [67]:
np.concatenate([grid, grid], axis = 1)

array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6],
       [7, 8, 9, 7, 8, 9]])

In [68]:
# vertical stack with vstack
np.vstack([x, grid])

array([[1, 2, 3],
       [1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [69]:
k = np.array([[99], [99], [99]])
k

array([[99],
       [99],
       [99]])

In [70]:
# horizontal stack with hstack
np.hstack([k, grid])

array([[99,  1,  2,  3],
       [99,  4,  5,  6],
       [99,  7,  8,  9]])

In [71]:
# splitting of arrays
sp = np.arange(10)
sp

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [72]:
x1, x2, x3 = np.split(sp, [1, 3])
x1 , x2, x3

(array([0]), array([1, 2]), array([3, 4, 5, 6, 7, 8, 9]))

In [73]:
four = np.arange(16).reshape((4,4))
four

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [74]:
f1, f2 = np.vsplit(four,[2] )
f1, f2

(array([[0, 1, 2, 3],
        [4, 5, 6, 7]]), array([[ 8,  9, 10, 11],
        [12, 13, 14, 15]]))

## Introducing Ufuncs

In [75]:
import numpy as np
np.random.seed(0)

def compute_reciprocal(values):
    output = np.empty(len(values))
    for i in range(len(values)):
        output[i] = 1.0 / values[i]
    return output

In [76]:
values = np.random.randint(1,10, size=5)
compute_reciprocal(values)

array([ 0.16666667,  1.        ,  0.25      ,  0.25      ,  0.125     ])

In [77]:
big_array = np.random.randint(1, 100 , size=1000000)
%timeit compute_reciprocal(big_array)

2.27 s ± 72.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [78]:
print(compute_reciprocal(values))
print(1.0 / values)

[ 0.16666667  1.          0.25        0.25        0.125     ]
[ 0.16666667  1.          0.25        0.25        0.125     ]


In [79]:
%timeit (1.0 / values)

1.74 µs ± 62.3 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [80]:
# array arithmetic
x = np.arange(7)
print("x     = ", x)
print("x + 2 = ", x +2)
print("x * 2 = ", x * 2)
print("x / 2 = ", x / 2)
print("x //2 =", x // 2) #floor devision

x     =  [0 1 2 3 4 5 6]
x + 2 =  [2 3 4 5 6 7 8]
x * 2 =  [ 0  2  4  6  8 10 12]
x / 2 =  [ 0.   0.5  1.   1.5  2.   2.5  3. ]
x //2 = [0 0 1 1 2 2 3]


In [81]:
# more operations
((x ** 2) + 2 ) * (-x)

array([   0,   -3,  -12,  -33,  -72, -135, -228])

In [82]:
# arithmetic operations implemented in numpy
np.multiply(np.add( np.power(x, 2), 2 ), np.negative(x))

array([   0,   -3,  -12,  -33,  -72, -135, -228])

In [83]:
# absolute value
z = np.array([-5, -2, 0, 1])
abs(z)

array([5, 2, 0, 1])

In [84]:
np.absolute(z)

array([5, 2, 0, 1])

In [85]:
np.abs(z)

array([5, 2, 0, 1])

In [86]:
np.absolute(z) == np.abs(z)

array([ True,  True,  True,  True], dtype=bool)

In [87]:
# trigonometric functions
theta = np.linspace(0, np.pi, 3)
theta

array([ 0.        ,  1.57079633,  3.14159265])

In [88]:
print(np.sin(theta))
print(np.tan(theta))

[  0.00000000e+00   1.00000000e+00   1.22464680e-16]
[  0.00000000e+00   1.63312394e+16  -1.22464680e-16]


In [89]:
print(x)
print(np.sin(x))
print(np.cos(x))

[0 1 2 3 4 5 6]
[ 0.          0.84147098  0.90929743  0.14112001 -0.7568025  -0.95892427
 -0.2794155 ]
[ 1.          0.54030231 -0.41614684 -0.9899925  -0.65364362  0.28366219
  0.96017029]


In [90]:
# Exponents and logarithms
print(x)
print(np.power(x, 2))
print(np.power(x, 3))

[0 1 2 3 4 5 6]
[ 0  1  4  9 16 25 36]
[  0   1   8  27  64 125 216]


In [91]:
from scipy import special
#gamma functions
x = [1, 5, 10]
print("gamma(x) = ", special.gamma(x))
print("ln|gamma(x) =", special.gammaln(x))

gamma(x) =  [  1.00000000e+00   2.40000000e+01   3.62880000e+05]
ln|gamma(x) = [  0.           3.17805383  12.80182748]


#### Advanced Ufunc features

In [92]:
x = np.arange(4)
np.multiply(x, 2, out=x)
x

array([0, 2, 4, 6])

In [93]:
# x and y has to have same amount of elements
x = np.arange(5)
y = np.empty(5)
np.multiply(x, 2, out=y)
y

array([ 0.,  2.,  4.,  6.,  8.])

In [94]:
k = np.zeros(10)
np.power(2, x, out=k[::2])
k

array([  1.,   0.,   2.,   0.,   4.,   0.,   8.,   0.,  16.,   0.])

In [95]:
# aggregate
x = np.arange(1, 6)
print(np.add.reduce(x))
print(np.multiply.reduce(x))

15
120


In [96]:
np.add.accumulate(x)

array([ 1,  3,  6, 10, 15])

#### Aggregations: Min. Max. Std. Median. Mean

In [97]:
x = np.random.randint(1, 1000, size=10000000)

In [98]:
# summing all values in an array
%timeit sum(x)   #python code
%timeit np.sum(x)  #numpy code

907 ms ± 28.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
6.43 ms ± 300 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [None]:
# min and max
print(min(x), max(x) ) # python code
print(np.min(x) , np.max(x))

1 999
1 999


In [None]:
# again numpy operates much more quickly
%timeit (min(x), max(x))
%timeit (np.min(x) , np.max(x))

1.29 s ± 42.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [None]:
# Multidimensional aggregation
M = np.random.randint(1,5, size=(3,4))
M

In [None]:
# sum of all values
M.sum()

In [None]:
# min of each column
np.min(M, axis=0)

In [None]:
# max of each column
np.max(M, axis=1)

In [None]:
# sum of each row
np.sum(M, axis=1)

#### Other aggreagation functions

In [None]:
n = [1,3,4]

In [None]:
#product of elements
np.prod(n)

In [None]:
np.std(n)

In [None]:
np.min(n), np.max(n)

In [None]:
# index of min and max
np.argmin(n), np.argmax(n)

In [None]:
np.median(n), np.mean(n)

### Example

In [None]:
!head -4 data/president_heights.csv

In [None]:
import pandas as pd
data = pd.read_csv('data/president_heights.csv', index_col='order')
data.head()

In [None]:
data.info()

In [None]:
data.describe()

In [None]:
heights = np.array(data['height(cm)'])
len(heights)

In [None]:
print("Mean height:", np.mean(heights))
print("Std of heigth:",np.std(heights, ddof=1))
print('min of height:', np.min(heights))
print('max of heights:', np.max(heights))
print('median of heights:', np.median(heights))

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')

In [None]:
plt.figure(figsize=(10,5) )
plt.hist(heights)
plt.title('Height distribution of US Presidents')
plt.xlabel('height (cm)')
plt.ylabel('number')
plt.show()

## Broadcasting

In [None]:
a = np.array([0, 1, 2])
b = np.array([5, 5, 5])
a + b

In [None]:
a + 5

In [None]:
M = np.ones((3, 3))
M

In [None]:
M + a

In [None]:
x = np.arange(3)
y = np.arange(3)[:,np.newaxis]

In [None]:
print(x)
print(y)

In [None]:
x + y

In [None]:
#Broadcasting example 1
M = np.ones((2,3))
a = np.arange(3)

print("Shape of M:", M.shape)
print(M)
print(' ')
print("shape of a:",a.shape) 
print(a)
print('')
print("shape of M + a:", (M+a).shape) 
print(M + a)

In [None]:
# Broadcasting example 2
a = np.arange(3).reshape((3,1))
print(a)
print('Shape of a:', a.shape)
print(' ')
b= np.arange(3)
print(b)
print('Shape of b:',b.shape)
print(' ')
print(a + b)
print('Shape of a + b:',(a+b).shape)

In [None]:
# Broadcasting example 3
M = np.ones((3,2))
print(M)
print('Shape of M:', M.shape)
print('')
a = np.arange(3)
print(a)
print('Shape of a:', a.shape)
print('')
print("M + a throws an error. These arrays are incompatible.")

In [None]:
# Broadcasting in practice
X = np.random.randint(0, 5,size=(3, 3))
X

In [None]:
#mean of each feature (row)
Xmean = X.mean(axis=0)
Xmean

In [None]:
Xcentered = Xmean - X
Xcentered

In [None]:
Xcentered.mean(0)

In [None]:
# plotting two dimensional function z = f(x, y)
x = np.linspace(0, 5, 50)
y = np.linspace(0, 5, 50)[:,np.newaxis]

z = np.sin(x) ** 10 + np.cos(10 + y * x) * np.cos(x)

#plot the function
plt.imshow(z, origin='lower', extent=[0,5,0,5], cmap='viridis')
plt.colorbar();

In [None]:
rainfall = pd.read_csv('data/Seattle2014.csv')['PRCP']
inches = rainfall / 254 #1/10mm = 1 inch
inches.shape  # 365 days in a year

In [None]:
plt.hist(inches, bins=40); 

In [None]:
#working with 2-dimensional array
f = np.random.randint(10, size=(3,4))
f

In [None]:
np.count_nonzero(f < 6)

In [None]:
# this code and cod above result the same. in this case, True=1 , False =0
np.sum(f < 6)

In [None]:
# how many values less than 3 for each row
np.sum( f < 3, axis = 1)

In [None]:
# np.any() , np.all()
np.any(f == 0 ) , np.all(f==0)

In [None]:
np.any(f > 5 , axis=1)

In [None]:
# go back to Seattlle rainy days data

# how many days rained less than 4 inches and greater than 1 inch
np.sum((inches > 0.5) & (inches < 1)) 

In [None]:
# Other examples
print("Number of days without rain:", np.sum(inches == 0))
print("Number of days with rain:", np.sum(inches != 0))
print("Days with more than 0.5 inches:", np.sum(inches > 0.5) )
print("Rainy days with < 0.1 inches:", np.sum((inches < 0.2) & (inches != 0))) 
#0.2 equals 0.1 inches