# Introduction to numpy

## ndarray: Creation 

In [1]:
import numpy as np                # NumPy is conventionally imported as np
a1 = np.array([2,55,7])
a1

array([ 2, 55,  7])

In [2]:
print(a1.ndim)
print(a1.shape)
a1.dtype

1
(3,)


dtype('int64')

In [3]:
a2 = np.array([[1,2,3],[4,5,6]])
print(a2)
print(a2.ndim)
a2.shape

[[1 2 3]
 [4 5 6]]
2


(2, 3)

In [6]:
a3 = np.array([[[2,2], [4,4], [6,6]], [[8,8], [10,10], [12,12]], [[14,14], [16,16], [18,18]],[[20,20], [22,22], [24,24]]])
print(a3)
print(a3.ndim)
print(a3.shape)

[[[ 2  2]
  [ 4  4]
  [ 6  6]]

 [[ 8  8]
  [10 10]
  [12 12]]

 [[14 14]
  [16 16]
  [18 18]]

 [[20 20]
  [22 22]
  [24 24]]]
3
(4, 3, 2)


In [10]:
print(np.arange(0,10))                  # start index is inclusive, end is exclusive
np.arange(0,10,2)                       # optionally specify step size

[0 1 2 3 4 5 6 7 8 9]


array([0, 2, 4, 6, 8])

In [11]:
np.arange(0,16).reshape(4,4)

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [12]:
print(np.zeros(2))
print(np.zeros((2,4)))
np.ones((3,3))

[ 0.  0.]
[[ 0.  0.  0.  0.]
 [ 0.  0.  0.  0.]]


array([[ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.]])

## Indexing and slicing

In [14]:
a = np.arange(10)
print(a[:4])
print(a[-2:])
a[3] = 77
a

[0 1 2 3]
[8 9]


array([ 0,  1,  2, 77,  4,  5,  6,  7,  8,  9])

In [15]:
a = np.arange(10)
a[5:] = 255
a

array([  0,   1,   2,   3,   4, 255, 255, 255, 255, 255])

In [16]:
a = np.arange(6).reshape(3,2)
print(a)
a[0][1]                       # row 0, column 1


[[0 1]
 [2 3]
 [4 5]]


1

In [17]:
a = np.arange(12).reshape(3,2,2)       # a three-dimensional array
print(a)
print(a[0])                            # a two-dimensional array
print(a[0][1])                         # a one-dimensional array

[[[ 0  1]
  [ 2  3]]

 [[ 4  5]
  [ 6  7]]

 [[ 8  9]
  [10 11]]]
[[0 1]
 [2 3]]
[2 3]


In [18]:
a = np.array([[ 0,  1,  2,  3,  4], [ 5,  6,  7,  8,  9], [10, 11, 12, 13, 14]])
print(a)
a[:2,:1]                               # take slice 0-1 of first dimension and 0 of second

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]]


array([[0],
       [5]])

In [19]:
a[:2][:1]                              # take slice 0-1 of first dimension first, then 0 of result

array([[0, 1, 2, 3, 4]])

### Boolean Indexing

In [21]:
data = np.random.randn(3, 5)
print(data)

subjects = np.array([1,5,1])
print(subjects)

print(subjects == 1)                          # yields an array of booleans

print(data[subjects == 1])                    # filter

[[-1.00862218  0.1217579   1.27765805 -1.16219206  1.30082852]
 [ 1.47241423 -0.70257884  0.04741811  1.16638602 -0.45976957]
 [-1.27100299  0.2890124  -0.47289796  0.52276187  0.46169986]]
[1 5 1]
[ True False  True]
[[-1.00862218  0.1217579   1.27765805 -1.16219206  1.30082852]
 [-1.27100299  0.2890124  -0.47289796  0.52276187  0.46169986]]


In [22]:
topics = np.array([1,1,5,3,1])
print(topics)

print(topics == 1)
data[:, topics == 1]                            # : means include every row

[1 1 5 3 1]
[ True  True False False  True]


array([[-1.00862218,  0.1217579 ,  1.30082852],
       [ 1.47241423, -0.70257884, -0.45976957],
       [-1.27100299,  0.2890124 ,  0.46169986]])

### Normal vs. Boolean Indexing

In [29]:
a = np.random.randn(3,3)
print(a)

b = a[:2]                                       # b consists of rows 0-1 of a
print(b)

b[1] = b[1] + 2                                 # increment every cell in b’s row 1 by 2
print(b)

a                                               # now the corresponding row in a is incremented too

[[ 0.89149421 -0.34873975  1.51897823]
 [ 0.16578387  1.81921544  0.54228857]
 [-0.413711   -0.64860044 -0.50664678]]
[[ 0.89149421 -0.34873975  1.51897823]
 [ 0.16578387  1.81921544  0.54228857]]
[[ 0.89149421 -0.34873975  1.51897823]
 [ 2.16578387  3.81921544  2.54228857]]


array([[ 0.89149421, -0.34873975,  1.51897823],
       [ 2.16578387,  3.81921544,  2.54228857],
       [-0.413711  , -0.64860044, -0.50664678]])

In [30]:
a = np.random.randn(4)
print(a)

b = a[a != 0]
print(b)

b[2] = 1000
print(a)

[ 0.59782515  0.3382994  -0.66841437  1.71542678]
[ 0.59782515  0.3382994  -0.66841437  1.71542678]
[ 0.59782515  0.3382994  -0.66841437  1.71542678]


## Vectorization and broadcasting

In [36]:
x = np.arange(3)
y = x + 1                              # y is created by adding 1 to every element of x
print(y)

print(x + y)                           # x and y are added element-wise

print((x + y)**2)                      # sum of x and y is squared element-wise       

[1 2 3]
[1 3 5]
[ 1  9 25]


### Broadcasting

In [33]:
print(x)                               # x is a one-dimensional array with shape (3,)

print(x + 1)                           # adding a scalar means the scalar is added to every element

z = np.arange(12).reshape(4,3)         # z is a two-dimensional array with shape (4,3)
print(z)

z + x                                  # adding x to every row of z

[0 1 2]
[1 2 3]
[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]]


array([[ 0,  2,  4],
       [ 3,  5,  7],
       [ 6,  8, 10],
       [ 9, 11, 13]])

### ufuncs

In [34]:
a = np.arange(5)
np.sqrt(a)

array([ 0.        ,  1.        ,  1.41421356,  1.73205081,  2.        ])

In [35]:
a = np.arange(5)
b = 1.3 * np.random.randn(5) + 2
np.maximum(a,b)


array([ 1.49259262,  1.        ,  2.        ,  3.        ,  4.        ])

### where: a vectorized ternary operator 

In [38]:
sentence = np.array(['Colorless', 'green', 'ideas', 'sleep', 'furiously'])

# arg 1: condition, arg 2: True outcome, arg 3: False outcome

np.where(sentence == "Colorless", "Empty", sentence)

array(['Empty', 'green', 'ideas', 'sleep', 'furiously'], 
      dtype='<U9')

In [39]:
a = np.random.randn(5)
np.where(a < 0, 0, a)

array([ 0.        ,  0.        ,  0.75932772,  0.        ,  0.15289307])

## Set operations, deduplication and sorting


In [40]:
a = np.array([1,2,4,2])
np.unique(a)

array([1, 2, 4])

In [41]:
# When applying unique() to higher-dimensional arrays, those get flattened first:
b = np.vstack((a,a))
print(b)

np.unique(b)

[[1 2 4 2]
 [1 2 4 2]]


array([1, 2, 4])

In [42]:
a = np.arange(10); b = np.arange(2,7)
print(np.union1d(a,b))

print(np.intersect1d(a,b))

print(np.setdiff1d(a,b))


[0 1 2 3 4 5 6 7 8 9]
[2 3 4 5 6]
[0 1 7 8 9]


In [43]:
np.in1d(a,b)


array([False, False,  True,  True,  True,  True,  True, False, False, False], dtype=bool)

#### ndarray has a sort() method that when given no parameters, sorts the array in-place, across the last axis:


In [51]:
a = np.arange(12)
np.random.shuffle(a)                   # shuffles array elements in-place
print(a)

a = a.reshape(3,4)
print(a)

a.sort()
print(a)

[ 0  1  7  4  6  8 10 11  9  5  3  2]
[[ 0  1  7  4]
 [ 6  8 10 11]
 [ 9  5  3  2]]
[[ 0  1  4  7]
 [ 6  8 10 11]
 [ 2  3  5  9]]


#### You can also sort along a specific dimension specifying the axis= parameter:

In [54]:
a = np.arange(12)
np.random.shuffle(a)
a = a.reshape(3,4)
print(a)

a.sort(axis=1)                 # sort across columns (axis 1)
print(a)

a.sort(axis=0)                 # now sort across rows (axis 0)
print(a)

[[ 6  5  4  0]
 [11  9  1  2]
 [ 8  7  3 10]]
[[ 0  4  5  6]
 [ 1  2  9 11]
 [ 3  7  8 10]]
[[ 0  2  5  6]
 [ 1  4  8 10]
 [ 3  7  9 11]]


#### To return a copy instead of sorting an array in-place, use the sort() function instead of the ndarray object’s method:

In [55]:
a = np.random.randn(5)
print(a)

print(np.sort(a))

print(a)                              # a is unchanged

[ 1.15085276  1.42111719 -1.84741484  0.0151491  -0.83219872]
[-1.84741484 -0.83219872  0.0151491   1.15085276  1.42111719]
[ 1.15085276  1.42111719 -1.84741484  0.0151491  -0.83219872]


## Statistics, random numbers, and linear algebra


In [56]:
a = np.arange(8).reshape(2,4)
print(a)

a.mean()

[[0 1 2 3]
 [4 5 6 7]]


3.5

In [58]:
a.mean(axis=0)                         # aggregate over rows

array([ 2.,  3.,  4.,  5.])

In [59]:
a.mean(axis=1)                         # aggregate over columns

array([ 1.5,  5.5])

In [60]:
(a.mean(), a.std(), a.var(), a.min(), a.max())

(3.5, 2.2912878474779199, 5.25, 0, 7)

In [61]:
a = a+1

a.cumsum(), a.cumprod()


(array([ 1,  3,  6, 10, 15, 21, 28, 36]),
 array([    1,     2,     6,    24,   120,   720,  5040, 40320]))

#### Random numbers

In [65]:
mu = 100; sigma = 10
s = np.random.normal(mu, sigma, 1000)
s.mean()

99.436730815353712

In [66]:
shape, scale = 2., 2.           
s = np.random.gamma(shape, scale, 1000)
s.mean()

4.02305861145358

In [67]:
loc, scale = 10, 1
s = np.random.logistic(loc, scale, 1000)
s.mean()

9.9697690172596793

#### Linear algebra

In [68]:
values = np.arange(12).reshape(3,4)
print(values)

weights = np.array([0,1,2,3])
np.dot(values, weights)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


array([14, 38, 62])

In [69]:
values_t = values.T            # transpose; 
print(values_t)

np.dot(values, values_t)

[[ 0  4  8]
 [ 1  5  9]
 [ 2  6 10]
 [ 3  7 11]]


array([[ 14,  38,  62],
       [ 38, 126, 214],
       [ 62, 214, 366]])

In [70]:
x = np.array([[0,1], [3,4]])
np.linalg.inv(x)

array([[-1.33333333,  0.33333333],
       [ 1.        ,  0.        ]])