## Create Numpy Arrays

In [1]:
import numpy as np

In [10]:
data = [1, 2, 3, 4]
arr = np.array(data)

print(arr)
print(arr.shape)
print(arr.ndim)

[1 2 3 4]
(4,)
1


In [11]:
data = [[1, 2, 3, 4],
        [5, 6, 7, 8]]

arr = np.array(data)

print(arr)
print(arr.shape)
print(arr.ndim)
print(arr.dtype)
print(arr.size)

[[1 2 3 4]
 [5 6 7 8]]
(2, 4)
2
int64
8


In [33]:
data = [[1, 2, 3, 4],
        [5, 6, 7, 8]]
arr = np.array(data)

# note argmax returns the "index"
print(np.argmax(arr)) # index of all elements, which is 7
print(np.max(arr, axis=1))
print(np.argmax(arr, axis=0)) # along row direction, the bigger one is at index 1 out of [0, 1]
print(np.argmax(arr, axis=1)) # along col direction, the bigger one is at index 3 out of [0, 1, 2, 3]

7
[4 8]
[1 1 1 1]
[3 3]


In [5]:
print(np.zeros(10))
print(np.zeros((2, 4)))

print(np.ones(10))
print(np.ones_like([1, 2, 3]))

print(np.full((2, 2), 7))
print(np.eye(2))
print(np.random.random((2, 2)))

[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
[[ 0.  0.  0.  0.]
 [ 0.  0.  0.  0.]]
[ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.]
[1 1 1]
[[ 7.  7.]
 [ 7.  7.]]
[[ 1.  0.]
 [ 0.  1.]]
[[ 0.89228048  0.72492309]
 [ 0.37731349  0.19564932]]




In [14]:
np.empty((2,3,2))

array([[[  2.53023665e-316,   0.00000000e+000],
        [  0.00000000e+000,   0.00000000e+000],
        [  0.00000000e+000,   0.00000000e+000]],

       [[  0.00000000e+000,   0.00000000e+000],
        [  0.00000000e+000,   0.00000000e+000],
        [  0.00000000e+000,   0.00000000e+000]]])

In [15]:
# just like range in python, but return ndarray
np.arange(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

## Array Indexing

In [12]:
a = np.array([[1,2,3,4], [5,6,7,8], [9,10,11,12]])

print(a)
print(a[:2, 1:3])
print(a[0, 1])

b = a[:2, 1:3]
b[0, 0] = 99
print(a)

[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]
[[2 3]
 [6 7]]
2
[[ 1 99  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]


In [41]:
arr_2d = [[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]

arr_2d = np.array(arr_2d)
print(arr_2d[2])
print(arr_2d[1, 1])
print(arr_2d[1][1])

print(arr_2d[1].copy())
print(arr_2d[:2, 1:])

[7 8 9]
5
5
[4 5 6]
[[2 3]
 [5 6]]


### Mix integer indexing with slice indexing

In [16]:
a = np.array([[1,2,3,4], [5,6,7,8], [9,10,11,12]])

print(a)

row_r1 = a[1, :]    # Rank 1 view of the second row of a
row_r2 = a[1:2, :]  # Rank 2 view of the second row of a

print(row_r1, row_r1.shape)
print(row_r2, row_r2.shape)

col_r1 = a[:, 1]
col_r2 = a[:, 1:2]

print(col_r1, col_r1.shape)
print(col_r2, col_r2.shape)

[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]
[5 6 7 8] (4,)
[[5 6 7 8]] (1, 4)
[ 2  6 10] (3,)
[[ 2]
 [ 6]
 [10]] (3, 1)


### Integer array indexing

In [20]:
a = np.array([[1, 2], 
              [3, 4], 
              [5, 6]])

print(a)

print(a[[0, 1, 2], [0, 1, 0]])  # (0, 0), (1, 1), (2, 0)
print(np.array([a[0, 0], a[1, 1], a[2, 0]])) 

# reuse element
print(a[[0, 0], [1, 1]])
print(np.array([a[0, 1], a[0, 1]]))

[[1 2]
 [3 4]
 [5 6]]
[1 4 5]
[1 4 5]
[2 2]
[2 2]


### Mutating element

In [24]:
a = np.array([[1,2,3], 
              [4,5,6], 
              [7,8,9], 
              [10, 11, 12]])
print(a)

b = np.array([0, 2, 0, 1])
print(b)

# Select one element from each row of a using the indices in b
print(a[np.arange(4), b])

# Mutate one element from each row of a using the indices in b
a[np.arange(4), b] += 10
print(a)


[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]
[0 2 0 1]
[ 1  6  7 11]
[[11  2  3]
 [ 4  5 16]
 [17  8  9]
 [10 21 12]]


### Boolean array indexing

In [27]:
a = np.array([[1,2], 
              [3, 4], 
              [5, 6]])

print(a)

bool_idx = (a > 2) 
print(bool_idx)

# We use boolean array indexing to construct a rank 1 array
# consisting of the elements of a corresponding to the True values
# of bool_idx
print(a[bool_idx])

# 
print(a[a > 2])

[[1 2]
 [3 4]
 [5 6]]
[[False False]
 [ True  True]
 [ True  True]]
[3 4 5 6]
[3 4 5 6]


## Array manipulation

### Transpose

In [43]:
x = np.array([[1,2], 
              [3,4]])

print(x)    # Prints "[[1 2]
            #          [3 4]]"
print(x.T)  # Prints "[[1 3]
            #          [2 4]]"
    
# Note that taking the transpose of a rank 1 array does nothing:
v = np.array([1,2,3])
print(v)    # Prints "[1 2 3]"
print(v.T)  # Prints "[1 2 3]"

[[1 2]
 [3 4]]
[[1 3]
 [2 4]]
[1 2 3]
[1 2 3]


In [16]:
arr = np.arange(12).reshape(3, 4)
print(arr)
print(arr.flatten())

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[ 0  1  2  3  4  5  6  7  8  9 10 11]


In [17]:
np.linspace(1, 10, 20) # start, end, num_points

array([  1.        ,   1.47368421,   1.94736842,   2.42105263,
         2.89473684,   3.36842105,   3.84210526,   4.31578947,
         4.78947368,   5.26315789,   5.73684211,   6.21052632,
         6.68421053,   7.15789474,   7.63157895,   8.10526316,
         8.57894737,   9.05263158,   9.52631579,  10.        ])

#### Array Merge

In [18]:
arr_1 = np.arange(5)
arr_2 = np.arange(5)

print(arr_1)
print(arr_2)

print(np.vstack((arr_1, arr_2)))
print(np.hstack((arr_1, arr_2)))

[0 1 2 3 4]
[0 1 2 3 4]
[[0 1 2 3 4]
 [0 1 2 3 4]]
[0 1 2 3 4 0 1 2 3 4]


#### Array Split

In [19]:
arr = np.arange(12).reshape(3, 4)
print(arr)

# vertical split
split_1, split_2 = np.split(arr, 2, axis=1) # array, num_splits, axis
print(split_1)
print(split_2)

# horizontal split
split_1, split_2, split_3 = np.split(arr, 3, axis=0)
print(split_1)
print(split_2)
print(split_3)


[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[[0 1]
 [4 5]
 [8 9]]
[[ 2  3]
 [ 6  7]
 [10 11]]
[[0 1 2 3]]
[[4 5 6 7]]
[[ 8  9 10 11]]


### Datatype

In [31]:
arr = np.array([1.1, 2.2, 3.3])
arr_1 = arr.astype(np.int32)
print(arr.dtype)
print(arr_1.dtype)

x = np.array([1, 2], dtype=np.int64)   # Force a particular datatype
print(x.dtype)                         # Prints "int64"

float64
int32
int64


### Computation

In [21]:
arr_1 = np.array([1, 2, 3])
arr_2 = np.array([1, 2, 3])

print(arr_1 * arr_2)
print(arr_1 / 2)
print(arr_1 ** 0.5)

print(arr_1[1:])

[1 4 9]
[ 0.5  1.   1.5]
[ 1.          1.41421356  1.73205081]
[2 3]


In [22]:
arr_1 = np.array([1, 2, 3])
arr_2 = np.array([1, 2, 3])

print("average: {}".format(np.average(arr_1)))
print("mean: {}".format(np.mean(arr_1)))
print("dot: {}".format(np.dot(arr_1, arr_2)))

average: 2.0
mean: 2.0
dot: 14


In [23]:
arr = np.arange(12).reshape(3, 4)
print(arr)
print(np.diff(arr)) # each col diff
print(np.diff(arr.T))

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[[1 1 1]
 [1 1 1]
 [1 1 1]]
[[4 4]
 [4 4]
 [4 4]
 [4 4]]


![image.png](attachment:image.png)

### Array Math

In [36]:
x = np.array([[1,2],[3,4]], dtype=np.float64)
y = np.array([[5,6],[7,8]], dtype=np.float64)

print(x + y)
print(np.add(x, y))

print(x - y)
print(np.subtract(x, y))

print(x * y)
print(np.multiply(x, y))

print(x / y)
print(np.divide(x, y))

print(np.sqrt(x))

[[  6.   8.]
 [ 10.  12.]]
[[  6.   8.]
 [ 10.  12.]]
[[-4. -4.]
 [-4. -4.]]
[[-4. -4.]
 [-4. -4.]]
[[  5.  12.]
 [ 21.  32.]]
[[  5.  12.]
 [ 21.  32.]]
[[ 0.2         0.33333333]
 [ 0.42857143  0.5       ]]
[[ 0.2         0.33333333]
 [ 0.42857143  0.5       ]]
[[ 1.          1.41421356]
 [ 1.73205081  2.        ]]


In [39]:
x = np.array([[1, 2],
              [3, 4]])

y = np.array([[5, 6],
              [7, 8]])

v = np.array([9, 10])
w = np.array([11, 12])

# Inner product of vectors; both produce 219
print(v.dot(w))
print(np.dot(v, w))

# Matrix / vector product; both produce the rank 1 array [29 67]
print(x.dot(v))
print(np.dot(x, v))

# Matrix / matrix product; both produce the rank 2 array
# [[19 22]
#  [43 50]]
print(x.dot(y))
print(np.dot(x, y))

219
219
[29 67]
[29 67]
[[19 22]
 [43 50]]
[[19 22]
 [43 50]]


In [40]:
x = np.array([[1, 2],
              [3, 4]])

print(np.sum(x))  # Compute sum of all elements; prints "10"
print(np.sum(x, axis=0))  # Compute sum of each column; prints "[4 6]"
print(np.sum(x, axis=1))  # Compute sum of each row; prints "[3 7]"

10
[4 6]
[3 7]


### Mask / Filter

In [25]:
names = np.array(['Bob', 'Joe', 'Will'])

print(names == 'Joe')

data = np.random.randn(3, 2)
print(data)
print(data[names == 'Joe'])


mask = (names == 'Will') | (names == 'Bob')
data[mask]

data[data < 0] = 0
print(data)

[False  True False]
[[ 0.61373291  0.19480468]
 [ 0.42173094 -1.4281563 ]
 [-0.40509271 -0.69101936]]
[[ 0.42173094 -1.4281563 ]]
[[ 0.61373291  0.19480468]
 [ 0.42173094  0.        ]
 [ 0.          0.        ]]


In [26]:
arr = np.arange(15).reshape(3, 5)
print(arr)
print(arr.T)
print(arr.mean(axis=1))
print(arr.mean(axis=0))
print(arr.mean(axis=None))

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]]
[[ 0  5 10]
 [ 1  6 11]
 [ 2  7 12]
 [ 3  8 13]
 [ 4  9 14]]
[  2.   7.  12.]
[ 5.  6.  7.  8.  9.]
7.0


In [27]:
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Bob'])
np.unique(names)

array(['Bob', 'Joe', 'Will'], 
      dtype='<U4')

### newaxis

Simply put, the newaxis is used to increase the dimension of the existing array by one more dimension, when used once. Thus,

1D array will become 2D array

2D array will become 3D array

3D array will become 4D array and so on..

In [28]:
x1 = np.array([1, 2, 3, 4, 5])
x2 = np.array([1, 2, 3])

print(x1[:])
print(x1[:, np.newaxis])


[1 2 3 4 5]
[[1]
 [2]
 [3]
 [4]
 [5]]


## Data Transformation

In [7]:
arr = np.arange(45).reshape(1, 3, 3, 5)
arr

array([[[[ 0,  1,  2,  3,  4],
         [ 5,  6,  7,  8,  9],
         [10, 11, 12, 13, 14]],

        [[15, 16, 17, 18, 19],
         [20, 21, 22, 23, 24],
         [25, 26, 27, 28, 29]],

        [[30, 31, 32, 33, 34],
         [35, 36, 37, 38, 39],
         [40, 41, 42, 43, 44]]]])

In [12]:
tarr = np.transpose(arr, [3, 1, 2, 0]).squeeze()
print(tarr.shape)
tarr

(5, 3, 3)


array([[[ 0,  5, 10],
        [15, 20, 25],
        [30, 35, 40]],

       [[ 1,  6, 11],
        [16, 21, 26],
        [31, 36, 41]],

       [[ 2,  7, 12],
        [17, 22, 27],
        [32, 37, 42]],

       [[ 3,  8, 13],
        [18, 23, 28],
        [33, 38, 43]],

       [[ 4,  9, 14],
        [19, 24, 29],
        [34, 39, 44]]])

## Broadcasting

In [49]:
x = np.array([[1,2,3], 
              [4,5,6], 
              [7,8,9], 
              [10, 11, 12]])

print(x)

# We will add the vector v to each row of the matrix x,
# storing the result in the matrix y
v = np.array([1, 0, 1])
y = np.empty_like(x)   # Create an empty matrix with the same shape as x

print(y)

## Slow
for i in range(4):
    y[i, :] = x[i, :] + v

print(y)

## Better
vv = np.tile(v, (4, 1))   # Stack 4 copies of v on top of each other
y = x + vv  # Add x and vv elementwise
print(y) 

# leverage broadcasting, do not need to make tile copies of v
y = x + v  # Add v to each row of x using broadcasting
print(y)

[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]
[[0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]]
[[ 2  2  4]
 [ 5  5  7]
 [ 8  8 10]
 [11 11 13]]
[[ 2  2  4]
 [ 5  5  7]
 [ 8  8 10]
 [11 11 13]]
[[ 2  2  4]
 [ 5  5  7]
 [ 8  8 10]
 [11 11 13]]


#### Broadcasting two arrays together follows these rules:

1. If the arrays do not have the same rank, prepend the shape of the lower rank array with 1s until both shapes have the same length.
2. The two arrays are said to be compatible in a dimension if they have the same size in the dimension, or if one of the arrays has size 1 in that dimension.
3. The arrays can be broadcast together if they are compatible in all dimensions.
4. After broadcasting, each array behaves as if it had shape equal to the elementwise maximum of shapes of the two input arrays.
5. In any dimension where one array had size 1 and the other array had size greater than 1, the first array behaves as if it were copied along that dimension
