### Create Numpy Arrays

In [1]:
import numpy as np

In [None]:
data = [1, 2, 3, 4]
arr = np.array(data)

print(arr)
print(arr.shape)
print(arr.ndim)

In [3]:
data = [[1, 2, 3, 4],
        [5, 6, 7, 8]]

arr = np.array(data)

print(arr)
print(arr.shape)
print(arr.ndim)
print(arr.dtype)
print(arr.size)

[[1 2 3 4]
 [5 6 7 8]]
(2, 4)
2
int64
8


In [16]:
print(np.zeros(10))
print(np.zeros((2, 4)))

print(np.ones(10))
print(np.ones_like([1, 2, 3]))

[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
[[ 0.  0.  0.  0.]
 [ 0.  0.  0.  0.]]
[ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.]
[1 1 1]


In [13]:
np.empty((2,3,2))

array([[[  2.28758560e-316,   0.00000000e+000],
        [  0.00000000e+000,   0.00000000e+000],
        [  0.00000000e+000,   0.00000000e+000]],

       [[  0.00000000e+000,   0.00000000e+000],
        [  0.00000000e+000,   0.00000000e+000],
        [  0.00000000e+000,   0.00000000e+000]]])

In [14]:
# just like range in python
np.arange(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [15]:
arr = np.arange(12).reshape(3, 4)
print(arr)
print(arr.flatten())

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[ 0  1  2  3  4  5  6  7  8  9 10 11]


In [5]:
np.linspace(1, 10, 20) # start, end, num_points

array([  1.        ,   1.47368421,   1.94736842,   2.42105263,
         2.89473684,   3.36842105,   3.84210526,   4.31578947,
         4.78947368,   5.26315789,   5.73684211,   6.21052632,
         6.68421053,   7.15789474,   7.63157895,   8.10526316,
         8.57894737,   9.05263158,   9.52631579,  10.        ])

#### Array Merge

In [21]:
arr_1 = np.arange(5)
arr_2 = np.arange(5)

print(arr_1)
print(arr_2)

print(np.vstack((arr_1, arr_2)))
print(np.hstack((arr_1, arr_2)))

[0 1 2 3 4]
[0 1 2 3 4]
[[0 1 2 3 4]
 [0 1 2 3 4]]
[0 1 2 3 4 0 1 2 3 4]


#### Array Split

In [30]:
arr = np.arange(12).reshape(3, 4)
print(arr)

# vertical split
split_1, split_2 = np.split(arr, 2, axis=1) # array, num_splits, axis
print(split_1)
print(split_2)

# horizontal split
split_1, split_2, split_3 = np.split(arr, 3, axis=0)
print(split_1)
print(split_2)
print(split_3)


[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[[0 1]
 [4 5]
 [8 9]]
[[ 2  3]
 [ 6  7]
 [10 11]]
[[0 1 2 3]]
[[4 5 6 7]]
[[ 8  9 10 11]]


### Datatype

In [18]:
arr = np.array([1.1, 2.2, 3.3])
arr.astype(np.int32)

array([1, 2, 3], dtype=int32)

### Computation

In [23]:
arr_1 = np.array([1, 2, 3])
arr_2 = np.array([1, 2, 3])

print(arr_1 * arr_2)
print(arr_1 / 2)
print(arr_1 ** 0.5)

print(arr_1[1:])

[1 4 9]
[ 0.5  1.   1.5]
[ 1.          1.41421356  1.73205081]
[2 3]


In [9]:
arr_1 = np.array([1, 2, 3])
arr_2 = np.array([1, 2, 3])

print("average: {}".format(np.average(arr_1)))
print("mean: {}".format(np.mean(arr_1)))
print("dot: {}".format(np.dot(arr_1, arr_2)))

average: 2.0
mean: 2.0
dot: 14


In [12]:
arr = np.arange(12).reshape(3, 4)
print(arr)
print(np.diff(arr)) # each col diff
print(np.diff(arr.T))

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[[1 1 1]
 [1 1 1]
 [1 1 1]]
[[4 4]
 [4 4]
 [4 4]
 [4 4]]


![image.png](attachment:image.png)

### Index

In [33]:
arr_2d = [[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]

arr_2d = np.array(arr_2d)
print(arr_2d[2])
print(arr_2d[1, 1])
print(arr_2d[1][1])

print(arr_2d[1].copy())
print(arr_2d[:2, 1:])

[7 8 9]
5
5
[4 5 6]
[[2 3]
 [5 6]]


### Mask / Filter

In [49]:
names = np.array(['Bob', 'Joe', 'Will'])

print(names == 'Joe')

data = np.random.randn(3, 2)
print(data)
print(data[names == 'Joe'])


mask = (names == 'Will') | (names == 'Bob')
data[mask]

data[data < 0] = 0
print(data)

[False  True False]
[[ 0.56854337  1.78260165]
 [-2.33506369 -1.19630752]
 [ 1.09453257  0.90387366]]
[[-2.33506369 -1.19630752]]
[[ 0.56854337  1.78260165]
 [ 0.          0.        ]
 [ 1.09453257  0.90387366]]


In [54]:
arr = np.arange(15).reshape(3, 5)
print(arr)
print(arr.T)
print(arr.mean(axis=1))
print(arr.mean(axis=0))
print(arr.mean(axis=None))

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]]
[[ 0  5 10]
 [ 1  6 11]
 [ 2  7 12]
 [ 3  8 13]
 [ 4  9 14]]
[  2.   7.  12.]
[ 5.  6.  7.  8.  9.]
7.0


In [55]:
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Bob'])
np.unique(names)

array(['Bob', 'Joe', 'Will'], 
      dtype='<U4')

### newaxis

Simply put, the newaxis is used to increase the dimension of the existing array by one more dimension, when used once. Thus,

1D array will become 2D array

2D array will become 3D array

3D array will become 4D array and so on..

In [4]:
x1 = np.array([1, 2, 3, 4, 5])
x2 = np.array([1, 2, 3])

print(x1[:])
print(x1[:, np.newaxis])


[1 2 3 4 5]
[[1]
 [2]
 [3]
 [4]
 [5]]


## Data Transformation