In [3]:
#numpy use fixed type (faster than list) >> 4 bytes of memory >> no type checking when iterate 
#use contiguous memory >> SIMD (single instruction multiple data) vector processing and effective cache utilisation
#more computation options
#mathematics (matlab replacement), plotting(matplotlib), backend(pandas), ML(tensor similar to numpy)

In [24]:
import numpy as np

## Basics

In [18]:
a = np.array([1,2,3], dtype = 'int16')
print(a)

[1 2 3]


In [9]:
b = np.array([[9.0,8.0,7.0], [6.0,5.0,4.0]])
print(b)

[[9. 8. 7.]
 [6. 5. 4.]]


In [15]:
#get dimension
a.ndim

1

In [16]:
b.shape

(2, 3)

In [19]:
#get type
a.dtype

dtype('int16')

In [20]:
#get size (bytes)
a.itemsize

2

In [21]:
a = np.array([1,2,3], dtype = 'int32')
a.itemsize

4

In [22]:
#number of element
a.size

3

In [23]:
b.itemsize

8

## Accessing/Changing specific elements, rows, columns, etc.

In [25]:
a = np.array([[1,2,3,4,5,6,7], [8,9,10,11,12,13,14]])
print(a)

[[ 1  2  3  4  5  6  7]
 [ 8  9 10 11 12 13 14]]


In [26]:
a.shape

(2, 7)

In [29]:
#get specific element
a[1,5]

13

In [30]:
a[1,-2]

13

In [32]:
#get specific row/column
a[0,:]
a[:,2]

array([ 3, 10])

In [33]:
#[startindex, endindex, stepsize]
a[0,1:6:2]

array([2, 4, 6])

In [34]:
a[0,1:-1:2]

array([2, 4, 6])

In [35]:
a[1,5] = 30
print(a)

[[ 1  2  3  4  5  6  7]
 [ 8  9 10 11 12 30 14]]


In [36]:
a[:,2] = 5
print(a)

[[ 1  2  5  4  5  6  7]
 [ 8  9  5 11 12 30 14]]


In [37]:
a[:,2] = [1,2]
print(a)

[[ 1  2  1  4  5  6  7]
 [ 8  9  2 11 12 30 14]]


In [41]:
b = np.array([[[1,2,3,],[4,5,6]], [[7,8,9], [10,11,12]]])

In [44]:
b.shape

(2, 2, 3)

In [45]:
print(b)

[[[ 1  2  3]
  [ 4  5  6]]

 [[ 7  8  9]
  [10 11 12]]]


In [46]:
#get specific element
b[0,1,1]

5

In [47]:
b[0,:,1]

array([2, 5])

In [53]:
#replace
b[:,1,:] = [[9,9,9],[8,8,8]]
print(b)

[[[1 2 3]
  [9 9 9]]

 [[7 8 9]
  [8 8 8]]]


## Initialising Different Types of Arrays

In [60]:
#all 0s matrix
np.zeros((2,2,2,2))

array([[[[0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.]]],


       [[[0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.]]]])

In [61]:
#all 1s matrix
np.ones((4,2,2), dtype = 'int32')

array([[[1, 1],
        [1, 1]],

       [[1, 1],
        [1, 1]],

       [[1, 1],
        [1, 1]],

       [[1, 1],
        [1, 1]]], dtype=int32)

In [63]:
#any other number
np.full((2,2),99, dtype = 'int32')

array([[99, 99],
       [99, 99]], dtype=int32)

In [65]:
#any other number (full_like another matrix)
np.full_like(a,4)

array([[4, 4, 4, 4, 4, 4, 4],
       [4, 4, 4, 4, 4, 4, 4]])

In [69]:
#random decimal numbers
np.random.rand(4,2,3)

array([[[0.44111335, 0.45871365, 0.62592959],
        [0.50232301, 0.07593868, 0.34773514]],

       [[0.462211  , 0.83226045, 0.62038398],
        [0.78768837, 0.89865081, 0.20455811]],

       [[0.17795736, 0.92653448, 0.05621976],
        [0.4787566 , 0.81210249, 0.20076198]],

       [[0.03114619, 0.90568979, 0.66837773],
        [0.73889221, 0.80177111, 0.19558266]]])

In [72]:
np.random.random_sample(a.shape)

array([[0.10115564, 0.6715188 , 0.11631616, 0.28873247, 0.23076306,
        0.43285662, 0.54343983],
       [0.75973205, 0.51669847, 0.66456336, 0.82879913, 0.97175843,
        0.05473348, 0.6873618 ]])

In [87]:
#random integer values
np.random.randint(-4,4, size = (4,2))

array([[-1,  2],
       [-2, -2],
       [ 1,  3],
       [-3, -3]])

In [88]:
#identity matrix
np.identity(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [93]:
#repeat on array
arr = np.array([[1,2,3]])
r1 = np.repeat(arr,3,axis=0)
print(r1)
r1 = np.repeat(arr,3,axis=1)
print(r1)

[[1 2 3]
 [1 2 3]
 [1 2 3]]
[[1 1 1 2 2 2 3 3 3]]


In [105]:
output = np.ones((5,5))
print(output)

z = np.zeros((3,3))
z[1,1] = 9
print(z)

output[1:-1, 1:-1] = z
print(output)

[[1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]]
[[0. 0. 0.]
 [0. 9. 0.]
 [0. 0. 0.]]
[[1. 1. 1. 1. 1.]
 [1. 0. 0. 0. 1.]
 [1. 0. 9. 0. 1.]
 [1. 0. 0. 0. 1.]
 [1. 1. 1. 1. 1.]]


#### be careful when copying array!!

In [112]:
a = np.array([1,2,3])
b = a.copy()
b[0] = 100
print(b)
print(a)

[100   2   3]
[1 2 3]


## Mathematics

In [113]:
a = np.array([1,2,3,4])
print(a)

[1 2 3 4]


In [114]:
a+2

array([3, 4, 5, 6])

In [115]:
a-2

array([-1,  0,  1,  2])

In [116]:
a*2

array([2, 4, 6, 8])

In [117]:
a/2

array([0.5, 1. , 1.5, 2. ])

In [119]:
a+=2
a

array([5, 6, 7, 8])

In [120]:
b = np.array([1,0,1,0])
a+b

array([6, 6, 8, 8])

In [121]:
a**2

array([25, 36, 49, 64])

In [125]:
#sin all values
print(np.sin(a))
print(np.cos(a))
print(np.tan(a))

[-0.95892427 -0.2794155   0.6569866   0.98935825]
[ 0.28366219  0.96017029  0.75390225 -0.14550003]
[-3.38051501 -0.29100619  0.87144798 -6.79971146]


#### Linear algebra

In [130]:
#number of columns = number of rows
a = np.ones((2,3))
print(a)

b = np.full((3,2),2)
print(b)

[[1. 1. 1.]
 [1. 1. 1.]]
[[2 2]
 [2 2]
 [2 2]]


In [131]:
a*b

ValueError: operands could not be broadcast together with shapes (2,3) (3,2) 

In [133]:
np.matmul(a,b)

array([[6., 6.],
       [6., 6.]])

In [134]:
#find determinant
c = np.identity(3)
np.linalg.det(c)

1.0

#### Statistics

In [135]:
stats = np.array([[1,2,3],[4,5,6]])
stats

array([[1, 2, 3],
       [4, 5, 6]])

In [136]:
np.min(stats)

1

In [137]:
np.max(stats)

6

In [139]:
np.min(stats, axis=1)

array([1, 4])

In [140]:
np.max(stats, axis=1)

array([3, 6])

In [141]:
np.sum(stats)

21

In [142]:
np.sum(stats,axis=1)

array([ 6, 15])

#### Reorganising Arrays

In [147]:
before = np.array([[1,2,3,4],[5,6,7,8]])
print(before)

after = before.reshape((4,2))
print(after)

after = before.reshape((2,2,2))
print(after)

[[1 2 3 4]
 [5 6 7 8]]
[[1 2]
 [3 4]
 [5 6]
 [7 8]]
[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]]


In [150]:
#vertically stacking vectors
v1 = np.array([1,2,3,4])
v2 = np.array([5,6,7,8])

np.vstack([v1,v2])

np.vstack([v1,v2,v2,v2])

array([[1, 2, 3, 4],
       [5, 6, 7, 8],
       [5, 6, 7, 8],
       [5, 6, 7, 8]])

In [155]:
#horizontal stacking vectors
h1 = np.ones((2,4))
h2 = np.zeros((2,2))

np.hstack((h1,h2))

array([[1., 1., 1., 1., 0., 0.],
       [1., 1., 1., 1., 0., 0.]])

## Miscellaneous

#### Load data from file

In [157]:
text_data = np.genfromtxt('data.txt', delimiter = ',')

In [158]:
text_data

array([[  1.,  13.,  21.,  11., 196.,  75.,   4.,   3.,  34.,   6.,   7.,
          8.,   0.,   1.,   2.,   3.,   4.,   5.],
       [  3.,  42.,  12.,  33., 766.,  75.,   4.,  55.,   6.,   4.,   3.,
          4.,   5.,   6.,   7.,   0.,  11.,  12.],
       [  1.,  22.,  33.,  11., 999.,  11.,   2.,   1.,  78.,   0.,   1.,
          2.,   9.,   8.,   7.,   1.,  76.,  88.]])

In [159]:
text_data_int = text_data.astype('int32')
text_data_int

array([[  1,  13,  21,  11, 196,  75,   4,   3,  34,   6,   7,   8,   0,
          1,   2,   3,   4,   5],
       [  3,  42,  12,  33, 766,  75,   4,  55,   6,   4,   3,   4,   5,
          6,   7,   0,  11,  12],
       [  1,  22,  33,  11, 999,  11,   2,   1,  78,   0,   1,   2,   9,
          8,   7,   1,  76,  88]], dtype=int32)

In [160]:
#boolean masking and advanced indexing
text_data_int > 50

array([[False, False, False, False,  True,  True, False, False, False,
        False, False, False, False, False, False, False, False, False],
       [False, False, False, False,  True,  True, False,  True, False,
        False, False, False, False, False, False, False, False, False],
       [False, False, False, False,  True, False, False, False,  True,
        False, False, False, False, False, False, False,  True,  True]])

In [161]:
text_data_int[text_data_int > 50]

array([196,  75, 766,  75,  55, 999,  78,  76,  88], dtype=int32)

In [165]:
#check at least some values in each column
np.any(text_data_int > 50, axis = 0)

array([False, False, False, False,  True,  True, False,  True,  True,
       False, False, False, False, False, False, False,  True,  True])

In [166]:
#check all values in each column
np.all(text_data_int > 50, axis = 0)

array([False, False, False, False,  True, False, False, False, False,
       False, False, False, False, False, False, False, False, False])

In [172]:
(text_data_int > 50) & (text_data_int < 100)

array([[False, False, False, False, False,  True, False, False, False,
        False, False, False, False, False, False, False, False, False],
       [False, False, False, False, False,  True, False,  True, False,
        False, False, False, False, False, False, False, False, False],
       [False, False, False, False, False, False, False, False,  True,
        False, False, False, False, False, False, False,  True,  True]])

In [174]:
(~((text_data_int > 50) & (text_data_int < 100)))

array([[ True,  True,  True,  True,  True, False,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True, False,  True, False,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True, False,
         True,  True,  True,  True,  True,  True,  True, False, False]])

In [181]:
table= [ [ 0 for i in range(6) ] for j in range(6) ]
print(table)
for d1 in range(6):
    for d2 in range(6):
        table[d1][d2]= d1+d2+2
print(table)

[[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]]
[[2, 3, 4, 5, 6, 7], [3, 4, 5, 6, 7, 8], [4, 5, 6, 7, 8, 9], [5, 6, 7, 8, 9, 10], [6, 7, 8, 9, 10, 11], [7, 8, 9, 10, 11, 12]]


In [192]:
table = np.arange(1,31).reshape(5,6)
table

array([[ 1,  2,  3,  4,  5,  6],
       [ 7,  8,  9, 10, 11, 12],
       [13, 14, 15, 16, 17, 18],
       [19, 20, 21, 22, 23, 24],
       [25, 26, 27, 28, 29, 30]])