# Numpy beginners tutorial

This notebook was created following the youtube tutorial by Keith Galli

https://youtu.be/QUT1VHiLmmI

## Load the numpy package

In [61]:
# Load the numpy package
import numpy as np
import sys

- Multidimensional array library
- List vs numpy
    - numpy is faster than lists
        - numpy uses fixed types and not python object type for data, python object type has lot of other data alongwith the object value like ref count, size hence takes more memory and hence is slow read and write.
        - does not require type checking when iterating through objects
        - numpy uses contiguous memory location for storage and hence faster access, better caching and SIMD vector processing possible


## The basics

In [62]:
# 1D array init
a = np.array([1, 2, 3], dtype='int16')
print(a)

[1 2 3]


In [63]:
#2D array init
b = np.array([[1.0, 3.0, 6.0], [2.5, 4.2, 5.7]])
print(b)

[[1.  3.  6. ]
 [2.5 4.2 5.7]]


In [64]:
#Get dimensions
b.ndim

2

In [65]:
#Get shape
a.shape

(3,)

In [66]:
#Get type
a.dtype

dtype('int16')

In [67]:
# Get element size
a.itemsize

2

In [68]:
# Get total size
# b.size * b.itemsize
b.nbytes

48

## Accessing/Changing specific elements, rows, columns ...

In [69]:
a = np.array([[1,2,3,4,5,6,7], [8,9,10,11,12,13,14]])
print(a)
# Print shape
print("shape: {} ".format(a.shape))

[[ 1  2  3  4  5  6  7]
 [ 8  9 10 11 12 13 14]]
shape: (2, 7) 


In [70]:
# Get specific element, indexing is done [r, c]
print(a[1,4])

# We can use negative indexing just like lists
print(a[1,-3])


12
12


In [71]:
# Accessing a specific row
print(a[0, :])

[1 2 3 4 5 6 7]


In [72]:
# Specific column
print(a[:, 4])

[ 5 12]


In [73]:
# Another way of getting elements, start:stop:step
print(a[0, 1:-1:2])

[2 4 6]


In [74]:
# Changing elements
a[1,5] = 99
print(a)

[[ 1  2  3  4  5  6  7]
 [ 8  9 10 11 12 99 14]]


In [75]:
# Changing entire coloumn
print(a)
a[:,2] = [9,9]
print(a)

[[ 1  2  3  4  5  6  7]
 [ 8  9 10 11 12 99 14]]
[[ 1  2  9  4  5  6  7]
 [ 8  9  9 11 12 99 14]]


In [76]:
# 3D example
b = np.array([[[1,2], [3,4]], [[5,6], [7,8]]])
print(b)

[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]]


In [77]:
# Accessing elements
print(b[1,0,1])

print(b[:,0,:])

print(b[1,:,:])

6
[[1 2]
 [5 6]]
[[5 6]
 [7 8]]


In [78]:
# Replace, make sure to use the same dimension for the replacement array
print(b[:,0,:])
b[:,0,:] = [[11,11], [22,22]]
print(b)

[[1 2]
 [5 6]]
[[[11 11]
  [ 3  4]]

 [[22 22]
  [ 7  8]]]


## Initializing different types of array

In [79]:
# All 0s matrix

np.zeros((2,3))

array([[0., 0., 0.],
       [0., 0., 0.]])

In [80]:
# All 1s matrix
np.ones((4,2,2), dtype='int32')

array([[[1, 1],
        [1, 1]],

       [[1, 1],
        [1, 1]],

       [[1, 1],
        [1, 1]],

       [[1, 1],
        [1, 1]]])

In [81]:
# Init with any other number
np.full((2,3), 99, dtype='float32')

array([[99., 99., 99.],
       [99., 99., 99.]], dtype=float32)

In [82]:
# Any other number (full like)
np.full_like(b, 3)

array([[[3, 3],
        [3, 3]],

       [[3, 3],
        [3, 3]]])

In [83]:
# Random decimal numbers

np.random.rand(4,2,3)

array([[[0.2885818 , 0.14697537, 0.44613913],
        [0.28356315, 0.03655906, 0.54854183]],

       [[0.71561663, 0.87854325, 0.4393036 ],
        [0.61256566, 0.17823232, 0.60686837]],

       [[0.92397686, 0.74031328, 0.65162736],
        [0.5189969 , 0.61145161, 0.01080006]],

       [[0.1499726 , 0.54464515, 0.94978724],
        [0.5016932 , 0.09507344, 0.3243512 ]]])

In [84]:
# Random integer numbers
np.random.randint(7, size=(3,2))

array([[3, 4],
       [2, 3],
       [4, 1]])

In [85]:
# start, stop, size
np.random.randint(4, 7, size=(3,2))

array([[5, 5],
       [5, 4],
       [6, 5]])

In [86]:
# The identity matrix
np.identity(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [87]:
arr = np.array([[1, 2, 3], [4,5,6]])

r1 = np.repeat(arr, 3, axis = 1)
print(r1)

r2 = np.repeat(arr, 3, axis = 0)
print(r2)

[[1 1 1 2 2 2 3 3 3]
 [4 4 4 5 5 5 6 6 6]]
[[1 2 3]
 [1 2 3]
 [1 2 3]
 [4 5 6]
 [4 5 6]
 [4 5 6]]


In [88]:
op = np.ones((5,5), dtype='int32')
op[1:-1, 1:-1] = 0
op[2, 2] = 9

print(op)


[[1 1 1 1 1]
 [1 0 0 0 1]
 [1 0 9 0 1]
 [1 0 0 0 1]
 [1 1 1 1 1]]


### Be careful while copying
- Assignment does a shallow copy, that is just creates a pointer to the original, hence any changes made to one while reflect in another

In [89]:
a = np.array([1,2,3])
print(a)
b = a
print(b)
b[0] = 10
print(b)
print(a)

[1 2 3]
[1 2 3]
[10  2  3]
[10  2  3]


In [90]:
# To deep copy use .copy() method
c = np.copy(a)
print(c)
c[0] = 1
print(c)
print(a)

[10  2  3]
[1 2 3]
[10  2  3]


## Math on arrays

In [91]:
a = np.array([1, 2, 3, 4])
print(a)


[1 2 3 4]


In [92]:
# Element wise add
a+2

array([3, 4, 5, 6])

In [93]:
# Elementwise minus
a-2

array([-1,  0,  1,  2])

In [94]:
# Multiply
a*2

array([2, 4, 6, 8])

In [95]:
# Divide
a/2


array([0.5, 1. , 1.5, 2. ])

In [96]:
a += 2
a

array([3, 4, 5, 6])

In [97]:
b = np.array([1,0,1,0])
a+b

array([4, 4, 6, 6])

In [98]:
a ** 2

array([ 9, 16, 25, 36], dtype=int32)

In [99]:
np.cos(a)

array([-0.9899925 , -0.65364362,  0.28366219,  0.96017029])

In [100]:
np.sin(a)

array([ 0.14112001, -0.7568025 , -0.95892427, -0.2794155 ])

### For a lot more (https://docs.scipy.org/doc/numpy/reference/routines.math.html

## Linear Algebra

In [101]:
a = np.ones((2,3))
print(a)

b = np.full((3,2),2)
print(b)

np.matmul(a,b)

[[1. 1. 1.]
 [1. 1. 1.]]
[[2 2]
 [2 2]
 [2 2]]


array([[6., 6.],
       [6., 6.]])

In [102]:
c = np.identity(3)
np.linalg.det(c)

1.0

### Reference docs (https://docs.scipy.org/doc/numpy/reference/routines.linalg.html)

- Determinant
- Trace
- Singular Vector Decomposition
- Eigenvalues
- Matrix Norm
- Inverse
- Etc...


## Statistics

In [104]:
stats = np.array([[1,2,3], [4,5,6]])
stats

array([[1, 2, 3],
       [4, 5, 6]])

In [105]:
np.min(stats)

1

In [106]:
np.max(stats)

6

In [107]:
np.sum(stats)

21

In [109]:
# Max accross axis
np.max(stats, axis=1)

array([3, 6])

In [110]:
np.sum(stats, axis=0)

array([5, 7, 9])

## Reorganizing Arrays

In [112]:
before = np.array([[1,2,3,4],[5,6,7,8]])
print(before)

after = before.reshape((2,2,2))
print(after)

[[1 2 3 4]
 [5 6 7 8]]
[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]]


In [113]:
# Vertically stacking arrays
v1 = np.array([1,2,3,4])
v2 = np.array([5,6,7,8])

np.vstack((v1,v2))

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [114]:
np.vstack((v1,v2,v2,v1))

array([[1, 2, 3, 4],
       [5, 6, 7, 8],
       [5, 6, 7, 8],
       [1, 2, 3, 4]])

In [116]:
# Horizontal stacking (appending)
h1 = np.ones((2,4))
h2 = np.zeros((2,2))

np.hstack((h1,h2))

array([[1., 1., 1., 1., 0., 0.],
       [1., 1., 1., 1., 0., 0.]])

In [117]:
np.hstack((h1,h2,h1,h2))

array([[1., 1., 1., 1., 0., 0., 1., 1., 1., 1., 0., 0.],
       [1., 1., 1., 1., 0., 0., 1., 1., 1., 1., 0., 0.]])

## Misc
### Load data from file

In [122]:

filedata = np.genfromtxt('data.txt', delimiter=',')


array([[  1.,  13.,  21.,  11., 196.,  75.,   4.,   3.,  34.,   6.,   7.,
          8.,   0.,   1.,   2.,   3.,   4.,   5.],
       [  3.,  42.,  12.,  33., 766.,  75.,   4.,  55.,   6.,   4.,   3.,
          4.,   5.,   6.,   7.,   0.,  11.,  12.],
       [  1.,  22.,  33.,  11., 999.,  11.,   2.,   1.,  78.,   0.,   1.,
          2.,   9.,   8.,   7.,   1.,  76.,  88.]])

In [123]:
filedata.astype(np.int32)

array([[  1,  13,  21,  11, 196,  75,   4,   3,  34,   6,   7,   8,   0,
          1,   2,   3,   4,   5],
       [  3,  42,  12,  33, 766,  75,   4,  55,   6,   4,   3,   4,   5,
          6,   7,   0,  11,  12],
       [  1,  22,  33,  11, 999,  11,   2,   1,  78,   0,   1,   2,   9,
          8,   7,   1,  76,  88]])

In [124]:
filedata

array([[  1.,  13.,  21.,  11., 196.,  75.,   4.,   3.,  34.,   6.,   7.,
          8.,   0.,   1.,   2.,   3.,   4.,   5.],
       [  3.,  42.,  12.,  33., 766.,  75.,   4.,  55.,   6.,   4.,   3.,
          4.,   5.,   6.,   7.,   0.,  11.,  12.],
       [  1.,  22.,  33.,  11., 999.,  11.,   2.,   1.,  78.,   0.,   1.,
          2.,   9.,   8.,   7.,   1.,  76.,  88.]])

In [125]:
filedata = filedata.astype(np.int32)
filedata

array([[  1,  13,  21,  11, 196,  75,   4,   3,  34,   6,   7,   8,   0,
          1,   2,   3,   4,   5],
       [  3,  42,  12,  33, 766,  75,   4,  55,   6,   4,   3,   4,   5,
          6,   7,   0,  11,  12],
       [  1,  22,  33,  11, 999,  11,   2,   1,  78,   0,   1,   2,   9,
          8,   7,   1,  76,  88]])

### Boolean masking and advanced indexing

In [126]:
filedata > 50

array([[False, False, False, False,  True,  True, False, False, False,
        False, False, False, False, False, False, False, False, False],
       [False, False, False, False,  True,  True, False,  True, False,
        False, False, False, False, False, False, False, False, False],
       [False, False, False, False,  True, False, False, False,  True,
        False, False, False, False, False, False, False,  True,  True]])

In [127]:
# get the values that meet the given condition
filedata[filedata > 50]

array([196,  75, 766,  75,  55, 999,  78,  76,  88])

In [128]:
# numpy allows indexing using list
a = np.array([1,2,3,4,5,6,7,8])
a[[1,3,7]]

array([2, 4, 8])

In [129]:
# If any value in the col meets the condition
np.any(filedata > 50, axis = 0)

array([False, False, False, False,  True,  True, False,  True,  True,
       False, False, False, False, False, False, False,  True,  True])

In [130]:
# If all value in the col meets the condition
np.all(filedata > 50, axis = 0)

array([False, False, False, False,  True, False, False, False, False,
       False, False, False, False, False, False, False, False, False])

In [131]:
((filedata > 50) & (filedata < 100))

array([[False, False, False, False, False,  True, False, False, False,
        False, False, False, False, False, False, False, False, False],
       [False, False, False, False, False,  True, False,  True, False,
        False, False, False, False, False, False, False, False, False],
       [False, False, False, False, False, False, False, False,  True,
        False, False, False, False, False, False, False,  True,  True]])

In [132]:
(~((filedata > 50) & (filedata < 100)))

array([[ True,  True,  True,  True,  True, False,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True, False,  True, False,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True, False,
         True,  True,  True,  True,  True,  True,  True, False, False]])

In [133]:
a = np.array([[1,2,3,4,5],
             [6,7,8,9,10],
             [11,12,13,14,15],
             [16,17,18,19,20],
             [21,22,23,24,25],
             [26,27,28,29,30]])
a

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15],
       [16, 17, 18, 19, 20],
       [21, 22, 23, 24, 25],
       [26, 27, 28, 29, 30]])

In [134]:
a[2:4,0:2]

array([[11, 12],
       [16, 17]])

In [135]:
# fetch 2, 8, 14, 20
a[[0,1,2,3], [1,2,3,4]]

array([ 2,  8, 14, 20])

In [136]:
# fetch 4,5, 24, 25, 29,30
a[[0,4,5], 3:]

array([[ 4,  5],
       [24, 25],
       [29, 30]])