# Numpy

- numpy is a popular linear algebra library for python

- all the libraries in the pyData ecosystem relies or depends on numpy

# Advantages of Numpy

- numpy is very fast as it has bindings with C libraries. Behind the scene the code has been optimized to run in c. Numpy internally stores data in a contagious block of memory, independent of other python built-in objects. Numpy's library of algorithms written in the C-language can operate on this memory without any type checking or other overhead.

- numpy is very fast also due to VECTORIZATION. The vectorized version of the function takes a sequence of objects or numpy arrays as input and evaluates python function over each element using parallel computation. 

- vectorization performs mathematical functions for fast operation on entire arrays of data without writing or using loops.

# Important aspects of Numpy

- Arrays
- Vectors (1-D Arrays)
- Matrices (2-D Arrays)
- Broadcasting 
- Number Generations

# Numpy Arrays

- ndarray is an efficient multidimensional array which provides fast array-oriented arithematic operations and flexible broadcasting abilities.
- Numpy arrays are of two types:
    - Vectors which are strictlt 1-D arrays
    - matrices which are multi-dimensional arrays
    
# Advantages of Numpy Arrays

- memory efficient as compared to list
- expands to n-dimensional objects
- fast array-oriented arithematic operations
- broadcasting abilities

In [1]:
import numpy as np

In [2]:
lst = [1, 2, 3]
arry = np.array([1, 2, 3])

In [3]:

print(type(lst))
print(type(arry))

<class 'list'>
<class 'numpy.ndarray'>


# Numpy Array vs List

In [4]:
arry = np.arange(1000)
lst = list(range(1000))

In [5]:
# Arithematic Operation on List
%time for _ in range(1, len(lst)): [x * 2 for x in lst]

CPU times: user 41.9 ms, sys: 1.47 ms, total: 43.3 ms
Wall time: 42.5 ms


In [6]:
# Arithematic Operation on Numpy Array
%time for _ in range(1, len(arry)): [arry * 2]

CPU times: user 2.15 ms, sys: 1.74 ms, total: 3.88 ms
Wall time: 2.72 ms


# Built-in Methods to create Numpy arrays from Objects

In [7]:
# array()
lst = [1, 2, 3]
lst

[1, 2, 3]

In [8]:
np.array(lst)

array([1, 2, 3])

In [9]:
lst_matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
lst_matrix

[[1, 2, 3], [4, 5, 6], [7, 8, 9]]

In [10]:
np.array(lst_matrix)

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [11]:
lst_matrix = np.array([[[1, 2, 3],
                       [4, 5, 6],
                       [7, 8, 9]],
                      [[10, 11, 12],
                      [13, 14, 15],
                      [16, 17, 18]]])
lst_matrix

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]]])

In [12]:
lst_matrix.shape

(2, 3, 3)

In [13]:
lst_matrix.ndim

3

In [14]:
lst_matrix.dtype

dtype('int64')

In [15]:
lst_matrix.size

18

In [16]:
type(lst_matrix)

numpy.ndarray

In [17]:
# arange() -> returns evenly spaced values with a given interval
# arange is an array-valued version of the built in python range function
np.arange(0, 10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [18]:
np.arange(0, 11, 2)

array([ 0,  2,  4,  6,  8, 10])

In [19]:
np.arange(0, 11, 2).dtype

dtype('int64')

In [20]:
# zeros() and ones() -> returns arrays of zeros and ones of the given dimension
np.zeros(3)

array([0., 0., 0.])

In [21]:
np.zeros(3).dtype

dtype('float64')

In [22]:
np.zeros(3).astype(int)

array([0, 0, 0])

In [23]:
np.zeros((3,3))

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [24]:
np.ones((5,5))

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [3]:
np.ones((3,3), 'bool')

array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])

In [25]:
# empty() -> returns an array of all zeros. it may return garbage values as well
np.empty((1, 2))

array([[2.05833592e-312, 2.33419537e-312]])

In [26]:
# linspace -> returns evenly spaced numbers over a specified interval
np.linspace(0, 10, 3)

array([ 0.,  5., 10.])

In [27]:
np.linspace(0, 5, 20)

array([0.        , 0.26315789, 0.52631579, 0.78947368, 1.05263158,
       1.31578947, 1.57894737, 1.84210526, 2.10526316, 2.36842105,
       2.63157895, 2.89473684, 3.15789474, 3.42105263, 3.68421053,
       3.94736842, 4.21052632, 4.47368421, 4.73684211, 5.        ])

In [28]:
# eye -> returns identity matrix
np.eye(4)

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

In [2]:
# full() -> returns an array of given shape and fill_value

np.full((3,3), True)

array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])

# Built-in Methods to create random Numpy arrays

In [29]:
import random

In [30]:
# rand() -> creates an array of given shape and dimension and populates it with random samples
# from a uniform distribution over (0, 1) interval
np.random.rand(2)

array([0.90130992, 0.886221  ])

In [31]:
np.random.rand(5, 5)

array([[0.49941238, 0.16144026, 0.83680281, 0.5518223 , 0.20390286],
       [0.36810051, 0.66704004, 0.41758806, 0.23551496, 0.75881755],
       [0.80636721, 0.78896959, 0.10991262, 0.44357653, 0.00699897],
       [0.27361351, 0.34866424, 0.78356063, 0.9488563 , 0.27731903],
       [0.94337096, 0.98114695, 0.87968457, 0.69813424, 0.10762934]])

In [32]:
# randn() -> returns an array of given shape and dimension and populates it with random samples 
# from a standard normal distribution
np.random.randn(2)

array([-0.53908418, -0.15314952])

In [33]:
np.random.randn(5, 5)

array([[ 0.66615919, -1.26238073,  0.02559207, -0.7849717 ,  0.69144664],
       [ 0.51530637,  0.48505353, -1.5198697 , -0.46863399, -0.07007905],
       [-1.97407904,  1.00270919,  0.09224277,  0.39995101,  1.98647054],
       [ 0.37250273, -1.10063221,  0.48129448,  0.47922808, -0.56165205],
       [-0.92904689, -0.27133918,  1.39324024, -0.41815585,  0.68849359]])

In [34]:
# randint() -> returns random samples from low to high
np.random.randint(1, 100)

67

In [35]:
np.random.randint(1, 100, 10)

array([15, 47, 31, 81, 84, 85, 29, 81,  2, 85])

In [36]:
np.random.randint(1, 100, (2, 2))

array([[ 8, 53],
       [91, 76]])

# Array Attributes and Methods

In [9]:
arry = np.arange(25)
ranarr = np.random.randint(0, 50, 10)

In [38]:
arry

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24])

In [39]:
ranarr

array([ 9, 24,  5,  0, 33, 16, 32,  2, 35, 24])

In [40]:
# reshape() -> returns an array with a new shape
arry.reshape(5, 5)

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

In [41]:
ranarr.reshape(2, 5)

array([[ 9, 24,  5,  0, 33],
       [16, 32,  2, 35, 24]])

In [42]:
ranarr.reshape(5, 2)

array([[ 9, 24],
       [ 5,  0],
       [33, 16],
       [32,  2],
       [35, 24]])

In [7]:
ranarr.reshape(2, -1)

array([[17, 29, 42, 14, 13],
       [ 4, 15, 27, 34, 47]])

In [11]:
a = np.arange(10).reshape(2, -1)
b = np.repeat(1, 10).reshape(2, -1)

In [12]:
a

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [13]:
b

array([[1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1]])

In [15]:
# vstack() -> stacks 2 array vertically
np.vstack([a, b])

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9],
       [1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1]])

In [16]:
# hstack() -> stacks 2 array horizontally
np.hstack([a, b])

array([[0, 1, 2, 3, 4, 1, 1, 1, 1, 1],
       [5, 6, 7, 8, 9, 1, 1, 1, 1, 1]])

In [17]:
# concatenate -> joins existing arrays in a sequence
np.concatenate([a, b])

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9],
       [1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1]])

In [18]:
# intersect1d() -> find intersection between 2 arrays
a = np.array([1,2,3,2,3,4,3,4,5,6])
b = np.array([7,2,10,2,7,4,9,4,9,8])

In [19]:
np.intersect1d(a, b)

array([2, 4])

In [20]:
a = np.array([1, 2, 3, 4, 5])
b = np.array([5, 6, 7, 8, 9])

In [21]:
# np.setdiff1d() -> finds the set difference between 2 arrays
np.setdiff1d(a, b)

array([1, 2, 3, 4])

In [22]:
# np.set_printoptions() -> These options determine the way floating point numbers, arrays and
# other NumPy objects are displayed.
rand_arr = np.random.random((5, 3))
rand_arr

array([[0.60741354, 0.54022332, 0.28119599],
       [0.15994845, 0.23180773, 0.86184729],
       [0.8133138 , 0.73653844, 0.24390965],
       [0.91691072, 0.13335228, 0.83221809],
       [0.05046745, 0.57230781, 0.10384568]])

In [23]:
np.set_printoptions(precision = 3)

In [24]:
rand_arr

array([[0.607, 0.54 , 0.281],
       [0.16 , 0.232, 0.862],
       [0.813, 0.737, 0.244],
       [0.917, 0.133, 0.832],
       [0.05 , 0.572, 0.104]])

In [27]:
# np.genfromtxt() -> Load data from a text file, with missing values handled as specified.
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype ='object')
iris[:3]

array([[b'5.1', b'3.5', b'1.4', b'0.2', b'Iris-setosa'],
       [b'4.9', b'3.0', b'1.4', b'0.2', b'Iris-setosa'],
       [b'4.7', b'3.2', b'1.3', b'0.2', b'Iris-setosa']], dtype=object)

In [43]:
# max(), min(), argmax(), argmin()
ranarr.max()

35

In [44]:
ranarr.min()

0

In [45]:
ranarr.argmax()

8

In [46]:
ranarr.argmin()

3

In [47]:
# shape -> grabs the shape of the object. it is a special object containing the information
# (metadata or data about the data)
arry.shape

(25,)

In [48]:
ranarr.shape

(10,)

In [49]:
arry.reshape(5, 5).shape

(5, 5)

In [50]:
# dtype -> grabs the data type of the object. it is a special object containing the information
# (metadata or data about the data)
arry.dtype

dtype('int64')

In [51]:
arry.astype('float').dtype

dtype('float64')

# Numpy Indexing and Selection

In [52]:
# indexing a 1-D array
arr = np.arange(0, 11)

In [53]:
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [54]:
arr[8]

8

In [55]:
arr[1:5]

array([1, 2, 3, 4])

In [56]:
arr[0:5]

array([0, 1, 2, 3, 4])

# Broadcasting
- numpy array differs from python lists becuase of their ability to broadcast
- we can broadcast a single value across a larger set of values

In [57]:
arr[0:5] = 100

In [58]:
arr

array([100, 100, 100, 100, 100,   5,   6,   7,   8,   9,  10])

In [59]:
arr = np.arange(0, 11)
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [60]:
slice_of_arr = arr[0:6]
slice_of_arr

array([0, 1, 2, 3, 4, 5])

In [61]:
slice_of_arr[:] = 99
slice_of_arr

array([99, 99, 99, 99, 99, 99])

In [63]:
# the change will also occur in original array because data is not copied, its a view of 
# original array
arr

array([99, 99, 99, 99, 99, 99,  6,  7,  8,  9, 10])

In [64]:
# to get a copy, we use copy() method

arr_copy = arr.copy()
arr_copy

array([99, 99, 99, 99, 99, 99,  6,  7,  8,  9, 10])

In [66]:
# indexing a 2-D array

arr_2d = np.array([[5,10,15],[20,25,30],[35,40,45]])
arr_2d

array([[ 5, 10, 15],
       [20, 25, 30],
       [35, 40, 45]])

In [67]:
arr_2d[1]

array([20, 25, 30])

In [70]:
# Format is arr_2d[row][col] or arr_2d[row,col]
arr_2d[1][0]

20

In [71]:
arr_2d[1, 0]

20

In [72]:
arr_2d[:2,1:]

array([[10, 15],
       [25, 30]])

In [73]:
arr_2d[2]

array([35, 40, 45])

In [74]:
arr_2d[2,:]

array([35, 40, 45])

# Conditional Selection

In [75]:
arr = np.arange(1, 11)
arr

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [76]:
arr > 4

array([False, False, False, False,  True,  True,  True,  True,  True,
        True])

In [77]:
arr[arr > 4]

array([ 5,  6,  7,  8,  9, 10])

In [78]:
condition = arr > 4

In [79]:
arr[condition]

array([ 5,  6,  7,  8,  9, 10])

In [80]:
names = np.array(['Bob','Joe','Will','Bob','Will','Joe','Joe'])
data = np.random.randn(7, 4)

In [81]:
names

array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'], dtype='<U4')

In [82]:
data

array([[-0.11593497,  0.58652696,  0.05006732,  0.61084445],
       [ 0.65435283, -0.3789361 ,  1.00418395, -0.41082464],
       [ 2.15127529, -0.40566031, -0.1543157 ,  0.85065924],
       [ 1.21177337, -1.05014845, -0.15819114, -0.45013114],
       [ 0.1814126 , -0.13664082,  0.24491298,  0.17595545],
       [-0.40154273,  0.73613057, -0.58119308,  1.15384588],
       [-0.23952117,  1.02867553,  0.29163872, -0.27291675]])

In [83]:
names == 'Bob'

array([ True, False, False,  True, False, False, False])

In [84]:
names[names == 'Bob']

array(['Bob', 'Bob'], dtype='<U4')

In [85]:
data[names == 'Bob']

array([[-0.11593497,  0.58652696,  0.05006732,  0.61084445],
       [ 1.21177337, -1.05014845, -0.15819114, -0.45013114]])

In [86]:
data[names == 'Bob', 2:]

array([[ 0.05006732,  0.61084445],
       [-0.15819114, -0.45013114]])

# Numpy Operations

- numpy arrays enable batch operations on data without using loop. This is called vectorization.
- numpy arrays provide advanced mathematical operations which executes more efficiently as compared to built in python functions.

In [87]:
arr = np.arange(0, 10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [88]:
arr + arr

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [89]:
arr * arr

array([ 0,  1,  4,  9, 16, 25, 36, 49, 64, 81])

In [90]:
arr - arr

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [91]:
1/arr

  1/arr


array([       inf, 1.        , 0.5       , 0.33333333, 0.25      ,
       0.2       , 0.16666667, 0.14285714, 0.125     , 0.11111111])

# Universal array functions

- universal array functions are mathematical operations that can be applied across the array, element by element fashions, supporting array broadcasting, type casting.
- these functions are vectorized wrapper for a function.

In [92]:
arr_neg = np.random.randn(5,5)
arr_neg

array([[ 0.29164248,  0.64903887,  0.02798285,  0.73928387,  1.51762867],
       [-0.71442704,  0.59672552, -1.80016032,  1.28431798, -0.03514873],
       [-0.17659382,  0.00275764, -0.60073731,  0.73584012,  2.49140358],
       [ 0.23859363, -0.67409582,  0.1374963 , -0.64531688, -1.03407603],
       [ 0.91596199, -1.49259918, -0.46409768,  0.07282181,  1.98932667]])

In [93]:
np.abs(arr_neg)

array([[0.29164248, 0.64903887, 0.02798285, 0.73928387, 1.51762867],
       [0.71442704, 0.59672552, 1.80016032, 1.28431798, 0.03514873],
       [0.17659382, 0.00275764, 0.60073731, 0.73584012, 2.49140358],
       [0.23859363, 0.67409582, 0.1374963 , 0.64531688, 1.03407603],
       [0.91596199, 1.49259918, 0.46409768, 0.07282181, 1.98932667]])

In [94]:
np.sqrt(arr)

array([0.        , 1.        , 1.41421356, 1.73205081, 2.        ,
       2.23606798, 2.44948974, 2.64575131, 2.82842712, 3.        ])

In [95]:
arr_sq = np.arange(10)
arr_sq

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [96]:
np.square(arr_sq)

array([ 0,  1,  4,  9, 16, 25, 36, 49, 64, 81])

In [97]:
np.exp(arr_sq)

array([1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01,
       5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03,
       2.98095799e+03, 8.10308393e+03])

In [99]:
np.log(arr)

  np.log(arr)


array([      -inf, 0.        , 0.69314718, 1.09861229, 1.38629436,
       1.60943791, 1.79175947, 1.94591015, 2.07944154, 2.19722458])

In [100]:
np.exp(arr)

array([1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01,
       5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03,
       2.98095799e+03, 8.10308393e+03])

In [102]:
np.ceil(arr_neg)

array([[ 1.,  1.,  1.,  1.,  2.],
       [-0.,  1., -1.,  2., -0.],
       [-0.,  1., -0.,  1.,  3.],
       [ 1., -0.,  1., -0., -1.],
       [ 1., -1., -0.,  1.,  2.]])

In [103]:
np.floor(arr_neg)

array([[ 0.,  0.,  0.,  0.,  1.],
       [-1.,  0., -2.,  1., -1.],
       [-1.,  0., -1.,  0.,  2.],
       [ 0., -1.,  0., -1., -2.],
       [ 0., -2., -1.,  0.,  1.]])

In [104]:
np.add(arr_neg, np.abs(arr_neg))

array([[0.58328495, 1.29807774, 0.05596571, 1.47856773, 3.03525733],
       [0.        , 1.19345103, 0.        , 2.56863595, 0.        ],
       [0.        , 0.00551528, 0.        , 1.47168024, 4.98280717],
       [0.47718726, 0.        , 0.27499259, 0.        , 0.        ],
       [1.83192397, 0.        , 0.        , 0.14564362, 3.97865335]])

In [105]:
np.dot(arr, arr)

285

# Summary Statistics 

In [106]:
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [107]:
arr.sum()

45

In [109]:
arr = np.arange(0, 25).reshape(5, 5)

In [110]:
arr.sum(axis = 1)

array([ 10,  35,  60,  85, 110])

In [111]:
arr.sum(axis = 0)

array([50, 55, 60, 65, 70])

In [117]:
arr.mean(axis=1)

array([ 2.,  7., 12., 17., 22.])

In [118]:
arr.mean()

12.0

In [113]:
arr.max()

24

In [114]:
arr.min()

0

In [115]:
arr.var()

52.0

In [116]:
arr.std()

7.211102550927978