In [1]:
import numpy as np

## What is Numpy ?

A python package for scientific computation.  It is convenient to use on element wise operation + vectorization

## Why Numpy is more powerful than other packages?
**Efficiency**: 
NumPy is implemented in C and Fortran, which makes it highly efficient for numerical operations. The underlying array operations are performed in compiled code, resulting in faster execution compared to pure Python implementations or some other numerical computing libraries.

**Multidimensional Arrays**: NumPy provides a powerful array object called ndarray that supports multidimensional arrays and matrices. 

**Broadcasting**: NumPy's broadcasting feature allows for implicit element-wise operations on arrays with different shapes and sizes. This simplifies code and eliminates the need for explicit loops, making it more convenient for users.

**Vectorization**: NumPy encourages vectorized operations, where mathematical operations are performed on entire arrays rather than individual elements. This approach is more faster than using traditional Python loops.

#### Initialization of a Numpy array

In [3]:
# Initializing thru python list
arr1 = np.array([1,2,3,4,5])
print("direct: ",arr1)
varList = [1,2,3,4,5]
arr2 = np.array(varList)
print("thru List Variable: ",arr2)

direct:  [1 2 3 4 5]
thru List Variable:  [1 2 3 4 5]


In [4]:
type(arr1)

numpy.ndarray

In [5]:
type(arr2)

numpy.ndarray

In [6]:
# Initializing a 2D Array
arr2d = np.array([[1,2,3,4],[5,6,7,8]])
arr2d

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [7]:
# init using python range
arr3 = np.arange(5)
arr3

array([0, 1, 2, 3, 4])

In [8]:
# we can have numpy arrays as a float point also
# using start & stop, step counts
arr4 = np.arange(start=2,stop=10,step=1.5)
arr4

array([2. , 3.5, 5. , 6.5, 8. , 9.5])

In [9]:
# creating a zeros array
arr5 = np.zeros(3)
arr5

array([0., 0., 0.])

In [10]:
# In above zeros array we are seeing float point values by default, but we can change them into int also
arr5 = np.zeros(3, dtype='int')
arr5

array([0, 0, 0])

In [11]:
# creating ones array
arr6 = np.ones(3, dtype='int')
arr6

array([1, 1, 1])

In [12]:
# creating a 2d zeros array
arr7 = np.zeros([3,4], dtype='int')
arr7

array([[0, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 0, 0, 0]])

In [13]:
# what is linspace in numpy
# Given a start and stop point, generate an array that is evenly spaced
np.linspace(start=0,stop=10,num=5, dtype='int')

array([ 0,  2,  5,  7, 10])

In [14]:
np.linspace(0,100,5,dtype='int')

array([  0,  25,  50,  75, 100])

In [15]:
# creating a diagnal array
val = [1,2,3]
np.diag(val)

array([[1, 0, 0],
       [0, 2, 0],
       [0, 0, 3]])

In [16]:
# creating a identity matrices
np.identity(3,dtype='int')

array([[1, 0, 0],
       [0, 1, 0],
       [0, 0, 1]])

#### Operation + Vectorization

In [17]:
arr = np.array([1,2,3,4]) # initializing an array in numpy
arr

array([1, 2, 3, 4])

In [18]:
arr*2

array([2, 4, 6, 8])

In [19]:
arr**2

array([ 1,  4,  9, 16])

#### Faster execution speed than traditional for loop speed

In [20]:
%timeit [i**2 for i in arr]

494 ns ± 1.5 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)


In [21]:
%timeit arr**2

238 ns ± 0.142 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)


#### Properties

In [22]:
# number of dimensions of the array
arr2 = np.array([[1,2,3,4],[5,6,7,8]])
arr2.ndim # returns the dimension of the array

2

In [23]:
# finding shape of the array
arr2.shape # returns the shape of the matrix in a tuple

(2, 4)

In [24]:
# finding date type of the array
arrNum = np.arange(10)
arrNum.dtype

dtype('int64')

In [25]:
arrFloat = np.array([1.21,234.34,453.1,11.433])
arrFloat.dtype

dtype('float64')

* Numpy handles only homogenous data, only number int/float can be handled numpy array
* Hetergenous data or homogenous string arrays are not applicable

#### Generating random numbers continuous uniform distribution

In [26]:
# generates random values from a uniform distribution over the interval 0 and 1
np.random.rand(2,3) # 2x3 matrix over the interval of 0 and 1 is generated

array([[0.55662899, 0.87803754, 0.34921193],
       [0.6409749 , 0.62019671, 0.98273802]])

In [27]:
# generating random values from a normal distribution of mean=0 and standard deviation=1
np.random.randn(2,3)

array([[-0.27902443,  0.58457585, -1.18505028],
       [-0.02850682, -0.64148375,  0.83394818]])

In [28]:
# generating random integers between given values and the size of the required matrix
np.random.randint(0,10,(2,3))

array([[9, 6, 2],
       [0, 6, 1]])

In [29]:
# generating random floats in the half-open interval [0.0, 1.0)
np.random.random_sample((2,3))

array([[0.77149877, 0.02037318, 0.56902678],
       [0.66350273, 0.74067256, 0.6513505 ]])

In [30]:
# generating random numbers from the given numpy array dataset
npDataset = np.arange(10)
print("npDataset: ", npDataset)
print(np.random.choice(npDataset,(2,3)))

npDataset:  [0 1 2 3 4 5 6 7 8 9]
[[1 2 3]
 [9 1 0]]


#### Random samples generating using Uniform Distribution 

In [31]:
np.random.uniform(0,1,size=10)

array([0.56859629, 0.21620332, 0.24128988, 0.59662738, 0.16606304,
       0.15825219, 0.66850065, 0.3718572 , 0.51245073, 0.82518857])

#### Random samples generating using Normal Distribution

In [32]:
np.random.normal(0,1,size=10)

array([-0.64992158, -1.25701072,  0.9165948 ,  1.79793756,  0.99598813,
        0.21107302, -0.05864414, -0.92612769,  1.32148288,  0.74205489])

#### Accessing Elements

In [33]:
# Accessing single element or Indexing
arr1 = np.array([1,2,3,4,5])
arr1[1] # works similar to general array indexing

2

In [34]:
# Negative indexing in numpy array is similar to python list indexing
arr[-1]

4

In [35]:
# Accessing 2D array elements
arr2d = np.array([[1,2,3,4,5],[6,7,8,9,0]])
arr2d[1,3] # similar to arr2d([x,y]) => x,y are x and y axis respectively

9

#### Sequencing or Slicing

In [36]:
# Similar to slicing in normal python list, numpy gives the option to slice its multi-dimensional arrays
arr1

array([1, 2, 3, 4, 5])

In [37]:
arr1[3:] # from postion 3 to the end

array([4, 5])

In [38]:
arr1[:3] # upto position 3, but pos 3 not included

array([1, 2, 3])

In [39]:
arr2d # slicig a 2D array

array([[1, 2, 3, 4, 5],
       [6, 7, 8, 9, 0]])

In [40]:
arr2d[:1,:] # slicing on y-axis

array([[1, 2, 3, 4, 5]])

In [41]:
arr2d[:,:2] # slicing on x-axis

array([[1, 2],
       [6, 7]])

In [42]:
arr2d[:,2:3]

array([[3],
       [8]])

#### Masking

In [43]:
arr = np.array([[23,45,56,34,65,23],
               [56,3,89,67,48,89]])
arr[arr>50]

array([56, 65, 56, 89, 67, 89])

In [44]:
arr[(arr>50)&(arr<80)]

array([56, 65, 56, 67])

#### Element-wise Operation

In [45]:
a = np.array([1,2,3,4,5])
b = np.array([1,1,2,3,2])

In [46]:
# Element-wise addition
a + b

array([2, 3, 5, 7, 7])

In [47]:
# Element-wise subtraction
a - b

array([0, 1, 1, 1, 3])

In [48]:
# Element-wise multiplication
a * b

array([ 1,  2,  6, 12, 10])

In [49]:
# Element-wise division
a / b

array([1.        , 2.        , 1.5       , 1.33333333, 2.5       ])

#### Element-wise Comparison

In [50]:
a == b

array([ True, False, False, False, False])

In [51]:
a >= b

array([ True,  True,  True,  True,  True])

In [52]:
a <= b

array([ True, False, False, False, False])

In [53]:
np.array_equal(a,b)

False

In [54]:
c = np.array([1,2,3,4])
d = np.array([1,2,3,4])
np.array_equal(c,d)

True

#### Matrix Multiplication

In [55]:
mat1 = np.array([[2],[1]]) # 2x1 matrix
mat2 = np.array([[2,4]]) # 1x2 matrix
np.matmul(mat1,mat2)

array([[4, 8],
       [2, 4]])

In [56]:
# same can be achieved using @ operator
mat1 @ mat2

array([[4, 8],
       [2, 4]])

In [57]:
np.dot(mat1,mat2)

array([[4, 8],
       [2, 4]])

In [58]:
# scalar multiplication of dot product
a = 2
b = np.array([1,2,3])
np.dot(a,b)

array([2, 4, 6])

In [59]:
# np.dot performs simple multiplication if both inputs are scalars
np.dot(2,3)

6

#### How AXIS works in Numpy ?

 - If there is only one axis i.e x-axis then that axis is represented as 0
 - Once there is a second axis i.e y-axis then y-axis becomes 0 and x-axis becomes 1
 - Now respectively in 3D array, z-axis becomes 0, y-axis becomes 1 eventually x-axis becomes 2
 - These axis representation, helps in manipulating the array values where any values or changes can be applied along the axis orientation just by denoting the axis value

In [60]:
from IPython.display import Image
Image(url="https://fgnt.github.io/python_crashkurs_doc/_images/numpy_array_t.png")

#### BroadCasting

For each dimension (going from right side)
 - The size of each dimension should be same OR
 - The size of one dimension should be 1

**Rule 1**: If two arrays differ in the number of dimensions, the shape of one with few dimensions is padded with ones on its leading(Left side)

**Rule 2**: If the shape of two arrays does not match in any dimensions, the array with sahpe equal to 1 is stretched to match the other shape i.e broadcasted

In [61]:
Image(url="https://jakevdp.github.io/PythonDataScienceHandbook/figures/02.05-broadcasting.png")

In [62]:
arr1 = np.array([[1,2,3,4],
               [2,3,4,5]])
arr2 = np.array([[1,2,3,4],
               [2,3,4,5],
               [3,4,5,6],
               [4,5,6,7]])

In [63]:
print(arr1.shape)
print(arr2.shape)

(2, 4)
(4, 4)


**Rule 3**: If in any dimension the sizes disagree and neither equal to 1, then Error is raised.

In [64]:
arr1 + arr2

ValueError: operands could not be broadcast together with shapes (2,4) (4,4) 

#### Array Manipulation

**Reshaping An Array**

In [65]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [66]:
arr.reshape(2,5)

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [67]:
arr.reshape(5,2)

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7],
       [8, 9]])

In [68]:
arr.reshape(2,-1)

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [69]:
arr.reshape(-1,2)

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7],
       [8, 9]])

**Transposing an Array**

In [70]:
arr = np.arange(10).reshape(5,2)
arr

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7],
       [8, 9]])

In [71]:
arr.T

array([[0, 2, 4, 6, 8],
       [1, 3, 5, 7, 9]])

#### View vs Copy

**View**
 - A view is a new array that refers to the same data as the original array but with a different shape or strides.
Modifying the view will also modify the original array. Memory Efficient.

**Copy**
 - A copy is a new array that is a completely independent entity from the original array.
Modifying the copy does not affect the original array, and vice versa. Memory overhead due to data redundancy.

In [72]:
arr1 = np.array([[1,2,3,4],
               [2,3,4,5]])

In [73]:
copy = arr1.copy()
copy

array([[1, 2, 3, 4],
       [2, 3, 4, 5]])

In [74]:
copy[1]=10
copy

array([[ 1,  2,  3,  4],
       [10, 10, 10, 10]])

In [75]:
arr1 # Original array is unchanged

array([[1, 2, 3, 4],
       [2, 3, 4, 5]])

In [76]:
arr2 = np.array([[1,2,3,4],
               [2,3,4,5],
               [3,4,5,6],
               [4,5,6,7]])

In [77]:
view = arr2.view()
view

array([[1, 2, 3, 4],
       [2, 3, 4, 5],
       [3, 4, 5, 6],
       [4, 5, 6, 7]])

In [78]:
view[1,0]=10
view

array([[ 1,  2,  3,  4],
       [10,  3,  4,  5],
       [ 3,  4,  5,  6],
       [ 4,  5,  6,  7]])

In [79]:
arr2 #Original array is also changed

array([[ 1,  2,  3,  4],
       [10,  3,  4,  5],
       [ 3,  4,  5,  6],
       [ 4,  5,  6,  7]])

In [80]:
arr1 = np.array([[1,2,3,4],
               [2,3,4,5]])
arr2 = np.array([[1,2,3,4],
               [2,3,4,5],
               [3,4,5,6],
               [4,5,6,7]])

In [81]:
arr1

array([[1, 2, 3, 4],
       [2, 3, 4, 5]])

In [82]:
flat = arr1.flatten()
flat

array([1, 2, 3, 4, 2, 3, 4, 5])

In [83]:
flat[1]=10
flat

array([ 1, 10,  3,  4,  2,  3,  4,  5])

In [84]:
arr1

array([[1, 2, 3, 4],
       [2, 3, 4, 5]])

**Flatten Returns a copy, hence there is not change of values in original**

In [85]:
arr2

array([[1, 2, 3, 4],
       [2, 3, 4, 5],
       [3, 4, 5, 6],
       [4, 5, 6, 7]])

In [86]:
rav = arr2.ravel()
rav

array([1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6, 4, 5, 6, 7])

In [87]:
rav[1]=10
rav

array([ 1, 10,  3,  4,  2,  3,  4,  5,  3,  4,  5,  6,  4,  5,  6,  7])

In [88]:
arr2

array([[ 1, 10,  3,  4],
       [ 2,  3,  4,  5],
       [ 3,  4,  5,  6],
       [ 4,  5,  6,  7]])

**Ravel Returns a view, hence there is change in original values**

#### Sorting

In [89]:
arr = np.array([5,3,4,6,9,8,7,2])
arr

array([5, 3, 4, 6, 9, 8, 7, 2])

In [90]:
arr.sort() # this makes changes to original
arr

array([2, 3, 4, 5, 6, 7, 8, 9])

In [91]:
arr = np.array([5,3,4,6,9,8,7,2]).reshape(4,2)
arr

array([[5, 3],
       [4, 6],
       [9, 8],
       [7, 2]])

In [92]:
np.sort(arr) # this does not make changes to original, changes are applied on the horizontal x-axis by default

array([[3, 5],
       [4, 6],
       [8, 9],
       [2, 7]])

In [93]:
np.sort(arr,axis=1) #similar to above

array([[3, 5],
       [4, 6],
       [8, 9],
       [2, 7]])

In [94]:
np.sort(arr,axis=0) # changes here are applied on y-axis (2D array)

array([[4, 2],
       [5, 3],
       [7, 6],
       [9, 8]])

#### Argument based Functions

In [95]:
arr = np.array([5,3,4,6,9,8,7,2])
arr

array([5, 3, 4, 6, 9, 8, 7, 2])

In [96]:
# return the indicies in sort order using argsort
np.argsort(arr)

array([7, 1, 2, 0, 3, 6, 5, 4])

In [97]:
arr = np.array([5,3,4,6,0,9,8,7,2])
arr

array([5, 3, 4, 6, 0, 9, 8, 7, 2])

In [98]:
# returns indices of non-zero element
np.argwhere(arr)

array([[0],
       [1],
       [2],
       [3],
       [5],
       [6],
       [7],
       [8]])

In [99]:
# returns index of the minimum values in an array
np.argmin(arr)

4

In [100]:
# returns index of the maximum value in an array
np.argmax(arr)

5

#### Change the datatype of an array

In [101]:
arr = np.arange(10,dtype='float')
arr

array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])

In [102]:
arr.astype(int)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

#### Aggregate Functions

In [103]:
arr = np.arange(5)
arr

array([0, 1, 2, 3, 4])

In [104]:
# Sum of all the elements of an array
np.sum(arr)

10

In [105]:
arr = np.arange(12).reshape(3,4)
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [106]:
# sum of elements by their axis
np.sum(arr,axis=1) # sum by their x-axis

array([ 6, 22, 38])

In [107]:
np.sum(arr,axis=0)

array([12, 15, 18, 21])

In [108]:
# mean of all elements
np.mean(arr)

5.5

In [109]:
# meal of elements by their axis wise
np.mean(arr,axis=1)

array([1.5, 5.5, 9.5])

In [110]:
# find the minimum value in the array
np.min(arr)

0

In [111]:
# find maximum value in the array
np.max(arr)

11

#### Logical Functions

In [112]:
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [113]:
arr1 = np.array([[3,4,6,4],
                  [8,3,6,2],
                  [7,4,9,3]])
arr1

array([[3, 4, 6, 4],
       [8, 3, 6, 2],
       [7, 4, 9, 3]])

In [114]:
# Returns True if any of the corresponding elements in array satisfy the condition
np.any(arr<arr1)

True

In [115]:
np.all(arr<arr1)

False

In [116]:
# if else condition is vectorized here, if the value is less than 4 then -1 else 1 should be replaced.
np.where(arr1<4,-1,1) 

array([[-1,  1,  1,  1],
       [ 1, -1,  1, -1],
       [ 1,  1,  1, -1]])

#### Numpy Split

Example, In 2D array
 - if x-axis = 1 is given, splitting will happen over the x-axis
 - if y-axis = 0 is given, splitting will happen across the y-axis

In [117]:
arr = np.array([[1, 2, 3, 4],
                   [5, 6, 7, 8],
                   [9, 10, 11, 12]])
arr

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])

In [118]:
np.split(arr,indices_or_sections=[4],axis=1)

[array([[ 1,  2,  3,  4],
        [ 5,  6,  7,  8],
        [ 9, 10, 11, 12]]),
 array([], shape=(3, 0), dtype=int64)]

In [119]:
arr = np.arange(25).reshape(5,5)
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

In [120]:
# similar to splicing,0,1 = 1 array, 2-3 (3 excluded) = 1 array, remaining indices 1 array
np.split(arr,[2,3],axis=1)

[array([[ 0,  1],
        [ 5,  6],
        [10, 11],
        [15, 16],
        [20, 21]]),
 array([[ 2],
        [ 7],
        [12],
        [17],
        [22]]),
 array([[ 3,  4],
        [ 8,  9],
        [13, 14],
        [18, 19],
        [23, 24]])]

In [121]:
np.split(arr,[1,4],axis=0)

[array([[0, 1, 2, 3, 4]]),
 array([[ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14],
        [15, 16, 17, 18, 19]]),
 array([[20, 21, 22, 23, 24]])]

#### Horizontal & Vertical Split functions

In [122]:
arr = np.arange(25).reshape(5,5)
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

In [123]:
# Horizontal split happens over x-axis and vice versa for vertical split
np.hsplit(arr,[2,3])

[array([[ 0,  1],
        [ 5,  6],
        [10, 11],
        [15, 16],
        [20, 21]]),
 array([[ 2],
        [ 7],
        [12],
        [17],
        [22]]),
 array([[ 3,  4],
        [ 8,  9],
        [13, 14],
        [18, 19],
        [23, 24]])]

In [124]:
np.vsplit(arr,[1,4])

[array([[0, 1, 2, 3, 4]]),
 array([[ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14],
        [15, 16, 17, 18, 19]]),
 array([[20, 21, 22, 23, 24]])]

#### Stacking in Numpy

In [125]:
arr = np.arange(4)
arr

array([0, 1, 2, 3])

 - Vertical Stacking

In [126]:
np.vstack((arr,arr))

array([[0, 1, 2, 3],
       [0, 1, 2, 3]])

 - Horizontal Stacking

In [127]:
np.hstack((arr,arr))

array([0, 1, 2, 3, 0, 1, 2, 3])

In [128]:
arr = np.arange(4).reshape(4,1)
arr

array([[0],
       [1],
       [2],
       [3]])

In [129]:
np.hstack((arr,arr,arr))

array([[0, 0, 0],
       [1, 1, 1],
       [2, 2, 2],
       [3, 3, 3]])

#### Concatenation

 - concatenation is similar to stacking the entire array in the desired axis

In [130]:
arr = np.arange(8).reshape(2,4)
arr

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [131]:
arr.reshape(1,-1)

array([[0, 1, 2, 3, 4, 5, 6, 7]])

In [132]:
arr

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [133]:
np.concatenate([arr,arr]) # by default vertical stacking or axis with 0 is default

array([[0, 1, 2, 3],
       [4, 5, 6, 7],
       [0, 1, 2, 3],
       [4, 5, 6, 7]])

In [134]:
np.concatenate([arr,arr],axis=1)

array([[0, 1, 2, 3, 0, 1, 2, 3],
       [4, 5, 6, 7, 4, 5, 6, 7]])