# What is NumPy?
Numpy is a python scientific computing libary that provides high-performance multidimensional array object, and tools for working with these arrays.

In [1]:
#Importing numpy
import numpy as np

In [2]:
#Creating numpy array
my_numpy_array=np.array([10,20,30])
print(my_numpy_array)
#There are various efficient and convinent ways for creating numpy array we will see all these things later

[10 20 30]


# NumPy vs Python List
So, you may think we already have datastructure(list) to store multidimensional array,why do we need NumPy?
Well, the reason is computation speed and memory usuage. NumPy’s main object is the homogeneous(usually numbers) multidimensional array but Python’s lists are general-purpose containers which supports heterogenous items and offers various operations like insertion, deletion, appending, and concatenation etc and all these features comes with the cost of high memory and computation usage. And another very important thing is Numpy support vectorization but python list does not. We will see later what is vectorization.

In [3]:
#Lets see the memory difference
import sys
my_list=[0 for i in range(20000000)] 
my_numpy_array=np.zeros(2000000) 
print("List memory consumption:",sys.getsizeof(my_list)/1000000,'MB')
print("Numpy memory consumption:",sys.getsizeof(my_numpy_array)/1000000,'MB')

List memory consumption: 165.281104 MB
Numpy memory consumption: 16.000096 MB


In [4]:
#Lets see the speed difference
import time
first_list=list(range(20000000))
second_list=list(range(20000000))
start=time.time()
final_list=[first_list[i]+second_list[i] for i in range(len(first_list))]
end=time.time()
print("Time taken by python list to sum two vectors: ",end-start,'s',sep='')
first_numpy_array=np.array(range(20000000))
second_numpy_array=np.array(range(20000000))
start=time.time()
# '+' does elementwise addition this is an example of vectorization we will see this in detail later
final_numpy_array=first_numpy_array+second_numpy_array 
end=time.time()
print("Time taken by numpy to sum two vectors: ",end-start,'s',sep='')

Time taken by python list to sum two vectors: 6.0249621868133545s
Time taken by numpy to sum two vectors: 0.1078028678894043s


# What is Vectorization?
Vectorization is an ability to express operations as occurring on entire arrays rather than their individual elements.
It is practice of replacing explicit loops with array expressions. In general, vectorized array operations will often be one or two (or more) orders of magnitude faster than their pure Python equivalents, with the biggest impact seen in numerical computations. When looping over an array or any data structure in Python, there’s a lot of overhead involved. Vectorized operations in NumPy delegate the looping internally to highly optimized C and Fortran functions, making for cleaner and faster Python code.

In [5]:
#Lets see the speed difference between vectorized code and non-vectorized code
#Finding sum of squares of numbers of array
my_numpy_array=np.random.rand(20000000)#Return a vector of numbers chosen uniformly randomly from [0,1)
#vectorized code
start=time.time()
#Remember dot product of two same vector return sum of squares of numbers of vector
#or you can also do total=np.linalg.norm(my_numpy_array)**2 
total=np.dot(my_numpy_array,my_numpy_array)
end=time.time()
print('Time taken by vectorized code: ',end-start,'s',sep='')
#non-vectorized code
total=0
start=time.time()
for i in range(len(my_numpy_array)):
    total+=my_numpy_array[i]**2
end=time.time()
print('Time taken by non-vectorized code: ',end-start,'s',sep='')

Time taken by vectorized code: 0.01560354232788086s
Time taken by non-vectorized code: 24.189085483551025s


In [6]:
#Adding scaler to vector
my_numpy_array=np.array(range(20000000))
#vectorized code
start=time.time()
final_numpy_array=my_numpy_array+15
end=time.time()
print('Time taken by vectorized code: ',end-start,'s',sep='')
#non-vectorized code
my_numpy_array=np.array(range(20000000))
start=time.time()
for i in range(len(my_numpy_array)):
    my_numpy_array[i]+=15
end=time.time()
print('Time taken by non-vectorized code: ',end-start,'s',sep='')

Time taken by vectorized code: 0.09861969947814941s
Time taken by non-vectorized code: 12.780759811401367s


# Basic Numpy

In [7]:
#NumPy’s array class is called ndarray.
my_numpy_array=np.array([1,2,3])
print(type(my_numpy_array))

<class 'numpy.ndarray'>


### Attributes of ndarray

In [8]:
#Lets see some attributes of ndarray
my_numpy_array=np.random.rand(3,3)
print(my_numpy_array)
print('\nShape:',my_numpy_array.shape)
print('Number of dimenstion:',my_numpy_array.ndim)
print('Total number of elements:',my_numpy_array.size)
print('Type of element in array:',my_numpy_array.dtype)
print('Size of each element in array:',my_numpy_array.itemsize,'Bytes')

[[0.12954976 0.20200795 0.74385929]
 [0.99828691 0.94380366 0.32363152]
 [0.95671806 0.56907706 0.62578758]]

Shape: (3, 3)
Number of dimenstion: 2
Total number of elements: 9
Type of element in array: float64
Size of each element in array: 8 Bytes


### Array Creation

In [9]:
#creating from list
my_list=[1,2,3]
my_numpy_array=np.array(my_list)# or my_numpy_array=np.array([1,2,3])
print(my_numpy_array)

[1 2 3]


In [10]:
#Creating 2 dimension array
my_numpy_array=np.array([[1,2,3],[4,5,6],[7,8,9]],dtype='float64')#Sepcifing data type on creation
print(my_numpy_array)
print('\n Datatype:',my_numpy_array.dtype)

[[1. 2. 3.]
 [4. 5. 6.]
 [7. 8. 9.]]

 Datatype: float64


In [11]:
#creating array with all zerso
my_numpy_array=np.zeros((3,3,3)) #Three dimension array of size 3*3*3
print(my_numpy_array)

[[[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]]


In [12]:
#creating array with all ones
my_numpy_array=np.ones((3,3,3)) #Three dimension array of size 3*3*3
print(my_numpy_array)

[[[1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]]

 [[1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]]

 [[1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]]]


In [13]:
#creating array with all five
my_numpy_array=np.ones((3,3,3))*5 #Three dimension array of size 3*3*3
print(my_numpy_array)

[[[5. 5. 5.]
  [5. 5. 5.]
  [5. 5. 5.]]

 [[5. 5. 5.]
  [5. 5. 5.]
  [5. 5. 5.]]

 [[5. 5. 5.]
  [5. 5. 5.]
  [5. 5. 5.]]]


In [14]:
#creating uninitilized array(if you need uninitilized array then this apporach will be faster than np.zeros and np.ones)
my_numpy_array=np.empty((2,2))
print(my_numpy_array)

[[9.31264501e-312 6.52020697e+252]
 [1.12038868e+219 2.31881454e-152]]


In [15]:
#creating random matrix
# it samples from the “standard normal” distribution.
my_numpy_array=np.random.randn(3,3)#Here notice instead of single tuple individual size are passed seperatly
print(my_numpy_array)

[[-0.0816964   0.96652373  0.83321813]
 [-1.55198112  0.48245812 -1.23229087]
 [-0.18656395 -0.30993326 -0.56221336]]


In [16]:
# it samples from uniform distribution over [0, 1).
my_numpy_array=np.random.rand(3,3)
print(my_numpy_array)

[[0.10658688 0.09987778 0.74388485]
 [0.24058399 0.63813776 0.18859228]
 [0.84414429 0.34878291 0.58766002]]


### Basic Operations

In [17]:
#Arithmetic operators on arrays are apply elementwise
first_array=np.arange(4*4).reshape((4,4)) #Reshape modify the shape of array we will see this later in detail
print('First array:\n',first_array)
second_array=np.arange(4*4,4*4+4*4).reshape((4,4))
print('\nSecond array:\n',second_array)
#Addition
print('\nElementwise Addition:\n',first_array+second_array)
#Subtraction
print('\nElementwise Subtractoin:\n',first_array-second_array)
#Multiplication
print('\nElementwise Multiplication:\n',first_array*second_array)
#Division
print('\nElementwise Division:\n',first_array/second_array)

#Matrix multiplication
print('\nMultiplied matrix:\n',first_array@second_array)#For this matrix need to be compatible for multiplication

First array:
 [[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]

Second array:
 [[16 17 18 19]
 [20 21 22 23]
 [24 25 26 27]
 [28 29 30 31]]

Elementwise Addition:
 [[16 18 20 22]
 [24 26 28 30]
 [32 34 36 38]
 [40 42 44 46]]

Elementwise Subtractoin:
 [[-16 -16 -16 -16]
 [-16 -16 -16 -16]
 [-16 -16 -16 -16]
 [-16 -16 -16 -16]]

Elementwise Multiplication:
 [[  0  17  36  57]
 [ 80 105 132 161]
 [192 225 260 297]
 [336 377 420 465]]

Elementwise Division:
 [[0.         0.05882353 0.11111111 0.15789474]
 [0.2        0.23809524 0.27272727 0.30434783]
 [0.33333333 0.36       0.38461538 0.40740741]
 [0.42857143 0.44827586 0.46666667 0.48387097]]

Multiplied matrix:
 [[ 152  158  164  170]
 [ 504  526  548  570]
 [ 856  894  932  970]
 [1208 1262 1316 1370]]


### Indexing

In [18]:
#Here you can index in similary way as in list
my_numpy_array=np.random.rand(5)
print(my_numpy_array)
print('First item:',my_numpy_array[0])
print('Third item:',my_numpy_array[2])
print('Last item:',my_numpy_array[-1])
#in 1D array you and also slice and iterage in same as as in list
print('\nLoop:')
for i in my_numpy_array:
    print(i)
print('\nFirst three items:',my_numpy_array[0:3])

[0.42000881 0.22816928 0.78655212 0.98435557 0.55760378]
First item: 0.4200088079607067
Third item: 0.786552117205408
Last item: 0.5576037778125758

Loop:
0.4200088079607067
0.22816927940716636
0.786552117205408
0.9843555669916589
0.5576037778125758

First three items: [0.42000881 0.22816928 0.78655212]


In [19]:
# In Multidimensional arrays indices for each dimension is given
# For each dimension index range form 0 to size_of_dimension-1
my_numpy_array=np.arange(3*3).reshape((3,3))
print(my_numpy_array)
print('\nIndex [2,0]:',my_numpy_array[2,0])
print('Index [2,2]:',my_numpy_array[2,2])
#you can think indexing [a,b,c...] as ath item of first dimension which returns another array of dimension n-1
#in that array bth item of first dimension and so no

[[0 1 2]
 [3 4 5]
 [6 7 8]]

Index [2,0]: 6
Index [2,2]: 8


### Slicing

In [20]:
# you can slice ndarray in similary as in list
my_numpy_array=np.arange(3*3).reshape((3,3))
print(my_numpy_array)
# : represents all 
print('\nSlicing index [1,:]:\n',my_numpy_array[1,:])
print('\nSlicing index [:,2]:\n',my_numpy_array[:,2])
#This can be generalized to higher dimension
#One very important thing to note here is that this type of slicing only gives you the view of array not a new copy
#So changes made in this obtained array by slicing will be reflected in original array

[[0 1 2]
 [3 4 5]
 [6 7 8]]

Slicing index [1,:]:
 [3 4 5]

Slicing index [:,2]:
 [2 5 8]


### Shape Manipulation

In [21]:
#We can manuplate the shape of array using reshape() method this does not change the item and thier number
#New given shape must have same number of items as in original array
my_numpy_array=np.arange(5*5)
print(my_numpy_array.shape)
#Reshaping into (5,5)
my_numpy_array=my_numpy_array.reshape((5,5))
#.reshape((4,4)) is not allowed as it can only accomodate 16 items but original array has 25 items
print(my_numpy_array.shape)
#Reshaping into((-1,25)).Here size of -1 is infered automatically
my_numpy_array=my_numpy_array.reshape((-1,25))
print(my_numpy_array.shape)

(25,)
(5, 5)
(1, 25)


### Concatenation

In [22]:
# you can concatenate two or more arrays using np.concatenate 
first_array=np.random.randn(3,3)
second_array=np.random.randn(3,3)                             #First parameter is tuple containing arrays to be 
final_array=np.concatenate((first_array,second_array),axis=0) #concatenated and second parameter is axis alongwhich
print(final_array)                                            #arrays are concatenated

[[-0.01829333 -0.88038229 -1.1576798 ]
 [ 1.01126599 -0.31891456  1.34637763]
 [ 0.28154868 -0.40821971 -0.44949206]
 [-1.68022231 -1.98976211 -0.52008002]
 [ 0.75089959 -1.18605639 -0.58172088]
 [ 0.54867215  0.60287491 -1.07778287]]


### Deep Copy

In [23]:
first_array=np.arange(4*4).reshape((4,4))
second_array=first_array
# a is b check if a and b refers to same object.
if second_array is first_array: 
    print("Both refers to same object.\n")
second_array[0,0]=50
print(second_array)
print('\n')
print(first_array)
#Change is reflected in both array as they point to the same object
#This is also true for basic indexing
#So how to make copy of array?

Both refers to same object.

[[50  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]


[[50  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]


In [24]:
#np.copy() or ndarray.copy() makes deep copy
first_array=np.arange(4*4).reshape((4,4))
second_array=first_array.copy()
if second_array is not first_array:
    print("They refers to different objects.\n")
second_array[0,0]=50
print(second_array)
print('\n')
print(first_array)
#Notice change made in second_array is not reflected in first_array

They refers to different objects.

[[50  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]


[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]


In [25]:
second_array[second_array>10]

array([50, 11, 12, 13, 14, 15])

### Broadcasting
The term broadcasting describes how numpy treats arrays with different shapes during arithmetic operations. Subject to certain constraints, the smaller array is “broadcast” across the larger array so that they have compatible shapes.

The first rule of broadcasting is that if all input arrays do not have the same number of dimensions, a “1” will be repeatedly prepended to the shapes of the smaller arrays until all the arrays have the same number of dimensions.


The second rule of broadcasting ensures that arrays with a size of 1 along a particular dimension act as if they had the size of the array with the largest shape along that dimension. The value of the array element is assumed to be the same along that dimension for the “broadcast” array.

After application of the broadcasting rules, the sizes of all arrays must match.

In [26]:
#simples example of broadcasting is scaler arthmatic
my_numpy_array=np.array([1,2,3])
my_numpy_array=my_numpy_array+[2] # or simply +2
print(my_numpy_array)

[3 4 5]


In [27]:
#Elementwies multiplication of matrix by vector
first_array=np.arange(4*4).reshape((4,4))
second_array=[1,2,3,4]
final_array=first_array*second_array
print('\nfirst_array:\n',first_array)
print('\nsecond_array:\n',second_array)
print('\nfinal_array:\n',final_array)


first_array:
 [[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]

second_array:
 [1, 2, 3, 4]

final_array:
 [[ 0  2  6 12]
 [ 4 10 18 28]
 [ 8 18 30 44]
 [12 26 42 60]]


### Advance Indexing
In addition to indexing by integers and slices,arrays can be indexed by arrays of integers and arrays of booleans. The returned array is copy of original unlike basic indexing it doesnot point original array.

In [28]:
#Indexing with Arrays of Indices
first_array=np.arange(4*4)
indexing_array=np.array([1,1,5,6,8])
final_array=first_array[indexing_array]
print('first_array:\n',first_array)
print('\nfinal_array:\n',final_array)

first_array:
 [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15]

final_array:
 [1 1 5 6 8]


In [29]:
#When the array a is multidimensional, a single array of indices refers to the first dimension of array
first_array=np.arange(4*4).reshape((4,4))
indexing_array=np.array([1,3,1])
final_array=first_array[indexing_array]
print('first_array:\n',first_array)
print('\nfinal_array:\n',final_array)

first_array:
 [[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]

final_array:
 [[ 4  5  6  7]
 [12 13 14 15]
 [ 4  5  6  7]]


In [30]:
#We can also give indexes for more than one dimension. The arrays of indices for each dimension must have the same shape.
first_array=np.arange(4*4).reshape((4,4))
i=np.array([1,3,1]) # i and j must have same shape
j=np.array([1,0,3])
final_array=first_array[i,j]
print('first_array:\n',first_array)
print('\nfinal_array:\n',final_array)

first_array:
 [[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]

final_array:
 [ 5 12  7]


In [31]:
#Indexing with Boolean Arrays
#Collecting all items in array which is greater than 10
first_array=np.arange(4*4).reshape((4,4))
boolean_array=first_array>10
final_array=first_array[boolean_array]
print('first_array:\n',first_array)
print('\nboolean_array:\n',boolean_array)
print('\nfinal_array:\n',final_array)

first_array:
 [[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]

boolean_array:
 [[False False False False]
 [False False False False]
 [False False False  True]
 [ True  True  True  True]]

final_array:
 [11 12 13 14 15]
