In [6]:
import numpy as np
import time # For use time prop
import sys # For knowing bytes

In [7]:
# ndarray creation
arr = np.array([1,2,3,4,5])
print(arr, type(arr))

[1 2 3 4 5] <class 'numpy.ndarray'>


In [8]:
size = 1_000_000

# Python list
# calculate time by doing square operation
py_list = list(range(size))
start = time.time()
sq_list = [x**2 for x in py_list]
end = time.time()
print(f"python list operation time = {end - start} seconds")

# Numpy array
# calculate time by doing square operation
np_arr = np.array(py_list)
start = time.time()
sq_list = np_arr ** 2
# By vectorization we do above statement, here no need to run loop. vectorization do the operation in the whole array
end = time.time()
print(f"numpy array operation time = {end - start} seconds")

python list operation time = 0.20096707344055176 seconds
numpy array operation time = 0.0724945068359375 seconds


In [9]:
# Check memory(bytes) occupied by python list and Numpy array

# Python list
print(f"Python list size = {sys.getsizeof(py_list) * len(py_list)} bytes")

# Numpy array
print(f"Numpy array size = {np_arr.nbytes} bytes") # In built proporties to get bytes
# print(f"Numpy array size = {sys.getsizeof(np_arr) * len(np_arr)} bytes")

# Python list store extra meta data so it occupy more memory. (Due to multiple data - Which data is store where all that things stored )

Python list size = 8000056000000 bytes
Numpy array size = 8000000 bytes


In [10]:
# Check Data type
arr2 = np.array([1,2,3,4,"Pritam"])
print(arr2, type(arr2), arr2.dtype, arr2.shape) 
# dtype is use for know datatype
# shape is use for no of elements present in array

# As np arrays are homogenous it convert all data into one type. here U21 (Unicode String)

['1' '2' '3' '4' 'Pritam'] <class 'numpy.ndarray'> <U21 (5,)


In [11]:
# 2D Array
arr2D = np.array([[1,2,3], [4,5,6], [7,8,9]])
# [[1,2,3], [4,5,6], [7,8,9,10]] ----> It shows error due to non-homogenous
print(arr2D, arr2D.shape)

[[1 2 3]
 [4 5 6]
 [7 8 9]] (3, 3)


In [12]:
# Array create using functions

arr3 = np.zeros((3,4), dtype = "int64") #It pre fill with zero
print(arr3)

arr4 = np.ones((3,4), dtype = "int64") #It pre fill with one
print(arr4)

arr5 = np.full((3,4), 100) #It pre fill with 100
print(arr5)

# Identity matrix
arr6 = np.eye((3)) 
print(arr6)

# Range
arr7 = np.arange(0, 11, 2) # (Start, end, step)
print(arr7)

# linspace ( Evenly Spaced array )
arr8 = np.linspace(1, 100, 4) # (Start, end, Total no of element in array)
print(arr8)

[[0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]]
[[1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]]
[[100 100 100 100]
 [100 100 100 100]
 [100 100 100 100]]
[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]
[ 0  2  4  6  8 10]
[  1.  34.  67. 100.]


In [13]:
# proporties of numpy arrays

arr9 = np.array([[1,2,3], [6,7,8]])

# shape : dimensions m * n
print(arr9.shape)

# size : Total elements
print(arr9.size)

# dtype : datatype
print(arr9.dtype)

# ndim : No of dimension
print(arr9.ndim) # for 1D array 1, 2D array 2, 3D array 3

# typecasting
float_arr = arr.astype(np.float64)
print(float_arr, float_arr.dtype)

(2, 3)
6
int64
2
[1. 2. 3. 4. 5.] float64


In [14]:
# Operations on numpy array
# 1 : Reshaping
# 2 : indexing
# 3 : Fancy indexing
# 4 : Boolean indexing
# 5 : Slicing

In [16]:
# 1 : Reshaping
arr = np.array([[1,2,3], [4,5,6]])
print(arr, arr.shape)

reshaped_arr = arr.reshape((3, 2))
print(reshaped_arr, reshaped_arr.shape)

# Make sure equal no of elements in both the array. 

# Flatten : Convert 2D array to 1D array
convert1DA = arr.flatten()
print(convert1DA, convert1DA.shape)

[[1 2 3]
 [4 5 6]] (2, 3)
[[1 2]
 [3 4]
 [5 6]] (3, 2)
[1 2 3 4 5 6] (6,)


In [27]:
# 2 : indexing

# For 1D Array
arr = np.array([1,2,3,4,5,6,7])
print(arr[0]) # 1
print(arr[2]) # 3
print(arr[6]) # 7

# For 2D Array
arr = np.array([[1,2,3], [4,5,6]])
print(arr[1][1])
print(arr[0][1])

1
3
7
5
2


In [29]:
# 3 : Fancy indexing : Access a set of value from array
arr = np.array([1,2,3,4,5,6,7])

idx = [0,2,4]
print(arr[idx])

[1 3 5]


In [31]:
# 4 : Boolean indexing : Based on condition we access a particular value.
arr = np.array([1,2,3,4,5,6,7])
# condition - print all even values
print(arr[arr % 2 == 0])

[2 4 6]


In [34]:
# 5 : Slicing
arr = np.array([1,2,3,4,5,6,7])

print(arr[1:6:2]) # [start:end:step]

[2 3 4 5 6 7]


In [42]:
# Python list (copy) vs Numpy array (view)

# python list
nums = [1,2,3,4,5,6]
# sub list
sub_list = nums[1:3]
print(sub_list)
sub_list[0] = 111

print(sub_list)
print(nums)

print("See the diff betn above and below")
print("In above there is a copy created and original array unchanged")
print("In below there is no copy created and original array is changed, It donot take any extra memory so that it is faster.")

# Numpy array
arr = np.array([1,2,3,4,5,6,7])
sub_arr = arr[1:3] #In case we want a copy then we use .copy function : sub_arr = arr[1:3].copy
print(sub_arr)
sub_arr[0] = 121

print(sub_arr)
print(arr)


[2, 3]
[111, 3]
[1, 2, 3, 4, 5, 6]
See the diff betn above and below
In above there is a copy created and original array unchanged
In below there is no copy created and original array is changed, It donot take any extra memory so that it is faster.
[2 3]
[121   3]
[  1 121   3   4   5   6   7]


In [53]:
# Data types

# 1 int
arr = np.array([1,2,3,4,5,6,7])
print(arr.dtype)

# 2 float
arr = np.array([1.5,2,3.7,4,5,6,7])
print(arr.dtype)

# We can also downcast the value
# example, when we store the age of a person in that case mximum ge is 120, then we no need big datatype like 32,64, 128
# example2 , For storing an image, we need 0 to 255, in this case also no need big data type. so we downcast it for make it faster.

# 3 Complex number (complex64, comples128)
# A special types of number which is made up of 2 different number.
# One real part and other is imiganery part
# When we show a complex number then add both (real part + imiganery part) so, the size is also diubble
# eg : 2 + 3i (i : imiginary) : i : under root -1, which is not a real value
# 2 + 3i
# +
# 3 + 4i
# ========
# 5 + 7i
arr = np.array([3 + 5j])
print(arr.dtype)

arr1 = np.array([3 + 5j])
arr2 = np.array([5 + 5j])
print((arr1 + arr2))

# 4 string : S --> byte-str , U --> Unicode-str
arr = np.array(["Pritam","Kumar","Patel"])
print(arr, arr.dtype)
# It is not recomended to store string or object in numpy array bcz numpy array is mainly for mathematical opr.

# Boolean : bool
arr = np.array([True])
print(arr.dtype)

# object : generic python objects - object
arr = np.array([{1,2,4}, {5,7,6}])
print(arr.dtype) # Object type



int64
float64
complex128
[8.+10.j]
['Pritam' 'Kumar' 'Patel'] <U6
bool
object


In [61]:
# Multi-Dimensional Array
# 1 : 2D array

arr2D = np.array([[1,2,3],[4,5,6],[7,8,9]])
print(arr2D)

# print sum of all element
print(np.sum(arr2D)) # 45

# Sum of columns
sum_of_columns = np.sum(arr2D, axis = 0)
print(sum_of_columns)

# sum of rows
sum_of_rows = np.sum(arr2D, axis = 1)
print(sum_of_rows)

# using slicing
# Suppose we want 2 and 5
print(arr2D[0:2, 1:2])



[[1 2 3]
 [4 5 6]
 [7 8 9]]
45
[12 15 18]
[ 6 15 24]
[[2]
 [5]]


In [72]:
# Multi-Dimensional Array
# 2 : 3D array

arr3D = np.array([[[1,2], [3,4], [5,6]], [[7,8], [9,10], [11,12]]]) #2 * 3* 2
print(arr3D, arr3D.shape)

# indexing
# Suppose we want 4
print(arr3D[0,1,1])
# Suppose we want 9
print(arr3D[1,1,0])

# Slicing
# print first row from both layers
print(arr3D[:, 0, :]) # : indicates all
# print first column from both layers
print(arr3D[:, :, 0])

# Replace first line from both layer by 20
arr3D[:, 0, :] = 20
print(arr3D)


[[[ 1  2]
  [ 3  4]
  [ 5  6]]

 [[ 7  8]
  [ 9 10]
  [11 12]]] (2, 3, 2)
4
9
[[1 2]
 [7 8]]
[[ 1  3  5]
 [ 7  9 11]]
[[[20 20]
  [ 3  4]
  [ 5  6]]

 [[20 20]
  [ 9 10]
  [11 12]]]


In [80]:
# vectorization
arr1 = np.array([1,2,3,4,5,6,7])
arr2 = np.array([4,5,6,7,8,9,10])

# Caalculate square of every element.
print(arr1 **2)
# Add 5 to every element of array
print(arr1 + 5)
# Add element of both array
print(arr1 + arr2)

# Due to vectorization no need of loop like python list.


[ 1  4  9 16 25 36 49]
[ 6  7  8  9 10 11 12]
[ 5  7  9 11 13 15 17]


In [83]:
# Broadcasting
# Comparesion start from right
# Two dimensions are compatible for broadcasting when
# 1 : They are equal
# 2 : One of them is 1

arr1D = np.array([1,2,3,4,5])
arr2D = np.array([[1,2,3,4,5], [1,2,3,4,5]])

print(arr1D.shape)
print(arr2D.shape)
print(arr1D + arr2D)


(5,)
(2, 5)
[[ 2  4  6  8 10]
 [ 2  4  6  8 10]]


In [88]:
# Vector normalization
# Formula : (arr - mean)/standard deviation

arr2D = np.array([[1,2], [3,4]])

# calculate mean value by mean()
mean_val = np.mean(arr2D)
# calculate the std deviation by std()
std_dev_val = np.std(arr2D)

normalized_arr = (arr2D - mean_val)/std_dev_val
print(normalized_arr)




[[-1.34164079 -0.4472136 ]
 [ 0.4472136   1.34164079]]


In [None]:
# Formula

# mean = Sum of vals / N

# Standard deviation = root of, sum(each val - mean)**2 / N 

# Standard variation = sum(each val - mean)**2 / N

In [99]:
# Mathematical function

# Agggrigate function

arr = np.array([1,2,3,4,5])

print(f"sum = {np.sum(arr)}")
print(f"product = {np.prod(arr)}")
print(f"Minimum val is = {np.min(arr)}")
print(f"Maximum val is = {np.max(arr)}")
print(f"The minimum value's index = {np.argmin(arr)}")
print(f"The maximum value's index = {np.argmax(arr)}")
print(f"Mean val = {np.mean(arr)}")
print(f"Standard deviation val = {np.std(arr)}")
print(f"Median val = {np.median(arr)}")
print(f"Standard variance val = {np.var(arr)}")



sum = 15
product = 120
Minimum val is = 1
Maximum val is = 5
The minimum value's index = 0
The maximum value's index = 4
Mean val = 3.0
Standard deviation val = 1.4142135623730951
Median val = 3.0
Standard variance val = 2.0


In [102]:
# Power functions
arr = np.array([1,2,3,4,5])

print(f"Square = {np.square(arr)}")
print(f"square root = {np.sqrt(arr)}")
print(f"Power of 3 = {np.pow(arr, 3)}")


Square = [ 1  4  9 16 25]
square root = [1.         1.41421356 1.73205081 2.         2.23606798]
Power of 3 = [  1   8  27  64 125]


In [103]:
# log functions
arr = np.array([1,2,3,4,5])

print(f"natural log = {np.log(arr)}")
print(f"log10 = {np.log10(arr)}")
print(f"log2 = {np.log2(arr)}")


natural log = [0.         0.69314718 1.09861229 1.38629436 1.60943791]
log10 = [0.         0.30103    0.47712125 0.60205999 0.69897   ]
log2 = [0.         1.         1.5849625  2.         2.32192809]


In [106]:
# Exponential
arr = np.array([1,2,3,4,5])

print(f"Exponential val = {np.exp(arr)}")
print(f"Exponential 2 val = {np.exp2(arr)}")



Exponential val = [  2.71828183   7.3890561   20.08553692  54.59815003 148.4131591 ]
Exponential 2 val = [ 2.  4.  8. 16. 32.]


In [114]:
# Rounding
arr = np.array([1.8,2.2,3.1,4,5.9])

print(f"Roundoff val = {np.round(arr)}")
print(f"ceil val = {np.ceil(arr)}") #up
print(f"floor val = {np.floor(arr)}") #down
print(f"Cut val after decimal = {np.ceil(arr)}") #up




Roundoff val = [2. 2. 3. 4. 6.]
ceil val = [2. 3. 4. 4. 6.]
floor val = [1. 2. 3. 4. 5.]


In [115]:
# unique
arr = np.array([1,2,3,4,5,5,4])

print(f"unique val in array = {np.unique(arr)}")



unique val in array = [1 2 3 4 5]


In [116]:
# sort
arr = np.array([3,4,2,5,1])

print(f"sort val in array = {np.sort(arr)}")


sort val in array = [1 2 3 4 5]


In [117]:
# absolute value
arr = np.array([1,-2,-3,4,5])

print(f"All value in posetive = {np.abs(arr)}")



All value in posetive = [1 2 3 4 5]


In [118]:
# Many more....