# Numpy - numerical python

In [1]:
import numpy as np
import time
import sys

In [2]:
# execution performance

size = 100_000_000

# python list

py_list = list(range(size))

start = time.time()
sq_list = [x**2 for x in py_list]
end = time.time()
print(f"py_time = {end-start}")

# numpy arrays

np_arr = np.array(py_list)
start = time.time()
sq_array = np_arr **2
end = time.time()

print(f"num_time = {end-start} ")



py_time = 2.7230520248413086
num_time = 0.19707250595092773 


In [3]:
# memory

print(f"python list size = {sys.getsizeof(py_list) * len(py_list)} bytes")
print(f"numpy array size = {np_arr.nbytes} bytes")


python list size = 80000005600000000 bytes
numpy array size = 800000000 bytes


***

## Numpy arrays

In [5]:
# creating arrays -->> from lists

arr1 = np.array([1,2,3,4,5,6])
print(arr1,type(arr1))

arr2 = np.array([1,1,21,3,4,5,"prash"])
print(arr2,type(arr2))

# 2d arrays - Matrix

arr3=np.array([[1,2,3] , [4,5,6]])
print(arr3,arr3.shape)

[1 2 3 4 5 6] <class 'numpy.ndarray'>
['1' '1' '21' '3' '4' '5' 'prash'] <class 'numpy.ndarray'>
[[1 2 3]
 [4 5 6]] (2, 3)


In [7]:
# numpy array from scratch
arr1 = np.zeros((3,4)) # pre filled with zeroes
print(arr1)

arr2 = np.ones((3,3)) # pre-filled with 1's
print(arr2)

arr3 = np.full((2,3),8) #pre-filled with a num
print(arr3)

arr4 = np.eye(3) #create an identity matrix
print(arr4)

arr5 = np.arange(1,20,2) # elements in range 
print(arr5)

arr6 = np.linspace(0,10,5) #evenly spaced array
print(arr6)

[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]
[[8 8 8]
 [8 8 8]]
[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]
[ 1  3  5  7  9 11 13 15 17 19]
[ 0.   2.5  5.   7.5 10. ]


In [10]:
# Properties of arrays::

arr = np.array([[1,2,3],[4,5,6],[7,8,9],[10,11,12]])

print(arr.shape) # m X n
print(arr.size) # total = m*n
print(arr.ndim) # dimension
print(arr.dtype) # data_type of the object

str_arr =np.array([1,2,3] , dtype="U") # string array
print(str_arr, str_arr.dtype)

float_arr = np.array([1,2,3], dtype ="float64")
print(float_arr,float_arr.dtype)

# IMPORTANT

int_arr =float_arr.astype(np.int64)  # convert to another data type
print(int_arr , int_arr.dtype)

(4, 3)
12
2
int64
['1' '2' '3'] <U1
[1. 2. 3.] float64
[1 2 3] int64


In [11]:
#INDEXING

arr = np.array([1,2,3,4,5]) #  1D array
print(arr[0])

arr  = np.array([[1,2,3] , [4,5,6] , [7,8,9],[10,11,12]])
print(arr[0][1])
print(arr[1][2])

# fancy indexing

arr= np.array([1,2,3,4,5])
idx=[0,1,4]

print(arr[idx])


# BOOLEAN INDEXING
print(arr[arr>2]) # nums greater than 2
print(arr[arr%2==0]) # even nums

1
2
6
[1 2 5]
[3 4 5]
[2 4]


In [12]:
# SLICING

arr = np.array([1,2,3,4,5,6,7])

print(arr[2:6])
print(arr[:6]) # idx 0 to 5
print(arr[3:]) # idx 3 to END
print(arr[::2]) # idx START to END at a GAP of 2


[3 4 5 6]
[1 2 3 4 5 6]
[4 5 6 7]
[1 3 5 7]


***
## Note : Sliced List is a copy
## Note : Sliced Array is a View
***

In [19]:
# Copy v/s View by slicing

# sliced list is a copy
py_list = [1,2,3,4,5]
copy_list = py_list[1:4] #[2,3,4]
copy_list[1] =333

print(copy_list)
print(py_list)

print("---------------------")

# Sliced array is a view 

np_arr = np.array([1,2,3,4,5])
view_arr = np_arr[1:4] #[2,3,4]
view_arr[1] = 999

print(view_arr)
print(np_arr)

print("---------------------")

# Creating a copy  in array
copy_arr = np_arr[1:4].copy()
copy_arr[2]= 444
print(copy_arr)
print(np_arr)

[2, 333, 4]
[1, 2, 3, 4, 5]
---------------------
[  2 999   4]
[  1   2 999   4   5]
---------------------
[  2 999 444]
[  1   2 999   4   5]


***

### Axes in Multi-dimensional Arrays
- 1D array has 1 axis (axis0).
- 2D array has 2 axes (axis0 = rows, axis1 = columns)
- 3D array has 3 axes (axis0 = depth/layer, axis1 = rows in each layer, axis2 = columns in each layer)

In [24]:
#Operations along Axes:

arr2D = np.array([[1,2,4],[4,5,6],[7,8,9]])

print(arr2D)
print("sum : ",np.sum(arr2D))# sum of entire array

sum_of_col = np.sum(arr2D, axis=0)
print(sum_of_col)

sum_of_rows = np.sum(arr2D,axis=1)
print(sum_of_col)

#Slicing
print(arr2D[0:3,1:3])# row-(0,1,2) & col-(1,2)


[[1 2 4]
 [4 5 6]
 [7 8 9]]
sum :  46
[12 15 19]
[12 15 19]
[[2 4]
 [5 6]
 [8 9]]


In [30]:
# IMP
#3D array

arr3D= np.array([[[1,2],[3,4],[5,6]],[[7,8],[9,10],[11,12]]])

print(arr3D)
print(arr3D.shape)

print("---------------------")

#Indexing
print(arr3D[0][1][1]) #4
print(arr3D[1][2][1]) 

print(arr3D[:,:,0]) # 1st col from both layers
print(arr3D[:,0,:]) # 1st row from both layers

print("---------------------")

#Manipulating data

arr3D[:,0,:]  = 99# change first row to store 99
print(arr3D)

[[[ 1  2]
  [ 3  4]
  [ 5  6]]

 [[ 7  8]
  [ 9 10]
  [11 12]]]
(2, 3, 2)
---------------------
4
12
[[ 1  3  5]
 [ 7  9 11]]
[[1 2]
 [7 8]]
---------------------
[[[99 99]
  [ 3  4]
  [ 5  6]]

 [[99 99]
  [ 9 10]
  [11 12]]]


***

### Common Data Types in Numpy
- Integers: int32, int64
- Floating point nums: float32, float64
- Boolean: bool
- Complex nums: complex64, complex128
- String: S (byte-str) & U (unicode-str)
- Object: generic python objects - object
***

In [38]:
# dtype - data Type
arr = np.array([1,2,3,4,5])
arr2 = np.array([1.0, 2.0, 3.0])
arr3 = np.array(["hello", "world", "prime", "ai/ml"])

print(arr.dtype)
print(arr2.dtype)
print(arr3.dtype)

print(arr.nbytes)        # 5elements * 8bytes each => 40
print("---------------------")

new_arr= arr.astype("float64") # cahnging type to flaot
print(new_arr,new_arr.dtype)

new_arr = np.array([1,2,3,4,5], dtype= "float64")
print(new_arr , new_arr.dtype)

#Complex numbers 

arr1=np.array([2+3j])
arr2=np.array([8+5j])

print(arr1,arr1.dtype)
print(arr1+arr2)


# Objects
arr = np.array(["hello", {1,2,3},3.14])
print(arr,arr.dtype)

int64
float64
<U5
40
---------------------
[1. 2. 3. 4. 5.] float64
[1. 2. 3. 4. 5.] float64
[2.+3.j] complex128
[10.+8.j]
['hello' {1, 2, 3} 3.14] object


***

### Vectorization & Broadcasting

<b>1. Vectorization</b>: It is the process of applying operations to entire arrays or sequences of data at once, rather than iterating through individual elements using explicit loops. 

<b>2. Broadcasting</b>: It is a mechanism that allows NumPy to perform operations on arrays of different shapes without explicitly reshaping them (creating copies of the smaller array to match the larger array's shape). 

<b> Broadcasting Condition <b>

For broadcasting to happen the dimensions should be compatible. Numpy compares shape element-wise. It starts dimension comparison with the trailing (i.e. rightmost) dimension and works its way left. 
Two dimensions are compatible when <ol> <li>they are equal, or</li> <li>one of them is 1. </li>

***

In [39]:
# Vectorization & Broadcasting

#Vectorization
arr = np.array([1, 2, 3, 4, 5])

sq_arr = arr ** 2       #square of all nums
print(sq_arr)

arr2 = np.array([6, 7, 8, 9, 10])
print(arr + arr2)      #sum of 2 arrays

#Broadcasting
arr_mul10 = arr * 10    #multiply by 10 to all nums
print(arr_mul10)

arr1D = np.array([1, 2, 3])
arr2D = np.array([[1, 2, 3], [4, 5, 6]])
print(arr1D + arr2D)

[ 1  4  9 16 25]
[ 7  9 11 13 15]
[10 20 30 40 50]
[[2 4 6]
 [5 7 9]]


In [40]:
# Vector Normalization
arr = np.array([[1, 2], [3, 4]])
mean = np.mean(arr)
std_dev = np.std(arr)

print((arr - mean) / std_dev)

#column wise
arr = np.array([[1, 2], [3, 4], [5, 6]])
mean = np.mean(arr, axis = 0)
std_dev = np.std(arr, axis = 0)
print((arr-mean) / std_dev)

[[-1.34164079 -0.4472136 ]
 [ 0.4472136   1.34164079]]
[[-1.22474487 -1.22474487]
 [ 0.          0.        ]
 [ 1.22474487  1.22474487]]


***

### Mathematical Functions in NumPy
There are lots of important & commonly used mathematical functions in NumPy.
1. Aggregation functions
   - sum()
   - prod()
   - min() & argmin() - min val & idx of min val
   - max() & argmax() - max val & idx of max val
   - mean()
   - median()
   - std() - standard deviation
   - var() - variance

***

In [42]:
# Mathematical Functions

arr = np.array([1, 2, 3, 4, 5])

print(np.sum(arr))     # 15
print(np.prod(arr))    # 120
print(np.min(arr))     # 1
print(np.argmin(arr))  # 0
print(np.max(arr))     # 5
print(np.argmax(arr))  # 4
print(np.mean(arr))    # 3.0
print(np.median(arr))  # 3.0
print(np.std(arr))     # 1.41
print(np.var(arr))     # 2.0

print(np.square(arr))    # [1, 4, 9, 16, 25]
print(np.sqrt(arr))      # [1, 1.41, 1.73, 2, 2.23]
print(np.pow(arr, 3))    # [1, 8, 27, 64, 125]

15
120
1
0
5
4
3.0
3.0
1.4142135623730951
2.0
[ 1  4  9 16 25]
[1.         1.41421356 1.73205081 2.         2.23606798]
[  1   8  27  64 125]


***
3. Log & Exponential functions
   - log() - natual log
   - log10() - log base 10
   - log2() - log base 2
   - exp() - exponentional i.e. e^x
***

In [43]:
print(np.log(arr))
print(np.log10(arr))
print(np.log2(arr))
print(np.exp(arr))

[0.         0.69314718 1.09861229 1.38629436 1.60943791]
[0.         0.30103    0.47712125 0.60205999 0.69897   ]
[0.         1.         1.5849625  2.         2.32192809]
[  2.71828183   7.3890561   20.08553692  54.59815003 148.4131591 ]


***

4. Rounding functions
   - round() - rounds to nearest value
   - floor() - rounds down
   - ceil() - rounds up
   - trunc() - truncates decimal part
***

In [44]:
print(np.round(2.678))    # 3.0
print(np.floor(2.678))    # 2.0
print(np.ceil(2.678))     # 3.0
print(np.trunc(2.678))    # 2.0

3.0
2.0
3.0
2.0


In [45]:
arr = np.array([1, 2, -5, 3, 8, -4, 2, 5])
print(np.abs(arr))     # [1 2 5 3 8 4 2 5]
print(np.sort(arr))    # [-5 -4  1  2  2  3  5  8]
print(np.unique(arr))  # [-5 -4  1  2  3  5  8]

[1 2 5 3 8 4 2 5]
[-5 -4  1  2  2  3  5  8]
[-5 -4  1  2  3  5  8]
