In [1]:
import numpy as np
print(np.__version__)

1.24.3


# NumPy
- Numerical Python
- Arrays and Matrices

## Table of Contents
- **1** NumPy Operations
- **2** arange()
- **3** Attributes of NumPy arrays
- **4** NumPy Vectorized Operations
- **5** Universal Functions
- **6** Array Slicing and Indexing
- **7** Statistical Concepts 
- **8** Logical Operations
- **9** Finding the Jupyter Notebook Path

## NumPy Operations
- Make an array
- Type of the array
- Shape of the array
- Reshape the array
- Create a 1-D array
- Create a 2-D array
- arange(x, y, z)
- Create a matrix of 1s : np.ones(a, b)
- Create an Identity matrix : np.eye(3)

In [4]:
arr1 = np.array([1,2,3,4,5])
print(arr1)
print(type(arr1)) #type
print(arr1.shape) #shape of array #(5,) - 1D array - 5 columns

arr2 = np.array([1,2,3,4,5])
arr2.reshape(1,5) #1 row 5 columns

#1-D array
arr2 = np.array([[1,2,3,4,5]]) 
print(arr2)
arr2.shape

[1 2 3 4 5]
<class 'numpy.ndarray'>
(5,)
[[1 2 3 4 5]]


(1, 5)

In [5]:
#2-D array
arr2 = np.array([[1,2,3,4,5],[2,3,4,5,6]])
print(arr2)
print(arr2.shape)

[[1 2 3 4 5]
 [2 3 4 5 6]]
(2, 5)


## np.arange(x, y, z) - Start, Stop, Step
- (x) = Start
- (y - 1) = Stop
- (z) = Interval between elements

In [6]:
np.arange(0,10,1)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [7]:
np.arange(0,10,2)

array([0, 2, 4, 6, 8])

In [8]:
# Reshaping into 5 Rows and 1 Column - 1d array to 2d array

np.arange(0,10,2).reshape(5,1)

array([[0],
       [2],
       [4],
       [6],
       [8]])

In [9]:
# Making a matrix of 1s

np.ones((2,3))

array([[1., 1., 1.],
       [1., 1., 1.]])

In [10]:
# Making an Identity Matrix - always a Square Matrix

np.eye(4)

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

## Some other NumPy functions - Attributes of NumPy arrays

In [11]:
arr = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

print("Array:\n", arr)
print("Shape: ", arr.shape)
print("Number of dimensions: ", arr.ndim)
print("Size (Number of elements): ", arr.size) 
print("Data type: ", arr.dtype)                  # int64 for this system
print("Item Size (in bytes): ", arr.itemsize)    # each element is 8 bytes
print("Total memory used (bytes): ", arr.nbytes) # 9 * 8bytes = 72bytes

Array:
 [[1 2 3]
 [4 5 6]
 [7 8 9]]
Shape:  (3, 3)
Number of dimensions:  2
Size (Number of elements):  9
Data type:  int32
Item Size (in bytes):  4
Total memory used (bytes):  36


## NumPy Vectorized Operations
- Element-wise (same indices) Addition, Subtraction
- Element-wise (same indices) Multiplication (not Matrix Multiplication)
- Element-wise (same indices) Division
- Outputs in the form of Arrays

In [12]:
arr1 = np.array([1, 2, 3, 4, 5])
arr2 = np.array([10, 20, 30, 40, 50])

# All return types will also be arrays

# Element wise addition - same index elements
print("Addition: ", arr1 + arr2)

# Element wise subtraction - same index elements
print("Subtraction: ", arr1 - arr2)

# Element wise multiplication - same indices - not matrix multiplication
print("Multiplication: ", arr1 * arr2)

# Element wise division - same indices
print("Division: ", arr1 / arr2)

Addition:  [11 22 33 44 55]
Subtraction:  [ -9 -18 -27 -36 -45]
Multiplication:  [ 10  40  90 160 250]
Division:  [0.1 0.1 0.1 0.1 0.1]


## Universal Functions
- Square Root
- Exponential
- Sine
- Natural Log
- Outputs in the form of Arrays

In [14]:
arr = np.array([2, 3, 4, 5, 6])

# All element wise - return an Array as Output

# Square Root
print(np.sqrt(arr))

# Exponential
print(np.exp(arr))

# Sine
print(np.sin(arr))

# Natural Log
print(np.log(arr))

[1.41421356 1.73205081 2.         2.23606798 2.44948974]
[  7.3890561   20.08553692  54.59815003 148.4131591  403.42879349]
[ 0.90929743  0.14112001 -0.7568025  -0.95892427 -0.2794155 ]
[0.69314718 1.09861229 1.38629436 1.60943791 1.79175947]


## Array Slicing and Indexing
- Row Index
- Row and Column Index
- arr[a:] = Picks from the (a+1)th row till the last row
- arr[:a] = Picks from the 1st row (0+1 th) till (a)th row 
- arr[: , :b] Picks from the 1st column (0+1 th) till the (b)th column
- arr[: , b:] = Picks from the (b+1)th column till the last column
- arr[1: , 2:] = Picks from the 2nd row till the end and from the 3rd column till the end
- arr[0:2 , 2:0] or arr[:2 , 2:] = Picks from the 1st row till the 2nd row and the 1st column till the 2nd column
- Modify array elements through indices or through rows and columns with the same principle as above

In [15]:
arr = np.array([[1, 2, 3, 4], [5,6, 7, 8], [9, 10, 11, 12]])
print("Array : \n", arr)

Array : 
 [[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]


In [16]:
# Row index

print(arr[0])

[1 2 3 4]


In [17]:
# Row and Column Index

print(arr[0][0])

1


In [18]:
# Picks the last 2 rows - arr[a:] - accessess from after the 'a'th row
print(arr[1:])

# Picks the last row
print(arr[2:])

[[ 5  6  7  8]
 [ 9 10 11 12]]
[[ 9 10 11 12]]


In [19]:
# Picks thr last 2 columns - arr[:,b:] - accesses from after the 'b'th column
print(arr[:,2:])

[[ 3  4]
 [ 7  8]
 [11 12]]


In [20]:
# Picking the elements of 2nd and 3rd rows & 3rd and 4th columns

print(arr[1: , 2:])

[[ 7  8]
 [11 12]]


In [21]:
# a:b - Picks from 'a+1'th row/column to 'b'th row/column 

print(arr[1:3,1:3])

[[ 6  7]
 [10 11]]


In [22]:
print(arr[:2 , 2:])

print(arr[0:2 , 2:4])

[[3 4]
 [7 8]]
[[3 4]
 [7 8]]


In [23]:
# Modify Array elements

arr[0,0] = 100
print(arr)

[[100   2   3   4]
 [  5   6   7   8]
 [  9  10  11  12]]


In [24]:
arr[1:] = 100
print(arr)

[[100   2   3   4]
 [100 100 100 100]
 [100 100 100 100]]


## Statistical Concepts
- **Aim is to have Mean of 0 and Standard Deviation of 1**
- Mean
- Median
- Standard Deviation
- Normalisation
- Variance
- **Mode is not present in NumPy - it is in SciPy library**

In [25]:
data = np.array([1, 2, 3, 4, 5])

# Mean
mean = np.mean(data)
print(mean)

# Median
median = np.median(data)
print("Median: ",median)

# Standard Deviation
std_dev = np.std(data)
print(std_dev)

# Normalize the data
normalized_data = (data - mean) / std_dev
print("Normalized data:\n", normalized_data)

# Variance
variance = np.var(data)
print("Variance: ",variance)

3.0
Median:  3.0
1.4142135623730951
Normalized data:
 [-1.41421356 -0.70710678  0.          0.70710678  1.41421356]
Variance:  2.0


## Logical Operations - Important for using with Pandas

In [29]:
data = np.arange(1,11,1)
print(data)

# Checks all elements and returns True / False depending on the condition
print(data > 5)

# Checks all elements and returns the elements that satisfy this condition
print(data[data > 5])

# 2 conditions
print(data[(data >= 5) & (data <= 8)])

[ 1  2  3  4  5  6  7  8  9 10]
[False False False False False  True  True  True  True  True]
[ 6  7  8  9 10]
[5 6 7 8]


## Finding the Jupyter Notebook Path

In [32]:
import os

# Get current working directory
cwd = os.getcwd()
print("Notebook is running from:", cwd)

Notebook is running from: C:\Users\shubh\OneDrive\Desktop\CODING JULY, 2023\data_analysis_projects
