<a href="https://colab.research.google.com/github/muslimuddin2002/Python-practice/blob/main/Copy_of_SkillMorph_4_NumPy_and_Pandas_for_Machine_Learning_(2).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **NumPy and Pandas for Machine Learning**

**Instructor:** Md. Samiul Islam - Skill Morph Research Lab

**Date:** Dec 30, 2025, 8:00 PM

---

# 1. Introduction to NumPy and Pandas
This section introduces the basics of NumPy and Pandas for Machine Learning.

## 1.1 NumPy Basics
Learn how to create arrays and perform basic array operations.


In [2]:
# Install required packages
!pip install numpy pandas matplotlib

# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


print("NumPy version:", np.__version__)
print("Pandas version:", pd.__version__)

NumPy version: 2.0.2
Pandas version: 2.2.2


In [3]:
# Creating NumPy arrays
arr1 = np.array([1, 2, 3, 4, 5])
arr2 = np.array([[1, 2, 3], [4, 5, 6]])

print("1D Array:", arr1)
print("2D Array:\n", arr2)
print("Array shape:", arr1.shape)
print("Array dtype:", arr1.dtype)

1D Array: [1 2 3 4 5]
2D Array:
 [[1 2 3]
 [4 5 6]]
Array shape: (5,)
Array dtype: int64


## 1.2 Creating NumPy Arrays


In [4]:
# Different ways to create arrays
zeros_array = np.zeros((3, 4), dtype=int)
ones_array = np.ones((2, 3),dtype=int)
range_array = np.arange(0, 10, 2)
linspace_array = np.linspace(0, 1, 5)

print("Zeros array:\n", zeros_array)
print("Ones array:\n", ones_array)
print("Range array:", range_array)
print("Linspace array:", linspace_array)

Zeros array:
 [[0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]]
Ones array:
 [[1 1 1]
 [1 1 1]]
Range array: [0 2 4 6 8]
Linspace array: [0.   0.25 0.5  0.75 1.  ]


## 1.3 Different Ways to Create Arrays


In [None]:
# Mathematical operations on arrays
a = np.array([1, 2, 3, 4, 5])
b = np.array([2, 3, 4, 5, 6])
# Element-wise operations
print("Addition:", a + b)
print("Multiplication:", a * b)
print("Power:", a ** 2)
print("Square root:", np.sqrt(a))


# Statistical operations
print("Mean:", np.mean(a))
print("Standard deviation:", np.std(a))
print("Min:", np.min(a))
print("Max:", np.max(a))

# Broadcasting example
matrix = np.array([[1, 2, 3], [4, 5, 6]])
vector = np.array([10, 20, 30])
result = matrix + vector
print("Broadcasting result:\n", result)

Mean: 3.0
Standard deviation: 1.4142135623730951
Min: 1
Max: 5
Broadcasting result:
 [[11 22 33]
 [14 25 36]]


## 1.4 Mathematical Operations on Arrays


In [None]:
arr = np.array([10, 20, 30, 40, 50])

# Indexing
print("Element at index 2:", arr[2])

# Slicing
print("Sliced array (from index 1 to 3):", arr[1:4])

# 2D array slicing
arr_2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
print("Sliced 2D array (first two rows):\n", arr_2d[:2])

Element at index 2: 30
Sliced array (from index 1 to 3): [20 30 40]
Sliced 2D array (first two rows):
 [[1 2 3]
 [4 5 6]]


# 2. Introduction to Pandas

## 2.1 Creating DataFrames


In [None]:
data = {'Name': ['Alice', 'Bob', 'Charlie'],
        'Age': [24, 27, 22],
        'City': ['New York', 'Los Angeles', 'Chicago']}

df = pd.DataFrame(data)
print("DataFrame:\n", df)

DataFrame:
       Name  Age         City
0    Alice   24     New York
1      Bob   27  Los Angeles
2  Charlie   22      Chicago


In [None]:
# Basic DataFrame information
print("\nDataFrame Info:")
print(df.info())
print("\nDataFrame Description:")
print(df.describe())



DataFrame Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    3 non-null      object
 1   Age     3 non-null      int64 
 2   City    3 non-null      object
dtypes: int64(1), object(2)
memory usage: 204.0+ bytes
None

DataFrame Description:
             Age
count   3.000000
mean   24.333333
std     2.516611
min    22.000000
25%    23.000000
50%    24.000000
75%    25.500000
max    27.000000


## 2.2 DataFrame Information and Description


In [None]:
# Accessing a single column
print("Names column:\n", df['Name'])

# Accessing multiple columns

print("\nName and Age columns:\n", df[['Name', 'Age']])

Names column:
 0      Alice
1        Bob
2    Charlie
Name: Name, dtype: object

Name and Age columns:
       Name  Age
0    Alice   24
1      Bob   27
2  Charlie   22


## 2.3 Converting NumPy Array to DataFrame


In [None]:
# Convert NumPy array to DataFrame
np_array = np.array([1, 2, 3])
df_from_array = pd.DataFrame(np_array)
print("DataFrame from NumPy array:")
print(df_from_array)

DataFrame from NumPy array:
   0
0  1
1  2
2  3


# 3. Loading Datasets

## 3.1 Loading from Google Drive


In [None]:

from google.colab import drive
import warnings
warnings.filterwarnings('ignore')
drive.mount('/content/drive')



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
file_path = '/content/drive/MyDrive/datasets/diabetes.csv'
df = pd.read_csv(file_path)
print("Dataset loaded successfully. Shape:", df.shape)


Dataset loaded successfully. Shape: (768, 9)


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## 3.2 Uploading Files to Colab


In [None]:
from google.colab import files
uploaded = files.upload()

# Load the uploaded file
import io
df1 = pd.read_csv(io.BytesIO(uploaded['diabete.csv']))
print("Dataset loaded successfully. Shape:", df1.shape)

Saving diabete.csv to diabete.csv
Dataset loaded successfully. Shape: (768, 9)


## 3.3 Converting DataFrame to NumPy Array


In [None]:
data_array = df.values
print("NumPy array shape:", data_array.shape)
print("Data type:", data_array.dtype)


NumPy array shape: (768, 9)
Data type: float64


# 4. Data Exploration

## 4.1 Quick Overview


In [None]:
# Quick overview
print(df.head())
print(df.tail())
print("\nDataset info:")
print(df.info())

# Basic statistics
print("\nDescriptive statistics:")
print(df.describe())

   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0            6      148             72             35        0  33.6   
1            1       85             66             29        0  26.6   
2            8      183             64              0        0  23.3   
3            1       89             66             23       94  28.1   
4            0      137             40             35      168  43.1   

   DiabetesPedigreeFunction  Age  Outcome  
0                     0.627   50        1  
1                     0.351   31        0  
2                     0.672   32        1  
3                     0.167   21        0  
4                     2.288   33        1  
     Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
763           10      101             76             48      180  32.9   
764            2      122             70             27        0  36.8   
765            5      121             72             23      112  26.2   
766    

## 4.2 Checking Missing Values


In [None]:
#missing values
print("Missing values in each column:")
print(df.isnull().sum())

Missing values in each column:
Pregnancies                 0
Glucose                     0
BloodPressure               0
SkinThickness               0
Insulin                     0
BMI                         0
DiabetesPedigreeFunction    0
Age                         0
Outcome                     0
dtype: int64


# 5. Practice Exercises

## 5.1 NumPy Exercise


In [5]:
# Import necessary library
import numpy as np

# Create a NumPy array with 20 random integers between 1 and 100


# Display the NumPy array


# 1. Find the sum of all elements



# 2. Find the maximum and minimum values in the array


# Import necessary library
import numpy as np

# Create a NumPy array with 20 random integers between 1 and 100
arr = np.random.randint(1, 101, 20)

# Display the NumPy array
print("NumPy Array:")
print(arr)

# 1. Find the sum of all elements
total_sum = np.sum(arr)
print("\nSum of all elements:", total_sum)

# 2. Find the maximum and minimum values in the array
max_value = np.max(arr)
min_value = np.min(arr)
print("Maximum value:", max_value)
print("Minimum value:", min_value)


NumPy Array:
[57 29 63 70 37 15 50 90 83 46 41 93 68 21 18 63 64 33 89  6]

Sum of all elements: 1036
Maximum value: 93
Minimum value: 6


## 5.2 Pandas Exercise


In [6]:
# Import necessary libraries
import pandas as pd
import numpy as np

# 1. Create a DataFrame with 5 rows and 3 columns of random numbers
# The DataFrame is generated from a 5x3 matrix of random integers between 1 and 100.


# Display the DataFrame


# 2. Compute the sum of each column
# The .sum() function computes the sum for each column in the DataFrame.


# 3. Compute the mean of each column
# The .mean() function computes the mean (average) of each column in the DataFrame.
# Import necessary libraries
import pandas as pd
import numpy as np

# 1. Create a DataFrame with 5 rows and 3 columns of random numbers (1 to 100)
data = np.random.randint(1, 101, (5, 3))
df = pd.DataFrame(data, columns=['Column A', 'Column B', 'Column C'])

# Display the DataFrame
print("DataFrame:")
print(df)

# 2. Compute the sum of each column
column_sum = df.sum()
print("\nSum of each column:")
print(column_sum)

# 3. Compute the mean of each column
column_mean = df.mean()
print("\nMean of each column:")
print(column_mean)



DataFrame:
   Column A  Column B  Column C
0        17        84        87
1        38        18        66
2        53        96         6
3        65        33         8
4         2        38        30

Sum of each column:
Column A    175
Column B    269
Column C    197
dtype: int64

Mean of each column:
Column A    35.0
Column B    53.8
Column C    39.4
dtype: float64
