<a href="https://colab.research.google.com/github/satyam1354/Deep-Learning/blob/main/pyTorch_training_pipeline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import numpy as np  # numpy → Fast number crunching, arrays.
import pandas as pd  # pandas → DataFrames for structured data.
import torch         # torch → PyTorch, deep learning.
from sklearn.model_selection import train_test_split   # train_test_split → Splits data for training/testing.
from sklearn.preprocessing import StandardScaler     # StandardScaler → Normalizes data.
from sklearn.preprocessing import LabelEncoder       # LabelEncoder → Encodes categories into numbers.

In [3]:
# train_test_split: A function from Scikit-learn used to split data into training and test sets.
#  Why we use it:
#       Helps with model evaluation by testing on unseen data.
#       Makes sure your model doesn’t overfit.

from sklearn.model_selection import train_test_split
X = np.array([[1, 2], [3, 4], [5, 6]])
y = np.array([0, 1, 0])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)
print(X_train)  # Randomized portion of the data (always)
print(X_test)
print(y_train)
print(y_test)

[[5 6]
 [1 2]]
[[3 4]]
[0 0]
[1]


In [4]:
# StandardScaler: Scales data so it has a mean of 0 and standard deviation of 1.
#   Why we use it:
#        Helps models converge faster and perform better.
#        Removes scale sensitivity — great for models like logistic regression and neural networks.
#----------------------------------------------------------------

# 1. What is StandardScaler()? StandardScaler is a part of the sklearn.preprocessing module and it’s used for feature scaling. It standardizes your data — which basically means it transforms the features so they all have:

# Mean = 0
# Standard deviation = 1
# This is called z-score normalization and it makes sure all features contribute equally to the model’s performance.

# Formula:
# 𝑧 = (𝑥−𝜇)/ 𝜎

# Where:
# x = individual data point
# μ = mean of the feature
# σ = standard deviation of the feature
#-------------------------------------------------------------
# 2. Why do we need to scale data? Many machine learning models — like logistic regression, SVMs, and neural networks
#  — are sensitive to the scale of the features. If one feature has values between 0 and 1, and another between 1000 and 10000,
#  the model might favor the feature with larger values. Standardizing fixes that.

# This does two things:
# scaler.fit_transform() is a convenience method in Scikit-learn that combines two steps into one:
     # fit(X) → Calculates the mean and standard deviation for each feature.
     # transform(X) → Uses those learned parameters to scale the data.

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X = np.array([[1, 2], [3, 4], [5, 6]])
X_scaled = scaler.fit_transform(X)
print(X_scaled)

# Let’s calculate it manually! ✏️
# Means:
#    Feature 1: 𝜇 = (1+3+5)/3  = 3
#    Feature 2: 𝜇 = (2+4+6)/3  = 4

# Standard Deviations:
# Feature 1: σ1 = rootOf(((feature1.item-feature) ka square + ...) )/3 = 1.63
# Feature 2: σ2 = rootOf(((feature1.item-feature) ka square + ...) )/3 =  1.63

# Applying the transformation: For [1,2]:
#    z1 = (1-3) /1.63 = -1.22
#    z2 = (2-4) /1.63 = -1.22

# Applying the transformation: For [3,4]:
#     z1 = (3-3) /1.63 = 0
#     z2 = (4-4) /1.63 =  0
# Applying the transformation: For [5,6]:
#     z1 = (5-3) /1.63 = 1.22
#    z2 = (6-4) /1.63 =  1.22


[[-1.22474487 -1.22474487]
 [ 0.          0.        ]
 [ 1.22474487  1.22474487]]


In [5]:
#  LabelEncoder: Converts categorical labels into numbers.
#   Why we use it:
#         Machine learning models usually need numeric input.
#         Simplifies categorical data like classes or categories.

from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
labels = ['cat', 'dog', 'cat', 'bird']
encoded_labels = le.fit_transform(labels)
print(encoded_labels)  # [1 2 1 0]

#  This does two things:
   # fit(labels) → Learns the unique categories and assigns them numbers.
     # It sorts the labels alphabetically and assigns a number starting from 0:

  #  transform(labels) → Replaces each label in the list with its assigned number:



[1 2 1 0]


In [6]:
df = pd.read_csv('https://raw.githubusercontent.com/gscdit/Breast-Cancer-Detection/refs/heads/master/data.csv')
df.head()

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,Unnamed: 32
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,
4,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,


In [32]:
#df.columns # Check column names:
#df.info() # Check info about data types and missing values:
#df.describe()  # Check summary statistics:

df.shape

(569, 31)

In [8]:
 # df.drop() is a Pandas method used to remove rows or columns from a DataFrame.
# inplace=True → Changes the DataFrame directly without creating a new copy.

df.drop(columns = ['id','Unnamed: 32'], inplace = True)

In [9]:
df.head()  # shows the first 5 rows of the DataFrame by default.

Unnamed: 0,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


# train test **split**

In [10]:
X_train, X_test, y_train, y_test = train_test_split(df.iloc[:, 1:], df.iloc[:, 0], test_size = 0.2)

# df.iloc[:, 1:]
# → Selects all rows and all columns starting from index 1
# → This means it's selecting all feature columns (excluding the first column, which is likely the target variable).
# → Stored in X (features)

# df.iloc[:, 0]
# → Selects all rows and only the first column (column at index 0)
# → This means it's selecting the target variable (labels like 'Malignant' or 'Benign').
# → Stored in y (target variable)

# test_size=0.2
# → 20% of the dataset will be used for testing, while 80% will be used for training.

# X_train, X_test, y_train, y_test
# → train_test_split() returns four sets of data:

# X_train: 80% of the features for training
# X_test: 20% of the features for testing
# y_train: 80% of the target values for training
# y_test: 20% of the target values for testing

# Scaling

In [11]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)  # # Fit and transform on training data
X_test = scaler.transform(X_test)   # Only transform using the same scaler


In [12]:
X_train

array([[-0.50364043, -1.47831067, -0.57248706, ..., -1.37521227,
        -0.99555773, -0.78424154],
       [ 0.24287504, -0.59098894,  0.23859204, ...,  0.10497906,
         0.82080623,  0.69008472],
       [-0.71412412, -0.47344762, -0.74120777, ..., -0.12517264,
        -0.5282641 , -0.74673103],
       ...,
       [-0.22299552,  2.06636678, -0.28545856, ..., -0.9087414 ,
        -0.33302497, -0.76630173],
       [ 1.87061555,  1.34729047,  2.03638241, ...,  2.0454738 ,
         0.04305038,  0.76238759],
       [-0.64957579,  0.34934162, -0.61151643, ..., -0.04093411,
        -0.69949841,  0.8210997 ]])

In [13]:
y_train

Unnamed: 0,diagnosis
493,B
205,M
187,B
459,B
486,B
...,...
83,M
299,B
377,B
563,M


# **Label Encoding**  

In [14]:
encoder = LabelEncoder()
y_train = encoder.fit_transform(y_train) # Fit and transform on training data
y_test = encoder.transform(y_test) # Only transform using the same encoder


In [15]:
print(y_train.shape)
print(y_train)

(455,)
[0 1 0 0 0 0 0 1 0 1 0 1 0 0 0 1 0 0 0 0 0 1 0 0 1 1 1 0 0 1 0 0 1 0 0 0 1
 0 0 0 1 1 0 1 0 0 1 0 0 1 1 0 0 0 0 0 1 1 0 1 0 0 1 1 0 0 1 1 0 1 0 1 1 1
 0 0 0 1 0 1 0 0 0 1 1 1 1 0 1 0 0 0 1 0 0 1 0 0 1 0 0 0 0 0 0 0 0 1 0 1 0
 0 0 1 0 0 1 0 0 0 0 0 0 1 1 0 0 1 1 0 0 1 1 0 1 0 1 1 0 0 0 1 0 1 1 1 1 0
 0 0 0 0 0 0 1 0 1 1 1 1 1 0 0 1 0 0 1 1 0 0 0 1 0 0 0 0 0 1 0 0 0 1 1 1 0
 0 1 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 1 0 1 0 0 1 1 0 0 0 0
 0 0 0 1 0 1 0 0 0 0 1 0 0 0 0 1 1 0 1 0 0 0 0 0 0 0 1 0 1 0 1 1 0 0 0 0 0
 0 0 0 0 1 0 0 1 0 0 0 0 0 1 1 1 1 0 0 0 0 0 1 1 0 1 1 1 1 1 0 1 0 0 0 0 0
 0 0 0 1 0 1 1 0 1 1 0 1 1 1 1 0 0 1 1 0 1 0 0 0 1 1 1 1 0 1 1 0 0 0 0 0 0
 0 0 0 0 1 1 0 0 1 0 1 0 1 0 1 1 0 0 0 1 0 1 0 0 0 0 1 0 0 1 0 1 0 0 1 0 1
 1 0 1 0 0 0 1 1 0 0 1 0 1 1 0 0 0 0 0 0 1 1 0 1 1 1 1 0 0 1 0 0 1 1 1 0 0
 0 1 1 0 0 0 1 1 0 0 0 1 1 0 1 0 1 0 0 1 0 1 1 0 0 0 1 0 0 0 1 1 0 1 0 1 0
 0 1 1 0 0 0 1 0 0 1 0]


# NUmpy arrays to PyTorch tensors

In [16]:
X_train_tensor = torch.from_numpy(X_train)
X_test_tensor = torch.from_numpy(X_test)
y_train_tensor = torch.from_numpy(y_train)
y_test_tensor = torch.from_numpy(y_test)

In [17]:
X_train_tensor.shape

torch.Size([455, 30])

In [18]:
y_train_tensor.shape

torch.Size([455])

# **Defining the model**

In [19]:
class MySimpleNN():
  def __init__(self, X):

    self.weights = torch.rand(X.shape[1], 1, dtype = torch.float64, requires_grad = True)
    self.bias = torch.zeros(1, dtype=torch.float64, requires_grad = True)

  # X = Input feature matrix ((samples × features)) , Weights ((features × 1))(30 * 1) = Har feature ka ek weight hota hai, Bias ((1,)) = Ek scalar value hoti hai jo har prediction me add hoti hai
  def forward(self, X):
     z = torch.matmul(X, self.weights) + self.bias  # Multiplication (X @ weights + bias) ka output shape → (samples × 1)
     y_pred = torch.sigmoid(z)
     return y_pred

  def loss_function(self, y_pred, y):
    # Clamp prediction to avoid log(0)
    epsilon = 1e-7
    y_pred = torch.clamp(y_pred, epsilon, 1 - epsilon)

    # Calculate loss
    loss =  -(y_train_tensor * torch.log(y_pred) + (1 - y_train_tensor) * torch.log(1 - y_pred)).mean()
    return loss


# **Important Parameters**

In [20]:
learning_rate = 0.1
epochs = 25  # no of runs on the dataset

# **Training Pipeline**

In [21]:
# create model
model = MySimpleNN(X_train_tensor)
# print(model.weights)
# print(model.bias)

#define loop
for epoch in range(epochs):

#    with torch.no_grad():
#      y_pred = model.forward(X_test_tensor)

   # forward pass
   y_pred = model.forward(X_train_tensor)
   #print(y_pred)
   print(y_pred.shape)

   # loss calculate
   loss = model.loss_function(y_pred, y_train_tensor)
    ## print(f'Epoch: {epoch+1}, Loss: {loss.item()}')

   # backward Pass
   loss.backward()


# torch.no_grad(): ka use inference aur parameter updates ke waqt hona chahiye taaki gradients compute na ho aur model fast chale:
# Ye ensure karta hai ki PyTorch computational graph maintain na kare, aur extra memory allocate na ho.
# Isse PyTorch extra memory allocate nahi karega, aur model training faster aur memory-efficient ho jayegi.

   # parameters update                                      # Gradient Descent Algorithm:
   with torch.no_grad():# don't track gradient  # torch.no_grad(): Yeh ensure karega ki gradient calculation off rahe (varna PyTorch unnecessary memory reserve karega).
    model.weights -= learning_rate * model.weights.grad    # W new= W old − α⋅ ∂W/∂L
    model.bias -= learning_rate * model.bias.grad          # b new = b old − α⋅ ∂b/∂L

   # zero gradients
    model.weights.grad.zero_()  # PyTorch me gradients accumulate hote hain, isliye har update ke baad gradients reset karna zaroori hai.
    model.bias.grad.zero_()      # Agar zero_() nahi lagayenge, to pichle gradients naye gradients me add ho jayenge, jo galat hoga.

    # print loss in each epoch
    print(f'Epoch: {epoch + 1}, Loss: {loss.item()}')


torch.Size([455, 1])
Epoch: 1, Loss: 3.340539208379527
torch.Size([455, 1])
Epoch: 2, Loss: 3.200772713211368
torch.Size([455, 1])
Epoch: 3, Loss: 3.0574942785996377
torch.Size([455, 1])
Epoch: 4, Loss: 2.903276578484999
torch.Size([455, 1])
Epoch: 5, Loss: 2.7469019506405803
torch.Size([455, 1])
Epoch: 6, Loss: 2.585381010698003
torch.Size([455, 1])
Epoch: 7, Loss: 2.4175132864410074
torch.Size([455, 1])
Epoch: 8, Loss: 2.2498768475263438
torch.Size([455, 1])
Epoch: 9, Loss: 2.083584711980005
torch.Size([455, 1])
Epoch: 10, Loss: 1.9237095558970936
torch.Size([455, 1])
Epoch: 11, Loss: 1.77144976110263
torch.Size([455, 1])
Epoch: 12, Loss: 1.624413803424339
torch.Size([455, 1])
Epoch: 13, Loss: 1.4803757127777666
torch.Size([455, 1])
Epoch: 14, Loss: 1.34562604322632
torch.Size([455, 1])
Epoch: 15, Loss: 1.2261851814513596
torch.Size([455, 1])
Epoch: 16, Loss: 1.1235251051069441
torch.Size([455, 1])
Epoch: 17, Loss: 1.0384384020473874
torch.Size([455, 1])
Epoch: 18, Loss: 0.9707025866

In [22]:
model.weights

tensor([[ 0.1423],
        [-0.2057],
        [ 0.2126],
        [ 0.0585],
        [-0.2317],
        [-0.5173],
        [ 0.1748],
        [-0.2649],
        [ 0.0688],
        [ 0.5032],
        [-0.0576],
        [-0.0812],
        [ 0.2175],
        [ 0.1904],
        [ 0.7096],
        [ 0.0262],
        [-0.1085],
        [-0.4005],
        [ 0.4525],
        [-0.0763],
        [-0.3065],
        [ 0.5711],
        [ 0.4268],
        [-0.0806],
        [ 0.2898],
        [-0.1965],
        [ 0.2461],
        [-0.1130],
        [ 0.0263],
        [-0.0324]], dtype=torch.float64, requires_grad=True)

In [23]:
model.bias

tensor([-0.1205], dtype=torch.float64, requires_grad=True)

# **Evaluation**

In [36]:
# model evaluation
with torch.no_grad():
  y_pred = model.forward(X_test_tensor)
  y_pred = (y_pred > 0.5).float()   # Binary classification (0/1)  # .float() => y_pred tensor float type me rahe.
  accuracy = (y_pred == y_test_tensor).float().mean()   # .mean() total correct predictions ka average
  print(f'Accuracy: {accuracy.item()}')           # .item() se accuracy ko scalar value me convert kar diya.
# Agar accuracy kam hai, to hyperparameter tuning (learning rate, epochs, batch size) karo ya data preprocessing improve karo!

# print(y_train_tensor)
# print(y_pred)

Accuracy: 0.509695291519165
