# PyTorch Training Pipeline

# Import Libraries

In [46]:
import pandas as pd
import numpy as np

In [47]:
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split

In [48]:
import torch
import torch.nn as nn

In [49]:
import kagglehub
import os

# Loading Dataset ...

In [50]:
path = kagglehub.dataset_download("ahmedesso/brest-cancer")
print("Path to dataset files:", path)

Using Colab cache for faster access to the 'brest-cancer' dataset.
Path to dataset files: /kaggle/input/brest-cancer


In [51]:
df = pd.read_csv('/root/.cache/kagglehub/datasets/ahmedesso/brest-cancer/versions/1/data.csv')
df.head()

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,Unnamed: 32
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,
4,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,


# Data Analysis

In [52]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 569 entries, 0 to 568
Data columns (total 33 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   id                       569 non-null    int64  
 1   diagnosis                569 non-null    object 
 2   radius_mean              569 non-null    float64
 3   texture_mean             569 non-null    float64
 4   perimeter_mean           569 non-null    float64
 5   area_mean                569 non-null    float64
 6   smoothness_mean          569 non-null    float64
 7   compactness_mean         569 non-null    float64
 8   concavity_mean           569 non-null    float64
 9   concave points_mean      569 non-null    float64
 10  symmetry_mean            569 non-null    float64
 11  fractal_dimension_mean   569 non-null    float64
 12  radius_se                569 non-null    float64
 13  texture_se               569 non-null    float64
 14  perimeter_se             5

In [53]:
df.isnull().sum()

Unnamed: 0,0
id,0
diagnosis,0
radius_mean,0
texture_mean,0
perimeter_mean,0
area_mean,0
smoothness_mean,0
compactness_mean,0
concavity_mean,0
concave points_mean,0


In [54]:
df.duplicated().sum()

np.int64(0)

## Drop columns

In [55]:
df.drop(labels=['id', 'Unnamed: 32'], axis=1, inplace=True, errors='ignore')
df.isnull().sum().sum()

np.int64(0)

In [56]:
df.head()

Unnamed: 0,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


# Train Test Split

In [57]:
X = df.drop('diagnosis', axis=1)
y = df['diagnosis']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=17)

X_train.shape, X_test.shape

((455, 30), (114, 30))

## Label Encoder

In [58]:
encoder = LabelEncoder()
y_train = encoder.fit_transform(y_train)
y_test = encoder.transform(y_test)

## Scale Data

In [59]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

X_train_scaled.shape, X_test_scaled.shape

((455, 30), (114, 30))

In [60]:
X_train_scaled

array([[-6.15618727e-01,  1.29001280e+00, -5.99538702e-01, ...,
         8.78575670e-02, -5.02422353e-01, -7.49410125e-02],
       [-6.75751228e-02, -1.90713736e-04, -1.35161089e-01, ...,
        -4.99517635e-01, -8.99275604e-01, -4.30484541e-01],
       [-4.77890491e-01, -4.40915101e-01, -4.80938664e-01, ...,
        -1.38289475e-01,  2.09627101e-01, -3.98661201e-01],
       ...,
       [-1.11201424e+00, -1.60324004e+00, -1.09481914e+00, ...,
        -7.20200722e-01,  4.95792878e-02, -3.65191825e-01],
       [-3.54508947e-01, -7.58328001e-01, -3.43546367e-01, ...,
        -2.00517855e-01,  1.04906155e+00, -1.47915224e-01],
       [-4.92237183e-01, -9.59280053e-01, -5.57778125e-01, ...,
        -1.12149789e+00, -9.21378641e-03, -8.80400735e-01]])

# Convert Numpy to Tensor

In [61]:
# Features
X_train_tensor = torch.from_numpy(X_train_scaled).float()
X_test_tensor = torch.from_numpy(X_test_scaled).float()

# Label
y_train_tensor = torch.from_numpy(y_train).float()
y_test_tensor = torch.from_numpy(y_test).float()

In [62]:
X_train_tensor

tensor([[-6.1562e-01,  1.2900e+00, -5.9954e-01,  ...,  8.7858e-02,
         -5.0242e-01, -7.4941e-02],
        [-6.7575e-02, -1.9071e-04, -1.3516e-01,  ..., -4.9952e-01,
         -8.9928e-01, -4.3048e-01],
        [-4.7789e-01, -4.4092e-01, -4.8094e-01,  ..., -1.3829e-01,
          2.0963e-01, -3.9866e-01],
        ...,
        [-1.1120e+00, -1.6032e+00, -1.0948e+00,  ..., -7.2020e-01,
          4.9579e-02, -3.6519e-01],
        [-3.5451e-01, -7.5833e-01, -3.4355e-01,  ..., -2.0052e-01,
          1.0491e+00, -1.4792e-01],
        [-4.9224e-01, -9.5928e-01, -5.5778e-01,  ..., -1.1215e+00,
         -9.2138e-03, -8.8040e-01]])

# Custom Model

In [63]:
class MySimpleNN(nn.Module):

  def __init__(self, num_features):

    super().__init__()
    self.linear = nn.Linear(in_features=num_features, out_features=1)
    self.sigmoid = nn.Sigmoid()


  def forward(self, X):

    out = self.linear(X)
    out = self.sigmoid(out)

    return out

In [64]:
# Learning Rate and Epochs
epochs = 50
learning_rate = 0.01

# Loss Fucntion
loss_function = nn.BCELoss()

# Create Model
model = MySimpleNN(X_train_tensor.shape[1])

# Optimizer
optimizer = torch.optim.SGD(params = model.parameters(), lr=learning_rate)

# Loop for Epochs
for i in range(epochs):

      # Calculate Y_pred
      y_pred = model(X_train_tensor)

      # Calculate Loss
      loss = loss_function(y_pred, y_train_tensor.view(-1, 1))

      # Clear Gradient
      optimizer.zero_grad()

      # Backward Pass
      loss.backward()

      # Update Parameters
      optimizer.step()

      # Print Loss
      print(f'Epochs: {i+1}, Loss: {loss.item()}')

Epochs: 1, Loss: 0.5668601393699646
Epochs: 2, Loss: 0.5551890730857849
Epochs: 3, Loss: 0.5441182851791382
Epochs: 4, Loss: 0.5336041450500488
Epochs: 5, Loss: 0.5236071348190308
Epochs: 6, Loss: 0.5140907764434814
Epochs: 7, Loss: 0.505021870136261
Epochs: 8, Loss: 0.49636998772621155
Epochs: 9, Loss: 0.4881073236465454
Epochs: 10, Loss: 0.48020827770233154
Epochs: 11, Loss: 0.4726494550704956
Epochs: 12, Loss: 0.4654093086719513
Epochs: 13, Loss: 0.45846793055534363
Epochs: 14, Loss: 0.45180708169937134
Epochs: 15, Loss: 0.44541001319885254
Epochs: 16, Loss: 0.4392611086368561
Epochs: 17, Loss: 0.4333459138870239
Epochs: 18, Loss: 0.4276511073112488
Epochs: 19, Loss: 0.4221644699573517
Epochs: 20, Loss: 0.4168744683265686
Epochs: 21, Loss: 0.4117704927921295
Epochs: 22, Loss: 0.40684258937835693
Epochs: 23, Loss: 0.40208160877227783
Epochs: 24, Loss: 0.3974789083003998
Epochs: 25, Loss: 0.3930264711380005
Epochs: 26, Loss: 0.3887168765068054
Epochs: 27, Loss: 0.38454297184944153
Epo

# Check Weights and Bias

In [65]:
model.linear.weight

Parameter containing:
tensor([[ 0.1434,  0.1431,  0.1938,  0.1275,  0.1970,  0.0264, -0.0115, -0.0184,
         -0.0514,  0.0340,  0.1637, -0.0055,  0.0015,  0.2172, -0.0872,  0.0133,
         -0.0747, -0.0390,  0.0994,  0.1173,  0.0463,  0.0624,  0.1263,  0.1092,
          0.0599,  0.2370,  0.2612,  0.1155,  0.1081, -0.0278]],
       requires_grad=True)

In [66]:
model.linear.bias

Parameter containing:
tensor([0.0691], requires_grad=True)

# Evalation

In [67]:
with torch.no_grad():
  y_pred = model.forward(X_test_tensor)
  y_pred = (y_pred > 0.5).float()
  accuracy = (y_pred == y_test_tensor).float().mean()
  print(f'Accuracy: {accuracy.item()}')

Accuracy: 0.5409356951713562
