---

# 0. Deep Learning with PyTorch 1 (Project 1, Cancer Diagnosis)

# Goal: 

## Cancer diagnosis based on the input feature values (usage of logistic reg).

# _00. Import Main Libraries_

# _01. Load the Data and Do the Preprocess Step_

# _02. Modelling_

# _03. Loss Function_

# _04. Optimizer_

# _05. Training Loop_

# _06. Success Evaluation (Accuracy)_

---
---
---

# _00. Import Main Libraries_

In [1]:
import torch as th
import torch.nn as nn
import numpy as np
import pandas as pd

from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# _01. Load the Data and Do the Preprocessing Step_

In [2]:
# load the data

bc = datasets.load_breast_cancer()

In [3]:
# explore the dataset as DataFrame

bcDf = pd.DataFrame(bc.data, columns = bc.feature_names)

bcDf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 569 entries, 0 to 568
Data columns (total 30 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   mean radius              569 non-null    float64
 1   mean texture             569 non-null    float64
 2   mean perimeter           569 non-null    float64
 3   mean area                569 non-null    float64
 4   mean smoothness          569 non-null    float64
 5   mean compactness         569 non-null    float64
 6   mean concavity           569 non-null    float64
 7   mean concave points      569 non-null    float64
 8   mean symmetry            569 non-null    float64
 9   mean fractal dimension   569 non-null    float64
 10  radius error             569 non-null    float64
 11  texture error            569 non-null    float64
 12  perimeter error          569 non-null    float64
 13  area error               569 non-null    float64
 14  smoothness error         5

In [4]:
# input and target specification

X, y = bc.data, bc.target

In [5]:
# Sample (Observation) and Feature Numbers

nSamples, nFeatures = X.shape

print(X.shape)

(569, 30)


In [6]:
# train-test split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [7]:
# scale the data

scaler1 = StandardScaler()

X_train1 = scaler1.fit_transform(X_train)

X_test1 = scaler1.transform(X_test)

In [8]:
# transform the numpy data into tensor

X_train1_tso = th.from_numpy(X_train1.astype(np.float32))

X_test1_tso = th.from_numpy(X_test1.astype(np.float32))

y_train_tso = th.from_numpy(y_train.astype(np.float32))

y_test_tso = th.from_numpy(y_test.astype(np.float32))

In [9]:
y_train_tso = y_train_tso.view(-1,1)

y_test_tso = y_test_tso.view(-1,1)

# _02. Modelling (Logistic Reg)_

In [10]:
# prepare the logistic reg class

class LogisticReg(nn.Module):
    
    def __init__(self, input_dim, output_dim):
        super(LogisticReg, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)
        
    def forward (self, X):
        y_predicted = th.sigmoid(self.linear(X))
        return y_predicted

In [11]:
model = LogisticReg(input_dim=nFeatures, output_dim=1)

# _03. Loss Function_

In [12]:
fLoss = nn.BCELoss()

# _04. Optimizer_

In [13]:
learning_rate = 0.005

In [14]:
# stochastic gradient descent for optimization

optimizer = th.optim.SGD(model.parameters(),lr = learning_rate)

# _05. Training Loop_

In [18]:
nEpochs = 200

In [19]:
for epoch in range(nEpochs):
    
    # forward pass
    yPredicted = model(X_train1_tso)
    
    # loss
    loss = fLoss(yPredicted, y_train_tso)
    
    # backward pass
    loss.backward()
    
    # update the weights
    optimizer.step()
    
    # zero the gradients
    optimizer.zero_grad()
    
    # print some results to see the change
    
    if (epoch + 1) % 5 == 0:
        print(f"epoch: {epoch + 1}, loss = {loss.item():.5f}")

epoch: 5, loss = 0.33187
epoch: 10, loss = 0.32642
epoch: 15, loss = 0.32124
epoch: 20, loss = 0.31629
epoch: 25, loss = 0.31157
epoch: 30, loss = 0.30705
epoch: 35, loss = 0.30273
epoch: 40, loss = 0.29858
epoch: 45, loss = 0.29461
epoch: 50, loss = 0.29078
epoch: 55, loss = 0.28711
epoch: 60, loss = 0.28357
epoch: 65, loss = 0.28016
epoch: 70, loss = 0.27688
epoch: 75, loss = 0.27371
epoch: 80, loss = 0.27064
epoch: 85, loss = 0.26768
epoch: 90, loss = 0.26482
epoch: 95, loss = 0.26205
epoch: 100, loss = 0.25936
epoch: 105, loss = 0.25676
epoch: 110, loss = 0.25423
epoch: 115, loss = 0.25178
epoch: 120, loss = 0.24941
epoch: 125, loss = 0.24710
epoch: 130, loss = 0.24485
epoch: 135, loss = 0.24267
epoch: 140, loss = 0.24054
epoch: 145, loss = 0.23847
epoch: 150, loss = 0.23646
epoch: 155, loss = 0.23450
epoch: 160, loss = 0.23259
epoch: 165, loss = 0.23072
epoch: 170, loss = 0.22890
epoch: 175, loss = 0.22713
epoch: 180, loss = 0.22540
epoch: 185, loss = 0.22371
epoch: 190, loss = 0.

# _06. Success Evaluation_

In [20]:
# without autograd

with th.no_grad():
    yPredicted = model(X_test1_tso)
    yPredictedClasses = yPredicted.round()
    accEval = yPredictedClasses.eq(y_test_tso).sum() / y_test_tso.shape[0]
    print(f"accuracy = {accEval:.5f}")

accuracy = 0.92105
