## Linear Regression- with sigmoid
    
(based on a tutorial by Python Engineer in Youtube)

In [12]:
import torch
import torch.nn as nn
import numpy as np            #for data transformation
from sklearn import datasets  #to load binary classification dataset
from sklearn.preprocessing import StandardScaler #to scale our features
from sklearn.model_selection import train_test_split #to separate training and testing data
#import matplotlib.pyplot as plt

Steps:

    0. prepare the data
    1. setup a model
    2. loss and optimizer
    3. training loop

## Step 0 - Data Preparation

In [13]:
#prepare data
bc = datasets.load_breast_cancer()
X, y = bc.data, bc.target

n_samples, n_features = X.shape
print(n_samples, n_features)

569 30


In [26]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)

#scale the features
sc = StandardScaler()   #make our features to have 0 mean and unit variance!!! always recommended to do 
                        #when we're dealing with logistic regression
    
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

#convert to torch tensors
X_train = torch.from_numpy(X_train.astype(np.float32))
y_train = torch.from_numpy(y_train.astype(np.float32))
X_test = torch.from_numpy(X_test.astype(np.float32))
y_test = torch.from_numpy(y_test.astype(np.float32))

#reshape Y tensor
y_train = y_train.view(y_train.shape[0], 1) # y has only one row, but we want to have a column vector
                                            # so we want to put each value in one row
y_test = y_test.view(y_test.shape[0], 1)
X_train.shape

(455, 30)


torch.Size([455, 30])

## Step 1 - Model

Here, the model is f = wx + b, and at the end a sigmoid function


In [28]:
#model

class LogisticRegression(nn.Module):
    
    def __init__(self, n_input_features):
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(n_input_features, 1) #one class label at the end
        
    def forward(self, x):  #don't change this function's name :)
        y_predicted = torch.sigmoid(self.linear(x)) #value between 0 and 1
        return y_predicted

model = LogisticRegression(n_features)  # so 30 input and 1 output
model

LogisticRegression(
  (linear): Linear(in_features=30, out_features=1, bias=True)
)

## Step 2 - Loss and Optimizer

In [29]:
learning_rate = 0.01
criterion = nn.BCELoss()      #binary Cross Entropy loss
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

## Step 3 - Training Loop

In [30]:
num_epoch = 100

for epoch in range(num_epoch):
    
    #forwardpass
    y_pred = model(X_train)
    
    #loss
    loss = criterion(y_pred, y_train)
    
    #backward
    loss.backward()
    
    #update
    optimizer.step()
    
    #empty the gradients because the backward function above adds all the gradient to grad attribute
    optimizer.zero_grad()
    
    if (epoch+1) % 10 == 0:
        print(f'epoch:{epoch+1}, loss = {loss.item(): .4f}')

epoch:10, loss =  0.5379
epoch:20, loss =  0.4411
epoch:30, loss =  0.3811
epoch:40, loss =  0.3403
epoch:50, loss =  0.3107
epoch:60, loss =  0.2880
epoch:70, loss =  0.2699
epoch:80, loss =  0.2551
epoch:90, loss =  0.2427
epoch:100, loss =  0.2321


## Evaluate the model 

This step should not be part of the computational graph. => with torch.no_grad()

In [36]:
with torch.no_grad():
    y_predicated = model(X_test)
    y_predicted_class = y_predicated.round()  # as the output of sigmoid is between 0 and 1,
                                            # if it's larger than 0.5 is class 1 ow class 0.

    # to calculate accuracy we want to know about all data with equal y_pred_class and y_test
    # devided by the number of samples => y_test.shape[0]
    
    acc = y_predicted_class.eq(y_test).sum()/ float(y_test.shape[0])
    
    print(f'The accuracy is = {acc*100:.2f}')

The accuracy is = 92.11
