## Implement logistic regression from scratch using torch
## 3 steps for any model
1) Design a model (input size, output size, forward pass)
2) Construct loss and optimizer
3) Implement actual training
    1) forward pass - compute prediction and loss
    2) backward pass - compute gradients wrt loss
    3) update weights

In [1]:
import torch
import torch.nn as nn
import numpy as np
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

## prepare data

In [2]:
bc = datasets.load_breast_cancer() ## a binary classification problem

In [3]:
X, y = bc['data'], bc['target']

In [4]:
n_samples, n_features = X.shape

In [None]:
n_samples

In [None]:
n_features

In [None]:
## train test splitting

In [5]:
X_train, X_test , y_train, y_test = train_test_split(X,y, test_size = 0.2, random_state = 1234)

In [None]:
## scale features using StandardScaler to make features 0 mean and unit variance
## Use fit transform of StandardScaler on train dataset to get fitted mean and variance
## and also transform train dataset. Use just transform on test data set with fitted scaler
## to use train fitted mean and variance to transform test data

In [6]:
sc = StandardScaler() ## to make features 0 mean and unit variance

In [None]:
sc.

In [7]:
X_train = sc.fit_transform(X_train)


In [8]:
X_test = sc.transform(X_test)

In [None]:
## First have to convert numpy arrays into pytorch tensors
## cast double to float 32

In [9]:
X_train = torch.from_numpy(X_train.astype(np.float32)) ## double to float 32
X_test = torch.from_numpy(X_test.astype(np.float32)) ## double to float 32

y_train = torch.from_numpy(y_train.astype(np.float32)) ## double to float 32
y_test = torch.from_numpy(y_test.astype(np.float32)) ## double to float 32

In [None]:
## have to convert y to shape n,1 instead of just n

In [10]:
y_train = y_train.view(y_train.shape[0], 1)
y_test = y_test.view(y_test.shape[0], 1)

## Design model (input size, output size, forward pass)

In [None]:
#3 two parts : first f(x) = wx+b, then a sigmoid at end. only 1 hidden node

In [11]:
class LogisticRegression(nn.Module):
    
    def __init__(self, n_input_features):
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(n_input_features, 1)
    
    def forward(self, x):
        y_predicted = torch.sigmoid(self.linear(x))
        return y_predicted

In [12]:
model = LogisticRegression(n_features)

## Define loss and optimizer

In [13]:
criterion = nn.BCELoss()
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

## training loop

In [14]:
num_epochs = 100
for epoch in range(num_epochs):
    ## forward pass - compute prediction and loss
    y_predicted = model(X_train)
    loss = criterion(y_predicted, y_train)
    
    ## backward pass #3 compute local gradients
    loss.backward()
    
    ## weight updates
    optimizer.step()
    
    ## zero gradients
    optimizer.zero_grad()
    
    if (epoch + 1)%10==0:
        print(f'epoch : {epoch + 1}, loss : {loss.item():.4f}')
        

epoch : 10, loss : 0.6830
epoch : 20, loss : 0.5280
epoch : 30, loss : 0.4392
epoch : 40, loss : 0.3825
epoch : 50, loss : 0.3429
epoch : 60, loss : 0.3136
epoch : 70, loss : 0.2908
epoch : 80, loss : 0.2724
epoch : 90, loss : 0.2573
epoch : 100, loss : 0.2446


In [None]:
torch.tensor(0.7)>0.5

## inference/evaluation

In [15]:
## threshold assumed to be 0.5
with torch.no_grad():
    y_predicted = model(X_test)
    y_predicted_cls = y_predicted.round() ## rounds to 0 or 1
    acc = y_predicted_cls.eq(y_test).sum()/float(y_test.shape[0])
    print(f'accuracy = {acc:.4f}')
    

accuracy = 0.8860


In [19]:
(y_predicted_cls==y_test).sum()

tensor(101)