<a href="https://colab.research.google.com/github/reeda23/Deep-Learning-With-Pytorch/blob/main/7_Logistic_Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Logistic Regression

1) Desgin model (input, output size, forward pass) <br>
2) Construct loss and optimizer <br>
3) Training loop<br>
> -forward pass: compute prediction and loss <br>
  -backward pass: compute gradients <br>
  -update weights



**Import libraries**

In [1]:
import torch
import torch.nn as nn
import numpy as np
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split


# Data Preprocessing
## Train-Test Split


In [2]:
#step 0: prepare data
#binary classification problem where we can predict cancer based on the 
#ipout features
bc = datasets.load_breast_cancer()
X, y = bc.data, bc.target

n_samples, n_features = X.shape
print(n_samples, n_features)   #569 samples and 30 features

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)

#random_state simply sets a seed to the random generator, 
#so that your train-test splits are always deterministic. 
#If you don't set a seed, it is different each time.



569 30


##Standarization of Data
Always recommended to scale features to have zero mean and unit variance when dealing with logistic regression.

In [3]:
#scale

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

#If we will use the fit method on our test data too, we will compute a new
#mean and variance that is a new scale for each feature and will let our model 
#learn about our test data too. Thus, what we want to keep as a surprise is no longer 
#unknown to our model and we will not get a good estimate of how our model is performing 
#on the test (unseen) data which is the ultimate goal of building a model using machine 
#learning algorithm.


X_train = torch.from_numpy(X_train.astype(np.float32)) #converting into float32 because orignially it is in double and it will create some errors later on 
X_test = torch.from_numpy(X_test.astype(np.float32))
y_train = torch.from_numpy(y_train.astype(np.float32)) #original shape torch.Size([455])
y_test = torch.from_numpy(y_test.astype(np.float32))

#reshaping
y_train = y_train.view(y_train.shape[0], 1) #convert row into column vector
y_test = y_test.view(y_test.shape[0], 1)

# Model
**function** -- linear combination of weights and bias <br>
f = wx + b <br>
sigmoid at the end

In [4]:
#1) model
class LogisticRegression(nn.Module): #this class is derived from nn.Module

    def __init__(self, n_input_features):
        super(LogisticRegression, self).__init__()

        #define layers
        #only one layer so use built-in layer
        #30 input features and 1 output
        self.linear = nn.Linear(n_input_features,1) #output size is 1 we only want 1 value at the end 0 or 1
 
    #forward pass
    def forward(self, x):
        y_predicted = torch.sigmoid(self.linear(x))
        return y_predicted

model = LogisticRegression(n_features)

# Loss Function and Optimizer


In [6]:
#2) loss and optimizer
learning_rate = 0.01
criterion = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

In [7]:
w, b = model.parameters()

In [8]:
w

Parameter containing:
tensor([[-0.0043,  0.1369, -0.0485, -0.0528,  0.0683,  0.1412,  0.1063,  0.0006,
          0.1716,  0.1273, -0.0572, -0.0027,  0.1664,  0.0651,  0.1590,  0.1653,
          0.0289,  0.1070,  0.0254, -0.0226, -0.1584, -0.0718, -0.0638, -0.1323,
          0.0438,  0.1572, -0.0098,  0.0239, -0.0869,  0.1671]],
       requires_grad=True)

In [9]:
b

Parameter containing:
tensor([-0.1517], requires_grad=True)

#Training Loop

In [10]:
num_epochs = 100
for epoch in range(num_epochs):
    #forward pass and loss
    y_predicted = model(X_train)
    loss = criterion(y_predicted, y_train)

    #backward pass:gradients
    loss.backward()

    #update parameters
    optimizer.step()

    #zero gradients
    optimizer.zero_grad()

    if (epoch+1) % 10 == 0:
        print(f'epoch = {epoch+1}, loss = {loss.item():.4f}')



epoch = 10, loss = 0.7189
epoch = 20, loss = 0.5533
epoch = 30, loss = 0.4532
epoch = 40, loss = 0.3887
epoch = 50, loss = 0.3445
epoch = 60, loss = 0.3124
epoch = 70, loss = 0.2881
epoch = 80, loss = 0.2689
epoch = 90, loss = 0.2534
epoch = 100, loss = 0.2405


# Evaluation



In [12]:
#evaluation should not be the part of computational graph so
#for testing we don't need to track gradients

with torch.no_grad():
    y_predicted = model(X_test)

    #remember the sigmoid function will return a value between 0 and 1
    #and if it's larger than 0.5 we say that this is class 1 otherwise 0
    y_predicted_cls = y_predicted.round()

    #accuray formula 
    #acc = no of correct predictions/ total no of predictions
    acc = y_predicted_cls.eq(y_test).sum() / float(y_test.shape[0])

    #y_predicted_cls.eq(y_test).sum() this method will calculate no of correct predictions.

    print(f'accuracy = {acc:.4f}')

accuracy = 0.9474


In [26]:
y_predicted_cls.shape

torch.Size([114, 1])

In [28]:
y_test.shape

torch.Size([114, 1])

In [29]:
z = y_predicted_cls.eq(y_test)

In [30]:
z.shape

torch.Size([114, 1])

In [38]:
z.numpy()

array([[ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [False],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [False],
       [ True],
       [ True],
       [False],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [False],
       [ True],
       [

In [39]:
np.count_nonzero(z == True)

108