# Implementing Logistic Regression

1. Design model (input, output size, forward pass)
2. Construct loss and optimizer
3. Training loop
    - forward pass: compute prediciton
    - backward pass: gradients (pytorch does this automatically)
    - update weights


In [6]:
import torch
import torch.nn as nn
import numpy as np
from sklearn import datasets 
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# 0. prepare data
bc = datasets.load_breast_cancer()
X, y = bc.data, bc.target

n_samples, n_features = X.shape
print(n_samples, n_features)

X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=.2,
                                                   random_state=1234)
# scale
# recomended for logistic regression
sc = StandardScaler() # make standard features zero mean 
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# convert to torch tensors
X_train = torch.from_numpy(X_train.astype(np.float32))
X_test = torch.from_numpy(X_test.astype(np.float32))
y_train = torch.from_numpy(y_train.astype(np.float32))
y_test = torch.from_numpy(y_test.astype(np.float32))

# reshape
y_train = y_train.view(y_train.shape[0], 1) # making y a column vector
y_test = y_test.view(y_test.shape[0], 1)

# 1. model
# linear combination of weights and bias:
# f = wx + b, sigmoid at the end
class LogisticRegression(nn.Module):
    
    def __init__(self, n_input_features):
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(n_input_features, 1)
    
    def forward(self, x):
        y_predicted = torch.sigmoid(self.linear(x))
        return y_predicted
    
model = LogisticRegression(n_features)


# 2. loss and optimizer
learning_rate = 0.01
criterion = nn.BCELoss() # binary cross loss
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)


# 3. training loop
num_epochs = 100
for epoch in range(num_epochs):
    # forward pass
    y_predicted = model(X_train)
    loss = criterion(y_predicted, y_train)
    
    # backward pass
    loss.backward()
    
    # updates and empty gradient
    optimizer.step()
    optimizer.zero_grad()
    
    # print information
    if (epoch+1) % 10 == 0:
        print(f'epoch: {epoch+1}, loss = {loss.item():.4f}')

# evaluation should not be part of computational graph
# where history is tracked
with torch.no_grad():
    # do evaluation
    y_predicted = model(X_test)
    y_predicted_cls = y_predicted.round() # to convert to binary
    acc = y_predicted_cls.eq(y_test).sum()/ float(y_test.shape[0]) # getting accuracy 
    print(f'accuracy = {acc:.4f}')

569 30
epoch: 10, loss = 0.7018
epoch: 20, loss = 0.5487
epoch: 30, loss = 0.4582
epoch: 40, loss = 0.3993
epoch: 50, loss = 0.3578
epoch: 60, loss = 0.3269
epoch: 70, loss = 0.3028
epoch: 80, loss = 0.2834
epoch: 90, loss = 0.2673
epoch: 100, loss = 0.2538
accuracy = 0.9035
