### Time to build a simple logistic regression model

1) Design model (input size, output size, forward)  
2) Loss and Optimizer  
3) Training:  
  - forward pass: prediction and loss
  - backward pass: gradients
  - update weights

In [1]:
import torch
import torch.nn as nn
import numpy as np
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

0) Prepare data

In [2]:
bc = datasets.load_breast_cancer()

X, y = bc.data, bc.target

n_samples, n_features = X.shape
n_samples, n_features

(569, 30)

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
?train_test_split

1) model

2) loss and optimizer

3) training loop 

In [5]:
X_train

array([[9.029e+00, 1.733e+01, 5.879e+01, ..., 1.750e-01, 4.228e-01,
        1.175e-01],
       [2.109e+01, 2.657e+01, 1.427e+02, ..., 2.903e-01, 4.098e-01,
        1.284e-01],
       [9.173e+00, 1.386e+01, 5.920e+01, ..., 5.087e-02, 3.282e-01,
        8.490e-02],
       ...,
       [1.429e+01, 1.682e+01, 9.030e+01, ..., 3.333e-02, 2.458e-01,
        6.120e-02],
       [1.398e+01, 1.962e+01, 9.112e+01, ..., 1.827e-01, 3.179e-01,
        1.055e-01],
       [1.218e+01, 2.052e+01, 7.722e+01, ..., 7.431e-02, 2.694e-01,
        6.878e-02]])

In [6]:
sc = StandardScaler()

In [7]:
X_train = sc.fit_transform(X_train)
X_train

array([[-1.44075296, -0.43531947, -1.36208497, ...,  0.9320124 ,
         2.09724217,  1.88645014],
       [ 1.97409619,  1.73302577,  2.09167167, ...,  2.6989469 ,
         1.89116053,  2.49783848],
       [-1.39998202, -1.24962228, -1.34520926, ..., -0.97023893,
         0.59760192,  0.0578942 ],
       ...,
       [ 0.04880192, -0.55500086, -0.06512547, ..., -1.23903365,
        -0.70863864, -1.27145475],
       [-0.03896885,  0.10207345, -0.03137406, ...,  1.05001236,
         0.43432185,  1.21336207],
       [-0.54860557,  0.31327591, -0.60350155, ..., -0.61102866,
        -0.3345212 , -0.84628745]])

In [8]:
X_test = sc.transform(X_test)

In [9]:
?sc.transform

In [10]:
?sc.fit_transform

In [11]:
X_train = torch.from_numpy(X_train.astype(np.float32))
X_test = torch.from_numpy(X_test.astype(np.float32))
y_train = torch.from_numpy(y_train.astype(np.float32))
y_test = torch.from_numpy(y_test.astype(np.float32))

In [12]:
y_train.shape, y_test.shape

(torch.Size([455]), torch.Size([114]))

Turn y_train and y_test into a column

In [13]:
y_train = y_train.view(y_train.shape[0], 1)
y_test = y_test.view(y_test.shape[0], 1)
y_train.shape, y_test.shape

(torch.Size([455, 1]), torch.Size([114, 1]))

Write our simple class for logistic regression  
f = wx + b, sigmoid at the end

#### 1) model

In [14]:
class MyLogisticRegression(nn.Module):
    def __init__(self, n_input_features):
        super(MyLogisticRegression, self).__init__()
        self.linear = nn.Linear(n_input_features, 1)
        
    def forward(self, x):
        y_preds = torch.sigmoid(self.linear(x))
        return y_preds

In [15]:
model = MyLogisticRegression(n_features)

#### 2) loss and optimizer

In [16]:
learning_rate = 0.01

criterion = nn.BCELoss() # binary cross entropy
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [17]:
?nn.BCELoss

### 3) train

In [18]:
def train(n_epochs):
    for e in range(n_epochs):
        # forward pass and loss
        y_preds = model(X_train)
        loss = criterion(y_preds, y_train)
        
        # backward pass
        loss.backward()
        
        # update weights
        optimizer.step()
        
        # zero out gradiends before next iteration
        optimizer.zero_grad()
        
        # simple logging
        if (e + 1) % 10 == 0:
            print(f'epoch: {e + 1}, loss = {loss.item():.4f}')

In [19]:
epochs = 100
train(epochs)

epoch: 10, loss = 0.5468
epoch: 20, loss = 0.4639
epoch: 30, loss = 0.4089
epoch: 40, loss = 0.3697
epoch: 50, loss = 0.3403
epoch: 60, loss = 0.3172
epoch: 70, loss = 0.2987
epoch: 80, loss = 0.2833
epoch: 90, loss = 0.2703
epoch: 100, loss = 0.2591


Let's get the current accuracy, and remember to use no_grad when doing more calculation that shouldn't be part of the gradients

In [20]:
with torch.no_grad():
    y_predictions = model(X_test)
    y_prediction_classes = y_predictions.round()
    acc = y_prediction_classes.eq(y_test).sum() / float(y_test.shape[0])
    print(f'accuracy: {acc:.4f}')

accuracy: 0.9561
