In [1]:
# https://github.com/patrickloeber/pytorchTutorial/blob/master/08_logistic_regression.py

import numpy as np
import torch
import torch.nn as nn
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
# 0. prepare data
bc = datasets.load_breast_cancer()
X, y = bc.data, bc.target

# 569, 30
n_smaples, n_features = X.shape
print(n_smaples, n_features)

# separate training and testing data, with the test data being 20% of all the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)

# scale
# StandardScaler scales the features by removing the mean and "to unit variance"
# tl;dr makes this work
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# convert every numpy ndarray you made into torch tensors
# if you did this earlier, you probably won't be able to
# do the sklearn crap
X_train = torch.from_numpy(X_train.astype(np.float32))
X_test = torch.from_numpy(X_test.astype(np.float32))
y_train = torch.from_numpy(y_train.astype(np.float32))
y_test = torch.from_numpy(y_test.astype(np.float32))

# reshape the tensors for easier processing
# make them as column vectors
# 1 row with x many columns
y_train = y_train.view(y_train.shape[0], 1) # 455, 1
y_test = y_test.view(y_test.shape[0], 1) # 114, 1

569 30


In [3]:
# 1. model
# linear model, f = wx + b, sigmoid at the end
class Model(nn.Module):
    def __init__(self, n_input_features):
        super(Model, self).__init__()
        # 1 because we only need 1 class layer at the end
        self.linear = nn.Linear(n_input_features, 1)
        
    def forward(self, x):
        y_pred = torch.sigmoid(self.linear(x))
        return y_pred

# remember, this is a tensor object
model = Model(n_features) # 30, 1

In [4]:
# 2. loss and optimizer
num_epochs = 100
lr = 0.01
# binary cross-entropy
criterion = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr)

In [5]:
# 3. training loop
for epoch in range(num_epochs):
    # forward pass and loss
    y_pred = model(X_train)
    loss = criterion(y_pred, y_train)
    
    # backward pass and update
    loss.backward()
    optimizer.step()
    
    # zero grad before new step
    optimizer.zero_grad()
    
    if (epoch + 1) % 10 == 0:
        print(f"epoch {epoch + 1}, loss = {loss.item():.4f}")

# evaluation
# should not be part of the training loop
with torch.no_grad():
    y_predicted = model(X_test) # tensor full of decimal values between 1 & 0
    y_predicted_cls = y_predicted.round() # tensor full of 0s & 1s, or True/False
    acc = y_predicted_cls.eq(y_test).sum() / float(y_test.shape[0])
    print(f"accuracy: {acc.item():.4f}")

epoch 10, loss = 0.6251
epoch 20, loss = 0.5085
epoch 30, loss = 0.4363
epoch 40, loss = 0.3871
epoch 50, loss = 0.3511
epoch 60, loss = 0.3235
epoch 70, loss = 0.3015
epoch 80, loss = 0.2834
epoch 90, loss = 0.2683
epoch 100, loss = 0.2553
accuracy: 0.9035
