We will be using the breast cancer dataset available in the PyTorch library to run a logistic regression model to do binary classification for breast cancer.

In [1]:
import torch
import torch.nn as nn
import numpy as np
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [2]:
# 1) Prepare Data
# Load breast cancer dataset
bc = datasets.load_breast_cancer()
X, y = bc.data, bc.target

n_samples, n_features = X.shape
print('total samples:', n_samples, '\ntotal features:', n_features)

total samples: 569 
total features: 30


In [3]:
# Split dataset into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1234)

# Scale
sc = StandardScaler() # 0 mean and unit variance
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

X_train = torch.from_numpy(X_train.astype(np.float32))
X_test = torch.from_numpy(X_test.astype(np.float32))
y_train = torch.from_numpy(y_train.astype(np.float32))
y_test = torch.from_numpy(y_test.astype(np.float32))

y_train = y_train.view(y_train.shape[0], 1)
y_test = y_test.view(y_test.shape[0], 1)
print(X_train.shape, X_test.shape)
print(y_train.shape, y_test.shape)

torch.Size([455, 30]) torch.Size([114, 30])
torch.Size([455, 1]) torch.Size([114, 1])


In [4]:
# 2) Model
class LogisticRegression(nn.Module):
    def __init__(self, n_input_features):
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(n_input_features, 1) # Only 1 class label
    
    def forward(self, x):
        y_predicted = torch.sigmoid(self.linear(x))
        return y_predicted

model = LogisticRegression(n_features)

In [5]:
# 3) Loss and Optimizer
learning_rate = 0.01
criterion = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

In [6]:
# 3) Training Loop
num_epochs = 100
for epoch in range(num_epochs):
    # Forward Pass
    y_predicted = model(X_train)
    loss = criterion(y_predicted, y_train)
    
    # Backward Pass
    loss.backward()
    
    # Update Weights
    optimizer.step()
    optimizer.zero_grad()
    
    if (epoch+1) % 10 == 0:
        print(f'epoch: {epoch+1}, loss = {loss.item():.4f}')

epoch: 10, loss = 0.4871
epoch: 20, loss = 0.4189
epoch: 30, loss = 0.3724
epoch: 40, loss = 0.3384
epoch: 50, loss = 0.3122
epoch: 60, loss = 0.2914
epoch: 70, loss = 0.2743
epoch: 80, loss = 0.2599
epoch: 90, loss = 0.2476
epoch: 100, loss = 0.2370


In [7]:
# 4) Evaluate model
with torch.no_grad():
    y_predicted = model(X_test)
    y_predicted_cls = y_predicted.round()
    acc = y_predicted_cls.eq(y_test).sum()/float(y_test.shape[0])
    print(f'accuracy = {acc:.4f}')

accuracy = 0.9035


Even with a very simple linear regression model, we are able to achieve 90% accuracy in terms of the binary classification of breast cancer. Using PyTorch makes it very easy to load an already available dataset and to run the training loop for the model.