In [2]:
import torch
import torch.nn as nn
import numpy as np
from sklearn import datasets
from sklearn.preprocessing import StandardScaler    # To scale features
from sklearn.model_selection import train_test_split    # To seperate train/test data

In [7]:
# 0) Prepare data
# binary classification to predict cancer based on input features
bc = datasets.load_breast_cancer()
X, y = bc.data, bc.target

n_samples, n_features = X.shape # 569 samples / 30 features

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)

# Scale
# Make features to have 0 mean unit variance
# Always recommend to do when deal with logistic regression
sc = StandardScaler()
X_train = sc.fit_transform(X_train) # Scale data
X_test = sc.transform(X_test)

X_train = torch.from_numpy(X_train.astype(np.float32))  # X_train은 원래 double type이라 error 없애기위해 astype
X_test = torch.from_numpy(X_test.astype(np.float32))
y_train = torch.from_numpy(y_train.astype(np.float32))
y_test = torch.from_numpy(y_test.astype(np.float32))

y_train = y_train.view(y_train.shape[0], 1)
y_test = y_test.view(y_test.shape[0], 1)

# 1) Model
# f = wx + b, sigmoid function at the end (logistic regression)
class LogisticRegression(nn.Module):
    def __init__(self, n_input_features):
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(n_input_features, 1)    # input_size: n_input_features, output_size: 1

    def forward(self, x):
        y_pred = torch.sigmoid(self.linear(x))
        return y_pred

model = LogisticRegression(n_features)

# 2) loss and optimizer
learning_rate = 0.01
criterion = nn.BCELoss()    # Binary Cross Entropy Loss
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

# 3) training loop
n_epochs = 200
for epoch in range(n_epochs):
    # Forward Pass
    y_pred = model(X_train)
    loss = criterion(y_pred, y_train)

    # Backward Pass
    loss.backward()

    # Update weights
    optimizer.step()
    optimizer.zero_grad()

    if (epoch+1) % 10 == 0:
        print(f'epoch: {epoch+1} | loss: {loss.item():.4f}')

with torch.no_grad():
    y_pred = model(X_test)
    y_pred_cls = y_pred.round() # 0~1 사이 값을 0/1로 반올림. torch.no_grad안하면 round tracking함
    accuracy = y_pred_cls.eq(y_test).sum() / float(y_test.shape[0])
    print(f'Accuracy: {accuracy:.4f}')

epoch: 10 | loss: 0.5421
epoch: 20 | loss: 0.4567
epoch: 30 | loss: 0.3998
epoch: 40 | loss: 0.3592
epoch: 50 | loss: 0.3284
epoch: 60 | loss: 0.3043
epoch: 70 | loss: 0.2847
epoch: 80 | loss: 0.2685
epoch: 90 | loss: 0.2547
epoch: 100 | loss: 0.2429
epoch: 110 | loss: 0.2325
epoch: 120 | loss: 0.2235
epoch: 130 | loss: 0.2154
epoch: 140 | loss: 0.2081
epoch: 150 | loss: 0.2016
epoch: 160 | loss: 0.1957
epoch: 170 | loss: 0.1903
epoch: 180 | loss: 0.1853
epoch: 190 | loss: 0.1807
epoch: 200 | loss: 0.1765
Accuracy: 0.9035
