# Binary Classification Problem

In [1]:
import torch
import torch.nn as nn
import numpy as np
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [13]:
bc = datasets.load_breast_cancer()
x,y = bc.data, bc.target
n_samples, n_features = x.shape
print(n_samples,n_features)
print(y)

569 30
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 1 0 0 0 0 0 0 0 0 1 0 1 1 1 1 1 0 0 1 0 0 1 1 1 1 0 1 0 0 1 1 1 1 0 1 0 0
 1 0 1 0 0 1 1 1 0 0 1 0 0 0 1 1 1 0 1 1 0 0 1 1 1 0 0 1 1 1 1 0 1 1 0 1 1
 1 1 1 1 1 1 0 0 0 1 0 0 1 1 1 0 0 1 0 1 0 0 1 0 0 1 1 0 1 1 0 1 1 1 1 0 1
 1 1 1 1 1 1 1 1 0 1 1 1 1 0 0 1 0 1 1 0 0 1 1 0 0 1 1 1 1 0 1 1 0 0 0 1 0
 1 0 1 1 1 0 1 1 0 0 1 0 0 0 0 1 0 0 0 1 0 1 0 1 1 0 1 0 0 0 0 1 1 0 0 1 1
 1 0 1 1 1 1 1 0 0 1 1 0 1 1 0 0 1 0 1 1 1 1 0 1 1 1 1 1 0 1 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 1 1 1 1 1 1 0 1 0 1 1 0 1 1 0 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1
 1 0 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 0 1 1 1 1 0 0 0 1 1
 1 1 0 1 0 1 0 1 1 1 0 1 1 1 1 1 1 1 0 0 0 1 1 1 1 1 1 1 1 1 1 1 0 0 1 0 0
 0 1 0 0 1 1 1 1 1 0 1 1 1 1 1 0 1 1 1 0 1 1 0 0 1 1 1 1 1 1 0 1 1 1 1 1 1
 1 0 1 1 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 1 0 1 1 1 1 1 0 1 1
 0 1 0 1 1 0 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 0 1
 1 1 1 1 1 1 0 1 0

In [5]:
x_train,x_test, y_train, y_test = train_test_split(x,y,test_size=0.2,random_state=1234)

In [7]:
# scale the featurers
sc = StandardScaler() 

x_train = sc.fit_transform(x_train) # fiting make the scaler learn some patterns to use to scale the data accordingly
x_test = sc.transform(x_test) # now the scaler knows about the data, so it can only transorm the test data

In [8]:
# convert data to tensors

x_train = torch.from_numpy(x_train.astype(np.float32))
x_test = torch.from_numpy(x_test.astype(np.float32))
y_train = torch.from_numpy(y_train.astype(np.float32))
y_test = torch.from_numpy(y_test.astype(np.float32))

In [9]:
print(y_train.shape)

torch.Size([455])


In [10]:
# re-shape
# trasform from 1 by 455 to 455 by 1
y_train = y_train.view(y_train.shape[0],1)
y_test = y_test.view(y_test.shape[0],1) # this converts it to a number of rows with one colum
print(y_train.shape)

torch.Size([455, 1])


In [15]:
# the model
# f = wx + b with a sigmoid activation function

class LogisticRegression(nn.Module):

    def __init__(self,n_input_features):
        super(LogisticRegression,self).__init__()
        self.linear = nn.Linear(n_input_features,1) # since this is classification, it should output only 1 value

    def forward(self,x):
        y_pred = torch.sigmoid(self.linear(x))
        return y_pred


In [17]:
model = LogisticRegression(n_features)

learning_rate = 0.01
criterion = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [None]:
# training loop
num_epochs  = 100
for epoch in range(num_epochs):
    y_pred = model(x_train)

    loss = criterion(y_pred,y_train)

    #backward pass and gradients calculation
    loss.backward()

    # update the weights
    optimizer.step()

    # empty the gradients after each epoch

    optimizer.zero_grad()

    if(epoch+ 1) % 10 == 0:
        print(f"epoch={epoch+1} loss={loss.item():.4f}")

In [23]:
# perform evaluation

with torch.no_grad():
    y_predicted = model(x_test)
    # convert the value to classes of 0 or 1 since this is classfification
    y_pred_class = y_predicted.round() # rounds to 1 if 0.5 or more, else 0
    accuracy = y_pred_class.eq(y_test).sum()/ float(y_test.shape[0])
    print(f"accuracy: {accuracy:.4f}")

accuracy: 0.9035
