In [1]:
import torch
import pandas as pd
import numpy as np
import torch.nn as nn
from sklearn.model_selection import train_test_split 
from sklearn.metrics import accuracy_score, log_loss
from sklearn.preprocessing import MinMaxScaler

In [2]:
cancer = pd.read_csv("C:/Training/Academy/Statistics (Python)/Cases/Wisconsin/BreastCancer.csv")
dum_cancer = pd.get_dummies(cancer,drop_first=True)
dum_cancer.head()

Unnamed: 0,Code,Clump,UniCell_Size,Uni_CellShape,MargAdh,SEpith,BareN,BChromatin,NoemN,Mitoses,Class_Malignant
0,61634,5,4,3,1,2,2,2,3,1,0
1,63375,9,1,2,6,4,10,7,7,2,1
2,76389,10,4,7,2,2,8,6,1,1,1
3,95719,6,10,10,10,8,10,7,10,7,1
4,128059,1,1,1,1,2,5,5,1,1,0


In [3]:
X = dum_cancer.iloc[:,1:-1]

scaler = MinMaxScaler()

y = dum_cancer.iloc[:,-1].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, 
                                                    random_state=23,stratify=y)
X_scl_trn = scaler.fit_transform(X_train) 
X_scl_tst = scaler.transform(X_test) 


In [4]:
X_torch = torch.from_numpy(X_scl_trn)
y_torch = torch.from_numpy(y_train)
print(X_torch.size())
print(y_torch.size())

torch.Size([489, 9])
torch.Size([489])


In [5]:
type(X_torch)

torch.Tensor

In [6]:
X_scl_trn.shape[1]

9

In [7]:
torch.manual_seed(23)
# Create a model
model = nn.Sequential(nn.Linear(in_features=X_scl_trn.shape[1], out_features=5),
                      nn.ReLU(),
                      nn.Linear(5, 3),
                      nn.ReLU(),
                      nn.Linear(3,1))

In [8]:
criterion = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)
optimizer

Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.001
    maximize: False
    weight_decay: 0
)

In [9]:
y_torch.size()

torch.Size([489])

In [10]:
y_pred = model(X_torch.float())
y_torch = y_torch.unsqueeze(1)
print(y_torch.shape)
print(y_pred.shape)

torch.Size([489, 1])
torch.Size([489, 1])


In [11]:
y_pred[:5]

tensor([[-0.1439],
        [-0.1665],
        [-0.1133],
        [-0.1675],
        [-0.1674]], grad_fn=<SliceBackward0>)

### Initial Log Loss

In [12]:
loss = criterion(y_pred, y_torch.float())
loss

tensor(0.6721, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)

In [13]:
X_torch_test = torch.from_numpy(X_scl_tst)
type(X_torch_test)

torch.Tensor

### Training Loop

In [14]:
for epoch in np.arange(0,1000):
       # Forward pass: Compute predicted y by passing x to the model
       y_pred_prob = model(X_torch.float())
       y_pred_prob_test = model(X_torch_test.float())
        
       # Compute and print loss
       loss = criterion(y_pred_prob, y_torch.float())
       tst_loss = log_loss(y_test, y_pred_prob_test.detach().numpy())
       if epoch%100 == 0:
          print('epoch: ', epoch+1,' train loss: ', loss.item(), " test loss:", tst_loss)

       # Zero gradients, perform a backward pass, and update the weights.
       optimizer.zero_grad()

       # perform a backward pass (backpropagation)
       loss.backward()

       # Update the parameters
       optimizer.step()
#print('epoch: ', epoch+1,' loss: ', loss.item())

epoch:  1  train loss:  0.6720623970031738  test loss: 5.465960702181678
epoch:  101  train loss:  0.6343051791191101  test loss: 5.465960702181678
epoch:  201  train loss:  0.5309345722198486  test loss: 4.188766431803632
epoch:  301  train loss:  0.3515016436576843  test loss: 0.897615006854744
epoch:  401  train loss:  0.21151600778102875  test loss: 0.73037209159864
epoch:  501  train loss:  0.1479852944612503  test loss: 0.776975717540855
epoch:  601  train loss:  0.11719004809856415  test loss: 0.8384654504832422
epoch:  701  train loss:  0.09987969696521759  test loss: 0.7711101772137161
epoch:  801  train loss:  0.08916476368904114  test loss: 0.7657362466807085
epoch:  901  train loss:  0.08208033442497253  test loss: 0.7646388123174692


### Training Set Log Loss after training loop execution

In [15]:
loss

tensor(0.0769, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)

In [32]:
y_wt_sum = model(X_torch_test.float()) # Equivalent predict_proba / predict
type(y_wt_sum)

torch.Tensor

In [33]:
y_wt_sum[:5]

tensor([[ 5.3576],
        [-4.5207],
        [-4.5996],
        [-4.2323],
        [-4.6090]], grad_fn=<SliceBackward0>)

In [34]:
y_pred_prob = nn.Sigmoid()(y_wt_sum)
y_pred_prob[:5]

tensor([[0.9953],
        [0.0108],
        [0.0100],
        [0.0143],
        [0.0099]], grad_fn=<SliceBackward0>)

In [35]:
y_pred_prob = y_pred_prob.detach().numpy()
type(y_pred_prob)

numpy.ndarray

In [36]:
y_pred_prob.shape

(210, 1)

In [37]:
y_pred_prob[:5]

array([[0.99530977],
       [0.01076472],
       [0.00995564],
       [0.01431128],
       [0.00986382]], dtype=float32)

In [38]:
y_pred_prob = y_pred_prob.reshape(y_test.shape[0],)
y_pred_prob.shape

(210,)

In [39]:
y_pred = np.where(y_pred_prob >= 0.5,1,0)

y_pred[:5]

array([1, 0, 0, 0, 0])

### Test Set Accuracy Score

In [40]:
print(accuracy_score(y_test,y_pred))

0.9571428571428572


### Test Set Log Loss

In [41]:
log_loss(y_test, y_pred_prob)

0.127680274027093