# Probabalistic ML

In [2]:
import torch
import matplotlib.pyplot as plt
import numpy as np
import sklearn

In [3]:
from sklearn.datasets import load_breast_cancer

In [4]:
bc = load_breast_cancer()

In [5]:
bc

{'data': array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
         1.189e-01],
        [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
         8.902e-02],
        [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
         8.758e-02],
        ...,
        [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
         7.820e-02],
        [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
         1.240e-01],
        [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
         7.039e-02]]),
 'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
        0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
        1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
        1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
        1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0

In [6]:
all_data_X = bc['data']
all_data_Y = bc['target']
y_labels = bc['target_names']

In [7]:
from sklearn.model_selection import train_test_split

In [8]:
X_train, X_test, y_train, y_test = train_test_split(all_data_X, all_data_Y, test_size=0.15, random_state=11)

In [9]:
all_data_X.shape

(569, 30)

In [10]:
k = all_data_X.shape[1]

In [11]:
hidden_dim = 32
lin_layer_1 = torch.nn.Linear(k, hidden_dim)
lin_layer_2 = torch.nn.Linear(hidden_dim, 2)

In [13]:
model = torch.nn.Sequential(
    lin_layer_1,
    torch.nn.Sigmoid(),
    lin_layer_2,
    torch.nn.LogSoftmax()
)

In [14]:
lr = 0.001
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [15]:
batch_size = 16
epochs = 1000

train_data = torch.utils.data.TensorDataset(torch.tensor(X_train).float(), torch.tensor(y_train))
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)

In [16]:
def train_epoch(loader, model, optimizer, loss_fn):
    
    for batch in loader:
        optimizer.zero_grad()
        
        batchX = batch[0]
        batchY = batch[1]
        preds = model(batchX)
        
        loss = loss_fn(preds, batchY)
        # print(loss)
        loss.backward()
        optimizer.step()
    
    return loss.detach().numpy()

    

torch [NLL Loss](https://pytorch.org/docs/stable/generated/torch.nn.NLLLoss.html)

In [17]:
loss_fn = torch.nn.NLLLoss()

In [18]:
for i in range(epochs):
    l = train_epoch(train_loader, model, optimizer, loss_fn)
    if i % 50 == 0:
        print(f'epoch {i} loss: {l}')

  input = module(input)


epoch 0 loss: 0.7584481239318848
epoch 50 loss: 0.05002618208527565
epoch 100 loss: 0.11602851003408432
epoch 150 loss: 0.17102168500423431
epoch 200 loss: 0.0057830810546875
epoch 250 loss: 0.09873395413160324
epoch 300 loss: 0.24445295333862305
epoch 350 loss: 0.006610189098864794
epoch 400 loss: 0.40958335995674133
epoch 450 loss: 0.017466740682721138
epoch 500 loss: 0.016956372186541557
epoch 550 loss: 0.01651722751557827
epoch 600 loss: 0.013682951219379902
epoch 650 loss: 0.7425881028175354
epoch 700 loss: 0.014808467589318752
epoch 750 loss: 0.17853085696697235
epoch 800 loss: 0.007590789347887039
epoch 850 loss: 0.12827709317207336
epoch 900 loss: 0.0015486382180824876
epoch 950 loss: 0.020334139466285706


### Experiment

In [19]:
class Experiment:
    
    def __init__(self, model, optimizer, loss_fn, train_loader, test_loader, epochs=100):
        self.model = model
        self.optimizer = optimizer
        self.loss_fn = loss_fn
        self.train_loader = train_loader
        self.test_loader = test_loader
        self.epochs = epochs
        
    def train(self):
        
        for i in range(self.epochs):
            
            losses = []
            for batch in self.train_loader:
                optimizer.zero_grad()

                batchX = batch[0]
                batchY = batch[1]

                preds = self.model(batchX)

                loss = self.loss_fn(preds, batchY)
                losses.append(loss.detach().numpy())
                
                loss.backward()
                optimizer.step()
            
            
            if i % 100 == 0:
                mean_loss = np.array(losses).sum() / len(losses) # technically not quite right cuz partial batches will be weighted slightly higher (we will survive)
                print(f'epoch {i} loss: {mean_loss}')
        
    def evaluate(self, thresh=0.5):
        
        with torch.no_grad():
            
            correct = 0
            total = 0
            for batch in self.test_loader:
                batchX = batch[0]
                batchY = batch[1]
                
                preds = np.argmax(self.model(batchX),1)
                
                accs = (preds == batchY)
                # print(preds.shape)
                # print(batchY.shape)
                # print(accs.shape)
                # print(accs.sum())
                # print(accs.shape[0])
                correct += accs.sum()
                total += accs.shape[0]
                


            return correct / total
        
        
        
        

In [20]:
batch_size = 16

train_data = torch.utils.data.TensorDataset(torch.tensor(X_train).float(), torch.tensor(y_train))
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)

test_data = torch.utils.data.TensorDataset(torch.tensor(X_test).float(), torch.tensor(y_test))
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=True)

In [21]:
exp_1_res = []
for i in range(10):
    print(f'rand init: {i+1}')
    hidden_dim = 32
    lin_layer_1 = torch.nn.Linear(k, hidden_dim)
    lin_layer_2 = torch.nn.Linear(hidden_dim, 2)
    model = torch.nn.Sequential(
        lin_layer_1,
        torch.nn.Sigmoid(),
        lin_layer_2,
        torch.nn.LogSoftmax()
    )
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    loss_fn = torch.nn.NLLLoss()

    exp = Experiment(model, optimizer, loss_fn, train_loader, test_loader, epochs=100)
    
    exp.train()
    exp_1_res.append(exp.evaluate())

rand init: 1
epoch 0 loss: 0.65611082507718
rand init: 2
epoch 0 loss: 0.7145069491478705
rand init: 3
epoch 0 loss: 0.5800196124661353
rand init: 4
epoch 0 loss: 0.6436661751039566
rand init: 5
epoch 0 loss: 0.6319565926828692
rand init: 6
epoch 0 loss: 0.6017074584960938
rand init: 7
epoch 0 loss: 0.6176518471010269
rand init: 8
epoch 0 loss: 0.6355407468734249
rand init: 9
epoch 0 loss: 0.6978634249779486
rand init: 10
epoch 0 loss: 0.5877715080015121


In [22]:
exp_1_res

[tensor(0.9535),
 tensor(0.9535),
 tensor(0.9302),
 tensor(0.9302),
 tensor(0.9535),
 tensor(0.9651),
 tensor(0.9419),
 tensor(0.9535),
 tensor(0.9535),
 tensor(0.9302)]

Again with larger hidden layer

In [23]:
exp_2_res = []
for i in range(10):
    print(f'rand init: {i+1}')
    
    hidden_dim = 128
    lin_layer_1 = torch.nn.Linear(k, hidden_dim)
    lin_layer_2 = torch.nn.Linear(hidden_dim, 2)
    model = torch.nn.Sequential(
        lin_layer_1,
        torch.nn.Sigmoid(),
        lin_layer_2,
        torch.nn.LogSoftmax()
    )
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    loss_fn = torch.nn.NLLLoss()

    exp = Experiment(model, optimizer, loss_fn, train_loader, test_loader, epochs=100)
    
    exp.train()
    exp_2_res.append(exp.evaluate())

rand init: 1
epoch 0 loss: 0.5615937017625378


  input = module(input)


rand init: 2
epoch 0 loss: 0.6357685827439831
rand init: 3
epoch 0 loss: 0.5844228806034211
rand init: 4
epoch 0 loss: 0.6567003188594696
rand init: 5
epoch 0 loss: 0.5929158733737084
rand init: 6
epoch 0 loss: 0.5861065156998173
rand init: 7
epoch 0 loss: 0.617814463953818
rand init: 8
epoch 0 loss: 0.525787845734627
rand init: 9
epoch 0 loss: 0.6573209454936366
rand init: 10
epoch 0 loss: 0.5827255249023438


In [24]:
exp_2_res

[tensor(0.9651),
 tensor(0.9651),
 tensor(0.9535),
 tensor(0.9535),
 tensor(0.9419),
 tensor(0.9535),
 tensor(0.9419),
 tensor(0.9302),
 tensor(0.9535),
 tensor(0.9419)]

In [25]:
print(f'32 hidden unit experiment mean acc: {np.array(exp_1_res).mean()}')
print(f'128 hidden unit experiment mean acc: {np.array(exp_2_res).mean()}')

32 hidden unit experiment mean acc: 0.9465116262435913
128 hidden unit experiment mean acc: 0.949999988079071


The more complex model shows minor improvement over the smaller model using NLLLoss whereas the same experiment with an MSE loss model saw basically no difference. The improvement is still very small (<1%) but it may suggest that NLLLoss makes training more complex models easier.