# Simple ANN training with torch

In [1]:
import torch
import matplotlib.pyplot as plt
import numpy as np
import sklearn

In [2]:
from sklearn.datasets import load_breast_cancer

In [3]:
bc = load_breast_cancer()

In [4]:
bc

{'data': array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
         1.189e-01],
        [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
         8.902e-02],
        [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
         8.758e-02],
        ...,
        [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
         7.820e-02],
        [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
         1.240e-01],
        [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
         7.039e-02]]),
 'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
        0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
        1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
        1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
        1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0

In [5]:
all_data_X = bc['data']
all_data_Y = bc['target']
y_labels = bc['target_names']

In [6]:
from sklearn.model_selection import train_test_split

In [7]:
X_train, X_test, y_train, y_test = train_test_split(all_data_X, all_data_Y, test_size=0.15, random_state=11)

In [8]:
all_data_X.shape

(569, 30)

In [9]:
k = all_data_X.shape[1]

In [10]:
hidden_dim = 32
lin_layer_1 = torch.nn.Linear(k, hidden_dim)
lin_layer_2 = torch.nn.Linear(hidden_dim, 1)

In [11]:
model = torch.nn.Sequential(
    lin_layer_1,
    torch.nn.Sigmoid(),
    lin_layer_2,
    torch.nn.Sigmoid()
)

In [12]:
lr = 0.001
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [13]:
batch_size = 16
epochs = 1000

train_data = torch.utils.data.TensorDataset(torch.tensor(X_train).float(), torch.tensor(y_train.reshape(-1,1)).float())
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)

In [14]:
def train_epoch(loader, model, optimizer, loss_fn):
    
    for batch in loader:
        optimizer.zero_grad()
        
        batchX = batch[0]
        batchY = batch[1]
        
        preds = model(batchX)
        
        loss = loss_fn(preds, batchY)
        # print(loss)
        loss.backward()
        optimizer.step()
    
    return loss.detach().numpy()

    

0g. Write a for loop that uses your code from 0e and 0f to train your neural net. You should use either the torch.nn.MSELoss error function, or the torch.nn.BCELoss error function (we will discuss cross-entropy later in the course).

In [15]:
loss_fn = torch.nn.BCELoss()

In [16]:
for i in range(epochs):
    l = train_epoch(train_loader, model, optimizer, loss_fn)
    if i % 50 == 0:
        print(f'epoch {i} loss: {l}')

epoch 0 loss: 0.6211494207382202
epoch 50 loss: 0.03889436274766922
epoch 100 loss: 1.1971546411514282
epoch 150 loss: 0.031006554141640663
epoch 200 loss: 0.20277239382266998
epoch 250 loss: 0.24325567483901978
epoch 300 loss: 0.019136415794491768
epoch 350 loss: 0.471358984708786
epoch 400 loss: 0.18913328647613525
epoch 450 loss: 0.2306215763092041
epoch 500 loss: 0.023762637749314308
epoch 550 loss: 0.007969596423208714
epoch 600 loss: 0.048988986760377884
epoch 650 loss: 0.0021723180543631315
epoch 700 loss: 0.19803769886493683
epoch 750 loss: 0.03306877240538597
epoch 800 loss: 0.1990635246038437
epoch 850 loss: 0.008778966031968594
epoch 900 loss: 0.00073231291025877
epoch 950 loss: 0.010038892738521099


## Test with 10 random inits

In [17]:
class Experiment:
    
    def __init__(self, model, optimizer, loss_fn, train_loader, test_loader, epochs=100):
        self.model = model
        self.optimizer = optimizer
        self.loss_fn = loss_fn
        self.train_loader = train_loader
        self.test_loader = test_loader
        self.epochs = epochs
        
    def train(self):
        
        for i in range(self.epochs):
            
            losses = []
            for batch in self.train_loader:
                optimizer.zero_grad()

                batchX = batch[0]
                batchY = batch[1]

                preds = self.model(batchX)

                loss = self.loss_fn(preds, batchY)
                losses.append(loss.detach().numpy())
                
                loss.backward()
                optimizer.step()
            
            
            if i % 100 == 0:
                mean_loss = np.array(losses).sum() / len(losses) # technically not quite right cuz partial batches will be weighted slightly higher (we will survive)
                print(f'epoch {i} loss: {mean_loss}')
        
    def evaluate(self, thresh=0.5):
        
        with torch.no_grad():
            
            correct = 0
            total = 0
            for batch in self.test_loader:
                batchX = batch[0]
                batchY = batch[1]
                
                preds = self.model(batchX)
                inference = (preds > thresh)
                
                accs = (inference == batchY)
                
                correct += accs.sum()
                total += accs.shape[0]
                


            return correct / total
        
        
        
        

In [18]:
batch_size = 16

train_data = torch.utils.data.TensorDataset(torch.tensor(X_train).float(), torch.tensor(y_train.reshape(-1,1)).float())
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)

test_data = torch.utils.data.TensorDataset(torch.tensor(X_test).float(), torch.tensor(y_test.reshape(-1,1)).float())
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=True)

In [19]:
exp_1_res = []
for i in range(10):
    print(f'rand init: {i+1}')
    hidden_dim = 32
    lin_layer_1 = torch.nn.Linear(k, hidden_dim)
    lin_layer_2 = torch.nn.Linear(hidden_dim, 1)
    model = torch.nn.Sequential(
        lin_layer_1,
        torch.nn.Sigmoid(),
        lin_layer_2,
        torch.nn.Sigmoid()
    )
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    loss_fn = torch.nn.BCELoss()

    exp = Experiment(model, optimizer, loss_fn, train_loader, test_loader, epochs=100)
    
    exp.train()
    exp_1_res.append(exp.evaluate())

rand init: 1
epoch 0 loss: 0.6531494509789252
rand init: 2
epoch 0 loss: 0.6496071354035409
rand init: 3
epoch 0 loss: 0.6410527998401273
rand init: 4
epoch 0 loss: 0.6510386313161542
rand init: 5
epoch 0 loss: 0.6800644167007939
rand init: 6
epoch 0 loss: 0.6573259907384073
rand init: 7
epoch 0 loss: 0.6828846470002206
rand init: 8
epoch 0 loss: 0.6682565750614289
rand init: 9
epoch 0 loss: 0.7578258514404297
rand init: 10
epoch 0 loss: 0.7539026814122354


In [20]:
exp_1_res

[tensor(0.9419),
 tensor(0.9535),
 tensor(0.9419),
 tensor(0.9535),
 tensor(0.9419),
 tensor(0.9535),
 tensor(0.9419),
 tensor(0.9419),
 tensor(0.9535),
 tensor(0.9302)]

Change something about your neural net model. For example, you could make the middle layer bigger (> 32 nodes), or you could add more layers, or you could change the activation function. Then, run the experiment in 1a again with your changed model.

In [21]:
exp_2_res = []
for i in range(10):
    print(f'rand init: {i+1}')
    
    hidden_dim = 128
    lin_layer_1 = torch.nn.Linear(k, hidden_dim)
    lin_layer_2 = torch.nn.Linear(hidden_dim, 1)
    model = torch.nn.Sequential(
        lin_layer_1,
        torch.nn.Sigmoid(),
        lin_layer_2,
        torch.nn.Sigmoid()
    )
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    loss_fn = torch.nn.BCELoss()

    exp = Experiment(model, optimizer, loss_fn, train_loader, test_loader, epochs=100)
    
    exp.train()
    exp_2_res.append(exp.evaluate())

rand init: 1
epoch 0 loss: 0.6440743477113785
rand init: 2
epoch 0 loss: 0.635819835047568
rand init: 3
epoch 0 loss: 0.64291504890688
rand init: 4
epoch 0 loss: 0.6215213037306263
rand init: 5
epoch 0 loss: 0.6578131644956527
rand init: 6
epoch 0 loss: 0.6081462983162172
rand init: 7
epoch 0 loss: 0.6010631438224546
rand init: 8
epoch 0 loss: 0.6221747244558027
rand init: 9
epoch 0 loss: 0.635071539109753
rand init: 10
epoch 0 loss: 0.6357970699187248


In [22]:
exp_2_res

[tensor(0.9535),
 tensor(0.9535),
 tensor(0.9535),
 tensor(0.9651),
 tensor(0.9419),
 tensor(0.9651),
 tensor(0.9535),
 tensor(0.9419),
 tensor(0.9419),
 tensor(0.9419)]

In [23]:
print(f'32 hidden unit experiment mean acc: {np.array(exp_1_res).mean()}')
print(f'128 hidden unit experiment mean acc: {np.array(exp_2_res).mean()}')

32 hidden unit experiment mean acc: 0.945348858833313
128 hidden unit experiment mean acc: 0.9511626958847046
