In [1]:
# importing our custom functions / classes
from helper import multi_acc, CustomMNISTDataset, Net

import pickle
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from tqdm import tqdm
from sklearn.model_selection import train_test_split

import torch.optim as optim

In [2]:
filename = 'training_data.p'

train_df = pd.read_csv('digit-recognizer/train.csv')
train_labels = train_df['label']

### loading the data if it's been previously generated ###
try:
    infile = open(filename,'rb')
    train_input = pickle.load(infile)
    infile.close()

### Generating and saving the data ###
except:
    train_df.drop(['label'],inplace=True, axis=1)
    np_train = train_df.to_numpy()
    
    two_d = np.reshape(np_train[0],[1,28,28])
    train_input = two_d

    for ind in tqdm(range(1,np.shape(np_train)[0])):
    #     if ind == 10:
    #         break
        two_d = np.reshape(np_train[ind],[1,28,28])
        train_input = np.concatenate((train_input, two_d), axis = 0)

    print(np.shape(train_input))

    outfile = open(filename,'wb')
    pickle.dump(train_input, outfile)
    outfile.close()

    print('saved successfully!')

In [3]:
net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=256, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [4]:
params = list(net.parameters())
print(f'net params len: {len(params)}')
print(params[0].size())

net params len: 10
torch.Size([6, 1, 5, 5])


In [5]:
input = torch.randn(1, 1, 28, 28)
out = net(input)
print(out)

tensor([[-0.0258,  0.0025, -0.0742,  0.0788, -0.0329, -0.0876, -0.0658,  0.0724,
         -0.1265, -0.0228]], grad_fn=<AddmmBackward>)


  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


In [6]:
net.zero_grad()
out.backward(torch.randn(1, 10))

In [7]:
net.zero_grad()     # zeroes the gradient buffers of all parameters

print('net conv1.bias.grad before backward',net.conv1.bias.grad)

# loss.backward()

# print('conv1.bias.grad after backward')
# print(net.conv1.bias.grad)

net conv1.bias.grad before backward tensor([0., 0., 0., 0., 0., 0.])


In [8]:
n_epochs = 10
learning_rate = 0.01
for f in net.parameters():
    f.data.sub_(f.grad.data * learning_rate)

random_seed = 1
torch.backends.cudnn.enabled = False
torch.manual_seed(random_seed)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=learning_rate)

accuracy_stats = {
    'train': [],
    "val": []
}
loss_stats = {
    'train': [],
    "val": []
}

In [9]:
X_train, X_test, y_train, y_test = train_test_split(train_input, train_labels.values, test_size=0.20, random_state=42)

train_DS = CustomMNISTDataset(labels = torch.from_numpy(y_train).long(), imgs = X_train)
test_DS = CustomMNISTDataset(labels = torch.from_numpy(y_test).long(), imgs = X_test)

train_loader = torch.utils.data.DataLoader(train_DS, batch_size=50)
val_loader = torch.utils.data.DataLoader(test_DS, batch_size=1)

In [10]:
# a lot of this came from: https://towardsdatascience.com/pytorch-tabular-multiclass-classification-9f8211a123ab

print("Begin training.")
for e in tqdm(range(1, n_epochs+1)):
    
    # TRAINING
    train_epoch_loss = 0
    train_epoch_acc = 0
    net.train()
    for X_train_batch, y_train_batch in train_loader:
        optimizer.zero_grad()
        
        y_train_pred = net(X_train_batch)
        
        train_loss = criterion(y_train_pred, y_train_batch)
        train_acc = multi_acc(y_train_pred, y_train_batch)
        
        train_loss.backward()
        optimizer.step()
        
        train_epoch_loss += train_loss.item()
        train_epoch_acc += train_acc.item()
        
        
    # VALIDATION    
    with torch.no_grad():
        
        val_epoch_loss = 0
        val_epoch_acc = 0
        
        net.eval()
        for X_val_batch, y_val_batch in val_loader:
            
            y_val_pred = net(X_val_batch)
                        
            val_loss = criterion(y_val_pred, y_val_batch)
            val_acc = multi_acc(y_val_pred, y_val_batch)
            
            val_epoch_loss += val_loss.item()
            val_epoch_acc += val_acc.item()
    loss_stats['train'].append(train_epoch_loss/len(train_loader))
    loss_stats['val'].append(val_epoch_loss/len(val_loader))
    accuracy_stats['train'].append(train_epoch_acc/len(train_loader))
    accuracy_stats['val'].append(val_epoch_acc/len(val_loader))
                              
    
    print(f'Epoch {e+0:03}: | Train Loss: {train_epoch_loss/len(train_loader):.5f} | Val Loss: {val_epoch_loss/len(val_loader):.5f} | Train Acc: {train_epoch_acc/len(train_loader):.3f}| Val Acc: {val_epoch_acc/len(val_loader):.3f}')


  0%|          | 0/10 [00:00<?, ?it/s]

Begin training.


 10%|█         | 1/10 [00:25<03:47, 25.28s/it]

Epoch 001: | Train Loss: 0.23678 | Val Loss: 0.28656 | Train Acc: 92.580| Val Acc: 93.238


 20%|██        | 2/10 [00:53<03:30, 26.30s/it]

Epoch 002: | Train Loss: 0.11699 | Val Loss: 0.11876 | Train Acc: 96.750| Val Acc: 97.107


 30%|███       | 3/10 [01:11<02:45, 23.62s/it]

Epoch 003: | Train Loss: 0.09595 | Val Loss: 0.12032 | Train Acc: 97.467| Val Acc: 96.750


 40%|████      | 4/10 [01:28<02:09, 21.62s/it]

Epoch 004: | Train Loss: 0.09826 | Val Loss: 0.11315 | Train Acc: 97.327| Val Acc: 97.405


 50%|█████     | 5/10 [01:44<01:40, 20.10s/it]

Epoch 005: | Train Loss: 0.10182 | Val Loss: 0.11692 | Train Acc: 97.396| Val Acc: 97.143


 60%|██████    | 6/10 [02:01<01:16, 19.04s/it]

Epoch 006: | Train Loss: 0.09699 | Val Loss: 0.09847 | Train Acc: 97.643| Val Acc: 97.714


 70%|███████   | 7/10 [02:18<00:55, 18.34s/it]

Epoch 007: | Train Loss: 0.08728 | Val Loss: 0.09660 | Train Acc: 97.881| Val Acc: 97.607


 80%|████████  | 8/10 [02:35<00:35, 17.97s/it]

Epoch 008: | Train Loss: 0.09193 | Val Loss: 0.09874 | Train Acc: 97.902| Val Acc: 97.917


 90%|█████████ | 9/10 [02:53<00:18, 18.08s/it]

Epoch 009: | Train Loss: 0.08733 | Val Loss: 0.17248 | Train Acc: 97.952| Val Acc: 97.500


100%|██████████| 10/10 [03:20<00:00, 20.05s/it]

Epoch 010: | Train Loss: 0.11715 | Val Loss: 0.13383 | Train Acc: 97.479| Val Acc: 97.048





__Trying some stuff with learning rate schedulers__

just copied and paster the cell above. Modify the training loop to incorporate https://pytorch.org/docs/stable/generated/torch.optim.lr_scheduler.ReduceLROnPlateau.html#torch.optim.lr_scheduler.ReduceLROnPlateau

In [None]:
# a lot of this came from: https://towardsdatascience.com/pytorch-tabular-multiclass-classification-9f8211a123ab

print("Begin training.")
for e in tqdm(range(1, n_epochs+1)):
    
    # TRAINING
    train_epoch_loss = 0
    train_epoch_acc = 0
    net.train()
    for X_train_batch, y_train_batch in train_loader:
        optimizer.zero_grad()
        
        y_train_pred = net(X_train_batch)
        
        train_loss = criterion(y_train_pred, y_train_batch)
        train_acc = multi_acc(y_train_pred, y_train_batch)
        
        train_loss.backward()
        optimizer.step()
        
        train_epoch_loss += train_loss.item()
        train_epoch_acc += train_acc.item()
        
        
    # VALIDATION    
    with torch.no_grad():
        
        val_epoch_loss = 0
        val_epoch_acc = 0
        
        net.eval()
        for X_val_batch, y_val_batch in val_loader:
            
            y_val_pred = net(X_val_batch)
                        
            val_loss = criterion(y_val_pred, y_val_batch)
            val_acc = multi_acc(y_val_pred, y_val_batch)
            
            val_epoch_loss += val_loss.item()
            val_epoch_acc += val_acc.item()
    loss_stats['train'].append(train_epoch_loss/len(train_loader))
    loss_stats['val'].append(val_epoch_loss/len(val_loader))
    accuracy_stats['train'].append(train_epoch_acc/len(train_loader))
    accuracy_stats['val'].append(val_epoch_acc/len(val_loader))
                              
    
    print(f'Epoch {e+0:03}: | Train Loss: {train_epoch_loss/len(train_loader):.5f} | Val Loss: {val_epoch_loss/len(val_loader):.5f} | Train Acc: {train_epoch_acc/len(train_loader):.3f}| Val Acc: {val_epoch_acc/len(val_loader):.3f}')
