In [18]:
# importing our custom functions / classes
from helper import multi_acc, CustomMNISTDataset, Net

import pickle
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.optim.lr_scheduler import ReduceLROnPlateau
from tqdm import tqdm
from sklearn.model_selection import train_test_split

import torch.optim as optim

In [2]:
filename = 'training_data.p'

train_df = pd.read_csv('digit-recognizer/train.csv')
train_labels = train_df['label']

### loading the data if it's been previously generated ###
try:
    infile = open(filename,'rb')
    train_input = pickle.load(infile)
    infile.close()

### Generating and saving the data ###
except:
    train_df.drop(['label'],inplace=True, axis=1)
    np_train = train_df.to_numpy()
    
    two_d = np.reshape(np_train[0],[1,28,28])
    train_input = two_d

    for ind in tqdm(range(1,np.shape(np_train)[0])):
    #     if ind == 10:
    #         break
        two_d = np.reshape(np_train[ind],[1,28,28])
        train_input = np.concatenate((train_input, two_d), axis = 0)

    print(np.shape(train_input))

    outfile = open(filename,'wb')
    pickle.dump(train_input, outfile)
    outfile.close()

    print('saved successfully!')

In [3]:
net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=256, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [4]:
params = list(net.parameters())
print(f'net params len: {len(params)}')
print(params[0].size())

net params len: 10
torch.Size([6, 1, 5, 5])


In [5]:
input = torch.randn(1, 1, 28, 28)
out = net(input)
print(out)

tensor([[ 0.0295,  0.0511,  0.0847, -0.1005,  0.0766, -0.0568, -0.0382,  0.0207,
         -0.0617, -0.1085]], grad_fn=<AddmmBackward>)


  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


In [6]:
net.zero_grad()
out.backward(torch.randn(1, 10))

In [7]:
net.zero_grad()     # zeroes the gradient buffers of all parameters

print('net conv1.bias.grad before backward',net.conv1.bias.grad)

# loss.backward()

# print('conv1.bias.grad after backward')
# print(net.conv1.bias.grad)

net conv1.bias.grad before backward tensor([0., 0., 0., 0., 0., 0.])


In [8]:
n_epochs = 10
learning_rate = 0.01
for f in net.parameters():
    f.data.sub_(f.grad.data * learning_rate)

random_seed = 1
torch.backends.cudnn.enabled = False
torch.manual_seed(random_seed)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=learning_rate)

accuracy_stats = {
    'train': [],
    "val": []
}
loss_stats = {
    'train': [],
    "val": []
}

In [9]:
X_train, X_test, y_train, y_test = train_test_split(train_input, train_labels.values, test_size=0.20, random_state=42)

train_DS = CustomMNISTDataset(labels = torch.from_numpy(y_train).long(), imgs = X_train)
test_DS = CustomMNISTDataset(labels = torch.from_numpy(y_test).long(), imgs = X_test)

train_loader = torch.utils.data.DataLoader(train_DS, batch_size=50)
val_loader = torch.utils.data.DataLoader(test_DS, batch_size=1)

__Trying some stuff with learning rate schedulers__

just copied and paster the cell above. Modify the training loop to incorporate https://pytorch.org/docs/stable/generated/torch.optim.lr_scheduler.ReduceLROnPlateau.html#torch.optim.lr_scheduler.ReduceLROnPlateau

In [19]:
# a lot of this came from: https://towardsdatascience.com/pytorch-tabular-multiclass-classification-9f8211a123ab

# optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
optimizer = optim.Adam(net.parameters(), lr=learning_rate)
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=2, verbose=True)

print("Begin training.")
for e in tqdm(range(1, n_epochs+1)):
    
    # TRAINING
    train_epoch_loss = 0
    train_epoch_acc = 0
    net.train()
    for index, (X_train_batch, y_train_batch) in enumerate(train_loader):
        print(f'Training on {index}/{len(train_loader)}', end='\r')
        optimizer.zero_grad()
        
        y_train_pred = net(X_train_batch)
        
        train_loss = criterion(y_train_pred, y_train_batch)
        train_acc = multi_acc(y_train_pred, y_train_batch)
        
        train_loss.backward()
        optimizer.step()
        
        train_epoch_loss += train_loss.item()
        train_epoch_acc += train_acc.item()
        
        
    # VALIDATION    
    with torch.no_grad():
        
        val_epoch_loss = 0
        val_epoch_acc = 0
        
        net.eval()
#         for X_val_batch, y_val_batch in val_loader:
        for index, (X_val_batch, y_val_batch) in enumerate(val_loader):
            print(f'Validating {index}/{len(val_loader)}', end='\r')
            y_val_pred = net(X_val_batch)
                        
            val_loss = criterion(y_val_pred, y_val_batch)
            val_acc = multi_acc(y_val_pred, y_val_batch)
            
            val_epoch_loss += val_loss.item()
            val_epoch_acc += val_acc.item()
            
            # this should reduce the learning rate when it hits a plateau
            scheduler.step(val_loss)
            
    loss_stats['train'].append(train_epoch_loss/len(train_loader))
    loss_stats['val'].append(val_epoch_loss/len(val_loader))
    accuracy_stats['train'].append(train_epoch_acc/len(train_loader))
    accuracy_stats['val'].append(val_epoch_acc/len(val_loader))
                              
    
    print(f'Epoch {e+0:03}: | Train Loss: {train_epoch_loss/len(train_loader):.5f} | Val Loss: {val_epoch_loss/len(val_loader):.5f} | Train Acc: {train_epoch_acc/len(train_loader):.3f}| Val Acc: {val_epoch_acc/len(val_loader):.3f}')

optimizer = optim.Adam(net.parameters(), lr=learning_rate)

  0%|          | 0/10 [00:00<?, ?it/s]

Begin training.
Epoch     4: reducing learning rate of group 0 to 1.0000e-03.
Epoch     7: reducing learning rate of group 0 to 1.0000e-04.
Epoch    10: reducing learning rate of group 0 to 1.0000e-05.
Epoch    13: reducing learning rate of group 0 to 1.0000e-06.
Epoch    16: reducing learning rate of group 0 to 1.0000e-07.
Epoch    19: reducing learning rate of group 0 to 1.0000e-08.
Validating 8355/8400

 10%|█         | 1/10 [00:21<03:15, 21.76s/it]

Epoch 001: | Train Loss: 0.12887 | Val Loss: 0.14601 | Train Acc: 97.443| Val Acc: 97.333
Validating 8337/8400

 20%|██        | 2/10 [00:45<02:58, 22.28s/it]

Epoch 002: | Train Loss: 0.07614 | Val Loss: 0.14601 | Train Acc: 98.369| Val Acc: 97.333
Validating 8355/8400

 30%|███       | 3/10 [01:16<02:55, 25.03s/it]

Epoch 003: | Train Loss: 0.07614 | Val Loss: 0.14601 | Train Acc: 98.369| Val Acc: 97.333
Validating 8376/8400

 40%|████      | 4/10 [01:43<02:33, 25.58s/it]

Epoch 004: | Train Loss: 0.07614 | Val Loss: 0.14601 | Train Acc: 98.369| Val Acc: 97.333
Validating 8389/8400

 50%|█████     | 5/10 [02:08<02:07, 25.53s/it]

Epoch 005: | Train Loss: 0.07614 | Val Loss: 0.14601 | Train Acc: 98.369| Val Acc: 97.333
Validating 8334/8400

 60%|██████    | 6/10 [02:33<01:41, 25.31s/it]

Epoch 006: | Train Loss: 0.07614 | Val Loss: 0.14600 | Train Acc: 98.369| Val Acc: 97.333
Validating 8306/8400

 70%|███████   | 7/10 [03:00<01:17, 25.81s/it]

Epoch 007: | Train Loss: 0.07614 | Val Loss: 0.14600 | Train Acc: 98.369| Val Acc: 97.333
Validating 8361/8400

 80%|████████  | 8/10 [03:36<00:57, 28.71s/it]

Epoch 008: | Train Loss: 0.07614 | Val Loss: 0.14600 | Train Acc: 98.369| Val Acc: 97.333
Validating 8387/8400

 90%|█████████ | 9/10 [04:27<00:35, 35.35s/it]

Epoch 009: | Train Loss: 0.07614 | Val Loss: 0.14600 | Train Acc: 98.369| Val Acc: 97.333
Validating 8341/8400

100%|██████████| 10/10 [05:03<00:00, 30.33s/it]

Epoch 010: | Train Loss: 0.07614 | Val Loss: 0.14600 | Train Acc: 98.369| Val Acc: 97.333



