# Initial Parameter Search
While all other techniques have value (and initial parameters don't really mean anything after many epochs), the truth is that a quick search of the best initial parameters can save large amounts of training time, and there isn't really any good reason not to do it.

The following is a small loop I run to perform the search quickly. We could create an optuna study, but the actual code to do so is more involved than the task warrants.

In [11]:
%load_ext autoreload
%autoreload 2

import sys; sys.path.append('./*'); sys.path.append('..')
import torch
import torch.nn as nn
from trainer import Trainer
import pandas as pd
import os.path as p

torch.manual_seed(0)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)
d = '../sheet_data'

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
cpu


In [3]:
from datasets.ClassificationSet import ClassificationSet
from models.SheetClassifier import SheetClassifier

best_parameters, best_score = None, 0
data = ClassificationSet(p.join(d, 'preprocessed_training_set'))
trainer = Trainer()

for i in range(20):
    model = SheetClassifier()

    trainer.run_experiment(model, data, epochs=5, criterion=nn.CrossEntropyLoss, train_shuffle=False, show=False)
    trainer.evaluate_model(model, show=True)

    if best_score < trainer.model_accuracy:
        best_score = trainer.model_accuracy
        best_parameters = model.state_dict()

[Evaluation over 10 Batches], Test Loss: 1.41, Accuracy: 0.74
[Evaluation over 10 Batches], Test Loss: 1.21, Accuracy: 0.71
[Evaluation over 10 Batches], Test Loss: 1.38, Accuracy: 0.71
[Evaluation over 10 Batches], Test Loss: 3.50, Accuracy: 0.48
[Evaluation over 10 Batches], Test Loss: 1.01, Accuracy: 0.74
[Evaluation over 10 Batches], Test Loss: 1.95, Accuracy: 0.72
[Evaluation over 10 Batches], Test Loss: 1.39, Accuracy: 0.71
[Evaluation over 10 Batches], Test Loss: 1.07, Accuracy: 0.73
[Evaluation over 10 Batches], Test Loss: 1.33, Accuracy: 0.72
[Evaluation over 10 Batches], Test Loss: 1.05, Accuracy: 0.73
[Evaluation over 10 Batches], Test Loss: 1.52, Accuracy: 0.66
[Evaluation over 10 Batches], Test Loss: 1.16, Accuracy: 0.74
[Evaluation over 10 Batches], Test Loss: 1.36, Accuracy: 0.72
[Evaluation over 10 Batches], Test Loss: 3.56, Accuracy: 0.50
[Evaluation over 10 Batches], Test Loss: 2.07, Accuracy: 0.66
[Evaluation over 10 Batches], Test Loss: 3.98, Accuracy: 0.46
[Evaluat

In [4]:
pd.to_pickle(best_parameters, 'best_initial_params.pkl')

In [None]:
training_parameters = pd.read_pickle('./best_params.pkl')
lr, wd, bs = training_parameters['lr'], training_parameters['weight_decay'], training_parameters['batch_size']

model = SheetClassifier()
model.load_state_dict(best_parameters)

trainer.run_experiment(model, data, epochs=50, learning_rate=lr, weight_decay=wd, batch_size=bs, criterion=nn.CrossEntropyLoss, show=True)
trainer.evaluate_model(model, show=True)

Epoch [1/50], Train Loss: 3.21, Accuracy: 0.47
Epoch [6/50], Train Loss: 0.76, Accuracy: 0.67
Epoch [11/50], Train Loss: 0.78, Accuracy: 0.66
Epoch [16/50], Train Loss: 0.74, Accuracy: 0.67
Epoch [21/50], Train Loss: 0.75, Accuracy: 0.67
Epoch [26/50], Train Loss: 0.74, Accuracy: 0.68
Epoch [31/50], Train Loss: 0.75, Accuracy: 0.67
Epoch [36/50], Train Loss: 0.85, Accuracy: 0.60
Epoch [41/50], Train Loss: 0.86, Accuracy: 0.60
Epoch [46/50], Train Loss: 0.82, Accuracy: 0.63
Experiment Complete
[Evaluation over 10 Batches], Test Loss: 1.21, Accuracy: 0.35


SheetClassifier(
  (model): Sequential(
    (0): Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (lin1): Linear(in_features=16384, out_features=3, bias=True)
)

In [22]:
import torchvision.transforms.v2 as v2

model = SheetClassifier()
data2 = ClassificationSet(p.join(d, 'training_set'),
                          transform=v2.Compose([v2.ToImage(),
                                                v2.ToDtype(torch.float32, scale=True),
                                                v2.Normalize([.5],[.5]),
                                                v2.GaussianBlur(1)]))

trainer.run_experiment(model, data2, epochs=15, criterion=nn.CrossEntropyLoss, show=True)
trainer.evaluate_model(model, show=True)

Epoch [1/15], Train Loss: 3.14, Accuracy: 0.65
Epoch [6/15], Train Loss: 0.07, Accuracy: 0.98
Epoch [11/15], Train Loss: 0.11, Accuracy: 0.96
Experiment Complete
[Evaluation over 10 Batches], Test Loss: 0.55, Accuracy: 0.82


SheetClassifier(
  (model): Sequential(
    (0): Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (lin1): Linear(in_features=16384, out_features=3, bias=True)
)

In [23]:
training_parameters = pd.read_pickle('./best_params.pkl')
lr, wd, bs = training_parameters['lr'], training_parameters['weight_decay'], training_parameters['batch_size']

model = SheetClassifier()
data2 = ClassificationSet(p.join(d, 'training_set'),
                          transform=v2.Compose([v2.ToImage(),
                                                v2.ToDtype(torch.float32, scale=True),
                                                v2.Normalize([.5],[.5]),
                                                v2.GaussianBlur(1)]))

model = SheetClassifier()
model.load_state_dict(best_parameters)

trainer.run_experiment(model, data, epochs=15, learning_rate=lr, weight_decay=wd, batch_size=bs, criterion=nn.CrossEntropyLoss, show=True)
trainer.evaluate_model(model, show=True)

Epoch [1/15], Train Loss: 2.71, Accuracy: 0.48
Epoch [6/15], Train Loss: 0.75, Accuracy: 0.67
Epoch [11/15], Train Loss: 0.77, Accuracy: 0.67
Experiment Complete
[Evaluation over 10 Batches], Test Loss: 1.01, Accuracy: 0.50


SheetClassifier(
  (model): Sequential(
    (0): Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (lin1): Linear(in_features=16384, out_features=3, bias=True)
)