# Initial Parameter Search
While all other techniques have value (and initial parameters don't really mean anything after many epochs), the truth is that a quick search of the best initial parameters can save large amounts of training time, and there isn't really any good reason not to do it.

The following is a small loop I run to perform the search quickly. We could create an optuna study, but the actual code to do so is more involved than the task warrants.

In [1]:
%load_ext autoreload
%autoreload 2

import sys; sys.path.append('./*'); sys.path.append('..')
import torch
import torch.nn as nn
from trainer import Trainer
import pandas as pd
import os.path as p

torch.manual_seed(0)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
d = '../sheet_data'


In [None]:
from datasets.ClassificationSet import ClassificationSet
from models.SheetClassifier import SheetClassifier

best_parameters, best_score = None, 0
data = ClassificationSet(p.join(d, 'preprocessed_training_set'))
trainer = Trainer()

for i in range(10):
    model = SheetClassifier()

    trainer.run_experiment(model, data, epochs=10, criterion=nn.CrossEntropyLoss, train_shuffle=False, show=False)
    trainer.evaluate_model(model, show=True)

    if best_score < trainer.model_accuracy:
        best_score = trainer.model_accuracy
        best_parameters = model.state_dict()

[Evaluation over 4 Batches], Test Loss: 1.71, Accuracy: 0.77
[Evaluation over 4 Batches], Test Loss: 1.42, Accuracy: 0.86
[Evaluation over 4 Batches], Test Loss: 2.23, Accuracy: 0.77
[Evaluation over 4 Batches], Test Loss: 2.51, Accuracy: 0.71
[Evaluation over 4 Batches], Test Loss: 2.17, Accuracy: 0.80
[Evaluation over 4 Batches], Test Loss: 2.57, Accuracy: 0.69
[Evaluation over 4 Batches], Test Loss: 2.50, Accuracy: 0.75
[Evaluation over 4 Batches], Test Loss: 1.38, Accuracy: 0.79
[Evaluation over 4 Batches], Test Loss: 1.88, Accuracy: 0.75
[Evaluation over 4 Batches], Test Loss: 3.62, Accuracy: 0.72
[Evaluation over 4 Batches], Test Loss: 1.77, Accuracy: 0.81
[Evaluation over 4 Batches], Test Loss: 0.95, Accuracy: 0.79
[Evaluation over 4 Batches], Test Loss: 1.77, Accuracy: 0.83
[Evaluation over 4 Batches], Test Loss: 2.08, Accuracy: 0.74
[Evaluation over 4 Batches], Test Loss: 1.84, Accuracy: 0.82
[Evaluation over 4 Batches], Test Loss: 1.74, Accuracy: 0.78
[Evaluation over 4 Batch

KeyboardInterrupt: 

In [4]:
pd.to_pickle(best_parameters, 'best_initial_params.pkl')

In [13]:
training_parameters = pd.read_pickle('./best_params.pkl')
lr, wd, bs = training_parameters['lr'], training_parameters['weight_decay'], training_parameters['batch_size']

model = SheetClassifier()
model.load_state_dict(best_parameters)

trainer.run_experiment(model, data, epochs=40, learning_rate=lr, weight_decay=wd, batch_size=bs,
                        mixed_precision=True, scheduler=True, pruning=False,
                        criterion=nn.CrossEntropyLoss, show=True)
trainer.evaluate_model(model, show=True)

Epoch [1/40], Train Loss: 8.47, Accuracy: 0.73
Epoch [6/40], Train Loss: 1.14, Accuracy: 0.83
Epoch [11/40], Train Loss: 0.25, Accuracy: 0.92
Epoch [16/40], Train Loss: 0.16, Accuracy: 0.96
Epoch [21/40], Train Loss: 0.15, Accuracy: 0.96
Epoch [26/40], Train Loss: 0.15, Accuracy: 0.97
Epoch [31/40], Train Loss: 0.12, Accuracy: 0.98
Epoch [36/40], Train Loss: 0.12, Accuracy: 0.98
Experiment Complete
[Evaluation over 4 Batches], Test Loss: 0.30, Accuracy: 0.89


SheetClassifier(
  (model): Sequential(
    (0): Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (lin1): Linear(in_features=16384, out_features=3, bias=True)
)