**Tuning Hyper-Parameters**<br/>
In this notebook we will find the optimal hyper-parameters by training our model and checking the validation accuracy

In [1]:
#setup
import json

import os
import sys
import time
import torch
import matplotlib.pyplot as plt
import warnings
import torch.nn as nn
warnings.simplefilter("ignore")
plt.rcParams['font.size'] = 20
data_dir = os.path.expanduser('~/.pytorch-datasets')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

#loading our models
from project.Trainer import train_and_eval
from project.Analyzer import SentimentAnalyzer
from project.Attention import AttentionAnalyzer

#loading the dataset
import torchtext.data
from torchtext.vocab import Vectors, GloVe

review_parser = torchtext.data.Field(
    sequential=True, use_vocab=True, lower=True,
    init_token='<sos>', eos_token='<eos>', dtype=torch.long,
    tokenize='spacy', tokenizer_language='en_core_web_sm'
)

# This Field object converts the text labels into numeric values (0,1,2)
label_parser = torchtext.data.Field(
    is_target=True, sequential=False, unk_token=None, use_vocab=True
)
import torchtext.datasets

ds_train, ds_valid, ds_test = torchtext.datasets.SST.splits(
    review_parser, label_parser, root=data_dir,fine_grained=False
)
#building vocabulary and loading GloVe 6B pretrained embeddings
review_parser.build_vocab(ds_train,vectors=GloVe(name='6B', dim=300))
label_parser.build_vocab(ds_train)
word_embeddings = review_parser.vocab.vectors
word_embeddings = word_embeddings.to(device=device)




cuda


In [2]:
def create_dataset(batch_size):
    BATCH_SIZE = batch_size #hyper parameter, could be changed

    # BucketIterator creates batches with samples of similar length
    # to minimize the number of <pad> tokens in the batch.
    dl_train, dl_valid, dl_test = torchtext.data.BucketIterator.splits(
        (ds_train, ds_valid, ds_test), batch_size=BATCH_SIZE,
        shuffle=True, device=device)

    train_iter, valid_iter, test_iter = torchtext.data.BucketIterator.splits((ds_train, ds_valid, ds_test), batch_size=BATCH_SIZE, sort_key=lambda x: len(x.text), repeat=False, shuffle=True)
    return train_iter, valid_iter, test_iter
    
def create_model(hidden_dim, layers):
    INPUT_DIM = len(review_parser.vocab)
    EMBEDDING_DIM = 300
    HIDDEN_DIM = hidden_dim
    OUTPUT_DIM = 3 #5

    model = SentimentAnalyzer(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM, word_embeddings, layers=2)
    attnModel = AttentionAnalyzer(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM, word_embeddings, layers=2)
    #model = RNN(32, 2, 256, len(review_parser.vocab), 300, word_embeddings)
    model = model.to(device)
    
    return model,attnModel

def training_model(lr, epochs, model, train_iter, test_iter, BATCH_SIZE, HIDDEN_DIM, layers):
    learning_rate = 1e-4
    batch_size = BATCH_SIZE
    output_size = 3
    hidden_size = HIDDEN_DIM
    embedding_length = 300
    EPOCHS = 20
    params = {'bs':BATCH_SIZE, 'hidden_dim':HIDDEN_DIM, 'lr':lr.item(), 'layers':layers}
    #open file and check if trained, if so than return pretrained accuracy
    try:
        results = open(f"Proj_results/Regular_{json.dumps(params)}","r")
        data = json.loads(results.read())
        print(f"loaded for params:{params}")
        return max(data["test"])
    except: #the file does not exist i.e no model was trained with these parameters
        nop = 0
    
    
    
    #train and return test best accuracy and average accuracy for baseline model
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    loss_fn = nn.NLLLoss()
    #RNN_train_loss_arr, RNN_train_acc_arr, RNN_val_loss_arr, RNN_val_acc_arr = 
    train_accur, test_accur = train_and_eval(model,train_iter, test_iter, optimizer, loss_fn=loss_fn, epochs=EPOCHS, verbose=False)
    
   
    results = open(f"Proj_results/Regular_{json.dumps(params)}", "w")
    results.write(json.dumps({'params':params, 'train':train_accur, 'test':test_accur}))
    results.close()
    
    return max(test_accur) #could check just last accuracy

In [3]:
#setting the hyperparameters range
batches = [16,32] #batch sizes from 16 to 128 
hiddens = [2**i for i in range(4,6+1)] #list of possible hidden size (to model)
layers = [i for i in range(1,3 + 1)] #layers from 1 to 3 (could add more)
learning_rates = torch.logspace(-2,-6,5) #learning rates from 0.01 to 10^-6
dropouts = [0,0.1,0.3,0.5]#torch.linspace(0,0.5,4) #dropout rate from 0 to 0.5
epochs = 20#epochs will remain static

file = open("results.txt","w")
file.write("pending")
file.close()

In [9]:
train_iter, valid_iter, test_iter = create_dataset(32)
from IPython.display import Markdown, display
'''
print(batches)
print(hiddens)
print(layers)
print(learning_rates)
'''

best_params = None
best_accur = 0
for batch in batches:
#batch size is fixed to 32 just for the moment
    for h in hiddens:
        for layer in layers:
            for drop in dropouts:
                for lr in learning_rates:
                    params =  {'bs':batch, 'hidden_dim':h, 'num_layers':layer, 'lr':lr.item(), 'dropout':drop}
                    model, attnModel = create_model(h, layer)
                    model = model.to(device)
                    print(f"training params:{json.dumps(params)}")
                    accur = training_model(lr, epochs, model, train_iter, test_iter, batch, h, layer)
                    print(f'params:{params}, accur:{accur}')
                    if accur > best_accur:
                        display(Markdown(f'**best:{accur}**'))
                        best_accur = accur
                        best_params = params
           
file = open("results.txt","w")
file.write(json.dumps(best_parameters))         
file.close()
print(best_params)

training params:{"bs": 16, "hidden_dim": 64, "num_layers": 1, "lr": 0.009999999776482582, "dropout": 0}
loaded for params:{'bs': 16, 'hidden_dim': 64, 'lr': 0.009999999776482582, 'layers': 1}
params:{'bs': 16, 'hidden_dim': 64, 'num_layers': 1, 'lr': 0.009999999776482582, 'dropout': 0}, accur:0.6790178571428571


**best:0.6790178571428571**

training params:{"bs": 16, "hidden_dim": 64, "num_layers": 1, "lr": 0.0010000000474974513, "dropout": 0}
loaded for params:{'bs': 16, 'hidden_dim': 64, 'lr': 0.0010000000474974513, 'layers': 1}
params:{'bs': 16, 'hidden_dim': 64, 'num_layers': 1, 'lr': 0.0010000000474974513, 'dropout': 0}, accur:0.6732142857142858
training params:{"bs": 16, "hidden_dim": 64, "num_layers": 1, "lr": 9.999999747378752e-05, "dropout": 0}
loaded for params:{'bs': 16, 'hidden_dim': 64, 'lr': 9.999999747378752e-05, 'layers': 1}
params:{'bs': 16, 'hidden_dim': 64, 'num_layers': 1, 'lr': 9.999999747378752e-05, 'dropout': 0}, accur:0.6723214285714286
training params:{"bs": 16, "hidden_dim": 64, "num_layers": 1, "lr": 9.999999747378752e-06, "dropout": 0}
loaded for params:{'bs': 16, 'hidden_dim': 64, 'lr': 9.999999747378752e-06, 'layers': 1}
params:{'bs': 16, 'hidden_dim': 64, 'num_layers': 1, 'lr': 9.999999747378752e-06, 'dropout': 0}, accur:0.6584821428571429
training params:{"bs": 16, "hidden_dim": 64, "num_la

KeyboardInterrupt: 