# Imports

In [16]:
import torch
import pandas as pd
import numpy as np
import statistics
import torch
import random
import time
import numpy as np
#from transformers import AdamW
from torch.utils.tensorboard import SummaryWriter
import pickle
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import torch.optim as optim
import shutil
import itertools

from GRU_model import *
from config import *

# Seeds for Comparisons:

In [17]:
torch.manual_seed(1)
np.random.seed(2)
random.seed(3)

In [18]:
Year

2020

# Read Data

In [19]:
with open('data/train_dataset_dict.pickle', 'rb') as f:
    train_dataset_dict = pickle.load(f)
    
with open('data/test_dataset_dict.pickle', 'rb') as f:
    test_dataset_dict = pickle.load(f)

with open(category_id_to_category_name_path, 'rb') as f:
    category_id_to_name_dict = pickle.load(f)
    
with open(categories_per_indent_path, 'rb') as f:
    categories_per_indent_dict = pickle.load(f)

# Create Dataloader

In [20]:
def create_dataloader(category_train_df, category_test_df):
    x_train = category_train_df.iloc[:,:-1].to_numpy()
    y_train = category_train_df.iloc[:,-1].to_numpy()
    x_test = category_test_df.iloc[:,:-1].to_numpy()
    y_test= category_test_df.iloc[:,-1].to_numpy()

    x_train= torch.from_numpy(x_train).to(torch.float32)
    y_train = torch.from_numpy(y_train).to(torch.float32)
    x_test = torch.from_numpy(x_test).to(torch.float32)
    y_test = torch.from_numpy(y_test).to(torch.float32)
    print('y test is: '+str(y_test))

    train_dataset = TensorDataset(x_train, y_train)
    test_dataset = TensorDataset(x_test, y_test)
    train_dataloader =  DataLoader(train_dataset, batch_size=BatchSize, shuffle=False)
    test_dataloader =  DataLoader(test_dataset, batch_size=BatchSize, shuffle=False)
    return train_dataloader, test_dataloader

# Model Configurations

In [21]:
#Define our device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 

# Training Loop

In [22]:
def training_loop(model, train_dataloader, optimizer):
    running_loss = 0
    model.train()
    predictions_list = []
    for inputs, labels in train_dataloader:
        # initialize calculated gradients (from prev step)
        optimizer.zero_grad()
        inputs, labels = inputs.to(device), labels.to(device)
        #Changing input shape - last batch size can change so we define it as input.shape[0]
        inputs = inputs.view(inputs.shape[0], SequenceLength, Features) 
        #model prediction
        pred = model(inputs)
        #append batch predictions to predictions list
        predictions_list.append(pred.view(1,-1))
        # calculate loss
        loss = Criterion(pred, labels.view(-1,1))
        # calculate the gradient
        loss.backward()
        # update parameters
        optimizer.step()
        #Add to loss of batch to epoch train loss
        running_loss+=loss.item()
    # Calculte the epoch train loss
    epoch_train_loss = running_loss/len(train_dataloader.dataset)
    return epoch_train_loss


In [23]:
def evaluation_loop(model, test_dataloader):
     # Evaluation
    # Initiate test loss, accuracy and f1 score to zero
    test_loss = 0
    # Change model to eval mode
    model.eval()
    # we dont need to update weights, so we define no_grad() to save memory

    predictions_list = []
    with torch.no_grad():
        for inputs, labels in test_dataloader:
            inputs = inputs.view(inputs.shape[0], SequenceLength, Features)
            inputs, labels = inputs.to(device), labels.to(device)
            out = model(inputs)
            predictions_list.append(out.view(1,-1))
            test_batch_loss = Criterion(out, labels.view(-1,1))
            test_loss += test_batch_loss.item()
    # Calculate epoch loss
    epoch_predictions = torch.cat(predictions_list, dim=1)
    epoch_test_loss = test_loss/len(test_dataloader.dataset)
        
    return epoch_test_loss, epoch_predictions


In [24]:
def save_checkpoint(checkpoint, is_best, checkpoint_path, best_model_path):
    """
    checkpoint: checkpoint we want to save
    is_best: is this the best checkpoint; min validation loss
    checkpoint_path: path to save checkpoint
    best_model_path: path to save best model
    """
    # save checkpoint data to the path given, checkpoint_path
    torch.save(checkpoint, checkpoint_path)
    # if it is a best model, min validation loss
    if is_best:
        # copy that checkpoint file to best path given, best_model_path
        shutil.copyfile(checkpoint_path, best_model_path)

In [25]:
def load_checkpoint(checkpoint_path, model, optimizer):
    """
    checkpoint_path: path to save checkpoint
    model: model that we want to load checkpoint parameters into       
    optimizer: optimizer we defined in previous training
    """
    # load check point
    checkpoint = torch.load(checkpoint_path)
    # initialize state_dict from checkpoint to model
    model.load_state_dict(checkpoint['state_dict'])
    # initialize optimizer from checkpoint to optimizer
    optimizer.load_state_dict(checkpoint['optimizer'])
    # initialize valid_loss_min from checkpoint to valid_loss_min
    valid_loss_min = checkpoint['valid_loss_min']
    # return model, optimizer, epoch value, min validation loss 
    return model, optimizer, checkpoint['epoch'], valid_loss_min

In [26]:
def unify_model_weights(model):
    param_dict ={}
    for name, param in model.named_parameters():
        param_dict[name] = param

    param_dict['gru.bias_hh_l0'] = param_dict['gru.bias_hh_l0'].view(-1,1)
    param_dict['gru.bias_ih_l0'] = param_dict['gru.bias_ih_l0'].view(-1,1)
    unified_weights = torch.hstack((
            param_dict['gru.weight_ih_l0'],
            param_dict['gru.weight_hh_l0'],
            param_dict['gru.bias_ih_l0'],
            param_dict['gru.bias_hh_l0']))

    return unified_weights

In [27]:
def training_and_evaluation(model, train_dataloader, test_dataloader, optim, category, checkpoint_path, best_checkpoint_path):
   #results list
   train_loss_list = []
   test_loss_list = []

   ##Create writer for using tesndorboard
   #writer = SummaryWriter(log_dir=f'{TbDirectory}_{category}')

   min_test_loss = np.inf

   for epoch in range(Epochs):
      #initiate train epoch loss
      epoch_train_loss = training_loop(model, train_dataloader, optim)
      epoch_test_loss, epoch_test_predictions = evaluation_loop(model, test_dataloader)

      checkpoint = {
         'epoch': epoch + 1,
         'valid_loss_min': epoch_test_loss,
         'state_dict': model.state_dict(),
         'optimizer': optim.state_dict(),
        }
      
      # save checkpoint
      save_checkpoint(checkpoint, False, checkpoint_path, best_checkpoint_path)

      if epoch_test_loss <= min_test_loss:
         save_checkpoint(checkpoint, True, checkpoint_path, best_checkpoint_path)
         min_test_loss = epoch_test_loss

      train_loss_list.append(epoch_train_loss)
      test_loss_list.append(epoch_test_loss)

      ## Display those measures on tensorboard
      #writer.add_scalar(tag='loss/train', scalar_value=epoch_train_loss, global_step=epoch)
      #writer.add_scalar(tag='loss/test', scalar_value=epoch_test_loss, global_step=epoch)
    
   results = {'train_loss': train_loss_list, 'test_loss': test_loss_list} 
   return results

In [28]:
categories_lists = list(categories_per_indent_dict.values())
categories_id = list(itertools.chain.from_iterable(categories_lists))
categories = []
for category_id in categories_id:
    categories.append(category_id_to_name_dict[category_id])

In [29]:
len(categories)

350

In [30]:
categories

['Admission to movies, theaters, and concerts',
 'Bacon, breakfast sausage, and related products',
 'Bread other than white',
 'Canned fruits',
 'Canned vegetables',
 'Checking account and other bank services',
 'Chicken',
 'Cookies',
 'Crackers, bread, and cracker products',
 'Dried beans, peas, and lentils',
 'Frankfurters',
 'Fresh cakes and cupcakes',
 'Fresh fish and seafood',
 'Fresh sweetrolls, coffeecakes, doughnuts',
 'Frozen and refrigerated bakery products, pies, tarts, turnovers',
 'Frozen vegetables',
 'Ham',
 'Intracity mass transit',
 'Lunchmeats',
 'Oranges, including tangerines',
 'Other condiments',
 'Other pork including roasts and picnics',
 'Other uncooked poultry including turkey',
 'Parking fees and tolls',
 'Pork chops',
 'Prepared salads',
 'Processed fish and seafood',
 'Salt and other seasonings and spices',
 'Sauces and gravies',
 'Ship fare',
 'Uncooked beef roasts',
 'Uncooked beef steaks',
 'Uncooked ground beef',
 'Uncooked other beef and veal',
 'Video 

In [31]:
def pipline(train_dataset_dict, test_dataset_dict):
    results = {}
    weights = {}
    total_mse = 0
    for category in categories:
        train_dataloader, test_dataloader = create_dataloader(train_dataset_dict[category], test_dataset_dict[category])

        model = GRUModel(input_dim = Features, hidden_dim = HiddenSize, layer_dim = LayersDim, output_dim = OutputDim, dropout_prob = DropoutProb)
        model.to(device)
        
        optimizer = torch.optim.AdamW(model.parameters(), lr=Lr)

        parameters_file_name = category+'.pt'
        
        results[category] = training_and_evaluation(
                                model=model,
                                optim=optimizer,
                                train_dataloader=train_dataloader,
                                test_dataloader=test_dataloader,
                                category=category,
                                checkpoint_path=CheckpointPath+parameters_file_name,
                                best_checkpoint_path=BestcheckpointPath+parameters_file_name,
                            )
    
    return results

In [32]:
results = pipline(train_dataset_dict, test_dataset_dict)

y test is: tensor([ 0.6391,  1.0857, -0.0336, -0.6105,  0.9351,  1.7115, -0.6477, -0.0623,
         0.1235,  0.7583,  0.6077,  0.8134, -0.1959,  1.1259,  1.0698,  0.0103,
         0.8903,  1.4829, -0.5711, -0.5831,  0.9011,  1.5884, -0.2634,  0.8523,
        -0.1651,  1.0952,  1.4112, -0.2530,  0.4848,  0.0835, -0.1169, -0.3758])
y test is: tensor([-0.1851,  1.2816,  2.8335,  0.8918,  1.9928,  0.6837,  2.8696,  1.7859,
         2.3512,  2.2472,  0.0295, -1.7791,  1.2989,  0.9089,  2.1129,  0.2981,
        -1.7279,  1.0111,  0.1046,  0.5841, -0.9158, -0.9560, -0.4712, -0.3472,
        -0.5278,  0.2695, -2.1167, -0.9136, -1.2713, -0.3369,  1.8595,  2.6905])
y test is: tensor([-0.4559, -0.3370,  0.7200,  0.9577, -0.8949,  1.0335,  0.1873,  1.1615,
         1.1231,  0.9191,  0.5698,  1.1466,  0.4159,  0.7830,  1.9174,  0.4281,
         1.3073,  3.4226,  1.6978,  0.4899,  0.0441,  2.2267, -0.7486,  0.7856,
         1.6568, -0.3154, -0.7768,  0.8193,  0.9314,  0.9311, -1.2193,  0.0214])
y te

In [33]:
with open('data/model_results.pickle', 'wb') as handle:
    pickle.dump(results, handle, protocol=pickle.HIGHEST_PROTOCOL)

# Create Best Model Per Category Dictionary

In [34]:
def create_dict_of_best_model_per_category(categories_list, dir_path):
    basic_model = GRUModel(input_dim = Features, hidden_dim = HiddenSize, layer_dim = LayersDim, output_dim = OutputDim, dropout_prob = DropoutProb)
    basic_optimizer = torch.optim.AdamW(basic_model.parameters(), lr=Lr)
    basic_model.to(device)

    best_models_dict = {}

    for category in categories_list:
        ckp_path = dir_path+category+'.pt'
        model, optimizer, checkpoint, valid_loss_min = load_checkpoint(ckp_path, basic_model, basic_optimizer)
        best_models_dict[category] = model
        
    return best_models_dict

In [35]:
categories_list = categories
dir_path = "checkpoints/best_checkpoints/"

best_models_dict = create_dict_of_best_model_per_category(categories_list, dir_path)

In [36]:
best_models_dict

{'Admission to movies, theaters, and concerts': GRUModel(
   (gru): GRU(1, 64, batch_first=True)
   (fc): Linear(in_features=64, out_features=1, bias=True)
 ),
 'Bacon, breakfast sausage, and related products': GRUModel(
   (gru): GRU(1, 64, batch_first=True)
   (fc): Linear(in_features=64, out_features=1, bias=True)
 ),
 'Bread other than white': GRUModel(
   (gru): GRU(1, 64, batch_first=True)
   (fc): Linear(in_features=64, out_features=1, bias=True)
 ),
 'Canned fruits': GRUModel(
   (gru): GRU(1, 64, batch_first=True)
   (fc): Linear(in_features=64, out_features=1, bias=True)
 ),
 'Canned vegetables': GRUModel(
   (gru): GRU(1, 64, batch_first=True)
   (fc): Linear(in_features=64, out_features=1, bias=True)
 ),
 'Checking account and other bank services': GRUModel(
   (gru): GRU(1, 64, batch_first=True)
   (fc): Linear(in_features=64, out_features=1, bias=True)
 ),
 'Chicken': GRUModel(
   (gru): GRU(1, 64, batch_first=True)
   (fc): Linear(in_features=64, out_features=1, bias=Tru

# Get Best Predictions for Each Category 

In [37]:
def get_best_predictions_for_each_category(best_models_dict):
    best_predictions_dict = {}

    for category in list(best_models_dict.keys()):
        model = best_models_dict[category]
        train_dataloader, test_dataloader = create_dataloader(train_dataset_dict[category], test_dataset_dict[category])
        epoch_test_loss, epoch_predictions = evaluation_loop(model, test_dataloader)
        best_predictions_dict[category] = epoch_predictions

    return best_predictions_dict

In [38]:
best_predictions_dict = get_best_predictions_for_each_category(best_models_dict)

y test is: tensor([ 0.6391,  1.0857, -0.0336, -0.6105,  0.9351,  1.7115, -0.6477, -0.0623,
         0.1235,  0.7583,  0.6077,  0.8134, -0.1959,  1.1259,  1.0698,  0.0103,
         0.8903,  1.4829, -0.5711, -0.5831,  0.9011,  1.5884, -0.2634,  0.8523,
        -0.1651,  1.0952,  1.4112, -0.2530,  0.4848,  0.0835, -0.1169, -0.3758])
y test is: tensor([-0.1851,  1.2816,  2.8335,  0.8918,  1.9928,  0.6837,  2.8696,  1.7859,
         2.3512,  2.2472,  0.0295, -1.7791,  1.2989,  0.9089,  2.1129,  0.2981,
        -1.7279,  1.0111,  0.1046,  0.5841, -0.9158, -0.9560, -0.4712, -0.3472,
        -0.5278,  0.2695, -2.1167, -0.9136, -1.2713, -0.3369,  1.8595,  2.6905])
y test is: tensor([-0.4559, -0.3370,  0.7200,  0.9577, -0.8949,  1.0335,  0.1873,  1.1615,
         1.1231,  0.9191,  0.5698,  1.1466,  0.4159,  0.7830,  1.9174,  0.4281,
         1.3073,  3.4226,  1.6978,  0.4899,  0.0441,  2.2267, -0.7486,  0.7856,
         1.6568, -0.3154, -0.7768,  0.8193,  0.9314,  0.9311, -1.2193,  0.0214])
y te

In [39]:
with open('data/predictions_dict.pickle', 'wb') as handle:
    pickle.dump(best_predictions_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

# Get Best Model Weights:

In [40]:
def get_weights_per_category(category_list, dir_path):
    weights = {}
    for category in category_list:
        model = GRUModel(input_dim = Features, hidden_dim = HiddenSize, layer_dim = LayersDim, output_dim = OutputDim, dropout_prob = DropoutProb)
        model.to(device)
        optimizer = torch.optim.AdamW(model.parameters(), lr=Lr)
        
        best_checkpoint_path = dir_path + category + '.pt'

        category_model, optimizer, checkpoint, valid_loss_min = load_checkpoint(best_checkpoint_path, model, optimizer)
        category_model_weights = unify_model_weights(category_model)

        weights[category] = category_model_weights
    
    return weights

In [41]:
def get_weights_per_category(category_id_list, dir_path):
    basic_model = GRUModel(input_dim = Features, hidden_dim = HiddenSize, layer_dim = LayersDim, output_dim = OutputDim, dropout_prob = DropoutProb)
    basic_optimizer = torch.optim.AdamW(basic_model.parameters(), lr=Lr)
    basic_model.to(device)

    best_models_weights_dict = {}

    for category_id in category_id_list:
        category_name = category_id_to_name_dict[category_id]
        ckp_path = dir_path+category_name+'.pt'
        model, optimizer, checkpoint, valid_loss_min = load_checkpoint(ckp_path, basic_model, basic_optimizer)
        category_model_weights = unify_model_weights(model)
        best_models_weights_dict[category_id] = category_model_weights
        
    return best_models_weights_dict

In [42]:
dir_path = '/Users/mvilenko/Library/CloudStorage/OneDrive-PayPal/CPI_HRNN - version 2.0/mayas_project/basic_model/checkpoints/best_checkpoints/'

In [43]:
category_id_list = []
 
# list out keys and values separately
key_list = list(category_id_to_name_dict.keys())
val_list = list(category_id_to_name_dict.values())

for cat_name in categories:
    position = val_list.index(cat_name)
    category_id_list.append(key_list[position])


In [44]:
weights_dict = get_weights_per_category(category_id_list, dir_path)

In [45]:
with open('/Users/mvilenko/Library/CloudStorage/OneDrive-PayPal/CPI_HRNN - version 2.0/pickle files/sgru_model_weights.pickle', 'wb') as handle:
    pickle.dump(weights_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [46]:
weights_dict

{290.0: tensor([[ 0.1243, -0.0435,  0.0220,  ..., -0.0869, -0.0317,  0.1265],
         [-0.0030,  0.0753,  0.0799,  ...,  0.0405, -0.0061,  0.1190],
         [-0.0499, -0.0270, -0.0156,  ..., -0.0356, -0.0385,  0.0051],
         ...,
         [-0.0272,  0.0036, -0.0201,  ...,  0.0191, -0.0271,  0.0388],
         [ 0.0942, -0.0152, -0.0345,  ...,  0.0374, -0.0660, -0.0614],
         [-0.1060, -0.0170, -0.0517,  ..., -0.1035, -0.0088, -0.0099]],
        grad_fn=<CatBackward0>),
 9102.0: tensor([[-0.0783,  0.0193,  0.0674,  ..., -0.0624, -0.0616,  0.0511],
         [ 0.1085,  0.1022,  0.0496,  ..., -0.0841, -0.0551, -0.0739],
         [-0.0736,  0.0457, -0.0073,  ...,  0.1089,  0.0500, -0.0087],
         ...,
         [ 0.0981, -0.0584,  0.0235,  ...,  0.0551, -0.0494, -0.0068],
         [-0.0084, -0.0626,  0.1210,  ...,  0.1223, -0.1064, -0.0054],
         [ 0.0318,  0.0423, -0.1005,  ...,  0.0780, -0.0113, -0.1126]],
        grad_fn=<CatBackward0>),
 4542.0: tensor([[ 0.0732, -0.0311,  