# Imports

In [1]:
import torch
import pandas as pd
import numpy as np
import statistics
import torch
import random
import time
import numpy as np
#from transformers import AdamW
from torch.utils.tensorboard import SummaryWriter
import pickle
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import torch.optim as optim
import shutil
import itertools

from GRU_model import *
from config import *

2023-06-18 13:09:04.678470: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Seeds for Comparisons:

In [2]:
torch.manual_seed(1)
np.random.seed(2)
random.seed(3)

In [3]:
Lr

0.08071971994256645

# Read Data

In [4]:
train_dataset_dict = pd.read_pickle(train_dataset_dict_path)
test_dataset_dict = pd.read_pickle(test_dataset_dict_path)
category_id_to_name_dict = pd.read_pickle(category_id_to_category_name_path)

# Create Dataloader

In [5]:
def create_dataloader(category_train_df, category_test_df):
    x_train = category_train_df.iloc[:,:-1].to_numpy()
    y_train = category_train_df.iloc[:,-1].to_numpy()
    x_test = category_test_df.iloc[:,:-1].to_numpy()
    y_test= category_test_df.iloc[:,-1].to_numpy()

    x_train= torch.from_numpy(x_train).to(torch.float32)
    y_train = torch.from_numpy(y_train).to(torch.float32)
    x_test = torch.from_numpy(x_test).to(torch.float32)
    y_test = torch.from_numpy(y_test).to(torch.float32)

    train_dataset = TensorDataset(x_train, y_train)
    test_dataset = TensorDataset(x_test, y_test)
    train_dataloader =  DataLoader(train_dataset, batch_size=BatchSize, shuffle=False)
    test_dataloader =  DataLoader(test_dataset, batch_size=BatchSize, shuffle=False)
    return train_dataloader, test_dataloader

# Model Configurations

In [6]:
#Define our device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 

# Training Loop

In [7]:
def training_loop(model, train_dataloader, optimizer):
    running_loss = 0
    model.train()
    predictions_list = []
    for inputs, labels in train_dataloader:
        # initialize calculated gradients (from prev step)
        optimizer.zero_grad()
        inputs, labels = inputs.to(device), labels.to(device)
        #Changing input shape - last batch size can change so we define it as input.shape[0]
        inputs = inputs.view(inputs.shape[0], SequenceLength, Features) 
        #model prediction
        pred = model(inputs)
        #append batch predictions to predictions list
        predictions_list.append(pred.view(1,-1))
        # calculate loss
        loss = Criterion(pred, labels.view(-1,1))
        # calculate the gradient
        loss.backward()
        # update parameters
        optimizer.step()
        #Add to loss of batch to epoch train loss
        running_loss+=loss.item()
    # Calculte the epoch train loss
    epoch_train_loss = running_loss/len(train_dataloader.dataset)
    return epoch_train_loss


In [8]:
def evaluation_loop(model, test_dataloader):
     # Evaluation
    # Initiate test loss, accuracy and f1 score to zero
    test_loss = 0
    # Change model to eval mode
    model.eval()
    # we dont need to update weights, so we define no_grad() to save memory

    predictions_list = []
    with torch.no_grad():
        for inputs, labels in test_dataloader:
            inputs = inputs.view(inputs.shape[0], SequenceLength, Features)
            inputs, labels = inputs.to(device), labels.to(device)
            out = model(inputs)
            predictions_list.append(out.view(1,-1))
            test_batch_loss = Criterion(out, labels.view(-1,1))
            test_loss += test_batch_loss.item()
    # Calculate epoch loss
    epoch_predictions = torch.cat(predictions_list, dim=1)
    epoch_test_loss = test_loss/len(test_dataloader.dataset)
        
    return epoch_test_loss, epoch_predictions


In [9]:
def save_checkpoint(checkpoint, is_best, checkpoint_path, best_model_path):
    """
    checkpoint: checkpoint we want to save
    is_best: is this the best checkpoint; min validation loss
    checkpoint_path: path to save checkpoint
    best_model_path: path to save best model
    """
    # save checkpoint data to the path given, checkpoint_path
    torch.save(checkpoint, checkpoint_path)
    # if it is a best model, min validation loss
    if is_best:
        # copy that checkpoint file to best path given, best_model_path
        shutil.copyfile(checkpoint_path, best_model_path)

In [10]:
def load_checkpoint(checkpoint_path, model, optimizer):
    """
    checkpoint_path: path to save checkpoint
    model: model that we want to load checkpoint parameters into       
    optimizer: optimizer we defined in previous training
    """
    # load check point
    checkpoint = torch.load(checkpoint_path)
    # initialize state_dict from checkpoint to model
    model.load_state_dict(checkpoint['state_dict'])
    # initialize optimizer from checkpoint to optimizer
    optimizer.load_state_dict(checkpoint['optimizer'])
    # initialize valid_loss_min from checkpoint to valid_loss_min
    valid_loss_min = checkpoint['valid_loss_min']
    # return model, optimizer, epoch value, min validation loss 
    return model, optimizer, checkpoint['epoch'], valid_loss_min

In [11]:
def unify_model_weights(model):
    param_dict ={}
    for name, param in model.named_parameters():
        param_dict[name] = param

    param_dict['gru.bias_hh_l0'] = param_dict['gru.bias_hh_l0'].view(-1,1)
    param_dict['gru.bias_ih_l0'] = param_dict['gru.bias_ih_l0'].view(-1,1)
    unified_weights = torch.hstack((
            param_dict['gru.weight_ih_l0'],
            param_dict['gru.weight_hh_l0'],
            param_dict['gru.bias_ih_l0'],
            param_dict['gru.bias_hh_l0']))

    return unified_weights

In [12]:
def training_and_evaluation(model, train_dataloader, test_dataloader, optim, category, checkpoint_path, best_checkpoint_path):
   #results list
   train_loss_list = []
   test_loss_list = []

   ##Create writer for using tesndorboard
   #writer = SummaryWriter(log_dir=f'{TbDirectory}_{category}')

   min_test_loss = np.inf

   for epoch in range(Epochs):
      #initiate train epoch loss
      epoch_train_loss = training_loop(model, train_dataloader, optim)
      epoch_test_loss, epoch_test_predictions = evaluation_loop(model, test_dataloader)

      checkpoint = {
         'epoch': epoch + 1,
         'valid_loss_min': epoch_test_loss,
         'state_dict': model.state_dict(),
         'optimizer': optim.state_dict(),
        }
      
      # save checkpoint
      save_checkpoint(checkpoint, False, checkpoint_path, best_checkpoint_path)

      if epoch_test_loss <= min_test_loss:
         save_checkpoint(checkpoint, True, checkpoint_path, best_checkpoint_path)
         min_test_loss = epoch_test_loss

      train_loss_list.append(epoch_train_loss)
      test_loss_list.append(epoch_test_loss)

      ## Display those measures on tensorboard
      #writer.add_scalar(tag='loss/train', scalar_value=epoch_train_loss, global_step=epoch)
      #writer.add_scalar(tag='loss/test', scalar_value=epoch_test_loss, global_step=epoch)
    
   results = {'train_loss': train_loss_list, 'test_loss': test_loss_list} 
   return results

In [13]:
categories = list(category_id_to_name_dict.values())

In [14]:
len(categories)

52

In [15]:
categories

['Accommodation services',
 'Actual rentals for housing',
 'Alcoholic beverages',
 'Alcoholic beverages and tobacco',
 'All-items',
 'Audio-visual, photographic and information processing equipment',
 'Clothing',
 'Clothing and footwear',
 'Communications',
 'Education',
 'Electricity, gas and other fuels',
 'Financial services n.e.c.',
 'Food',
 'Food and non-alcoholic beverages',
 'Footwear',
 'Furnishings, household equipment and routine maintenance',
 'Furniture and furnishings, carpets and other floor coverings',
 'Glassware, tableware and household utensils',
 'Goods and services for routine household maintenance',
 'Health',
 'Household appliances',
 'Household textiles',
 'Housing, water, electricity, gas and other fuels',
 'Imputed rentals for housing',
 'Insurance',
 'Maintenance and repair of the dwelling',
 'Medical products, appliances and equipment',
 'Miscellaneous goods and services',
 'Newspapers, books and stationery',
 'Non-alcoholic beverages',
 'Operation of person

In [16]:
def pipline(train_dataset_dict, test_dataset_dict):
    results = {}
    num_categories = 0
    for category_id in list(category_id_to_name_dict.keys()):
        num_categories +=1
        print(f'num categories: {num_categories}')
        category = category_id_to_name_dict[category_id]
        print(f'category id|name: {category_id}|{category}')

        train_dataloader, test_dataloader = create_dataloader(train_dataset_dict[category], test_dataset_dict[category])

        model = GRUModel(input_dim = Features, hidden_dim = HiddenSize, layer_dim = LayersDim, output_dim = OutputDim, dropout_prob = DropoutProb)
        model.to(device)
        
        optimizer = torch.optim.AdamW(model.parameters(), lr=Lr)

        parameters_file_name = category+'.pt'
        
        results[category] = training_and_evaluation(
                                model=model,
                                optim=optimizer,
                                train_dataloader=train_dataloader,
                                test_dataloader=test_dataloader,
                                category=category,
                                checkpoint_path=CheckpointPath+parameters_file_name,
                                best_checkpoint_path=BestcheckpointPath+parameters_file_name,
                            )
    
    return results

In [17]:
results = pipline(train_dataset_dict, test_dataset_dict)

num categories: 1
category id|name: 45|Accommodation services
num categories: 2
category id|name: 19|Actual rentals for housing
num categories: 3
category id|name: 15|Alcoholic beverages
num categories: 4
category id|name: 2|Alcoholic beverages and tobacco
num categories: 5
category id|name: 0|All-items
num categories: 6
category id|name: 38|Audio-visual, photographic and information processing equipment
num categories: 7
category id|name: 17|Clothing
num categories: 8
category id|name: 3|Clothing and footwear
num categories: 9
category id|name: 8|Communications
num categories: 10
category id|name: 10|Education
num categories: 11
category id|name: 23|Electricity, gas and other fuels
num categories: 12
category id|name: 50|Financial services n.e.c.
num categories: 13
category id|name: 13|Food
num categories: 14
category id|name: 1|Food and non-alcoholic beverages
num categories: 15
category id|name: 18|Footwear
num categories: 16
category id|name: 5|Furnishings, household equipment and 

In [18]:
with open('data/model_results.pickle', 'wb') as handle:
    pickle.dump(results, handle, protocol=pickle.HIGHEST_PROTOCOL)

# Create Best Model Per Category Dictionary

In [19]:
def create_dict_of_best_model_per_category(categories_list, dir_path):
    basic_model = GRUModel(input_dim = Features, hidden_dim = HiddenSize, layer_dim = LayersDim, output_dim = OutputDim, dropout_prob = DropoutProb)
    basic_optimizer = torch.optim.AdamW(basic_model.parameters(), lr=Lr)
    basic_model.to(device)

    best_models_dict = {}

    for category in categories_list:
        ckp_path = dir_path+category+'.pt'
        model, optimizer, checkpoint, valid_loss_min = load_checkpoint(ckp_path, basic_model, basic_optimizer)
        best_models_dict[category] = model
        
    return best_models_dict

In [20]:
categories_list = categories
dir_path = "checkpoints/best_checkpoints/"

best_models_dict = create_dict_of_best_model_per_category(categories_list, dir_path)

In [21]:
best_models_dict

{'Accommodation services': GRUModel(
   (gru): GRU(1, 64, batch_first=True)
   (fc): Linear(in_features=64, out_features=1, bias=True)
 ),
 'Actual rentals for housing': GRUModel(
   (gru): GRU(1, 64, batch_first=True)
   (fc): Linear(in_features=64, out_features=1, bias=True)
 ),
 'Alcoholic beverages': GRUModel(
   (gru): GRU(1, 64, batch_first=True)
   (fc): Linear(in_features=64, out_features=1, bias=True)
 ),
 'Alcoholic beverages and tobacco': GRUModel(
   (gru): GRU(1, 64, batch_first=True)
   (fc): Linear(in_features=64, out_features=1, bias=True)
 ),
 'All-items': GRUModel(
   (gru): GRU(1, 64, batch_first=True)
   (fc): Linear(in_features=64, out_features=1, bias=True)
 ),
 'Audio-visual, photographic and information processing equipment': GRUModel(
   (gru): GRU(1, 64, batch_first=True)
   (fc): Linear(in_features=64, out_features=1, bias=True)
 ),
 'Clothing': GRUModel(
   (gru): GRU(1, 64, batch_first=True)
   (fc): Linear(in_features=64, out_features=1, bias=True)
 ),
 'C

# Get Best Predictions for Each Category 

In [22]:
def get_best_predictions_for_each_category(best_models_dict):
    best_predictions_dict = {}

    for category in list(best_models_dict.keys()):
        model = best_models_dict[category]
        train_dataloader, test_dataloader = create_dataloader(train_dataset_dict[category], test_dataset_dict[category])
        epoch_test_loss, epoch_predictions = evaluation_loop(model, test_dataloader)
        best_predictions_dict[category] = epoch_predictions

    return best_predictions_dict

In [23]:
best_predictions_dict = get_best_predictions_for_each_category(best_models_dict)

In [24]:
with open('data/predictions_dict.pickle', 'wb') as handle:
    pickle.dump(best_predictions_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

# Get Best Model Weights:

In [25]:
def get_weights_per_category(category_list, dir_path):
    weights = {}
    for category in category_list:
        model = GRUModel(input_dim = Features, hidden_dim = HiddenSize, layer_dim = LayersDim, output_dim = OutputDim, dropout_prob = DropoutProb)
        model.to(device)
        optimizer = torch.optim.AdamW(model.parameters(), lr=Lr)
        
        best_checkpoint_path = dir_path + category + '.pt'

        category_model, optimizer, checkpoint, valid_loss_min = load_checkpoint(best_checkpoint_path, model, optimizer)
        category_model_weights = unify_model_weights(category_model)

        weights[category] = category_model_weights
    
    return weights

In [26]:
def get_weights_per_category(category_id_list, dir_path):
    basic_model = GRUModel(input_dim = Features, hidden_dim = HiddenSize, layer_dim = LayersDim, output_dim = OutputDim, dropout_prob = DropoutProb)
    basic_optimizer = torch.optim.AdamW(basic_model.parameters(), lr=Lr)
    basic_model.to(device)

    best_models_weights_dict = {}

    for category_id in category_id_list:
        category_name = category_id_to_name_dict[category_id]
        ckp_path = dir_path+category_name+'.pt'
        model, optimizer, checkpoint, valid_loss_min = load_checkpoint(ckp_path, basic_model, basic_optimizer)
        category_model_weights = unify_model_weights(model)
        best_models_weights_dict[category_id] = category_model_weights
        
    return best_models_weights_dict

In [27]:
dir_path = '/Users/mvilenko/Library/CloudStorage/OneDrive-PayPal/CPI_HRNN - version 2.0/mayas_project/basic_model_norway/checkpoints/best_checkpoints/'

In [28]:
category_id_list = []
 
# list out keys and values separately
key_list = list(category_id_to_name_dict.keys())
val_list = list(category_id_to_name_dict.values())

for cat_name in categories:
    position = val_list.index(cat_name)
    category_id_list.append(key_list[position])


In [29]:
weights_dict = get_weights_per_category(category_id_list, dir_path)

In [30]:
with open('/Users/mvilenko/Library/CloudStorage/OneDrive-PayPal/CPI_HRNN - version 2.0/pickle files/norway_sgru_model_weights.pickle', 'wb') as handle:
    pickle.dump(weights_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [31]:
weights_dict

{45: tensor([[-1.0324e-01, -4.2914e-01, -8.3811e-02,  ..., -1.0002e-03,
          -9.6098e-01, -8.6348e-01],
         [-1.5876e+00,  6.5696e-01, -3.9444e-01,  ..., -2.7079e-01,
          -1.7413e+00, -1.6642e+00],
         [ 7.8310e-01,  6.0179e-01,  2.9628e-01,  ...,  2.9923e-01,
          -6.0128e-01, -5.7441e-01],
         ...,
         [-5.4172e-01, -7.4301e-01, -3.8378e-01,  ...,  7.5334e-01,
           7.2861e-03, -1.9579e-01],
         [ 8.2305e-01, -4.0005e-02,  1.9541e-01,  ...,  3.2599e-01,
          -1.4596e+00, -6.5245e-01],
         [ 3.4290e-02,  6.4210e-01, -1.5608e-01,  ...,  3.6539e-02,
          -1.7264e+00, -1.0241e+00]], grad_fn=<CatBackward0>),
 19: tensor([[ 0.0813, -0.7061, -0.9026,  ..., -0.0651, -0.1227, -0.0775],
         [-0.3400, -0.0922,  0.1280,  ..., -0.0664, -0.1752, -0.1828],
         [-0.1504,  0.0319, -0.0244,  ..., -0.0558, -0.0686, -0.0922],
         ...,
         [-0.0113, -0.9250, -0.1227,  ...,  0.2479,  0.0667,  0.1201],
         [-0.0023,  0.32