In [6]:
import torch
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pickle
import os
from tqdm.notebook import tqdm
from utils import *
from IPython.display import clear_output

In [9]:
# create a dataframe to store our metrics
logs = pd.DataFrame(
    data=None, columns=["minimum_count", "embed_size", "freeze_type", "vocab_size",
                        "average_train_time", "trainable_params"])

# vocab_sizes (from Word2Vec)
vocab_sizes = [13332, 5338, 3600]

# counter to track progress
counter = 0

# load all of our word-level models -- get parameter counts + average train time
for i, mc in enumerate([1, 3, 5]):
    for embed_size in [192, 384, 576, 768, 960]:
        for freeze_type in [True, False, None]:
    
            # load in our time-elapsed logs (for TRAINING ONLY) - threshold to 99% percentile to ignore validation sets
            foldername = f"models/mc={mc}_embed-size={embed_size}_freeze-type={str(freeze_type)}"
            times = pd.read_csv(f"{foldername}/logs.csv").wallclock.values
            average_train_time = times[times <= np.quantile(times, q=0.99)].mean()

            # load in the model and get our weights
            model = torch.load(f"{foldername}/model.pth")
            trainable_params = np.sum(
                [param.numel() for param in model.parameters()
                 if param.requires_grad])

            # record in our table
            logs.loc[len(logs.index)] = [mc, embed_size, str(freeze_type), 
                                         vocab_sizes[i], average_train_time, trainable_params]
            
            # status update
            counter += 1
            clear_output(wait=True)
            print(f"Finished processing {counter} of 45 models.")

# save as a .csv
logs.to_csv("logs.csv", index=False)

Finished processing 45 of 45 models.
