In [1]:
# import dependencies
import nltk
import json
import io
import gzip
import torch
import string
import random
import jsonlines
import pandas as pd
import pickle as pkl
import numpy as np
from tqdm import tqdm
from functools import partial

import torch
import torch.nn as nn
import torch.functional as F
from torch.autograd import Variable
from torch.utils.data import Dataset, RandomSampler, SequentialSampler, DataLoader
from sklearn.preprocessing import MultiLabelBinarizer

In [2]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
device

'cuda:0'

In [11]:
PATH_TO_EMBEDDINGS_FOLDER = "/scratch/mz2476/wiki/embeddings/"
PATH_TO_DATA_FOLDER = "/scratch/mz2476/wiki/data/"
PATH_TO_MODELS_FOLDER = "/scratch/mz2476/wiki/models/"

## Load data

In [4]:
from preprocess import create_lookups_for_vocab, pad_collate_fn

[nltk_data] Downloading package stopwords to /home/mz2476/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [5]:
# LOAD vocab, tensor dataset, classes
vocab = torch.load(PATH_TO_DATA_FOLDER + "vocab_all_en.pt")
print("Vocab size is:", len(vocab))
index_to_word, word_to_index = create_lookups_for_vocab(vocab)

wiki_tensor_dataset = torch.load(PATH_TO_DATA_FOLDER + "wiki_tensor_dataset_vocab_all_en.pt")

classes = torch.load(PATH_TO_DATA_FOLDER + "classes_list.pt")
mlb = MultiLabelBinarizer(classes)

Vocab size is: 682850


In [6]:
wiki_tensor_dataset["train"].__getitem__(200)

(tensor([13030,  8330,  3721,  8330,  3721,   132,  2496, 13031,  4719,  3982,
         13031,  3178,   303,  5510, 13032,  8334,  2496, 13031,  4719,  1828,
          2496,  1985, 13033, 10701, 13034,     7,  5299,  2338,  6948,     5,
             9,     9,     8, 10510,   480, 13035, 13036, 11814, 13035, 13036,
           965,   933,  2789,     5,   223,    10,   933, 13037,  6777,  1646,
          3271, 13038,  2496, 13031,  4719,  1036, 13039,  1985,  2300,  1495,
           601, 13040,  1495,     5,     9,   208,     6,     5,     9,     9,
            11,   568,     5,     9,     9,   208, 13041,  1467,   403, 13042,
          9309,  1065, 13043, 13044, 13043, 13044,  2300,  2189,  1880,  8330,
          4719,   452,    10,     8,     8,     8, 13035, 13036,    21, 13045,
          2300, 13045,  2641,  3721,  4340,  4251, 13043, 13044, 13046,  2496,
         13031,  4719,  4340, 13045, 13047, 13048, 13049, 13050,  5496,  9571,
           648,     5,     9,    10,     8,     5,  

In [7]:
# create dataloader
wiki_loaders = {}

batch_size = 32

for split, wiki_dataset in wiki_tensor_dataset.items():
    wiki_loaders[split] = DataLoader(
        wiki_dataset, 
        batch_size=batch_size, 
        shuffle=True, 
        collate_fn=partial(pad_collate_fn, word_to_index=word_to_index)
    )

## Load the embeddings and make a pretrained embeddings matrix

In [8]:
import utils
import importlib
importlib.reload(utils)

<module 'utils' from '/home/mz2476/topic-modeling/topic-modeling/baseline/utils.py'>

In [9]:
# # Aligned fasstext. 2.5 million
embeddings = utils.load_vectors(PATH_TO_EMBEDDINGS_FOLDER + "wiki.en.align.vec")

# # CHANGE to googlenews vectors
# import gensim
 
# model = gensim.models.KeyedVectors.load("/scratch/mz2476/GoogleNews-vectors-negative300.bin", binary=True)  
 
# embeddings = model.vocab.keys()
# wordsInVocab = len(embeddings)
# print (wordsInVocab)

# # embeddings = load_vectors("/scratch/mz2476/GoogleNews-vectors-negative300.bin")



2519370it [03:08, 13337.59it/s]


In [10]:
#Creating the weight matrix for pretrained word embeddings
weights_matrix_ve = utils.create_embeddings_matrix(word_to_index, embeddings)

Total words in vocab: 682850
No. of words from vocab found in embeddings: 528314


## Model

In [102]:
import model
import importlib
importlib.reload(model)

from model import FinalModel
from torchcontrib.optim import SWA

In [103]:
options = {
    "VOCAB_SIZE": len(index_to_word),
    "dim_e": weights_matrix.shape[1],
    "pretrained_embeddings": weights_matrix,
    "num_layers": 2,
    "num_classes": len(mlb.classes_),
    "mid_features": 150,
    "dropout_rate": 0.2,
    "activation": nn.ReLU()
}
model = FinalModel(options)

if torch.cuda.is_available():
    model = model.to(device)
    
# Criterion and Optimizer
criterion = torch.nn.BCEWithLogitsLoss()
# optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
base_opt = torch.optim.Adam(model.parameters(), lr=0.001)
optimizer = SWA(base_opt)

In [104]:
model

FinalModel(
  (layer_bag_of_words): BagOfWords(
    (embed_e): Embedding(595366, 300)
  )
  (layer_out): Sequential(
    (0): Linear(in_features=300, out_features=150, bias=True)
    (1): ReLU()
    (2): Linear(in_features=150, out_features=44, bias=True)
  )
)

## Hyperparameter tuning

Grid search vs. Random search

<ol>
    <li> dropout </li>
    <li> learning rate </li>
    <li> optimizer </li>
    <li> num of hidden layers </li>
    <li> dim of hidden layers </li>
    <li> take only first 500 words from the article </li>
    <li> TODO threshold </li>
<ol>

I focused on SWA optimizer.

In [34]:
# # one layer
# range_dropout = [0]
# range_num_hidden = [1]
# range_dim_hidden = [80, 120, 150]
# range_lr = [0.01]

# many layers
range_dropout = [0, 0.1, 0.2]
range_num_hidden = [2, 3]
range_dim_hidden = [40, 80, 120]
range_lr = [0.001, 0.01]

# # best hyperparams
# range_dropout = [0.2]
# range_num_hidden = [2]
# range_dim_hidden = [120, 150, 200]
# range_lr = [0.01]

In [35]:
import model
import importlib
importlib.reload(model)

from model import FinalModel
from torchcontrib.optim import SWA
import itertools

In [36]:
import warnings
warnings.filterwarnings('ignore')

from utils import test_model

def train_model(wiki_loaders, model, criterion, optimizer, num_epochs=10, device=device, model_name="model"):
    best_val_f1_micro = 0
    best_metrics_dict = {}
    for epoch in range(num_epochs):
        runnin_loss = 0.0
        for i, (data, length, labels) in enumerate(wiki_loaders["train"]):        
            model.train()
            data_batch, length_batch, label_batch = data.to(device),length.to(device), labels.float().to(device)

            optimizer.zero_grad()
            outputs = model(data_batch, length_batch)
            loss = criterion(outputs, label_batch)
            loss.backward()
            optimizer.step()

            runnin_loss += loss.item()
            #torch.nn.utils.clip_grad_norm(model.parameters(), 10)
            if i>0 and i % 1000 == 0:
                print('Epoch: [{}/{}], Step: [{}/{}], Train_loss: {}'.format(
                    epoch+1, num_epochs, i+1, len(wiki_loaders["train"]), runnin_loss / i))
            # validate every 300 iterations
            if i > 0 and i % 1000 == 0:
                optimizer.update_swa()
                metrics_dict = test_model(wiki_loaders["val"], model, device=device)
                print("Precision macro: {}, Recall macro: {}, F1 macro: {} ".format(
                    metrics_dict["precision_macro"], metrics_dict["recall_macro"], metrics_dict["f1_macro"]
                ))
                print("Precision micro: {}, Recall micro: {}, F1 micro: {} ".format(
                    metrics_dict["precision_micro"], metrics_dict["recall_micro"], metrics_dict["f1_micro"]
                ))

                if metrics_dict["f1_micro"] > best_val_f1_micro:
                    best_val_f1_micro = metrics_dict["f1_micro"]
                    best_metrics_dict = metrics_dict
                    optimizer.swap_swa_sgd()
                    torch.save(model.state_dict(), f"{PATH_TO_MODELS_FOLDER}en_{model_name}.pth")
                    print('Model Saved')
                    print()
    optimizer.swap_swa_sgd()
    return best_metrics_dict

In [None]:
# results_df_without_best = results_df

In [37]:
# results_df = pd.DataFrame(columns=[
#     "optimizer", "num_hidden", "dim_hidden", "dropout_rate", "learning_rate", "num_epochs", 
#     'precision_macro', 'recall_macro', 'f1_macro', 
#     'precision_micro', 'recall_micro', 'f1_micro'
# ])


for num_hidden, dim_hidden, dropout_rate, lr in itertools.product(range_num_hidden, range_dim_hidden, range_dropout, range_lr):
    # model
    options = {
        "VOCAB_SIZE": len(index_to_word),
        "dim_e": weights_matrix_ve.shape[1],
        "pretrained_embeddings": weights_matrix_ve,
        "num_layers": num_hidden,
        "num_classes": len(classes),
        "mid_features": dim_hidden,
        "dropout_rate": dropout_rate,
        "activation": nn.ReLU()
    }
    num_epochs = 10
    
    result = {
        "optimizer": "SWA", 
        "num_hidden": num_hidden,
        "dim_hidden": dim_hidden,
        "dropout_rate": dropout_rate,
        "learning_rate": lr,
        "num_epochs": num_epochs
    }
    print("\n", result)
    
    model = FinalModel(options)
    
    if torch.cuda.is_available():
        model = model.to(device)
    
    # Criterion and Optimizer
    criterion = torch.nn.BCEWithLogitsLoss()
    # optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    base_opt = torch.optim.Adam(model.parameters(), lr=lr)
    optimizer = SWA(base_opt) 
    
    # train the model
    model_name = "_".join([str(key) + "_" + str(value) for key, value in result.items()])
    metrics_dict = train_model(wiki_loaders, model, criterion, optimizer, num_epochs=num_epochs, model_name=model_name)
    result.update(metrics_dict)
    
    results_df = results_df.append(result, ignore_index=True)
    results_df.to_csv("results/results_tuning_2_3_layers.csv")


 {'optimizer': 'SWA', 'num_hidden': 2, 'dim_hidden': 40, 'dropout_rate': 0, 'learning_rate': 0.001, 'num_epochs': 10}
Epoch: [1/10], Step: [1001/2499], Train_loss: 0.1613335421010852
Precision macro: 0.07763095820863863, Recall macro: 0.022133927127800327, F1 macro: 0.02803677062124604 
Precision micro: 0.6717482173592328, Recall micro: 0.15964471454449833, F1 micro: 0.2579792256846081 
Model Saved

Epoch: [1/10], Step: [2001/2499], Train_loss: 0.13147279946506024
Precision macro: 0.11809104702657391, Recall macro: 0.061221524474661226, F1 macro: 0.0676405812609879 
Precision micro: 0.7536959954069183, Recall micro: 0.3068427511248758, F1 micro: 0.43612956810631226 
Model Saved

Epoch: [2/10], Step: [1001/2499], Train_loss: 0.08414565202966333
Precision macro: 0.15466359242020145, Recall macro: 0.09444943925158747, F1 macro: 0.10662958689069836 
Precision micro: 0.7951121879279469, Recall micro: 0.4410681937708175, F1 micro: 0.5673908141020823 
Model Saved

Epoch: [2/10], Step: [2001/

Model Saved

Epoch: [5/10], Step: [2001/2499], Train_loss: 0.045241340330801905
Precision macro: 0.5935961974640986, Recall macro: 0.40162432873399223, F1 macro: 0.45300709048177407 
Precision micro: 0.8311810599946193, Recall micro: 0.72214106235026, F1 micro: 0.7728338701103781 
Model Saved

Epoch: [6/10], Step: [1001/2499], Train_loss: 0.04443852431699634
Precision macro: 0.6351501796401967, Recall macro: 0.4022228833570848, F1 macro: 0.46324434824108063 
Precision micro: 0.8586662771163538, Recall micro: 0.6869631274469702, F1 micro: 0.7632774964290352 
Epoch: [6/10], Step: [2001/2499], Train_loss: 0.04420813321415335
Precision macro: 0.63294608095334, Recall macro: 0.4154389094393022, F1 macro: 0.47938124212300726 
Precision micro: 0.8390406305204512, Recall micro: 0.7216151463799451, F1 micro: 0.7759102761458956 
Model Saved

Epoch: [7/10], Step: [1001/2499], Train_loss: 0.04428689499758184
Precision macro: 0.6029069795898866, Recall macro: 0.39880245318659147, F1 macro: 0.455150

Precision macro: 0.5383566964211329, Recall macro: 0.3095567281430236, F1 macro: 0.36634320053363945 
Precision micro: 0.8399765533411488, Recall micro: 0.6699000759656402, F1 micro: 0.7453593836351223 
Model Saved

Epoch: [10/10], Step: [2001/2499], Train_loss: 0.04996282046381384
Precision macro: 0.5387569722860307, Recall macro: 0.3092359896188814, F1 macro: 0.3663281541815256 
Precision micro: 0.8417335001103672, Recall micro: 0.6684976333781336, F1 micro: 0.7451797811360082 

 {'optimizer': 'SWA', 'num_hidden': 2, 'dim_hidden': 40, 'dropout_rate': 0.1, 'learning_rate': 0.01, 'num_epochs': 10}
Epoch: [1/10], Step: [1001/2499], Train_loss: 0.09957867152988911
Precision macro: 0.26034356474413944, Recall macro: 0.13783823045531732, F1 macro: 0.15619433959436096 
Precision micro: 0.803516211679477, Recall micro: 0.5314673055571788, F1 micro: 0.6397720877884074 
Model Saved

Epoch: [1/10], Step: [2001/2499], Train_loss: 0.08068148791603744
Precision macro: 0.4117928284563574, Recall ma

Precision macro: 0.3587469613088194, Recall macro: 0.17181313160027176, F1 macro: 0.20207192032294996 
Precision micro: 0.8318999218139171, Recall micro: 0.559574592415123, F1 micro: 0.6690888764673001 
Epoch: [5/10], Step: [1001/2499], Train_loss: 0.05985771159827709
Precision macro: 0.4086369437389045, Recall macro: 0.20218833925281496, F1 macro: 0.24002676833976244 
Precision micro: 0.8370883882149047, Recall micro: 0.5927072985449658, F1 micro: 0.6940130003421143 
Model Saved

Epoch: [5/10], Step: [2001/2499], Train_loss: 0.05995417389832437
Precision macro: 0.4164411991128871, Recall macro: 0.20027846281074657, F1 macro: 0.23667165367735252 
Precision micro: 0.8303955811875559, Recall micro: 0.5973821071699877, F1 micro: 0.6948749320282762 
Model Saved

Epoch: [6/10], Step: [1001/2499], Train_loss: 0.056579754617065194
Precision macro: 0.4441704263370419, Recall macro: 0.23129980080294568, F1 macro: 0.27352593707734896 
Precision micro: 0.8337280909521554, Recall micro: 0.61707473

Model Saved

Epoch: [9/10], Step: [2001/2499], Train_loss: 0.0419212734894827
Precision macro: 0.6420684272835433, Recall macro: 0.46890087997055657, F1 macro: 0.516845698949471 
Precision micro: 0.8253856493631185, Recall micro: 0.7535207152457196, F1 micro: 0.7878176930596286 
Epoch: [10/10], Step: [1001/2499], Train_loss: 0.041473331943154335
Precision macro: 0.64841616267524, Recall macro: 0.4728196700507425, F1 macro: 0.5288279309651372 
Precision micro: 0.8232939263151311, Recall micro: 0.7691228890317303, F1 micro: 0.7952870090634442 
Model Saved

Epoch: [10/10], Step: [2001/2499], Train_loss: 0.041732057453133166
Precision macro: 0.6563796483158757, Recall macro: 0.4731986137201607, F1 macro: 0.5275840622652775 
Precision micro: 0.8366339890782288, Recall micro: 0.7430608309472331, F1 micro: 0.7870760089130973 

 {'optimizer': 'SWA', 'num_hidden': 2, 'dim_hidden': 80, 'dropout_rate': 0, 'learning_rate': 0.001, 'num_epochs': 10}
Epoch: [1/10], Step: [1001/2499], Train_loss: 0.14

Model Saved

Epoch: [4/10], Step: [2001/2499], Train_loss: 0.04206683901138604
Precision macro: 0.6361788981719751, Recall macro: 0.47578629825211044, F1 macro: 0.5233490115479097 
Precision micro: 0.8417789038538471, Recall micro: 0.7377432361362707, F1 micro: 0.7863349008128055 
Epoch: [5/10], Step: [1001/2499], Train_loss: 0.040664397651329634
Precision macro: 0.6917799094831893, Recall macro: 0.4874054682349925, F1 macro: 0.543914854331587 
Precision micro: 0.8298335467349552, Recall micro: 0.7574358674691755, F1 micro: 0.7919836250878319 
Model Saved

Epoch: [5/10], Step: [2001/2499], Train_loss: 0.040856748732738195
Precision macro: 0.6444962768339741, Recall macro: 0.4894134532041894, F1 macro: 0.5344626112903257 
Precision micro: 0.8191717134642442, Recall micro: 0.7825045287208555, F1 micro: 0.8004184100418409 
Model Saved

Epoch: [6/10], Step: [1001/2499], Train_loss: 0.04039205405302346
Precision macro: 0.6747897653006243, Recall macro: 0.5028756609868804, F1 macro: 0.554483

Precision macro: 0.5570126226515768, Recall macro: 0.3679056903056623, F1 macro: 0.42217098329046476 
Precision micro: 0.8411992945326279, Recall micro: 0.6967802255595161, F1 micro: 0.7622091536691383 
Model Saved

Epoch: [9/10], Step: [2001/2499], Train_loss: 0.04626221817731857
Precision macro: 0.5993456693311546, Recall macro: 0.37624498620813307, F1 macro: 0.43081878394296974 
Precision micro: 0.840849926123971, Recall micro: 0.698357973470461, F1 micro: 0.7630083636595799 
Model Saved

Epoch: [10/10], Step: [1001/2499], Train_loss: 0.0450018363147974
Precision macro: 0.5823232653311243, Recall macro: 0.3881703382237813, F1 macro: 0.4399912284297571 
Precision micro: 0.8373939228031755, Recall micro: 0.7150119791971016, F1 micro: 0.7713790386130812 
Model Saved

Epoch: [10/10], Step: [2001/2499], Train_loss: 0.045324886388145386
Precision macro: 0.5968843683455428, Recall macro: 0.3804581802030391, F1 macro: 0.4371502675214689 
Precision micro: 0.8417236089806164, Recall micro: 0.

Epoch: [3/10], Step: [2001/2499], Train_loss: 0.06183473600819707
Precision macro: 0.3855679830473306, Recall macro: 0.19376788810883247, F1 macro: 0.23000923573366955 
Precision micro: 0.8256262833675565, Recall micro: 0.5873897037340033, F1 micro: 0.6864244741873805 
Epoch: [4/10], Step: [1001/2499], Train_loss: 0.05614892731979489
Precision macro: 0.4346780167986582, Recall macro: 0.22974691372693978, F1 macro: 0.2721614326347502 
Precision micro: 0.8378314580218743, Recall micro: 0.6222170279904167, F1 micro: 0.7141036818456175 
Model Saved

Epoch: [4/10], Step: [2001/2499], Train_loss: 0.056379694627597926
Precision macro: 0.43778010931927264, Recall macro: 0.2273467935454587, F1 macro: 0.26787722889590593 
Precision micro: 0.8270955768635941, Recall micro: 0.6250219131654298, F1 micro: 0.7119986686636711 
Epoch: [5/10], Step: [1001/2499], Train_loss: 0.05303310260362923
Precision macro: 0.524237616679755, Recall macro: 0.27699303798920005, F1 macro: 0.3281567687342629 
Precision 

Epoch: [8/10], Step: [2001/2499], Train_loss: 0.03996502546034753
Precision macro: 0.7407214491203914, Recall macro: 0.5218365553274494, F1 macro: 0.5754217412386878 
Precision micro: 0.8260762151812735, Recall micro: 0.7815695669958511, F1 micro: 0.803206822003363 
Model Saved

Epoch: [9/10], Step: [1001/2499], Train_loss: 0.039831197841092944
Precision macro: 0.7332671322501366, Recall macro: 0.500245361430969, F1 macro: 0.5569116262943682 
Precision micro: 0.83280430135389, Recall micro: 0.7512417460410215, F1 micro: 0.7899231950844855 
Epoch: [9/10], Step: [2001/2499], Train_loss: 0.040014788595028224
Precision macro: 0.7165103419399753, Recall macro: 0.5178763358562315, F1 macro: 0.5679686748650299 
Precision micro: 0.8271817428443935, Recall micro: 0.7582539589785543, F1 micro: 0.7912195121951221 
Epoch: [10/10], Step: [1001/2499], Train_loss: 0.039303801646456124
Precision macro: 0.7222768487806164, Recall macro: 0.4960993852996312, F1 macro: 0.5579203756050976 
Precision micro:

Model Saved

Epoch: [3/10], Step: [1001/2499], Train_loss: 0.04234453810751438
Precision macro: 0.6516890260841843, Recall macro: 0.4800706488643533, F1 macro: 0.5267861108362948 
Precision micro: 0.8398915415647114, Recall micro: 0.7421258692222287, F1 micro: 0.7879878389278402 
Model Saved

Epoch: [3/10], Step: [2001/2499], Train_loss: 0.04168704004585743
Precision macro: 0.6603816600496363, Recall macro: 0.49618784401024774, F1 macro: 0.5461755369759607 
Precision micro: 0.836507413509061, Recall micro: 0.7417752585753521, F1 micro: 0.7862983151635282 
Epoch: [4/10], Step: [1001/2499], Train_loss: 0.0399586783926934
Precision macro: 0.7031994496628118, Recall macro: 0.4736989371537309, F1 macro: 0.5428589046209477 
Precision micro: 0.8271902556619932, Recall micro: 0.7619353707707591, F1 micro: 0.7932230198320963 
Model Saved

Epoch: [4/10], Step: [2001/2499], Train_loss: 0.04025269797071815
Precision macro: 0.6755903420514063, Recall macro: 0.47532756302233353, F1 macro: 0.52899088

Precision macro: 0.5320415806146319, Recall macro: 0.35837132516104037, F1 macro: 0.41340541231071926 
Precision micro: 0.8468826847967598, Recall micro: 0.6842166773797698, F1 micro: 0.7569087559391061 
Epoch: [8/10], Step: [1001/2499], Train_loss: 0.04532984853908419
Precision macro: 0.5801303712826203, Recall macro: 0.38745546143322956, F1 macro: 0.4410293884475637 
Precision micro: 0.8389206305500103, Recall micro: 0.7121486589142757, F1 micro: 0.770353982300885 
Model Saved

Epoch: [8/10], Step: [2001/2499], Train_loss: 0.04558108008094132
Precision macro: 0.5888127032532707, Recall macro: 0.3720031592733124, F1 macro: 0.42874037249901653 
Precision micro: 0.8470437930300234, Recall micro: 0.697364576637644, F1 micro: 0.7649509646817512 
Epoch: [9/10], Step: [1001/2499], Train_loss: 0.04436332703568041
Precision macro: 0.5897040072643247, Recall macro: 0.3905062526470875, F1 macro: 0.44839504781241124 
Precision micro: 0.8433080192294294, Recall micro: 0.7072985449658155, F1 micro

Precision macro: 0.3416159999972966, Recall macro: 0.1545569755550397, F1 macro: 0.18014687074848001 
Precision micro: 0.8161688086141021, Recall micro: 0.549231578332262, F1 micro: 0.656606937021901 
Model Saved

Epoch: [2/10], Step: [2001/2499], Train_loss: 0.06783419903181494
Precision macro: 0.3592409143457006, Recall macro: 0.1707717704229251, F1 macro: 0.20072529438250972 
Precision micro: 0.8305707450444293, Recall micro: 0.5680476830479753, F1 micro: 0.6746712010271715 
Model Saved

Epoch: [3/10], Step: [1001/2499], Train_loss: 0.05867591248638928
Precision macro: 0.43617174711177886, Recall macro: 0.21329681896388664, F1 macro: 0.2497680082421527 
Precision micro: 0.8323818585474152, Recall micro: 0.6134517618185006, F1 micro: 0.7063414634146341 
Model Saved

Epoch: [3/10], Step: [2001/2499], Train_loss: 0.05874686063732952
Precision macro: 0.44135220391550667, Recall macro: 0.20846369435731021, F1 macro: 0.24470418881779388 
Precision micro: 0.8231010180109632, Recall micro: 

Model Saved

Epoch: [7/10], Step: [1001/2499], Train_loss: 0.03881788587197661
Precision macro: 0.7141912485768067, Recall macro: 0.494685846873231, F1 macro: 0.5624084684537679 
Precision micro: 0.8605500792720755, Recall micro: 0.7295038859346695, F1 micro: 0.7896268184693233 
Epoch: [7/10], Step: [2001/2499], Train_loss: 0.03877784068416804
Precision macro: 0.682189289855683, Recall macro: 0.5376806839471797, F1 macro: 0.5801318519448745 
Precision micro: 0.8197691982357561, Recall micro: 0.7928475428037165, F1 micro: 0.8060836501901142 
Model Saved

Epoch: [8/10], Step: [1001/2499], Train_loss: 0.03782598140835762
Precision macro: 0.755464903756047, Recall macro: 0.5092463763480808, F1 macro: 0.5757316625977802 
Precision micro: 0.8496861579121242, Recall micro: 0.7514754864722726, F1 micro: 0.7975688414785412 
Epoch: [8/10], Step: [2001/2499], Train_loss: 0.03786924143135548
Precision macro: 0.7270303946167099, Recall macro: 0.5439452082314756, F1 macro: 0.6004014305173666 
Precis

KeyboardInterrupt: 

In [38]:
# results_df

In [None]:
# results_df.to_csv("results_tuning.csv")