In [5]:
# import dependencies
import nltk
import json
import io
import gzip
import torch
import string
import random
import jsonlines
import pandas as pd
import pickle as pkl
import numpy as np
from tqdm import tqdm
from functools import partial

import torch
import torch.nn as nn
import torch.functional as F
from torch.autograd import Variable
from torch.utils.data import Dataset, RandomSampler, SequentialSampler, DataLoader
from sklearn.preprocessing import MultiLabelBinarizer


In [6]:
# import utils
# import importlib
# importlib.reload(utils)

In [7]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
device

'cuda:0'

In [8]:
PATH_TO_FOLDER = "../../"

## Load data

Preprocess the data (the functions are in `preprocess.py`):
<ol>
    <li> Remove stopwords. </li>
    <li> Remove rows with missing labels. </li>
    <li> Remove rows with no tokens. </li>
    <li> Create a set of all categories. Binarize the labels. </li>
    <li> Split in train/val/test. </li>
    <li> Build vocabulary for train. </li>
</ol>

Make DataLoader:
<ol>
    <li> Tokenize train/val/test. </li>
    <li> Create batches using collate function that pads the short sentences. </li>
</ol>

Use pretrained embeddings:
<ol>
    <li> Load pretrained embeddings. </li>
    <li> Create embedding matrix for given vocabulary. Words that are in given vocabualry but not in pretrained embeddings have zero embedding vector. </li>
</ol>

In [9]:
# load the dataframe from pickle file
import pickle as pkl

wiki_df =  pkl.load(open(PATH_TO_FOLDER + "wikitext_tokenized.p", "rb"))

In [10]:
wiki_df.head()

Unnamed: 0,QID,mid_level_categories,tokens
0,Q2000864,[Culture.Philosophy and religion],"[affirming, the, consequent, sometimes, called..."
1,Q1064113,[History_And_Society.Business and economics],"[growth, two, six, two, zero, one, six, zero, ..."
2,Q6941060,[Geography.Europe],"[the, museum, of, work, or, arbetets, museum, ..."
3,Q843920,"[History_And_Society.History and society, STEM...","[like, this, one, in, dorset, england, arable,..."
4,Q178999,"[STEM.Biology, STEM.Medicine]","[an, axon, from, greek, axis, or, nerve, fiber..."


In [13]:
# import preprocess
# import importlib
# importlib.reload(preprocess)

from preprocess import remove_stop_words, train_validate_test_split
from preprocess import tokenize_dataset, TensoredDataset, pad_collate_fn

[nltk_data] Downloading package stopwords to /home/mz2476/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [14]:
#Removing stop words
wiki_df['tokens'] = wiki_df["tokens"].apply(remove_stop_words)
wiki_df.head()

Unnamed: 0,QID,mid_level_categories,tokens
0,Q2000864,[Culture.Philosophy and religion],"[affirming, consequent, sometimes, called, con..."
1,Q1064113,[History_And_Society.Business and economics],"[growth, two, six, two, zero, one, six, zero, ..."
2,Q6941060,[Geography.Europe],"[museum, work, arbetets, museum, swedish, muse..."
3,Q843920,"[History_And_Society.History and society, STEM...","[like, one, dorset, england, arable, land, lat..."
4,Q178999,"[STEM.Biology, STEM.Medicine]","[axon, greek, axis, nerve, fiber, long, slende..."


In [15]:
#Removing rows with missing labels
mask = wiki_df.mid_level_categories.apply(lambda x: len(x) > 0)
wiki_df = wiki_df[mask]
wiki_df = wiki_df.reset_index(drop=True)
wiki_df.shape

(99969, 3)

In [16]:
#Removing rows with no tokens
mask = wiki_df.tokens.apply(lambda x: len(x) > 0)
wiki_df = wiki_df[mask]
wiki_df = wiki_df.reset_index(drop=True)
wiki_df.shape

(99960, 3)

In [17]:
# Binarize the labels
# labels list: mlb.classes_
mlb = MultiLabelBinarizer()
wiki_df["labels"] = list(mlb.fit_transform(wiki_df.mid_level_categories))
wiki_df.head()

Unnamed: 0,QID,mid_level_categories,tokens,labels
0,Q2000864,[Culture.Philosophy and religion],"[affirming, consequent, sometimes, called, con...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ..."
1,Q1064113,[History_And_Society.Business and economics],"[growth, two, six, two, zero, one, six, zero, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,Q6941060,[Geography.Europe],"[museum, work, arbetets, museum, swedish, muse...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,Q843920,"[History_And_Society.History and society, STEM...","[like, one, dorset, england, arable, land, lat...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,Q178999,"[STEM.Biology, STEM.Medicine]","[axon, greek, axis, nerve, fiber, long, slende...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [18]:
# train/val/test split
wiki_train, wiki_valid, wiki_test = train_validate_test_split(wiki_df, seed=1)

wiki_train = wiki_train.reset_index(drop=True)
wiki_valid = wiki_valid.reset_index(drop=True)
wiki_test = wiki_test.reset_index(drop=True)

In [19]:
# Building vocabulary
vocab = list(set([y for x in list(wiki_train['tokens']) for y in x]))

print("Vocab size is: {}".format(len(vocab)))

Vocab size is: 595364


In [20]:
word_to_index = {"<pad>":0, "<unk>":1}
for word in vocab:
    if word not in word_to_index:
        word_to_index[word] = len(word_to_index)
index_to_word = {v:k for k, v in word_to_index.items()}

In [21]:
# CHANGE max number of tokens 
max_num_tokens = 500
wiki_tokenized_train = tokenize_dataset(wiki_train, word_to_index, max_num_tokens=max_num_tokens)
wiki_tokenized_val = tokenize_dataset(wiki_valid, word_to_index, max_num_tokens=max_num_tokens)
wiki_tokenized_test = tokenize_dataset(wiki_test, word_to_index, max_num_tokens=max_num_tokens)

100%|██████████| 79968/79968 [00:05<00:00, 14360.07it/s]
100%|██████████| 9996/9996 [00:00<00:00, 12697.08it/s]
100%|██████████| 9996/9996 [00:00<00:00, 12833.77it/s]


In [22]:
wiki_tokenized_datasets = {}
wiki_tokenized_datasets['X_train'] = wiki_tokenized_train
wiki_tokenized_datasets['X_val'] = wiki_tokenized_val
wiki_tokenized_datasets['X_test'] = wiki_tokenized_test

wiki_tokenized_datasets['y_train'] = list(wiki_train.labels)
wiki_tokenized_datasets['y_val'] = list(wiki_valid.labels)
wiki_tokenized_datasets['y_test'] = list(wiki_test.labels)

In [23]:
wiki_tensor_dataset = {}
wiki_tensor_dataset['train'] = TensoredDataset(
    wiki_tokenized_datasets['X_train'], wiki_tokenized_datasets['y_train']
)
wiki_tensor_dataset['val'] = TensoredDataset(
    wiki_tokenized_datasets['X_val'], wiki_tokenized_datasets['y_val']
)
wiki_tensor_dataset['test'] = TensoredDataset(
    wiki_tokenized_datasets['X_test'], wiki_tokenized_datasets['y_test']
)

In [40]:
wiki_tensor_dataset["train"].__getitem__(90)

(tensor([164870, 443205, 388602, 361458, 491134, 429789, 164870, 573021, 497131,
         524145, 377983,  74744, 377983, 327308, 292454, 245377, 198652, 226397,
         310381,   2640,  23873,  23873]),
 tensor([22.]),
 tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]))

In [25]:
# create dataloader
wiki_loaders = {}

batch_size = 32

for split, wiki_dataset in wiki_tensor_dataset.items():
    wiki_loaders[split] = DataLoader(
        wiki_dataset, 
        batch_size=batch_size, 
        shuffle=True, 
        collate_fn=partial(pad_collate_fn, word_to_index=word_to_index)
    )

## Load the embeddings and make a pretrained embeddings matrix

In [26]:
import utils
import importlib
importlib.reload(utils)

<module 'utils' from '/home/mz2476/topic-modeling/topic-modeling/baseline/utils.py'>

In [27]:
# 2.5 million
embeddings = utils.load_vectors(PATH_TO_FOLDER + "wiki.en.align.vec")

2519370it [03:34, 11760.34it/s]


In [28]:
#Creating the weight matrix for pretrained word embeddings
vocab_size = len(index_to_word)
embed_dim = len(embeddings["apple"])
weights_matrix = np.zeros((vocab_size,embed_dim))

words_found = 0
for i, word in enumerate(word_to_index):
    if word in embeddings.keys():
        weights_matrix[i] = embeddings[word]
        words_found += 1
    else:
        weights_matrix[i] = np.zeros(embed_dim)
weights_matrix = torch.FloatTensor(weights_matrix)

In [29]:
print("Total words in vocab: {}".format(len(vocab)))
print("No. of words from vocab found in fastText: {}".format(words_found))

Total words in vocab: 595364
No. of words from vocab found in fastText: 470346


## Model

In [24]:
import model
import importlib
importlib.reload(model)

from model import FinalModel
from torchcontrib.optim import SWA

In [25]:
options = {
    "VOCAB_SIZE": len(index_to_word),
    "dim_e": weights_matrix.shape[1],
    "pretrained_embeddings": weights_matrix,
    "num_layers": 2,
    "num_classes": len(mlb.classes_),
    "mid_features": 100,
    "dropout_rate": 0.2,
    "activation": nn.ReLU()
}
model = FinalModel(options)

if torch.cuda.is_available():
    model = model.to(device)
    
# Criterion and Optimizer
criterion = torch.nn.BCEWithLogitsLoss()
# optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
base_opt = torch.optim.Adam(model.parameters(), lr=0.001)
optimizer = SWA(base_opt)

In [26]:
model

FinalModel(
  (layer_bag_of_words): BagOfWords(
    (embed_e): Embedding(595366, 300)
  )
  (layer_out): Sequential(
    (0): Linear(in_features=300, out_features=100, bias=True)
    (1): ReLU()
    (2): Linear(in_features=100, out_features=44, bias=True)
  )
)

## Training

In [27]:
# import warnings
# warnings.filterwarnings('ignore')

# from utils import test_model

# best_val_f1_micro = 0
# num_epochs = 20
# for epoch in range(num_epochs):
#     runnin_loss = 0.0
#     for i, (data, length, labels) in enumerate(wiki_loaders["train"]):        
#         model.train()
#         data_batch, length_batch, label_batch = data.to(device),length.to(device), labels.float().to(device)

#         optimizer.zero_grad()
#         outputs = model(data_batch, length_batch)
#         loss = criterion(outputs, label_batch)
#         loss.backward()
#         optimizer.step()

#         runnin_loss += loss.item()
#         #torch.nn.utils.clip_grad_norm(model.parameters(), 10)
#         if i>0 and i % 300 == 0:
#             print('Epoch: [{}/{}], Step: [{}/{}], Train_loss: {}'.format(
#                 epoch+1, num_epochs, i+1, len(wiki_loaders["train"]), runnin_loss / i))
#         # validate every 300 iterations
#         if i > 0 and i % 300 == 0:
#             metrics_dict = test_model(wiki_loaders["val"], model, device=device)
#             print("Precision macro: {}, Recall macro: {}, F1 macro: {} ".format(
#                 metrics_dict["precision_macro"], metrics_dict["recall_macro"], metrics_dict["f1_macro"]
#             ))
#             print("Precision micro: {}, Recall micro: {}, F1 micro: {} ".format(
#                 metrics_dict["precision_micro"], metrics_dict["recall_micro"], metrics_dict["f1_micro"]
#             ))
            
#             if metrics_dict["f1_micro"] > best_val_f1_micro:
#                 best_val_f1_micro = metrics_dict["f1_micro"]
#                 optimizer.swap_swa_sgd()
#                 torch.save(model.state_dict(), 'baseline.pth')
#                 print('Model Saved')
#                 print()
# optimizer.swap_swa_sgd()

In [28]:
# word_to_index

In [29]:
# model.load_state_dict(torch.load("../../baseline.pth"))

In [30]:
# model

## Hyperparameter tuning

Grid search vs. Random search

<ol>
    <li> dropout </li>
    <li> ?? learning rate </li>
    <li> optimizer </li>
    <li> num of hidden layers </li>
    <li> dim of hidden layers </li>
    <li> * take only first 500/1000 words from the article </li>
<ol>

I focused on SWA optimizer.

In [34]:
# # one layer
# range_dropout = [0]
# range_num_hidden = [1]
# range_dim_hidden = [40, 80, 120]
# range_lr = [0.001, 0.01]

# many layers
range_dropout = [0, 0.1, 0.2]
range_num_hidden = [2, 3]
range_dim_hidden = [40, 80, 120]
range_lr = [0.001, 0.01]

In [35]:
import model
import importlib
importlib.reload(model)

from model import FinalModel
from torchcontrib.optim import SWA
import itertools

In [36]:
import warnings
warnings.filterwarnings('ignore')

from utils import test_model

def train_model(wiki_loaders, model, criterion, optimizer, num_epochs=10, device=device):
    best_val_f1_micro = 0
    best_metrics_dict = {}
    for epoch in range(num_epochs):
        runnin_loss = 0.0
        for i, (data, length, labels) in enumerate(wiki_loaders["train"]):        
            model.train()
            data_batch, length_batch, label_batch = data.to(device),length.to(device), labels.float().to(device)

            optimizer.zero_grad()
            outputs = model(data_batch, length_batch)
            loss = criterion(outputs, label_batch)
            loss.backward()
            optimizer.step()

            runnin_loss += loss.item()
            #torch.nn.utils.clip_grad_norm(model.parameters(), 10)
            if i>0 and i % 1000 == 0:
                print('Epoch: [{}/{}], Step: [{}/{}], Train_loss: {}'.format(
                    epoch+1, num_epochs, i+1, len(wiki_loaders["train"]), runnin_loss / i))
            # validate every 300 iterations
            if i > 0 and i % 1000 == 0:
                optimizer.update_swa()
                metrics_dict = test_model(wiki_loaders["val"], model, device=device)
                print("Precision macro: {}, Recall macro: {}, F1 macro: {} ".format(
                    metrics_dict["precision_macro"], metrics_dict["recall_macro"], metrics_dict["f1_macro"]
                ))
                print("Precision micro: {}, Recall micro: {}, F1 micro: {} ".format(
                    metrics_dict["precision_micro"], metrics_dict["recall_micro"], metrics_dict["f1_micro"]
                ))

                if metrics_dict["f1_micro"] > best_val_f1_micro:
                    best_val_f1_micro = metrics_dict["f1_micro"]
                    best_metrics_dict = metrics_dict
#                     optimizer.swap_swa_sgd()
#                     torch.save(model.state_dict(), f"{PATH_TO_FOLDER}baseline_models_params/{model_name}.pth")
#                     print('Model Saved')
#                     print()
    optimizer.swap_swa_sgd()
    return best_metrics_dict

In [37]:
results_df = pd.DataFrame(columns=[
    "optimizer", "num_hidden", "dim_hidden", "dropout_rate", "learning_rate", "num_epochs", 
    'precision_macro', 'recall_macro', 'f1_macro', 
    'precision_micro', 'recall_micro', 'f1_micro'
])


for num_hidden, dim_hidden, dropout_rate, lr in itertools.product(range_num_hidden, range_dim_hidden, range_dropout, range_lr):
    # model
    options = {
        "VOCAB_SIZE": len(index_to_word),
        "dim_e": weights_matrix.shape[1],
        "pretrained_embeddings": weights_matrix,
        "num_layers": num_hidden,
        "num_classes": len(mlb.classes_),
        "mid_features": dim_hidden,
        "dropout_rate": dropout_rate,
        "activation": nn.ReLU()
    }
    num_epochs = 10
    
    result = {
        "optimizer": "SWA", 
        "num_hidden": num_hidden,
        "dim_hidden": dim_hidden,
        "dropout_rate": dropout_rate,
        "learning_rate": lr,
        "num_epochs": num_epochs
    }
    print("\n", result)
    
    model = FinalModel(options)
    
    if torch.cuda.is_available():
        model = model.to(device)
    
    # Criterion and Optimizer
    criterion = torch.nn.BCEWithLogitsLoss()
    # optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    base_opt = torch.optim.Adam(model.parameters(), lr=lr)
    optimizer = SWA(base_opt) 
    
    # train the model
    metrics_dict = train_model(wiki_loaders, model, criterion, optimizer, num_epochs=num_epochs)
    result.update(metrics_dict)
    
    results_df = results_df.append(result, ignore_index=True)
    results_df.to_csv("results/results_tuning_2_3_layers_maxlen_500.csv")


 {'optimizer': 'SWA', 'num_hidden': 2, 'dim_hidden': 40, 'dropout_rate': 0, 'learning_rate': 0.001, 'num_epochs': 10}
Epoch: [1/10], Step: [1001/2499], Train_loss: 0.15921992294490336
Precision macro: 0.057917949610036136, Recall macro: 0.02070349329910915, F1 macro: 0.028041222781815298 
Precision micro: 0.7162206805417906, Recall micro: 0.12668731373809386, F1 micro: 0.21529294935451843 
Epoch: [1/10], Step: [2001/2499], Train_loss: 0.13101045458763838
Precision macro: 0.11735284777738592, Recall macro: 0.06062856167597581, F1 macro: 0.06742932498089074 
Precision micro: 0.7609964412811387, Recall micro: 0.3123940863670894, F1 micro: 0.4429530201342282 
Epoch: [2/10], Step: [1001/2499], Train_loss: 0.08174411727488042
Precision macro: 0.187709587514583, Recall macro: 0.0940581292541711, F1 macro: 0.10804515746230227 
Precision micro: 0.8213893163332601, Recall micro: 0.4366855606848595, F1 micro: 0.570218610507039 
Epoch: [2/10], Step: [2001/2499], Train_loss: 0.07741684293001891
Pr

Epoch: [6/10], Step: [1001/2499], Train_loss: 0.04311453340016305
Precision macro: 0.6160361108426119, Recall macro: 0.42958265547793834, F1 macro: 0.48855031273813676 
Precision micro: 0.8429968323922324, Recall micro: 0.7153625898439783, F1 micro: 0.7739529002686898 
Epoch: [6/10], Step: [2001/2499], Train_loss: 0.04303271537646651
Precision macro: 0.6596358140113641, Recall macro: 0.4658841155763817, F1 macro: 0.5213805078121794 
Precision micro: 0.8411728772144166, Recall micro: 0.7241862911237071, F1 micro: 0.7783081077686366 
Epoch: [7/10], Step: [1001/2499], Train_loss: 0.04245577777735889
Precision macro: 0.6734738189403374, Recall macro: 0.46312987314568926, F1 macro: 0.5236036516070004 
Precision micro: 0.817143217824907, Recall micro: 0.7565009057441711, F1 micro: 0.7856535987377109 
Epoch: [7/10], Step: [2001/2499], Train_loss: 0.04262932132277638
Precision macro: 0.6657080089562707, Recall macro: 0.44780102193823423, F1 macro: 0.5149590063754437 
Precision micro: 0.8469536


 {'optimizer': 'SWA', 'num_hidden': 2, 'dim_hidden': 40, 'dropout_rate': 0.1, 'learning_rate': 0.01, 'num_epochs': 10}
Epoch: [1/10], Step: [1001/2499], Train_loss: 0.09096962071955204
Precision macro: 0.36782331407501767, Recall macro: 0.18261082820297944, F1 macro: 0.21193317756278854 
Precision micro: 0.8106152970820125, Recall micro: 0.5827733302167942, F1 micro: 0.678066358444384 
Epoch: [1/10], Step: [2001/2499], Train_loss: 0.07416215118579567
Precision macro: 0.516728570457947, Recall macro: 0.27348649523610413, F1 macro: 0.3238467476150298 
Precision micro: 0.8313627406396847, Recall micro: 0.640974697598317, F1 micro: 0.7238591744481472 
Epoch: [2/10], Step: [1001/2499], Train_loss: 0.049673843979835514
Precision macro: 0.5421520429744597, Recall macro: 0.3581487800159572, F1 macro: 0.4114335694903086 
Precision micro: 0.8348050829205256, Recall micro: 0.6794834336469351, F1 micro: 0.7491785323110625 
Epoch: [2/10], Step: [2001/2499], Train_loss: 0.04847733356058598
Precisio

Epoch: [6/10], Step: [1001/2499], Train_loss: 0.05170941969566047
Precision macro: 0.5209956418901522, Recall macro: 0.2787227023554815, F1 macro: 0.3341416463767046 
Precision micro: 0.8377498485766203, Recall micro: 0.6465844679483433, F1 micro: 0.7298571946835527 
Epoch: [6/10], Step: [2001/2499], Train_loss: 0.05147047310974449
Precision macro: 0.5232681644045903, Recall macro: 0.2990066025207265, F1 macro: 0.35373262345442275 
Precision micro: 0.8376119402985075, Recall micro: 0.6558756500905745, F1 micro: 0.7356864287352932 
Epoch: [7/10], Step: [1001/2499], Train_loss: 0.04976532462798059
Precision macro: 0.5119511427447475, Recall macro: 0.32165572378557544, F1 macro: 0.3730647527703929 
Precision micro: 0.8350104973575617, Recall micro: 0.6739905335125344, F1 micro: 0.745909590635711 
Epoch: [7/10], Step: [2001/2499], Train_loss: 0.04962912438903004
Precision macro: 0.5216849859873479, Recall macro: 0.32276437632634986, F1 macro: 0.37455304400831685 
Precision micro: 0.8307835

Epoch: [1/10], Step: [1001/2499], Train_loss: 0.144156963840127
Precision macro: 0.12103301941443148, Recall macro: 0.04751702898736054, F1 macro: 0.054537181664419225 
Precision micro: 0.7455923545888944, Recall micro: 0.264418862852802, F1 micro: 0.3903890949874903 
Epoch: [1/10], Step: [2001/2499], Train_loss: 0.11628943858481944
Precision macro: 0.13280337522435579, Recall macro: 0.08160651927362478, F1 macro: 0.0921777449436474 
Precision micro: 0.8097161320101546, Recall micro: 0.4100391515222346, F1 micro: 0.5443966018852554 
Epoch: [2/10], Step: [1001/2499], Train_loss: 0.07034102689474821
Precision macro: 0.29497333845347556, Recall macro: 0.14021160034977528, F1 macro: 0.16558655692080274 
Precision micro: 0.8174690007293947, Recall micro: 0.5239291766493309, F1 micro: 0.6385812471065846 
Epoch: [2/10], Step: [2001/2499], Train_loss: 0.06720424460060895
Precision macro: 0.42247449222842093, Recall macro: 0.17070872621809083, F1 macro: 0.2015239037908872 
Precision micro: 0.82

Precision macro: 0.7322754482324055, Recall macro: 0.4887740873715481, F1 macro: 0.5443834620179124 
Precision micro: 0.8266042273129983, Recall micro: 0.7632793782504529, F1 micro: 0.7936806926933009 
Epoch: [6/10], Step: [2001/2499], Train_loss: 0.0393534677317366
Precision macro: 0.6929251486273018, Recall macro: 0.4881061376051475, F1 macro: 0.554875681249072 
Precision micro: 0.8385409857497712, Recall micro: 0.7496055630222638, F1 micro: 0.791583104501558 
Epoch: [7/10], Step: [1001/2499], Train_loss: 0.038996341672725976
Precision macro: 0.7200653312566581, Recall macro: 0.5036295971237902, F1 macro: 0.5738907319059935 
Precision micro: 0.8494731897188108, Recall micro: 0.7396715946940922, F1 micro: 0.7907790341725496 
Epoch: [7/10], Step: [2001/2499], Train_loss: 0.03896323175588623
Precision macro: 0.7363518788484772, Recall macro: 0.4981759465887389, F1 macro: 0.5571364925886854 
Precision micro: 0.8204919061362781, Recall micro: 0.7641559048676445, F1 micro: 0.79132250158845

Epoch: [1/10], Step: [1001/2499], Train_loss: 0.08017346029356122
Precision macro: 0.46171422905849213, Recall macro: 0.2199982145333129, F1 macro: 0.2587360107921002 
Precision micro: 0.8313567362428842, Recall micro: 0.6144451586513177, F1 micro: 0.7066294815362386 
Epoch: [1/10], Step: [2001/2499], Train_loss: 0.06594208792410791
Precision macro: 0.5247374202203763, Recall macro: 0.36967767013753206, F1 macro: 0.42182864456505925 
Precision micro: 0.8339115044247788, Recall micro: 0.688307134926664, F1 micro: 0.7541455919072925 
Epoch: [2/10], Step: [1001/2499], Train_loss: 0.045515495229512454
Precision macro: 0.5981393245901212, Recall macro: 0.414824869084603, F1 macro: 0.46906092073662137 
Precision micro: 0.8402164235326347, Recall micro: 0.7168819026471104, F1 micro: 0.7736646276092578 
Epoch: [2/10], Step: [2001/2499], Train_loss: 0.044698548722080886
Precision macro: 0.6832327400395676, Recall macro: 0.4428677340133809, F1 macro: 0.49832055121204805 
Precision micro: 0.81126

Precision macro: 0.5569612770017284, Recall macro: 0.3621710437742363, F1 macro: 0.41943613277740854 
Precision micro: 0.8435190449118818, Recall micro: 0.6936247297376263, F1 micro: 0.7612634279300947 
Epoch: [6/10], Step: [2001/2499], Train_loss: 0.04584634846355766
Precision macro: 0.5880772250996915, Recall macro: 0.37445368156257647, F1 macro: 0.4293867398832013 
Precision micro: 0.8401913202504044, Recall micro: 0.6980073628235844, F1 micro: 0.7625279285030322 
Epoch: [7/10], Step: [1001/2499], Train_loss: 0.04440433394163847
Precision macro: 0.5763327799728839, Recall macro: 0.39208206308858456, F1 macro: 0.4442076142891214 
Precision micro: 0.8375421454620519, Recall micro: 0.7112721322970841, F1 micro: 0.7692599380648424 
Epoch: [7/10], Step: [2001/2499], Train_loss: 0.044549899446778
Precision macro: 0.577494278688312, Recall macro: 0.39963941951715554, F1 macro: 0.45277631903971843 
Precision micro: 0.8378378378378378, Recall micro: 0.7173493835096125, F1 micro: 0.7729261766

Epoch: [1/10], Step: [1001/2499], Train_loss: 0.13636404048651457
Precision macro: 0.11362074486778663, Recall macro: 0.052624252026950334, F1 macro: 0.060978297017297095 
Precision micro: 0.7754964658364187, Recall micro: 0.2692689768012622, F1 micro: 0.39973975276512697 
Epoch: [1/10], Step: [2001/2499], Train_loss: 0.11002953597158194
Precision macro: 0.2208234759629599, Recall macro: 0.10475912221240848, F1 macro: 0.11935994833176222 
Precision micro: 0.7949245541838135, Recall micro: 0.47408402968503477, F1 micro: 0.5939456056224606 
Epoch: [2/10], Step: [1001/2499], Train_loss: 0.06532959870994091
Precision macro: 0.3855325475707885, Recall macro: 0.16910302442908895, F1 macro: 0.20153033999853856 
Precision micro: 0.8147836336083516, Recall micro: 0.565534973412026, F1 micro: 0.6676554792866752 
Epoch: [2/10], Step: [2001/2499], Train_loss: 0.0629808770827949
Precision macro: 0.42406580052374027, Recall macro: 0.20002614857065246, F1 macro: 0.23961055470617906 
Precision micro: 

Precision macro: 0.7160748107506628, Recall macro: 0.5053990495818438, F1 macro: 0.5678479574793066 
Precision micro: 0.8308614704954245, Recall micro: 0.7692981943551686, F1 micro: 0.7988955640512169 
Epoch: [6/10], Step: [2001/2499], Train_loss: 0.03830531266564503
Precision macro: 0.7142920514145265, Recall macro: 0.5459377925794846, F1 macro: 0.5965834352666968 
Precision micro: 0.8194326585040611, Recall micro: 0.7899842225208905, F1 micro: 0.8044390229389188 
Epoch: [7/10], Step: [1001/2499], Train_loss: 0.0380645357882604
Precision macro: 0.732128112076614, Recall macro: 0.5123516688742034, F1 macro: 0.5805723755373593 
Precision micro: 0.8412667573343695, Recall micro: 0.7590720504879331, F1 micro: 0.798058610309025 
Epoch: [7/10], Step: [2001/2499], Train_loss: 0.03789351520081982
Precision macro: 0.7367653385711985, Recall macro: 0.5178284615899948, F1 macro: 0.5772714087651294 
Precision micro: 0.8262151144559423, Recall micro: 0.7698241103254836, F1 micro: 0.797023413394639

Epoch: [1/10], Step: [1001/2499], Train_loss: 0.07760655555129051
Precision macro: 0.47024990399045286, Recall macro: 0.2521273786323328, F1 macro: 0.2951176053544268 
Precision micro: 0.8365810107910876, Recall micro: 0.6296967217904517, F1 micro: 0.7185437087417483 
Epoch: [1/10], Step: [2001/2499], Train_loss: 0.06394812887348235
Precision macro: 0.5197638475719214, Recall macro: 0.35770992040325544, F1 macro: 0.4126813360919713 
Precision micro: 0.8375503162737206, Recall micro: 0.6808858762344416, F1 micro: 0.7511361804995971 
Epoch: [2/10], Step: [1001/2499], Train_loss: 0.04439932489395142
Precision macro: 0.6073519283663632, Recall macro: 0.4093186269891568, F1 macro: 0.46625711945742976 
Precision micro: 0.8217894177080564, Recall micro: 0.7224332378893239, F1 micro: 0.7689150107286126 
Epoch: [2/10], Step: [2001/2499], Train_loss: 0.04381175547093153
Precision macro: 0.6453381887264276, Recall macro: 0.44165622394499415, F1 macro: 0.49244624224691264 
Precision micro: 0.84074

Precision macro: 0.5817601780873157, Recall macro: 0.392252566119444, F1 macro: 0.4461023207027095 
Precision micro: 0.8431591432358893, Recall micro: 0.7061882779173727, F1 micro: 0.7686192202505883 
Epoch: [7/10], Step: [1001/2499], Train_loss: 0.04364843302033842
Precision macro: 0.5790036334805413, Recall macro: 0.4050145679935968, F1 macro: 0.4555478294927539 
Precision micro: 0.8402782513810271, Recall micro: 0.7199789633611874, F1 micro: 0.7754909365558912 
Epoch: [7/10], Step: [2001/2499], Train_loss: 0.043516783931292595
Precision macro: 0.5748626317822277, Recall macro: 0.4070644262588185, F1 macro: 0.45976595150097416 
Precision micro: 0.8338801473049883, Recall micro: 0.7277508327002863, F1 micro: 0.777209186220669 
Epoch: [8/10], Step: [1001/2499], Train_loss: 0.04294166282564402
Precision macro: 0.5836192437061455, Recall macro: 0.3976959045131155, F1 macro: 0.4528373881782642 
Precision micro: 0.8458054626532887, Recall micro: 0.7093437737392625, F1 micro: 0.771587478150

Epoch: [1/10], Step: [2001/2499], Train_loss: 0.11574636036157608
Precision macro: 0.13825829295652622, Recall macro: 0.08820664751913755, F1 macro: 0.09856946557933863 
Precision micro: 0.8156235404016815, Recall micro: 0.4081692280722258, F1 micro: 0.5440666744557386 
Epoch: [2/10], Step: [1001/2499], Train_loss: 0.07166537376865745
Precision macro: 0.24398103876876578, Recall macro: 0.1276465682194606, F1 macro: 0.14598977128175597 
Precision micro: 0.8187722989662428, Recall micro: 0.5229942149243265, F1 micro: 0.6382826986164598 
Epoch: [2/10], Step: [2001/2499], Train_loss: 0.06902579127065837
Precision macro: 0.2719979382102876, Recall macro: 0.15582256190579902, F1 macro: 0.1769902165466558 
Precision micro: 0.812046383582214, Recall micro: 0.5688073394495413, F1 micro: 0.6690034364261168 
Epoch: [3/10], Step: [1001/2499], Train_loss: 0.06223594386875629
Precision macro: 0.342815606632342, Recall macro: 0.18135631524155313, F1 macro: 0.20974580738599335 
Precision micro: 0.8228

Precision macro: 0.5988624980782887, Recall macro: 0.40758248892859966, F1 macro: 0.4629935850094855 
Precision micro: 0.8242622950819672, Recall micro: 0.7345293052065681, F1 micro: 0.7768130272224454 
Epoch: [7/10], Step: [1001/2499], Train_loss: 0.0451319096442312
Precision macro: 0.6246148517991488, Recall macro: 0.3904141065509503, F1 macro: 0.4500549550693489 
Precision micro: 0.8469576343474552, Recall micro: 0.6962543095892012, F1 micro: 0.764247458388121 
Epoch: [7/10], Step: [2001/2499], Train_loss: 0.045149185807444155
Precision macro: 0.5852519691600826, Recall macro: 0.4023786178679728, F1 macro: 0.45487589728858224 
Precision micro: 0.832366473294659, Recall micro: 0.7294454508268567, F1 micro: 0.7775147928994083 
Epoch: [8/10], Step: [1001/2499], Train_loss: 0.04508294541761279
Precision macro: 0.633269863640506, Recall macro: 0.40315591530881867, F1 macro: 0.4613627635230595 
Precision micro: 0.8443966115817247, Recall micro: 0.7106293461111436, F1 micro: 0.771759479612

Epoch: [1/10], Step: [2001/2499], Train_loss: 0.08205952916853129
Precision macro: 0.377908636679229, Recall macro: 0.1986125644978971, F1 macro: 0.23622834648741886 
Precision micro: 0.8388254486133768, Recall micro: 0.6009466487465669, F1 micro: 0.700234909610867 
Epoch: [2/10], Step: [1001/2499], Train_loss: 0.05984596536308527
Precision macro: 0.466277683763165, Recall macro: 0.28347055767618556, F1 macro: 0.3254842674086581 
Precision micro: 0.8222954106107253, Recall micro: 0.6711272132297084, F1 micro: 0.739060489060489 
Epoch: [2/10], Step: [2001/2499], Train_loss: 0.05881220149807632
Precision macro: 0.5344880450876562, Recall macro: 0.2662284017303579, F1 macro: 0.3188390814327839 
Precision micro: 0.8368389423076923, Recall micro: 0.6509671010343014, F1 micro: 0.7322925225965489 
Epoch: [3/10], Step: [1001/2499], Train_loss: 0.056550253948196766
Precision macro: 0.5216366921688649, Recall macro: 0.29469839209150844, F1 macro: 0.3464098739927599 
Precision micro: 0.8240525089

Precision macro: 0.47496203035714585, Recall macro: 0.2768662221316592, F1 macro: 0.32912426459429617 
Precision micro: 0.8510605356866388, Recall micro: 0.6424355752936364, F1 micro: 0.7321767506909527 
Epoch: [7/10], Step: [1001/2499], Train_loss: 0.05635429961234331
Precision macro: 0.49319697154471026, Recall macro: 0.28033381706461974, F1 macro: 0.33147243519990177 
Precision micro: 0.8433996113021378, Recall micro: 0.659323321451528, F1 micro: 0.7400872388573678 
Epoch: [7/10], Step: [2001/2499], Train_loss: 0.05623367530293763
Precision macro: 0.546822995967095, Recall macro: 0.29510115115770896, F1 macro: 0.3498079959280013 
Precision micro: 0.8564428451114003, Recall micro: 0.653655115993689, F1 micro: 0.7414330218068536 
Epoch: [8/10], Step: [1001/2499], Train_loss: 0.055470702858641745
Precision macro: 0.49248917082003785, Recall macro: 0.2988849242736315, F1 macro: 0.3541815476832773 
Precision micro: 0.8492570579494799, Recall micro: 0.6679717174078186, F1 micro: 0.7477839

Epoch: [1/10], Step: [2001/2499], Train_loss: 0.10780827636830509
Precision macro: 0.18723651441783826, Recall macro: 0.10469812634657762, F1 macro: 0.11864273785384438 
Precision micro: 0.8292380755729919, Recall micro: 0.4693507859522001, F1 micro: 0.5994253516922273 
Epoch: [2/10], Step: [1001/2499], Train_loss: 0.0647628179192543
Precision macro: 0.311365530239528, Recall macro: 0.1674408553324832, F1 macro: 0.19641837123470737 
Precision micro: 0.8363334498427123, Recall micro: 0.5592824168760592, F1 micro: 0.6703085057954267 
Epoch: [2/10], Step: [2001/2499], Train_loss: 0.06247532264702022
Precision macro: 0.3796478922846708, Recall macro: 0.1994410798826686, F1 macro: 0.23173785733286764 
Precision micro: 0.8417362270450751, Recall micro: 0.5892596271840121, F1 micro: 0.6932251744405871 
Epoch: [3/10], Step: [1001/2499], Train_loss: 0.054594326239079234
Precision macro: 0.4393873518094574, Recall macro: 0.2461746646803321, F1 macro: 0.29036588353076803 
Precision micro: 0.83502

Precision macro: 0.6812406583151309, Recall macro: 0.48321145403113214, F1 macro: 0.5336334497678278 
Precision micro: 0.8351648351648352, Recall micro: 0.754981592941039, F1 micro: 0.7930515913206273 
Epoch: [7/10], Step: [1001/2499], Train_loss: 0.04047713184729219
Precision macro: 0.6931145424966016, Recall macro: 0.44080582198745905, F1 macro: 0.5023701612844168 
Precision micro: 0.8278024562320355, Recall micro: 0.740489686203471, F1 micro: 0.7817155547330433 
Epoch: [7/10], Step: [2001/2499], Train_loss: 0.04069216725602746
Precision macro: 0.6725915018779126, Recall macro: 0.5080584444543695, F1 macro: 0.5560558421413889 
Precision micro: 0.838452566096423, Recall micro: 0.7560918599894817, F1 micro: 0.7951451835919495 
Epoch: [8/10], Step: [1001/2499], Train_loss: 0.040297376489266755
Precision macro: 0.6856330757754305, Recall macro: 0.48352526263456247, F1 macro: 0.5382338034722454 
Precision micro: 0.8210131101126362, Recall micro: 0.7794659031145913, F1 micro: 0.79970023980

Epoch: [1/10], Step: [2001/2499], Train_loss: 0.07226512005273253
Precision macro: 0.4943114459107067, Recall macro: 0.32577340389970044, F1 macro: 0.360082265554097 
Precision micro: 0.8048643329115703, Recall micro: 0.6690819844562613, F1 micro: 0.7307189125370943 
Epoch: [2/10], Step: [1001/2499], Train_loss: 0.051819091457873584
Precision macro: 0.557995798365737, Recall macro: 0.3681195541145214, F1 macro: 0.41606328934437414 
Precision micro: 0.8290046233342399, Recall micro: 0.7124992695611524, F1 micro: 0.7663492662078502 
Epoch: [2/10], Step: [2001/2499], Train_loss: 0.05128204574249685
Precision macro: 0.5916097325404469, Recall macro: 0.366687829174172, F1 macro: 0.4196952682593602 
Precision micro: 0.8316581196581196, Recall micro: 0.7107462163267692, F1 micro: 0.7664629151175248 
Epoch: [3/10], Step: [1001/2499], Train_loss: 0.04885962312854827
Precision macro: 0.6039305514268769, Recall macro: 0.3936387003383521, F1 macro: 0.44588129299228957 
Precision micro: 0.842040397

Precision macro: 0.535561117235536, Recall macro: 0.3630099184096291, F1 macro: 0.41106968262994 
Precision micro: 0.8410951365481186, Recall micro: 0.7144860632267866, F1 micro: 0.7726382306477094 
Epoch: [7/10], Step: [1001/2499], Train_loss: 0.04822840136662126
Precision macro: 0.5512006897512257, Recall macro: 0.36014911716281556, F1 macro: 0.41123044794746116 
Precision micro: 0.8457490999723069, Recall micro: 0.7138432770408462, F1 micro: 0.7742180815666889 
Epoch: [7/10], Step: [2001/2499], Train_loss: 0.047897883104160426
Precision macro: 0.5469268012364696, Recall macro: 0.37289220474289503, F1 macro: 0.4235556266821442 
Precision micro: 0.8428610503282276, Recall micro: 0.7202711389002513, F1 micro: 0.7767589879320668 
Epoch: [8/10], Step: [1001/2499], Train_loss: 0.0466722111441195
Precision macro: 0.5681824096839797, Recall macro: 0.3751030977351963, F1 macro: 0.42303833421959847 
Precision micro: 0.8339079697652831, Recall micro: 0.7349383509612575, F1 micro: 0.78130144432

Epoch: [1/10], Step: [2001/2499], Train_loss: 0.10021068346500396
Precision macro: 0.23171483960599534, Recall macro: 0.12341549360802881, F1 macro: 0.14042744238071073 
Precision micro: 0.8324236330556354, Recall micro: 0.5079763922164436, F1 micro: 0.6309333720423864 
Epoch: [2/10], Step: [1001/2499], Train_loss: 0.060278641080483794
Precision macro: 0.4266578571930124, Recall macro: 0.19736925235166905, F1 macro: 0.2339525493955252 
Precision micro: 0.8361600526922444, Recall micro: 0.5934669549465319, F1 micro: 0.6942137461977511 
Epoch: [2/10], Step: [2001/2499], Train_loss: 0.05786326465476304
Precision macro: 0.462903390722628, Recall macro: 0.2620368604353917, F1 macro: 0.30975249037373254 
Precision micro: 0.8385908358277688, Recall micro: 0.6384619879623677, F1 micro: 0.7249684825160906 
Epoch: [3/10], Step: [1001/2499], Train_loss: 0.05105982833728194
Precision macro: 0.5012526593278674, Recall macro: 0.30083131575550687, F1 macro: 0.3549667165684775 
Precision micro: 0.8417

Precision macro: 0.683112588186061, Recall macro: 0.4767863620107758, F1 macro: 0.5298161393731088 
Precision micro: 0.8346349964500097, Recall micro: 0.7556243791269794, F1 micro: 0.7931669017972153 
Epoch: [7/10], Step: [1001/2499], Train_loss: 0.04008946685306728
Precision macro: 0.6791170310019266, Recall macro: 0.48421268730505107, F1 macro: 0.531809384684331 
Precision micro: 0.830791530123519, Recall micro: 0.7703500262957985, F1 micro: 0.7994299748339955 
Epoch: [7/10], Step: [2001/2499], Train_loss: 0.04007507802825421
Precision macro: 0.7075053714527659, Recall macro: 0.48511888268387504, F1 macro: 0.5503568225880637 
Precision micro: 0.8429489302510432, Recall micro: 0.7436451820253608, F1 micro: 0.7901893821794473 
Epoch: [8/10], Step: [1001/2499], Train_loss: 0.03962388139590621
Precision macro: 0.6675610811030623, Recall macro: 0.46522002391074957, F1 macro: 0.5214004250285933 
Precision micro: 0.8287602265575834, Recall micro: 0.7695319347864197, F1 micro: 0.798048662242

Epoch: [1/10], Step: [2001/2499], Train_loss: 0.06738194921519608
Precision macro: 0.5586083962785972, Recall macro: 0.3508016100295773, F1 macro: 0.40309435940549593 
Precision micro: 0.8194027815653122, Recall micro: 0.7023315608017296, F1 micro: 0.7563638652024794 
Epoch: [2/10], Step: [1001/2499], Train_loss: 0.04830069831199944
Precision macro: 0.5810203070671314, Recall macro: 0.39960987418330496, F1 macro: 0.4472243726729613 
Precision micro: 0.8176318063958513, Recall micro: 0.7186349558814936, F1 micro: 0.764943708403309 
Epoch: [2/10], Step: [2001/2499], Train_loss: 0.048030391653999686
Precision macro: 0.6038337633390896, Recall macro: 0.3866744157403936, F1 macro: 0.44145490025930517 
Precision micro: 0.8508275717578043, Recall micro: 0.7119149184830246, F1 micro: 0.775197251208959 
Epoch: [3/10], Step: [1001/2499], Train_loss: 0.04597555894218385
Precision macro: 0.5900267869862037, Recall macro: 0.41228122692043007, F1 macro: 0.46339802272319824 
Precision micro: 0.849078

Precision macro: 0.5895113790184081, Recall macro: 0.4099479648808291, F1 macro: 0.45437199269583234 
Precision micro: 0.8421572500987752, Recall micro: 0.7473265938175656, F1 micro: 0.791913062323911 
Epoch: [7/10], Step: [1001/2499], Train_loss: 0.0431359302457422
Precision macro: 0.5778843548853563, Recall macro: 0.41918017423671566, F1 macro: 0.4640605470245095 
Precision micro: 0.8356780368728721, Recall micro: 0.7601823175363759, F1 micro: 0.7961444308445533 
Epoch: [7/10], Step: [2001/2499], Train_loss: 0.042893412466160956
Precision macro: 0.5943807682086151, Recall macro: 0.4279402025005849, F1 macro: 0.47476112125546793 
Precision micro: 0.8481596101929282, Recall micro: 0.7527026237363408, F1 micro: 0.7975851393188854 
Epoch: [8/10], Step: [1001/2499], Train_loss: 0.04184435125626624
Precision macro: 0.6072442351576505, Recall macro: 0.43554200075809973, F1 macro: 0.48076862345043964 
Precision micro: 0.8373150579645168, Recall micro: 0.7639221644363934, F1 micro: 0.79893662

In [38]:
# results_df

In [None]:
# results_df.to_csv("results_tuning.csv")