In [95]:
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader

np.random.seed(42)
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import init
from tqdm.auto import tqdm, trange
from collections import Counter
import random
from torch import optim

import pandas as pd
import pickle

import wandb

import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score
import seaborn as sns

# Sort of smart tokenization
from nltk.tokenize import RegexpTokenizer
from gensim import corpora

# Attention plotting
import matplotlib.pyplot as plt
import pickle

import os
import sys
cur_dir = os.path.dirname(os.path.abspath("__file__"))  # Gets the current notebook directory
src_dir = os.path.join(cur_dir, '../src')  # Constructs the path to the 'src' directory
# Add the 'src' directory to sys.path
if src_dir not in sys.path:
    sys.path.append(src_dir)

from utils import *

In [3]:
dictionary = corpora.Dictionary.load('../data/comments.dict')
index_to_word = dictionary.id2token
word_to_index = dictionary.token2id
tokenizer = RegexpTokenizer(r'\w+')

In [80]:
class DocumentAttentionClassifier(nn.Module):
    
    def __init__(self, vocab_size, num_heads, embeddings_fname, num_classes=24):
        '''
        Creates the new classifier model. embeddings_fname is a string containing the
        filename with the saved pytorch parameters (the state dict) for the Embedding
        object that should be used to initialize this class's word Embedding parameters
        '''
        super(DocumentAttentionClassifier, self).__init__()
        
        torch.set_default_dtype(torch.float32)  # Set default to float64
        
        # Save the input arguments to the state
        self.vocab_size = vocab_size
        self.word_embeddings = torch.load(embeddings_fname)
        self.embedding_size = self.word_embeddings.embedding_dim
        self.num_heads = num_heads
        self.embeddings_fname = embeddings_fname

        self.attention = torch.empty((num_heads, self.embedding_size),dtype=torch.float32, requires_grad=True)
        torch.nn.init.uniform_(self.attention, a=-0.5, b=0.5)

        self.linear = nn.Linear(self.embedding_size * num_heads, num_classes)
        self.sigmoid = torch.nn.Sigmoid()

        init.normal_(self.linear.weight, mean=0.0, std=1.0)
        init.constant_(self.linear.bias, 0.0)
        pass
    

    def forward(self, word_ids):

        word_embeddings = self.word_embeddings(word_ids).squeeze(0)

        r = torch.einsum('ij,kj->ik', word_embeddings, self.attention)

        a = torch.softmax(r, dim=0)      

        d = torch.einsum('ij,ik->jk', word_embeddings, a).transpose(0, 1)

        d = d.flatten()

        output = self.linear(d)
        # output = self.sigmoid(output)

        return output

In [37]:
sent_train_df = pd.read_csv('../data/train.csv', index_col=0)
sent_dev_df = pd.read_csv('../data/test.csv', index_col=0)

In [38]:
sent_train_df['label'] = sent_train_df.apply(lambda x: np.array(x[2:].values), axis=1)
sent_dev_df['label'] = sent_dev_df.apply(lambda x: np.array(x[2:].values), axis=1)

In [49]:
train_list = []
dev_list = []

key = "Review"

for df in [sent_train_df, sent_dev_df]:
    df['text'] = df[key].str.lower()
    # if 'label' in df.columns:
    #     df['label'] = df['label'].astype(float)
    for _, row in tqdm(df.iterrows(), total=len(df)):
        word_list = tokenizer.tokenize(row['text'])
        index_list = np.array([word_to_index[word] for word in word_list if word in word_to_index])

        if df is sent_train_df:
            label = row['label'].astype(float)
            train_list.append((index_list, label))
        elif df is sent_dev_df:
            label = row['label'].astype(float)
            dev_list.append((index_list, label))
    

  0%|          | 0/9955 [00:00<?, ?it/s]

  0%|          | 0/2489 [00:00<?, ?it/s]

In [23]:
len(train_list), len(dev_list)

(9955, 2489)

In [97]:
batch_size = 1
embedding_size = 50
learning_rate = 5e-5
epochs = 200
max_steps = 160000

In [81]:
model = DocumentAttentionClassifier(vocab_size=len(word_to_index), num_heads=4, embeddings_fname='../data/embedding_layer.pth', num_classes=train_list[0][1].shape[0])

loss_function = nn.CrossEntropyLoss() # Example for a classification task
optimizer = optim.AdamW(model.parameters(), lr=learning_rate)  # Adjust learning rate as needed

train_dataloader = DataLoader(train_list, batch_size=batch_size, shuffle=True)


In [99]:
def run_eval(model, eval_data):
    '''
    Scores the model on the evaluation data and returns the F1
    '''
    with torch.no_grad():
        all_preds = []
        all_labels = []
        for inputs, label in eval_data:
            inputs = torch.tensor(inputs).long()
            label = torch.tensor(label).float()
            try:
                pred= model(inputs)
                all_preds.append(pred)
                all_labels.append(label)
            except:
                continue
        
        te_label = np.array([label.numpy() for label in all_labels])
        te_pred = np.array([pred.numpy() for pred in all_preds])
        test_metrics = get_modified_evaluation(te_label, te_pred, list_metrics=["hamming_loss"], threshold=0.25) 

        return test_metrics

In [101]:

for epoch in tqdm(range(epochs)):

    loss_sum = 0
    
    # TODO: use your DataLoader to iterate over the data
    for step, data in enumerate(train_dataloader):

        # NOTE: since you created the data np.array instances,
        # these have now been converted to Tensor objects for us
        word_ids, label = data    
        
        # TODO: Fill in all the training details here
        try:
            outputs = model(word_ids.squeeze(0))
        except:
            continue
        
        optimizer.zero_grad()
        loss = loss_function(outputs, label[0])
        loss_sum += loss.item()

        loss.backward()
        optimizer.step()

        # print(f'epochs: {epoch}, step: {step}, loss: {loss.item()}')
        
        if step > max_steps:
            print('Max steps reached')
            break

    # Evaluate the model after each epoch
    if epoch % 1 == 0:
        test_metrics = run_eval(model, dev_list)
        print(f'Epoch {epoch} test metrics: {test_metrics}')
        wandb.log(test_metrics)
        
        
# # once you finish training, it's good practice to switch to eval.
# model.eval()

  0%|          | 0/200 [00:00<?, ?it/s]

Epoch 0 test metrics: {'hamming_loss': 0.17588124916231068}
Epoch 1 test metrics: {'hamming_loss': 0.17206138587320735}
Epoch 2 test metrics: {'hamming_loss': 0.1690457043291784}
Epoch 3 test metrics: {'hamming_loss': 0.16547714783541081}
Epoch 4 test metrics: {'hamming_loss': 0.16021645891971587}
Epoch 5 test metrics: {'hamming_loss': 0.15733480766653263}
Epoch 6 test metrics: {'hamming_loss': 0.15560916767189384}
Epoch 7 test metrics: {'hamming_loss': 0.1515715051601662}
Epoch 8 test metrics: {'hamming_loss': 0.15048250904704463}
Epoch 9 test metrics: {'hamming_loss': 0.14768462672564}
Epoch 10 test metrics: {'hamming_loss': 0.14542286556761827}
Epoch 11 test metrics: {'hamming_loss': 0.1426752446052808}
Epoch 12 test metrics: {'hamming_loss': 0.14094960461064202}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 13 test metrics: {'hamming_loss': 0.1383862752982174}
Epoch 14 test metrics: {'hamming_loss': 0.13580619219943707}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 15 test metrics: {'hamming_loss': 0.1339967832730197}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 16 test metrics: {'hamming_loss': 0.13228789706473662}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 17 test metrics: {'hamming_loss': 0.13067953357458786}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 18 test metrics: {'hamming_loss': 0.12860206406647903}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 19 test metrics: {'hamming_loss': 0.12607224232676584}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 20 test metrics: {'hamming_loss': 0.12439686369119421}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 21 test metrics: {'hamming_loss': 0.12282200777375687}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 22 test metrics: {'hamming_loss': 0.1216157351561453}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 23 test metrics: {'hamming_loss': 0.1196890497252379}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 24 test metrics: {'hamming_loss': 0.1187675914756735}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 25 test metrics: {'hamming_loss': 0.11695818254925613}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 26 test metrics: {'hamming_loss': 0.1152995577000402}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 27 test metrics: {'hamming_loss': 0.1132388419782871}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 28 test metrics: {'hamming_loss': 0.11270272081490416}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 29 test metrics: {'hamming_loss': 0.11114461868382254}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 30 test metrics: {'hamming_loss': 0.10970379305723094}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 31 test metrics: {'hamming_loss': 0.10821270607157217}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 32 test metrics: {'hamming_loss': 0.10749229325827638}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 33 test metrics: {'hamming_loss': 0.1065038198632891}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 34 test metrics: {'hamming_loss': 0.10581691462270473}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 35 test metrics: {'hamming_loss': 0.10471116472322745}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 36 test metrics: {'hamming_loss': 0.10318657016485726}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 37 test metrics: {'hamming_loss': 0.10260018764240718}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 38 test metrics: {'hamming_loss': 0.1013939150247956}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 39 test metrics: {'hamming_loss': 0.10048921056158693}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 40 test metrics: {'hamming_loss': 0.09963476745744539}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 41 test metrics: {'hamming_loss': 0.0990316311486396}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 42 test metrics: {'hamming_loss': 0.09879707813965956}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 43 test metrics: {'hamming_loss': 0.09802640396729662}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 44 test metrics: {'hamming_loss': 0.09722222222222222}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 45 test metrics: {'hamming_loss': 0.09747352901755797}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 46 test metrics: {'hamming_loss': 0.0964850556225707}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 47 test metrics: {'hamming_loss': 0.09561385873207345}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 48 test metrics: {'hamming_loss': 0.09522852164589198}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 49 test metrics: {'hamming_loss': 0.09465889290979762}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 50 test metrics: {'hamming_loss': 0.09373743466023321}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 51 test metrics: {'hamming_loss': 0.09328508242862887}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 52 test metrics: {'hamming_loss': 0.09305052941964884}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 53 test metrics: {'hamming_loss': 0.09194477952017156}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 54 test metrics: {'hamming_loss': 0.09167671893848009}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 55 test metrics: {'hamming_loss': 0.09109033641603002}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 56 test metrics: {'hamming_loss': 0.09092279855247286}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 57 test metrics: {'hamming_loss': 0.09057096903900282}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 58 test metrics: {'hamming_loss': 0.09001809408926417}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 59 test metrics: {'hamming_loss': 0.09006835544833132}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 60 test metrics: {'hamming_loss': 0.08958249564401555}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 61 test metrics: {'hamming_loss': 0.08961600321672698}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 62 test metrics: {'hamming_loss': 0.08941495778045838}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 63 test metrics: {'hamming_loss': 0.08904637448063263}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 64 test metrics: {'hamming_loss': 0.08867779118080686}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 65 test metrics: {'hamming_loss': 0.0882086851628468}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 66 test metrics: {'hamming_loss': 0.0884097305991154}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 67 test metrics: {'hamming_loss': 0.0884097305991154}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 68 test metrics: {'hamming_loss': 0.08815842380377965}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 69 test metrics: {'hamming_loss': 0.08784010186302105}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 70 test metrics: {'hamming_loss': 0.08770607157217532}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 71 test metrics: {'hamming_loss': 0.08735424205870526}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 72 test metrics: {'hamming_loss': 0.08765581021310816}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 73 test metrics: {'hamming_loss': 0.08742125720412813}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 74 test metrics: {'hamming_loss': 0.08733748827234955}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 75 test metrics: {'hamming_loss': 0.08740450341777242}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 76 test metrics: {'hamming_loss': 0.08710293526336953}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 77 test metrics: {'hamming_loss': 0.08695215118616807}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 78 test metrics: {'hamming_loss': 0.08665058303176518}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 79 test metrics: {'hamming_loss': 0.08671759817718805}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 80 test metrics: {'hamming_loss': 0.08660032167269803}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 81 test metrics: {'hamming_loss': 0.086365768663718}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 82 test metrics: {'hamming_loss': 0.08634901487736228}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 83 test metrics: {'hamming_loss': 0.0864662913818523}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 84 test metrics: {'hamming_loss': 0.0859636777911808}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 85 test metrics: {'hamming_loss': 0.08598043157753653}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 86 test metrics: {'hamming_loss': 0.08609770808202653}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 87 test metrics: {'hamming_loss': 0.08577938614126793}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 88 test metrics: {'hamming_loss': 0.08619823080016084}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 89 test metrics: {'hamming_loss': 0.08606420050931511}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 90 test metrics: {'hamming_loss': 0.08594692400482509}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 91 test metrics: {'hamming_loss': 0.08567886342313363}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 92 test metrics: {'hamming_loss': 0.08634901487736228}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 93 test metrics: {'hamming_loss': 0.08618147701380512}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 94 test metrics: {'hamming_loss': 0.08649979895456374}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 95 test metrics: {'hamming_loss': 0.08591341643211366}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 96 test metrics: {'hamming_loss': 0.08609770808202653}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 97 test metrics: {'hamming_loss': 0.08606420050931511}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 98 test metrics: {'hamming_loss': 0.08624849215922799}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 99 test metrics: {'hamming_loss': 0.08628199973193942}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 100 test metrics: {'hamming_loss': 0.08649979895456374}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 101 test metrics: {'hamming_loss': 0.08634901487736228}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 102 test metrics: {'hamming_loss': 0.08655006031363088}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 103 test metrics: {'hamming_loss': 0.08629875351829513}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 104 test metrics: {'hamming_loss': 0.08661707545905374}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 105 test metrics: {'hamming_loss': 0.08604744672295939}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 106 test metrics: {'hamming_loss': 0.0867846133226109}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 107 test metrics: {'hamming_loss': 0.08634901487736228}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 108 test metrics: {'hamming_loss': 0.08658356788634232}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 109 test metrics: {'hamming_loss': 0.08643278380914086}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 110 test metrics: {'hamming_loss': 0.08695215118616807}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 111 test metrics: {'hamming_loss': 0.08690188982710093}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 112 test metrics: {'hamming_loss': 0.08690188982710093}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 113 test metrics: {'hamming_loss': 0.08691864361345664}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 114 test metrics: {'hamming_loss': 0.08690188982710093}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 115 test metrics: {'hamming_loss': 0.08718670419514811}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 116 test metrics: {'hamming_loss': 0.08716995040879238}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 117 test metrics: {'hamming_loss': 0.08695215118616807}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 118 test metrics: {'hamming_loss': 0.0867678595362552}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 119 test metrics: {'hamming_loss': 0.08711968904972524}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 120 test metrics: {'hamming_loss': 0.08723696555421526}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 121 test metrics: {'hamming_loss': 0.08698565875887951}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 122 test metrics: {'hamming_loss': 0.08732073448599384}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 123 test metrics: {'hamming_loss': 0.08762230264039673}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 124 test metrics: {'hamming_loss': 0.08753853370861815}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 125 test metrics: {'hamming_loss': 0.08752177992226243}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 126 test metrics: {'hamming_loss': 0.08737099584506099}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 127 test metrics: {'hamming_loss': 0.08755528749497386}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 128 test metrics: {'hamming_loss': 0.08763905642675245}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 129 test metrics: {'hamming_loss': 0.08747151856319528}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 130 test metrics: {'hamming_loss': 0.08762230264039673}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 131 test metrics: {'hamming_loss': 0.08767256399946388}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 132 test metrics: {'hamming_loss': 0.08757204128132957}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 133 test metrics: {'hamming_loss': 0.08805790108564536}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 134 test metrics: {'hamming_loss': 0.08825894652191395}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 135 test metrics: {'hamming_loss': 0.08804114729928963}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 136 test metrics: {'hamming_loss': 0.08804114729928963}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 137 test metrics: {'hamming_loss': 0.08819193137649109}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 138 test metrics: {'hamming_loss': 0.0884097305991154}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 139 test metrics: {'hamming_loss': 0.0884097305991154}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 140 test metrics: {'hamming_loss': 0.08856051467631684}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 141 test metrics: {'hamming_loss': 0.08849349953089398}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 142 test metrics: {'hamming_loss': 0.08854376088996113}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 143 test metrics: {'hamming_loss': 0.08884532904436403}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 144 test metrics: {'hamming_loss': 0.08852700710360542}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 145 test metrics: {'hamming_loss': 0.08857726846267257}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 146 test metrics: {'hamming_loss': 0.08874480632622973}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 147 test metrics: {'hamming_loss': 0.08839297681275968}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 148 test metrics: {'hamming_loss': 0.08867779118080686}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 149 test metrics: {'hamming_loss': 0.08904637448063263}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 150 test metrics: {'hamming_loss': 0.08889559040343117}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 151 test metrics: {'hamming_loss': 0.08892909797614261}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 152 test metrics: {'hamming_loss': 0.08872805253987401}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 153 test metrics: {'hamming_loss': 0.08897935933520976}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 154 test metrics: {'hamming_loss': 0.08892909797614261}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 155 test metrics: {'hamming_loss': 0.08911338962605549}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 156 test metrics: {'hamming_loss': 0.08904637448063263}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 157 test metrics: {'hamming_loss': 0.08918040477147836}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 158 test metrics: {'hamming_loss': 0.08946521913952553}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 159 test metrics: {'hamming_loss': 0.08964951078943842}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 160 test metrics: {'hamming_loss': 0.08929768127596836}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 161 test metrics: {'hamming_loss': 0.08949872671223696}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 162 test metrics: {'hamming_loss': 0.0895322342849484}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 163 test metrics: {'hamming_loss': 0.08966626457579413}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 164 test metrics: {'hamming_loss': 0.08951548049859268}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 165 test metrics: {'hamming_loss': 0.08956574185765984}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 166 test metrics: {'hamming_loss': 0.08968301836214984}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 167 test metrics: {'hamming_loss': 0.08961600321672698}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 168 test metrics: {'hamming_loss': 0.08996783273019703}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 169 test metrics: {'hamming_loss': 0.08954898807130411}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 170 test metrics: {'hamming_loss': 0.089733279721217}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 171 test metrics: {'hamming_loss': 0.09000134030290846}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 172 test metrics: {'hamming_loss': 0.09000134030290846}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 173 test metrics: {'hamming_loss': 0.08991757137112988}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 174 test metrics: {'hamming_loss': 0.0901521243801099}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 175 test metrics: {'hamming_loss': 0.09010186302104276}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 176 test metrics: {'hamming_loss': 0.09047044632086852}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 177 test metrics: {'hamming_loss': 0.09025264709824421}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 178 test metrics: {'hamming_loss': 0.09023589331188848}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 179 test metrics: {'hamming_loss': 0.0903531698163785}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 180 test metrics: {'hamming_loss': 0.09036992360273421}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 181 test metrics: {'hamming_loss': 0.09060447661171425}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 182 test metrics: {'hamming_loss': 0.09067149175713711}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 183 test metrics: {'hamming_loss': 0.09063798418442567}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 184 test metrics: {'hamming_loss': 0.09058772282535853}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 185 test metrics: {'hamming_loss': 0.09078876826162713}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 186 test metrics: {'hamming_loss': 0.09077201447527142}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 187 test metrics: {'hamming_loss': 0.09083902962069428}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 188 test metrics: {'hamming_loss': 0.09083902962069428}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 189 test metrics: {'hamming_loss': 0.09092279855247286}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 190 test metrics: {'hamming_loss': 0.09080552204798284}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 191 test metrics: {'hamming_loss': 0.09107358262967431}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 192 test metrics: {'hamming_loss': 0.0909563061251843}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 193 test metrics: {'hamming_loss': 0.09124112049323146}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 194 test metrics: {'hamming_loss': 0.09115735156145288}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 195 test metrics: {'hamming_loss': 0.09132488942501005}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 196 test metrics: {'hamming_loss': 0.09132488942501005}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 197 test metrics: {'hamming_loss': 0.09132488942501005}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)


Epoch 198 test metrics: {'hamming_loss': 0.09149242728856721}
Epoch 199 test metrics: {'hamming_loss': 0.09162645757941294}


  y_pred_normalized = y_prob / np.max(y_prob, axis=1, keepdims=True)
