In [0]:
import pandas as pd
import random
import io
import ast
import numpy as np
import os
import re
import string
import pickle
import copy
import torch.nn.functional as functional
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
from tqdm import tnrange
from tqdm import tqdm_notebook as tqdm
from collections import Counter
from bs4 import BeautifulSoup
from sklearn.metrics import f1_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import classification_report

import torch, torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import nltk
from nltk.stem import WordNetLemmatizer
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from sklearn import linear_model
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics.pairwise import cosine_distances, cosine_similarity

In [0]:
random.seed(13)
np.random.seed(13)
torch.manual_seed(13)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
os.environ['PYTHONHASHSEED'] = str(13)
torch.cuda.manual_seed(13)

### Loading texts, split on train, val and test


In [0]:
!wget https://www.dropbox.com/s/n8nfxxh5azocrrm/train_LASER.csv?dl=0
!wget https://www.dropbox.com/s/v7msk8zxkkiqc6q/X_test_translated.txt?dl=0
!wget https://www.dropbox.com/s/je9am5c77ytfcaf/test_LASER.csv?dl=0

In [4]:
#открываем файл с train dataset
df_train = pd.read_csv("train_LASER.csv?dl=0").dropna()
train_texts = list(df_train['abstracts'])
train_labels = list(df_train['labels'])

len(train_texts)

80948

In [5]:
#открываем файл с test_dataset
df_test = pd.read_csv("test_LASER.csv?dl=0").dropna()
test_texts = list(df_test['lemm_abstracts'])              #лемм там нет, просто название неправильное не дала
test_labels = list(df_test['labels'])

len(test_texts)

10150

In [0]:
test_texts[1000]

In [0]:
#разбиваем на train и val английские тексты
from sklearn.model_selection import train_test_split

train_texts, val_texts, train_labels, val_labels = train_test_split(train_texts, train_labels, \
                                                          test_size=0.15, shuffle = True, random_state=42, stratify=train_labels)

In [7]:
print(len(train_labels))
print(len(val_labels))

68805
12143


Нужно почистить тексты

In [0]:
def cleaning(text):
    clean_txt = []
    for sentence in text.split('.'):   
        sentence = re.sub(r'[•⋅−]', ' ', sentence)
        pattern = re.compile(r'(\s,){2,}')                 # регулярка для того чтобы несколько подряд запятых заменять на одну
        sentence = re.sub(pattern, ', ', sentence)
        sent = sentence.strip()
        sent = re.sub(r'( )+', ' ', sent)
        sent = re.sub(' ,', ',', sent)
     #   print(sent.split())
        if len(sent.lstrip(',').rstrip(',').strip().split()) > 2 and 'ф лы' not in sent:
      #  if 'ф лы' not in sent:
             clean_txt.append(sent.lstrip(',').rstrip(',').strip())
    return '. '.join(clean_txt)          

In [0]:
test_texts = [cleaning(text) for text in test_texts]
val_texts = [cleaning(text) for text in val_texts]
train_texts = [cleaning(text) for text in train_texts]

In [0]:
with open('X_test_translated.txt?dl=0', encoding='utf-8') as of:
    translated_texts = of.readlines()
    of.close()
    
translated_texts = [cleaning(text.strip()) for text in translated_texts]

In [11]:
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(train_labels)
#add_labels = label_encoder.fit_transform(df_add['us_code'])
y_train[:10]

array([ 58, 199, 218,   7, 196, 115,  69,   1,  13,  23])

In [12]:
y_val = label_encoder.transform(val_labels)
y_val[:10]

array([ 23,  82,  57, 164,  99, 146,  24, 161, 194,  16])

In [0]:
y_test = label_encoder.transform(test_labels)

In [13]:
if torch.cuda.is_available():
    DEVICE = torch.device('cuda')
else:
    DEVICE = torch.device('cpu')
print(f'Using device: {DEVICE}')

Using device: cuda


### Loading LASER embeddings

In [15]:
!pip install laserembeddings

Installing collected packages: transliterate, subword-nmt, sacremoses, laserembeddings
Successfully installed laserembeddings-1.0.1 sacremoses-0.0.35 subword-nmt-0.3.7 transliterate-1.10.2


In [16]:
!python -m laserembeddings download-models
from laserembeddings import Laser

laser = Laser()

embeddings = laser.embed_sentences(train_texts[:100], lang='en')
embeddings.shape

Downloading models into /usr/local/lib/python3.6/dist-packages/laserembeddings/data

✅   Downloaded https://dl.fbaipublicfiles.com/laser/models/93langs.fcodes    
✅   Downloaded https://dl.fbaipublicfiles.com/laser/models/93langs.fvocab    
✅   Downloaded https://dl.fbaipublicfiles.com/laser/models/bilstm.93langs.2018-12-26.pt    

✨ You're all set!


(100, 1024)

In [0]:
laser.embed_sentences('an original image at high speed.', lang='en') == laser.embed_sentences('an original image at high speed', lang='en') 

array([[False, False, False, ..., False, False, False]])

In [0]:
laser1 = laser.embed_sentences('Hello world', lang='en')
laser2 = laser.embed_sentences('hello world', lang='en')
laser1 == laser2

array([[ True,  True,  True, ...,  True,  True,  True]])

### Texts vectorization

In [0]:
lens_train = [len(text.split()) for text in train_texts]
np.min(lens_train)

5

In [17]:
X_train = laser.embed_sentences(train_texts, lang='en') 
#X_train = np.load('X_train_LASER.npy?dl=0')
X_train.shape

(68805, 1024)

In [18]:
X_val = laser.embed_sentences(val_texts, 'en')
X_val.shape

(12143, 1024)

In [0]:
X_test = laser.embed_sentences(test_texts, 'ru')
X_translated = laser.embed_sentences(translated_texts, 'en')

### Experiments

#### Cosine similarity

In [52]:
np.sort(cosine_distances(X_val[0].reshape(1,-1), X_train))

array([[0.08893418, 0.09236324, 0.09371287, ..., 0.53890795, 0.5399134 ,
        0.57637364]], dtype=float32)

In [55]:
best_indices = np.argsort(cosine_distances(X_val[0].reshape(1,-1), X_train))[0]
best_indices

array([53837, 42818, 64710, ..., 35765,  2050, 17590])

In [56]:
list(y_train[best_indices[:10]])

[23, 199, 22, 31, 23, 3, 25, 18, 108, 138]

In [57]:
print("True class: ", y_val[0])
print('Top 10 predicted: ', y_train[best_indices][:10])

True class:  23
Top 10 predicted:  [ 23 199  22  31  23   3  25  18 108 138]


In [0]:
def predict_by_similar(matrix_pred, matrix_train, labels_train, K): 
    pred_labels = np.array([])
    sim_matrix = cosine_distances(matrix_pred, matrix_train)
    for i in tqdm(range(0, len(matrix_pred), 1)):
        TOP_K = y_train[np.argsort(sim_matrix[i])[:K]]
        most_freq = Counter(TOP_K).most_common()[0]
        if most_freq[1] == 1:
            y_pred = TOP_K[0]
        else:
            y_pred = most_freq[0]
        pred_labels = np.hstack((pred_labels, y_pred))
    return pred_labels



In [106]:
y_train[np.argsort(sim_matrix[0])[:10]]

array([ 23, 199,  22,  31,  23,   3,  25,  18, 108, 138])

In [92]:
sim_matrix[4, :15]

array([0.1971345 , 0.19444329, 0.19963491, 0.23229307, 0.19571179,
       0.18956578, 0.19715023, 0.1648798 , 0.1850096 , 0.17626286,
       0.21125406, 0.31484997, 0.17388046, 0.21035683, 0.17515051],
      dtype=float32)

In [95]:
Counter(y_train[np.argsort(sim_matrix[4])[:15]]).most_common()[0][0]

99

In [111]:
y_pred = predict_by_similar(X_test, X_train, y_train, 10)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=10150.0), HTML(value='')))




In [112]:
y_pred[:10]

array([124., 221., 100.,  17., 132., 137., 193., 216.,  41.,   7.])

In [113]:
y_test[:10]

array([163, 221,   1,  45, 114, 111, 159, 221, 191, 130])

In [110]:
f1_score(y_pred, y_val, average='micro')*100

26.01498805896401

In [114]:
f1_score(y_pred, y_test, average='micro')*100

11.517241379310345

#### SVC Classifier

In [0]:
def predict(y_true, y_pred):
    print('micro: ', f1_score(y_true, y_pred, average='micro')*100)
    print('weighted: ', f1_score(y_true, y_pred, average='weighted')*100)
    

In [0]:
from sklearn import linear_model

clf = linear_model.SGDClassifier(max_iter=10000, tol=1e-3, early_stopping=True, random_state=42, verbose=0,\
                                 learning_rate='adaptive', eta0 = 1e-2, n_jobs=-1, alpha=1e-4)
clf.fit(X_train, y_train)
print('f1_train:' + '\n')
predict(y_train, clf.predict(X_train))
print('f1_val:' + '\n')
predict(y_val, clf.predict(X_val))
print('f1_test:' + '\n')
predict(y_test, clf.predict(X_test))
print('f1_test_translated:' + '\n')
predict(y_test, clf.predict(X_translated))
print('==========================')


f1_train:

micro:  56.33311532592108
weighted:  55.70234535342989
f1_val:

micro:  43.572428559664004
weighted:  42.049889347509094
f1_test:

micro:  28.778325123152708
weighted:  28.34019262102049
f1_test_translated:

micro:  38.73891625615764
weighted:  37.50617373516699


####  Logistic regression

In [0]:
from tqdm import tnrange
def iterate_minibatches(data, labels, batch_size=100, shuffle=True, verbose=True):
    indices = np.arange(len(data))
    if shuffle:
        indices = np.random.permutation(indices)        
    irange = tnrange if verbose else range
 #   print(indices)
    for start in irange(0, len(indices), batch_size):
        yield data[indices[start: start + batch_size]], labels[indices[start: start + batch_size]]

In [0]:
class LogisticRegression(nn.Module):
    def __init__(self, emb_dim, num_labels):
        super().__init__()
        self.linear = nn.Linear(emb_dim, num_labels)
        nn.init.xavier_normal_(self.linear.weight)

    def forward(self, emb):
        return F.log_softmax(self.linear(emb), dim=1) 

In [0]:
def train_model_epoch(train_dataset, labels, model, optimizer, loss_function, batch_size):
    model.train()
    train_loss = 0
    train_predictions = np.array([])
    pred_labels = np.array([])
    
    for texts, labs in iterate_minibatches(np.array(train_dataset), np.array(labels), batch_size=batch_size, shuffle=True, verbose=False):
        emb_ix = torch.tensor(texts, dtype=torch.float32).to(DEVICE) #cuda
     #   print(labels[i: i+batch_size])
        res_labels = torch.tensor(labs).to(DEVICE)     #cuda 
      #  print(labels.shape)
        optimizer.zero_grad()
        output = model(emb_ix)
 
        loss = loss_function(output, res_labels)
        train_loss += loss.item()
   #     train_acc += (output.argmax(1) == res_labels).sum().item()
      #  print(train_acc)
        
        y_pred = output.argmax(1).cpu().numpy()
        train_predictions = np.hstack((train_predictions, y_pred))
        pred_labels = np.hstack((pred_labels, res_labels.cpu().numpy()))
     #   print(train_f1)
     #   break
        loss.backward()
        optimizer.step()

    train_f1_micro = f1_score(pred_labels, train_predictions, average='micro')*100
    train_f1_weighted = f1_score(pred_labels, train_predictions, average='weighted')*100

    return train_loss / (len(train_dataset)//batch_size), train_f1_micro, train_f1_weighted

In [0]:
def test(test_dataset, labels, model, loss_function, batch_size):
    model.eval()
#    acc = 0
    test_predictions = np.array([])
    test_loss = 0
    for texts, labs in iterate_minibatches(np.array(test_dataset), np.array(labels), batch_size=batch_size, shuffle=False, verbose=False):
        with torch.no_grad():
            emb_ix = torch.tensor(texts, dtype = torch.float32).to(DEVICE) #cuda
            res_labels = torch.tensor(labs).to(DEVICE)   #cuda
        #  print(labels.shape)
            output = model(emb_ix)
            y_pred = output.argmax(1).cpu().numpy()
            loss = loss_function(output, res_labels)
            test_loss += loss.item()
            test_predictions = np.hstack((test_predictions, y_pred))

    test_f1_micro = f1_score(labels, test_predictions, average='micro')*100
    test_f1_weighted = f1_score(labels, test_predictions, average='weighted')*100


    return test_loss / (len(test_dataset)//batch_size), test_f1_micro, test_f1_weighted

In [0]:
def objective(trial):
    model = LogisticRegression(1024,225).to(DEVICE)
    loss_function = nn.NLLLoss()
    N_EPOCHS = 600
    # Generate the optimizers.
    optimizer_name = "Adam"
    lr = trial.suggest_loguniform("lr", 1e-3, 1e-2)
    decay = trial.suggest_loguniform("weight_decay", 1e-6, 1e-3)
    optimizer = getattr(torch.optim, optimizer_name)(model.parameters(), lr=lr, weight_decay=decay)
    print('Training...')
    best_loss = 10
    max_step = 2
    step=0
    for epoch in tnrange(N_EPOCHS):
        train_loss, train_f1_micro, train_f1_weighted = train_model_epoch(X_train, y_train, model, optimizer, loss_function,\
                                                                          batch_size=300)
        val_loss, val_f1_micro, val_f1_weighted = test(X_val, y_val, model, loss_function, batch_size=300)
        if val_loss > best_loss:
            step += 1
        
            if step > max_step:
                break
        else:
            best_loss = val_loss
            step = 0
    print('Finished training.')
    return val_f1_micro



In [0]:
!pip install optuna

Collecting optuna
[?25l  Downloading https://files.pythonhosted.org/packages/bc/7a/b6e3ddae75af5f7d987b4fc6be7bb38d9c9c195bd662d8762a75163c1103/optuna-1.4.0.tar.gz (183kB)
[K     |████████████████████████████████| 184kB 4.8MB/s 
[?25hCollecting alembic
[?25l  Downloading https://files.pythonhosted.org/packages/60/1e/cabc75a189de0fbb2841d0975243e59bde8b7822bacbb95008ac6fe9ad47/alembic-1.4.2.tar.gz (1.1MB)
[K     |████████████████████████████████| 1.1MB 43.7MB/s 
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
Collecting cliff
[?25l  Downloading https://files.pythonhosted.org/packages/b9/17/57187872842bf9f65815b6969b515528ec7fd754137d2d3f49e3bc016175/cliff-3.1.0-py3-none-any.whl (80kB)
[K     |████████████████████████████████| 81kB 9.4MB/s 
[?25hCollecting cmaes>=0.3.2
  Downloading https://files.pythonhosted.org/packages/03/de/6ed34ebc0e5c34ed371d898540bca36edb

In [0]:
import optuna

In [0]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100)

complete_trials = [t for t in study.trials if t.state == optuna.structs.TrialState.COMPLETE]

print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of complete trials: ", len(complete_trials))

In [0]:
print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)
print(trial)

Best trial:
  Value:  44.239479535534876
FrozenTrial(number=28, value=44.239479535534876, datetime_start=datetime.datetime(2020, 5, 23, 19, 29, 0, 220238), datetime_complete=datetime.datetime(2020, 5, 23, 19, 32, 11, 324618), params={'lr': 0.00158379347383459, 'weight_decay': 1.012968757988202e-06}, distributions={'lr': LogUniformDistribution(high=0.01, low=0.001), 'weight_decay': LogUniformDistribution(high=0.001, low=1e-06)}, user_attrs={}, system_attrs={}, intermediate_values={}, trial_id=28, state=TrialState.COMPLETE)


In [0]:
optuna.visualization.plot_parallel_coordinate(study)

In [0]:
random.seed(13)
np.random.seed(13)
torch.manual_seed(13)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
os.environ['PYTHONHASHSEED'] = str(13)
torch.cuda.manual_seed(13)

In [0]:
#train model with best parameters again

N_EPOCHS = 600
model = LogisticRegression(1024,225).to(DEVICE) 
optimizer = torch.optim.Adam(model.parameters(), lr=0.00158379347383459, weight_decay=1.012968757988202e-06)
loss_function = nn.NLLLoss()


print('Training...')
best_loss = 10
max_step = 2
step=0
for epoch in tnrange(N_EPOCHS+1):
    train_loss, train_f1_micro, train_f1_weighted = train_model_epoch(X_train, y_train, model, optimizer, loss_function, batch_size=300)
 #   train_loss, train_acc = train_model_epoch_sample(train_texts, labels, model, optimizer)
    val_loss, val_f1_micro, val_f1_weighted = test(X_val, y_val, model, loss_function, batch_size=300)
    if val_loss + 1e-3 > best_loss:
      step += 1
      
      if step > max_step:
        break
    else:
      best_loss = val_loss
      step = 0
      weights_model = copy.deepcopy(model.state_dict())
           
    
      new_model = LogisticRegression(1024,225).to(DEVICE)
      new_model.load_state_dict(weights_model)

    if epoch % 10 == 0:
     #   test_loss, test_f1_micro, test_f1_weighted = test(X_val, y_val, model, batch_size=300)
        print(f'\nTrain loss: {train_loss:.8f} | Train f1 micro: {train_f1_micro:.4f}  | Train f1 weighted: {train_f1_weighted:.4f}' )   
            #   print(f'Test accuracy {test_acc:.4f}')
        print(f'\nVal f1 micro: {val_f1_micro:.4f} | Val f1 weighted: {val_f1_weighted:.4f}')

    
print('Finished training.')

In [0]:
def compute_f1(model, data, labels):
    model.eval()
    emb_ix = torch.tensor(data, dtype = torch.float32).to(DEVICE)
    output = model(emb_ix)
    y_pred = output.argmax(1).cpu().numpy()
    print('f1_micro= ', f1_score(labels, y_pred, average='micro')*100)
    print('f1_weighted= ', f1_score(labels, y_pred, average='weighted')*100)

In [0]:
print('Train f1')
compute_f1(new_model, X_train, y_train)
print('Val f1')
compute_f1(new_model, X_val, y_val)
print('Test f1')
compute_f1(new_model, X_test, y_test)
print('Test translted f1')
compute_f1(new_model, X_translated, y_test)

Train f1
f1_micro=  54.75764842671318
f1_weighted=  53.85162058199088
Val f1
f1_micro=  43.86889566005106
f1_weighted=  42.15593858017622
Test f1
f1_micro=  31.724137931034484
f1_weighted=  29.560348770258894
Test translted f1
f1_micro=  39.69458128078818
f1_weighted=  37.44134027249809


#### FFNN

In [0]:
def FFNN(trial):
    # We optimize the number of layers, hidden untis and dropout ratio
    n_layers = trial.suggest_int("n_layers", 1, 2)
    layers = []

    in_features = 1024
    for i in range(n_layers):
        out_features = trial.suggest_int("n_units_l{}".format(i), 200, 700)
        layers.append(nn.Linear(in_features, out_features))
        layers.append(nn.ReLU())

        in_features = out_features

    layers.append(nn.Linear(in_features, 225))
    p = trial.suggest_uniform("dropout", 0.0, 0.2)
    layers.append(nn.Dropout(p))
    layers.append(nn.LogSoftmax(dim=1))

    return nn.Sequential(*layers)

In [0]:
def objective(trial):
    model = FFNN(trial).to(DEVICE)
    loss_function = nn.NLLLoss()
    N_EPOCHS = 600
    
    lr = trial.suggest_loguniform("lr", 1e-4, 1e-3)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-6)
    loss_function = nn.NLLLoss()

    print('Training...')
    best_loss = 10
    max_step = 2
    step=0
    for epoch in tnrange(N_EPOCHS):
        train_loss, train_f1_micro, train_f1_weighted = train_model_epoch(X_train, y_train, model, optimizer, loss_function,\
                                                                          batch_size=300)
        val_loss, val_f1_micro, val_f1_weighted = test(X_val, y_val, model, loss_function, batch_size=300)
        if val_loss > best_loss:
            step += 1
        
            if step > max_step:
                break
        else:
            best_loss = val_loss
            step = 0
    print('Finished training.')
    return val_f1_micro



In [0]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50)

complete_trials = [t for t in study.trials if t.state == optuna.structs.TrialState.COMPLETE]

print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of complete trials: ", len(complete_trials))

In [0]:
print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)
print(trial)

Best trial:
  Value:  45.96063575722639
FrozenTrial(number=32, value=45.96063575722639, datetime_start=datetime.datetime(2020, 5, 24, 13, 53, 17, 152052), datetime_complete=datetime.datetime(2020, 5, 24, 13, 59, 27, 379546), params={'n_layers': 1, 'n_units_l0': 657, 'dropout': 0.16912642932471764, 'lr': 0.00010063387856914983}, distributions={'n_layers': IntUniformDistribution(high=2, low=1, step=1), 'n_units_l0': IntUniformDistribution(high=700, low=200, step=1), 'dropout': UniformDistribution(high=0.2, low=0.0), 'lr': LogUniformDistribution(high=0.001, low=0.0001)}, user_attrs={}, system_attrs={}, intermediate_values={}, trial_id=32, state=TrialState.COMPLETE)


In [0]:
optuna.visualization.plot_parallel_coordinate(study)

In [0]:
random.seed(13)
np.random.seed(13)
torch.manual_seed(13)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
os.environ['PYTHONHASHSEED'] = str(13)
torch.cuda.manual_seed(13)

In [0]:
#train model with best parameters again

N_EPOCHS = 400
model = nn.Sequential(nn.Linear(1024, 657), nn.ReLU(), nn.Linear(657, 225), nn.Dropout(0.17), nn.LogSoftmax(dim=1)).to(DEVICE) 
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-6)
loss_function = nn.NLLLoss()


print('Training...')
best_loss = 10
max_step = 2
step=0
for epoch in tnrange(N_EPOCHS+1):
    train_loss, train_f1_micro, train_f1_weighted = train_model_epoch(X_train, y_train, model, optimizer, loss_function, batch_size=300)
 #   train_loss, train_acc = train_model_epoch_sample(train_texts, labels, model, optimizer)
    val_loss, val_f1_micro, val_f1_weighted = test(X_val, y_val, model, loss_function, batch_size=300)
    if val_loss +1e-3 > best_loss:
      step += 1
      
      if step > max_step:
        break
    else:
      best_loss = val_loss
      step = 0
      weights_model = copy.deepcopy(model.state_dict())
           
    
      new_model = nn.Sequential(nn.Linear(1024, 657), nn.ReLU(), nn.Linear(657, 225), nn.Dropout(0.17), nn.LogSoftmax(dim=1)).to(DEVICE)
      new_model.load_state_dict(weights_model)

  #  if epoch % 10 == 0:
     #   test_loss, test_f1_micro, test_f1_weighted = test(X_val, y_val, model, batch_size=300)
    print(f'\nTrain loss: {train_loss:.8f} | Train f1 micro: {train_f1_micro:.4f}  | Train f1 weighted: {train_f1_weighted:.4f}' )   
    #   print(f'Test accuracy {test_acc:.4f}')
    print(f'\nVal f1 micro: {val_f1_micro:.4f} | Val f1 weighted: {val_f1_weighted:.4f}')

print('Finished training.')

In [0]:
def compute_f1(model, data, labels):
    model.eval()
    emb_ix = torch.tensor(data, dtype = torch.float32).to(DEVICE)
    output = model(emb_ix)
    y_pred = output.argmax(1).cpu().numpy()
    print('f1_micro= ', f1_score(labels, y_pred, average='micro')*100)
    print('f1_weighted= ', f1_score(labels, y_pred, average='weighted')*100)

In [0]:
print('Train f1')
compute_f1(new_model, X_train, y_train)
print('Val f1')
compute_f1(new_model, X_val, y_val)
print('Test f1')
compute_f1(new_model, X_test, y_test)
print('Test translted f1')
compute_f1(new_model, X_translated, y_test)

Train f1
f1_micro=  56.45665285953056
f1_weighted=  55.49080862270126
Val f1
f1_micro=  45.22770320349173
f1_weighted=  44.014904549370705
Test f1
f1_micro=  32.81773399014779
f1_weighted=  31.145818976247263
Test translted f1
f1_micro=  40.38423645320197
f1_weighted=  38.695325589117424
