In [None]:
%load_ext autoreload
%autoreload 2

from tqdm import tqdm
from tqdm import tqdm_notebook

import numpy as np
import pandas as pd
import os
import gc

### brMoral Dataset

In [None]:
data_path = "../../data/BRMoral/brmoral.csv"
mf_data_path = "../../data/BRMoral/mf-ternary.csv"

### Loading dataset and splitting by task (*ap*, *mf* or *st*)

In [None]:
dfcorpus = pd.read_csv(data_path, na_values=['na'], sep=';', encoding = "ISO-8859-1")
dfcorpus_mf = pd.read_csv(mf_data_path, na_values=['na'], sep=';', encoding = "ISO-8859-1")

In [None]:
text_cols = []
concat_text = ""
ap_classes = []
st_classes = []
mf_classes = []


for col in dfcorpus.columns:
    if col.startswith("t."):
        text_cols.append(col)
    elif col.startswith("ap."):
        ap_classes.append(col)
    elif col.startswith("st."):
        st_classes.append(col)
    elif col.startswith("mf."):
        mf_classes.append(col)
    elif "concat" in col:
        concat_text = col
    else:
        ap_classes.append(col)

In [None]:
ap_corpus = dfcorpus[[concat_text] + ap_classes].dropna()
mf_corpus = dfcorpus_mf[[concat_text] + mf_classes].dropna()
st_corpus = dfcorpus[text_cols + st_classes].dropna()

#### Printing the number of instances and features of each task dataset 

In [None]:
print(f"ap_corpus: {ap_corpus.shape}")
print(f"mf_corpus: {mf_corpus.shape}")
print(f"st_corpus: {st_corpus.shape}")

#### Visualization of the values of each class for Author Profilling task

In [None]:
for classe in ap_classes:
    print(f"{ap_corpus.groupby(classe).count()[concat_text]}\n\n")

#### Visualization of the values of each class for Moral Fundaments task

In [None]:
for classe in mf_classes:
    print(f"{mf_corpus.groupby(classe).count()[concat_text]}\n\n")

#### Visualization of the values of each class for Stance task

In [None]:
for classe in st_classes:
    print(f"{st_corpus.groupby(classe).count()[text_cols[0]]}\n\n")

---

### Preprocessing the texts

In [None]:
import nltk

In [None]:
nltk.download("punkt")

##### Tokenizing the texts

In [None]:
txt_aux = {}
for col in tqdm_notebook(text_cols):
    txt_aux[col] = []
    for text in dfcorpus[col]:
        tokenized_text = nltk.word_tokenize(text, language="Portuguese")
        txt_aux[col].append(len(tokenized_text))

s = pd.DataFrame(txt_aux)

In [None]:
s.describe(percentiles=[0.01*i for i in range(100)])

Considering the last 100 words of each text, we have few losses of words.

---

#### Importing the StratifiedKFold to split the data

In [None]:
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split

#### Training LSTM for each task

In [None]:
import sys
sys.path.append("../")

from py_util.bilstm_attention import LSTM_Network
from py_util.preprocessing import PreProcess

In [None]:
def get_filtered_feat_tgt(feat, tgt, train_idx, test_idx):
    train_features = pd.Series(feat)[train_idx]
    train_targets = pd.Series(tgt)[train_idx]

    test_features = pd.Series(feat)[test_idx]
    test_targets = pd.Series(tgt)[test_idx]

    return train_features, train_targets, test_features, test_targets

---

#### Defining the general function to train the NNs for each task

In [None]:
def fit_task(task_, corpus_, classes_, text_var_, nets, seq_length, n_folds,
             attention, attn_depth_model, attn_num_heads, dropout_prob, embed_dim, n_rec_layers, lstm_layer_size, save_dir_root,
             batch_size, early_stopping, learning_rate, n_epochs, random_state_valid, train_print, valid_size,
             suffix="", rdn_ste_split=None, test_size=None, save_preproc_root=None, pretr_emb_path=None):

    att_name = f'_attn_{attn_depth_model}_{attn_num_heads}' if attention else ''
    suffix = suffix if suffix.startswith("_") or suffix=="" else f"_{suffix}"
    model_name = f"bilstm_{embed_dim}_{lstm_layer_size}_early{early_stopping}{att_name}{suffix}"
    
    for cls_idx, cls in tqdm_notebook(enumerate(classes_), desc="Topics", total=len(classes_)):
        nets[cls] = []
        save_preproc_root = save_preproc_root or save_dir_root
        save_dir_preproc = f"{save_preproc_root}/pre_process/seq_{seq_length}/{task_}/{cls}"
        os.makedirs(save_dir_preproc, exist_ok=True)
        
        if isinstance(text_var_, list):
            pre_proc = PreProcess(corpus_[text_var_[cls_idx]], corpus_[cls],
                                  seq_length, save_dir=save_dir_preproc, pretr_emb_path=pretr_emb_path)
        else:
            pre_proc = PreProcess(corpus_[text_var_], corpus_[cls],
                                  seq_length, save_dir=save_dir_preproc, pretr_emb_path=pretr_emb_path)
        
        if n_folds and n_folds > 1:
            k_folf_gen = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=rdn_ste_split)
            iterator_ = enumerate(k_folf_gen.split(pre_proc.features, corpus_[cls]))

            for k, (train_idx, test_idx) in tqdm_notebook(iterator_, desc="Fold", total=n_folds):
                save_dir = f"{save_dir_root}/{model_name}/{task_}/{cls}/cv{k}/"
                os.makedirs(save_dir, exist_ok=True)

                log_file = open(f"{save_dir}/log.txt", mode="w")
                print(f"\n\n{cls} - Fold {k}", file=log_file)

                fit_fold(cls, attention, attn_depth_model, attn_num_heads, dropout_prob, embed_dim, n_rec_layers,
                         lstm_layer_size,pre_proc, save_dir, batch_size, early_stopping, learning_rate, log_file, n_epochs,
                         random_state_valid, train_print, valid_size, nets, train_idx=train_idx, test_idx=test_idx)
        else:
            trn_tst_spl = train_test_split(pre_proc.features,
                                           pre_proc.vec_targets,
                                           test_size=test_size,
                                           random_state=rdn_ste_split,
                                           stratify=pre_proc.vec_targets)

            save_dir = f"{save_dir_root}/{model_name}/{task_}/{cls}/"
            os.makedirs(save_dir, exist_ok=True)

            log_file = open(f"{save_dir}/log.txt", mode="w")
            print(f"\n\n{cls}", file=log_file)

            fit_fold(cls, attention, attn_depth_model, attn_num_heads, dropout_prob, embed_dim, n_rec_layers, lstm_layer_size,
                     pre_proc, save_dir, batch_size, early_stopping, learning_rate, log_file, n_epochs, random_state_valid,
                     train_print, valid_size, nets, trn_tst_spl=trn_tst_spl)
        del pre_proc
    
def fit_fold(cls, attention, attn_depth_model, attn_num_heads, dropout_prob, embed_dim, n_rec_layers, lstm_layer_size,
             pre_proc, save_dir, batch_size, early_stopping, learning_rate, log_file, n_epochs,
             random_state, train_print, valid_size, nets=None, train_idx=None, test_idx=None, trn_tst_spl=None):
    gc.collect()
    lstm_net = LSTM_Network(pre_proc         = pre_proc,
                            attention        = attention,
                            attn_depth_model = attn_depth_model,
                            attn_num_heads   = attn_num_heads,
                            bi_dir           = True,
                            dropout_prob     = dropout_prob,
                            embed_dim        = embed_dim,
                            n_rec_layers     = n_rec_layers,
                            lstm_layer_size  = lstm_layer_size,
                            save_dir         = save_dir)
    
    if trn_tst_spl:
        train_features, test_features, train_targets, test_targets = trn_tst_spl
    else:
        results = get_filtered_feat_tgt(feat = pre_proc.features,
                                        tgt  = pre_proc.vec_targets,
                                        train_idx = train_idx,
                                        test_idx  = test_idx)
        train_features, train_targets, test_features, test_targets = results
    
    lstm_net.fit(train_x          = list(train_features),
                 train_y          = list(train_targets),
                 batch_size       = batch_size,
                 early_stopping   = early_stopping,
                 learning_rate    = learning_rate,
                 log_file         = log_file,
                 num_epochs       = n_epochs,
                 random_state     = random_state,
                 show_every_epoch = train_print,
                 test_x           = list(test_features),
                 test_y           = list(test_targets),
                 valid_size       = valid_size)
    lstm_net.destroy_graph()
    del lstm_net, train_features, train_targets, test_features, test_targets
#     nets[cls].append(lstm_net)

#### Training the networks

##### AP

In [None]:
#Task Parameters
task_ = "ap"
corpus_ = ap_corpus
classes_ = ap_classes
text_var_ = concat_text
nets = ap_nets = {}
suffix = "skipgram_folha_train_test_split"
save_dir_root = "D:/USP/Mestrado/Stance/checkpoints/brMoral"
save_preproc_root = f"{save_dir_root}/preproc_skipgram_folha"
pretr_emb_path = "D:/USP/Mestrado/Stance/checkpoints/embeddings/w2v_skipgram"

#Split Parameters
n_folds = None
rdn_ste_split = 123
test_size = 0.2

#Pre processing parameters
seq_length = 800

#LSTM Parameters
dropout_prob = 0.5
embed_dim = 128
n_rec_layers = 1
lstm_layer_size = 64

#Attention parameters
attention = True
attn_depth_model = 32
attn_num_heads = 1

#Train Parameters
n_epochs = 50
batch_size = 100
learning_rate = 0.001
train_print = True
early_stopping = 10
random_state_valid = 42
valid_size = 0.25


# fit_task(task_, corpus_, classes_, text_var_, nets, seq_length, n_folds,
#          attention, attn_depth_model, attn_num_heads, dropout_prob, embed_dim, n_rec_layers, lstm_layer_size, save_dir_root,
#          batch_size, early_stopping, learning_rate, n_epochs, random_state_valid, train_print, valid_size,
#          suffix=suffix, rdn_ste_split=rdn_ste_split, test_size=test_size)

In [None]:
lstm_layer_size_list = [
    8,
    16,
#     32,
#     64,
    128
]

attn_depth_model_list = [
    8,
    16,
    32
]

pretr_emb_path_list = [
#  ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\cbow_s100.txt'  , "nilc_cbow_100_train_test_split"   , 100 ),
#  ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\cbow_s1000.txt' , "nilc_cbow_s1000_train_test_split" , 1000),
#  ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\cbow_s300.txt'  , "nilc_cbow_s300_train_test_split"  , 300 ),
#  ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\cbow_s50.txt'   , "nilc_cbow_s50_train_test_split"   , 50  ),
#  ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\glove_s100.txt' , "nilc_glove_s100_train_test_split" , 100 ),
#  ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\glove_s1000.txt', "nilc_glove_s1000_train_test_split", 1000),
 ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\glove_s300.txt' , "nilc_glove_s300_train_test_split" , 300),
 ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\glove_s600.txt' , "nilc_glove_s600_train_test_split" , 600),
#  ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\skip_s100.txt'  , "nilc_skip_s100_train_test_split"  , 100 ),
#  ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\skip_s1000.txt' , "nilc_skip_s1000_train_test_split" , 1000),
 ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\skip_s300.txt'  , "nilc_skip_s300_train_test_split"  , 300 ),
 ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\skip_s600.txt'  , "nilc_skip_s600_train_test_split"  , 600 ),
#  ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\skip_s50.txt'   , "nilc_skip_s50_train_test_split"   , 50  )
]

from itertools import product

prod = tqdm_notebook(list(product(lstm_layer_size_list, attn_depth_model_list, pretr_emb_path_list))[24:], desc="Combination")
for lstm_layer_size, attn_depth_model, (pretr_emb_path, suffix, embed_dim) in prod:fit_task(task_, corpus_, classes_, text_var_, nets, seq_length, n_folds,
             attention, attn_depth_model, attn_num_heads, dropout_prob, embed_dim, n_rec_layers, lstm_layer_size, save_dir_root,
             batch_size, early_stopping, learning_rate, n_epochs, random_state_valid, train_print, valid_size,
             suffix=suffix, rdn_ste_split=rdn_ste_split, test_size=test_size,
             save_preproc_root=save_preproc_root, pretr_emb_path=pretr_emb_path)

##### MF (Ternary)

In [None]:
#Task Parameters
task_ = "mf"
corpus_ = mf_corpus
classes_ = mf_classes
text_var_ = concat_text
nets = mf_nets = {}
suffix = "skipgram_folha_train_test_split"
save_dir_root = "D:/USP/Mestrado/Stance/checkpoints/brMoral"
save_preproc_root = f"{save_dir_root}/preproc_skipgram_folha"
pretr_emb_path = "D:/USP/Mestrado/Stance/checkpoints/embeddings/w2v_skipgram"

#Split Parameters
n_folds = None
rdn_ste_split = 123
test_size = 0.2

#Pre processing parameters
seq_length = 800

#LSTM Parameters
dropout_prob = 0.5
embed_dim = 128
n_rec_layers = 1
lstm_layer_size = 64

#Attention parameters
attention = True
attn_depth_model = 32
attn_num_heads = 1

#Train Parameters
n_epochs = 50
batch_size = 100
learning_rate = 0.001
train_print = True
early_stopping = 10
random_state_valid = 42
valid_size = 0.25


# fit_task(task_, corpus_, classes_, text_var_, nets, seq_length, n_folds,
#          attention, attn_depth_model, attn_num_heads, dropout_prob, embed_dim, n_rec_layers, lstm_layer_size, save_dir_root,
#          batch_size, early_stopping, learning_rate, n_epochs, random_state_valid, train_print, valid_size,
#          suffix=suffix, rdn_ste_split=rdn_ste_split, test_size=test_size)

In [None]:
lstm_layer_size_list = [
    8,
    16,
#     32,
#     64,
    128
]

attn_depth_model_list = [
    8,
    16,
    32
]

pretr_emb_path_list = [
#  ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\cbow_s100.txt'  , "nilc_cbow_100_train_test_split"   , 100 ),
#  ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\cbow_s1000.txt' , "nilc_cbow_s1000_train_test_split" , 1000),
#  ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\cbow_s300.txt'  , "nilc_cbow_s300_train_test_split"  , 300 ),
#  ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\cbow_s50.txt'   , "nilc_cbow_s50_train_test_split"   , 50  ),
#  ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\glove_s100.txt' , "nilc_glove_s100_train_test_split" , 100 ),
#  ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\glove_s1000.txt', "nilc_glove_s1000_train_test_split", 1000),
 ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\glove_s300.txt' , "nilc_glove_s300_train_test_split" , 300),
 ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\glove_s600.txt' , "nilc_glove_s600_train_test_split" , 600),
#  ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\skip_s100.txt'  , "nilc_skip_s100_train_test_split"  , 100 ),
#  ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\skip_s1000.txt' , "nilc_skip_s1000_train_test_split" , 1000),
 ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\skip_s300.txt'  , "nilc_skip_s300_train_test_split"  , 300 ),
 ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\skip_s600.txt'  , "nilc_skip_s600_train_test_split"  , 600 ),
#  ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\skip_s50.txt'   , "nilc_skip_s50_train_test_split"   , 50  )
]

from itertools import product

prod = tqdm_notebook(list(product(lstm_layer_size_list, attn_depth_model_list, pretr_emb_path_list))[3:], desc="Combination")
for lstm_layer_size, attn_depth_model, (pretr_emb_path, suffix, embed_dim) in prod:
    fit_task(task_, corpus_, classes_, text_var_, nets, seq_length, n_folds,
             attention, attn_depth_model, attn_num_heads, dropout_prob, embed_dim, n_rec_layers, lstm_layer_size, save_dir_root,
             batch_size, early_stopping, learning_rate, n_epochs, random_state_valid, train_print, valid_size,
             suffix=suffix, rdn_ste_split=rdn_ste_split, test_size=test_size,
             save_preproc_root=save_preproc_root, pretr_emb_path=pretr_emb_path)

##### ST

In [None]:
#Task Parameters
task_ = "st"
corpus_ = st_corpus
classes_ = st_classes
text_var_ = text_cols
nets = st_nets = {}
suffix = "skipgram_folha_train_test_split"
save_dir_root = "D:/USP/Mestrado/Stance/checkpoints/brMoral"
save_preproc_root = f"{save_dir_root}/preproc_skipgram_folha"
pretr_emb_path = "D:/USP/Mestrado/Stance/checkpoints/embeddings/w2v_skipgram"

#Split Parameters
n_folds = None
rdn_ste_split = 123
test_size = 0.2

#Pre processing parameters
seq_length = 100

#LSTM Parameters
dropout_prob = 0.6
embed_dim = 128
n_rec_layers = 1
lstm_layer_size = 64

#Attenrion parameters
attention = True
attn_depth_model = 32
attn_num_heads = 1

#Train Parameters
n_epochs = 50
batch_size = 100
learning_rate = 0.001
train_print = True
early_stopping = 10
random_state_valid = 42
valid_size = 0.25


# fit_task(task_, corpus_, classes_, text_var_, nets, seq_length, n_folds,
#          attention, attn_depth_model, attn_num_heads, dropout_prob, embed_dim, n_rec_layers, lstm_layer_size, save_dir_root,
#          batch_size, early_stopping, learning_rate, n_epochs, random_state_valid, train_print, valid_size,
#          suffix=suffix, rdn_ste_split=rdn_ste_split, test_size=test_size)

In [None]:
lstm_layer_size_list = [
    8,
    16,
#     32,
#     64,
    128
]

attn_depth_model_list = [
    8,
    16,
    32
]

pretr_emb_path_list = [
#  ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\cbow_s100.txt'  , "nilc_cbow_100_train_test_split"   , 100 ),
#  ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\cbow_s1000.txt' , "nilc_cbow_s1000_train_test_split" , 1000),
#  ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\cbow_s300.txt'  , "nilc_cbow_s300_train_test_split"  , 300 ),
#  ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\cbow_s50.txt'   , "nilc_cbow_s50_train_test_split"   , 50  ),
 ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\glove_s100.txt' , "nilc_glove_s100_train_test_split" , 100 ),
#  ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\glove_s1000.txt', "nilc_glove_s1000_train_test_split", 1000),
 ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\glove_s300.txt' , "nilc_glove_s300_train_test_split" , 300),
#  ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\glove_s600.txt' , "nilc_glove_s600_train_test_split" , 600),
 ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\skip_s100.txt'  , "nilc_skip_s100_train_test_split"  , 100 ),
#  ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\skip_s1000.txt' , "nilc_skip_s1000_train_test_split" , 1000),
 ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\skip_s300.txt'  , "nilc_skip_s300_train_test_split"  , 300 ),
#  ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\skip_s600.txt'  , "nilc_skip_s600_train_test_split"  , 600 ),
#  ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\skip_s50.txt'   , "nilc_skip_s50_train_test_split"   , 50  )
]

from itertools import product

prod = tqdm_notebook(list(product(lstm_layer_size_list, attn_depth_model_list, pretr_emb_path_list))[9:], desc="Combination")
for lstm_layer_size, attn_depth_model, (pretr_emb_path, suffix, embed_dim) in prod:
    fit_task(task_, corpus_, classes_, text_var_, nets, seq_length, n_folds,
             attention, attn_depth_model, attn_num_heads, dropout_prob, embed_dim, n_rec_layers, lstm_layer_size, save_dir_root,
             batch_size, early_stopping, learning_rate, n_epochs, random_state_valid, train_print, valid_size,
             suffix=suffix, rdn_ste_split=rdn_ste_split, test_size=test_size,
             save_preproc_root=save_preproc_root, pretr_emb_path=pretr_emb_path)

In [None]:
lstm_layer_size_list = [
    8,
    16,
#     32,
#     64,
    128
]

attn_depth_model_list = [
    8,
    16,
    32
]

pretr_emb_path_list = [
#  ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\cbow_s100.txt'  , "nilc_cbow_100_train_test_split"   , 100 ),
#  ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\cbow_s1000.txt' , "nilc_cbow_s1000_train_test_split" , 1000),
#  ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\cbow_s300.txt'  , "nilc_cbow_s300_train_test_split"  , 300 ),
#  ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\cbow_s50.txt'   , "nilc_cbow_s50_train_test_split"   , 50  ),
#  ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\glove_s100.txt' , "nilc_glove_s100_train_test_split" , 100 ),
#  ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\glove_s1000.txt', "nilc_glove_s1000_train_test_split", 1000),
#  ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\glove_s300.txt' , "nilc_glove_s300_train_test_split" , 300),
 ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\glove_s600.txt' , "nilc_glove_s600_train_test_split" , 600),
#  ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\skip_s100.txt'  , "nilc_skip_s100_train_test_split"  , 100 ),
#  ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\skip_s1000.txt' , "nilc_skip_s1000_train_test_split" , 1000),
#  ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\skip_s300.txt'  , "nilc_skip_s300_train_test_split"  , 300 ),
#  ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\skip_s600.txt'  , "nilc_skip_s600_train_test_split"  , 600 ),
#  ('D:/USP/Mestrado/Stance/checkpoints/embeddings/NILCtemp\\skip_s50.txt'   , "nilc_skip_s50_train_test_split"   , 50  )
]

from itertools import product

prod = tqdm_notebook(list(product(lstm_layer_size_list, attn_depth_model_list, pretr_emb_path_list)), desc="Combination")
for lstm_layer_size, attn_depth_model, (pretr_emb_path, suffix, embed_dim) in prod:
    fit_task(task_, corpus_, classes_, text_var_, nets, seq_length, n_folds,
             attention, attn_depth_model, attn_num_heads, dropout_prob, embed_dim, n_rec_layers, lstm_layer_size, save_dir_root,
             batch_size, early_stopping, learning_rate, n_epochs, random_state_valid, train_print, valid_size,
             suffix=suffix, rdn_ste_split=rdn_ste_split, test_size=test_size,
             save_preproc_root=save_preproc_root, pretr_emb_path=pretr_emb_path)

In [None]:
import time
time.sleep(60)

In [None]:
! shutdown -h