# Revisión de resultados traducción de español - arhuaco
Este notebook calcula las métricas en test de los diferentes modelos entrenados con finetuning de un traductor español-finlandés para traducir de español a arhuaco.

# Librerías

In [1]:
from glob import glob
import pandas as pd
import numpy as np
from tqdm.auto import tqdm, trange
import sys
import os

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from datasets import load_dataset, DatasetDict, Dataset
from sklearn.model_selection import train_test_split
from transformers import AutoTokenizer
import pandas as pd

In [3]:
from transformers import Seq2SeqTrainingArguments
from transformers import DataCollatorForSeq2Seq
from transformers import AutoModelForSeq2SeqLM
from transformers import EarlyStoppingCallback
from transformers import Seq2SeqTrainer

import torch

import numpy as np
import pickle
import evaluate

In [4]:
#from google.colab import drive
#drive.mount('/content/drive')

# Funciones auxiliares 

In [5]:
def preprocess_dataset(path_dataset: str, lang_output: str):
  """
  Lee los datos y los preprocesa. Lo pasa al formato necesario DatasetDict
  y divide los datos en train, test y validación.
  Sirve para traducción de indígena a español

  input:
  - path_dataset: con la ruta en donde se encuentra la base a procesar
  - lang_output: wayuu, arh de donde va a terminar la traducción

  output:
  - dataset_dict: DatasetDict con train test y validation
  """
  # Lectura de datos y conversión a diccionario
  dataset = pd.read_csv(path_dataset)
  conv = {'esp': 'es', 'wayuu': lang_output, 'arh': lang_output}
  dataset.rename(columns = conv, inplace = True)

  dataset = [{'es': row['es'], lang_output: row[lang_output]} for _, row in dataset.iterrows()]

  # División train, test y validación
  train, test = train_test_split(dataset, test_size = 0.2, random_state = 42)
  val, test = train_test_split(test, test_size = 0.5, random_state = 42)

  # Creación de datasets
  train = Dataset.from_dict({"id": list(range(len(train))), "translation": train})
  test = Dataset.from_dict({"id": list(range(len(test))), "translation": test})
  validation = Dataset.from_dict({"id": list(range(len(val))), "translation": val})

  # Creación del diccionario
  dataset_dict = DatasetDict({"train": train, "test": test, "validation": validation})

  return dataset_dict

# 

In [6]:
def tokenizar(dataset_dict, tokenizer, max_length = 150):
  """
  A partir de un DatasetDict, tokeniza los datos. Esto depende del modelo a utilizar,
  y de un modelo específico.

  input:
  - dataset_dict: con los datos de train, test y validación
  - tokenizer: tokenizer
  - max_length: de las sentencias a considerar

  output:
  - tokenized_datasets
  """

  def preprocess_function(examples):
      inputs = [ex["es"] for ex in examples["translation"]]
      targets = [ex["fi"] for ex in examples["translation"]]
      model_inputs = tokenizer(
          inputs, text_target=targets, max_length=max_length, truncation=True
      )
      return model_inputs

  # Tokenizar los datos
  tokenized_datasets = dataset_dict.map(
    preprocess_function,
    batched=True,
    remove_columns=dataset_dict["train"].column_names,
  )

  return tokenized_datasets, tokenizer

# Evaluamos el mejor modelo en cuanto a BLUE en entrenamiento

Veamos el modelo que dio mejor métrica BLUE en el entrenamiento para validation.

In [12]:
model_path = "../results/arhuaco"
eval_blues = {}

for res in glob(model_path + '/*'):
  if 'pickle' in res and 'resultados' not in res:
    with open(res, 'rb') as file:
      blue_score = pickle.load(file)['eval_bleu']
      eval_blues[res] = blue_score

In [16]:
res_max = max(eval_blues, key=lambda k: eval_blues[k])

Este es el modelo con mejor score BLUE para el conjunto de validación entre los entrenados

In [17]:
res_max

'/content/drive/MyDrive/Colab Notebooks/Talleres NLP/Proyecto/results/fine_tunning_finlandes_arhuaco_metrica_despues_BIBLIA_10_0.0002.pickle'

In [26]:
path_data = '/content/drive/MyDrive/Colab Notebooks/Talleres NLP/Proyecto/data_clean/arhuaco'

d = res_max.split('_')[-3]

# Cargar datos
dataset_dict = preprocess_dataset(path_data + '/' + d + '.csv', lang_output = 'fi')

# Cargar modelo y tokenizados
name = res_max.split('.pickle')[0].replace('metrica_despues', 'modelo')
tokenizer = AutoTokenizer.from_pretrained(name)
model = AutoModelForSeq2SeqLM.from_pretrained(name)

df_test = pd.DataFrame(dataset_dict['test']['translation'])
df_train = pd.DataFrame(dataset_dict['train']['translation'])
df_validation = pd.DataFrame(dataset_dict['validation']['translation'])

def translate(text, src_lang='es', tgt_lang='fi', a=32, b=3, max_input_length=1024, num_beams=4, **kwargs):
    tokenizer.src_lang = src_lang
    tokenizer.tgt_lang = tgt_lang
    inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=max_input_length)
    result = model.generate(
        **inputs.to(model.device),
        forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_lang),
        max_new_tokens=int(a + b * inputs.input_ids.shape[1]),
        num_beams=num_beams,
        **kwargs
    )
    return tokenizer.batch_decode(result, skip_special_tokens=True)

def batched_translate(texts, batch_size=16, **kwargs):
    """Translate texts in batches of similar length"""
    idxs, texts2 = zip(*sorted(enumerate(texts), key=lambda p: len(p[1]), reverse=True))
    results = []
    for i in trange(0, len(texts2), batch_size):
        results.extend(translate(texts2[i: i+batch_size], **kwargs))
    return [p for i, p in sorted(zip(idxs, results))]

Obtenemos las predicciones

In [27]:
arh_translated_test = batched_translate(df_test.es, src_lang='es', tgt_lang='fi')

  0%|          | 0/35 [00:00<?, ?it/s]

In [None]:
#esp_translated_validation = batched_translate(df_validation.es, src_lang='es', tgt_lang='fi')

In [None]:
#esp_translated_train = batched_translate(df_train.es, src_lang='es', tgt_lang='fi')

# Métricas en test

In [32]:
df_test.head()

Unnamed: 0,es,fi
0,se pusieron entonces los fariseos a estudiar l...,"inuri gunu a'kumuya jinari sumu ne nanunki, ag..."
1,uno de ellos fue corriendo a empapar una espon...,uye'ki jesuri juma'nisi akunnajuri unwicha una.
2,"los hijos de ruben janoc, falu, jesron y carmi.","ruben zu gumusinu jinari anok, falu, hesron, ..."
3,pedro le contesto aunque todos te abandonen...,pedruse'ri pinna michusa ne unige'ki nun nu...
4,en aquel momento se les abrieron los ojos y de...,ey uye'ri i'munugwi tikiri nukawin wina'zunki ...


In [24]:
import sacrebleu
bleu_calc = sacrebleu.BLEU()
chrf_calc = sacrebleu.CHRF(word_order=2)

In [35]:
print(bleu_calc.corpus_score(arh_translated_test, [df_test['fi'].tolist()]))
print(chrf_calc.corpus_score(arh_translated_test, [df_test['fi'].tolist()]))

BLEU = 3.19 20.2/5.0/2.0/0.8 (BP = 0.893 ratio = 0.899 hyp_len = 11634 ref_len = 12945)
chrF2++ = 25.61


In [36]:
with open(model_path + '/resultados_traducciones_mejor_modelo_test_arhuaco.pickle', 'wb') as file:
  pickle.dump(arh_translated_test, file)

# Métricas en test para los modelos de COMP

In [26]:

def translate(text, src_lang='es', tgt_lang='fi', a=32, b=3, max_input_length=128, num_beams=4, **kwargs):
    tokenizer.src_lang = src_lang
    tokenizer.tgt_lang = tgt_lang
    inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=max_input_length)
    result = model.generate(
        **inputs.to(model.device),
        forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_lang),
        max_new_tokens=int(a + b * inputs.input_ids.shape[1]),
        num_beams=num_beams,
        **kwargs
    )
    return tokenizer.batch_decode(result, skip_special_tokens=True)

def batched_translate(texts, batch_size=16, **kwargs):
    """Translate texts in batches of similar length"""
    idxs, texts2 = zip(*sorted(enumerate(texts), key=lambda p: len(p[1]), reverse=True))
    results = []
    for i in trange(0, len(texts2), batch_size):
        results.extend(translate(texts2[i: i+batch_size], **kwargs))
    return [p for i, p in sorted(zip(idxs, results))]

In [27]:
keys_nd = [k for k in eval_blues.keys() if 'COMP' in k and 'NC' not in k]
keys_nd

['../results/arhuaco/fine_tunning_finlandes_arhuaco_metrica_despues_COMP_10_2e-05.pickle',
 '../results/arhuaco/fine_tunning_finlandes_arhuaco_metrica_despues_COMP_5_0.0002.pickle',
 '../results/arhuaco/fine_tunning_finlandes_arhuaco_metrica_despues_COMP_10_0.0002.pickle',
 '../results/arhuaco/fine_tunning_finlandes_arhuaco_metrica_despues_COMP_3_2e-05.pickle',
 '../results/arhuaco/fine_tunning_finlandes_arhuaco_metrica_despues_COMP_5_2e-05.pickle',
 '../results/arhuaco/fine_tunning_finlandes_arhuaco_metrica_despues_COMP_3_0.0002.pickle']

In [28]:
path_data = '../data/arhuaco'

d = 'COMP'

# Cargar datos
dataset_dict = preprocess_dataset(path_data + '/' + d + '.csv', lang_output = 'fi')

df_test = pd.DataFrame(dataset_dict['test']['translation'])
df_train = pd.DataFrame(dataset_dict['train']['translation'])
df_validation = pd.DataFrame(dataset_dict['validation']['translation'])

In [29]:
df_test.shape

(574, 2)

In [30]:
for model_name in keys_nd:
    # Cargar modelo y tokenizados
    name = model_name.split('.pickle')[0].replace('metrica_despues', 'modelo')
    tokenizer = AutoTokenizer.from_pretrained(name)
    model = AutoModelForSeq2SeqLM.from_pretrained(name)

    # Traducir
    ar_pred = []
    for i in trange(0, len(df_test.values)):
        translated = translate(df_test.es[i])
        ar_pred.append(translated)
    ar_pred2 = [w[0] for w in ar_pred] 

    # Resultados 
    print(f'\n\n ---------------------- Resultados {model_name}')
    print(bleu_calc.corpus_score(ar_pred2, [df_test['fi'].tolist()]))
    print(chrf_calc.corpus_score(ar_pred2, [df_test['fi'].tolist()]))

    # Guardar 
    name = name.split('/')[-1]
    with open('../results/arhuaco/resultados_traducciones_{name}_arhuaco.pickle', 'wb') as file:
        pickle.dump(ar_pred2, file)

100%|██████████| 574/574 [22:15<00:00,  2.33s/it]




 ---------------------- Resultados ../results/arhuaco/fine_tunning_finlandes_arhuaco_metrica_despues_COMP_10_2e-05.pickle
BLEU = 0.87 14.8/1.9/0.4/0.1 (BP = 0.802 ratio = 0.820 hyp_len = 11628 ref_len = 14188)
chrF2++ = 18.64


100%|██████████| 574/574 [32:24<00:00,  3.39s/it]  




 ---------------------- Resultados ../results/arhuaco/fine_tunning_finlandes_arhuaco_metrica_despues_COMP_5_0.0002.pickle
BLEU = 2.28 18.8/4.2/1.4/0.5 (BP = 0.843 ratio = 0.854 hyp_len = 12114 ref_len = 14188)
chrF2++ = 23.04


100%|██████████| 574/574 [19:00<00:00,  1.99s/it]




 ---------------------- Resultados ../results/arhuaco/fine_tunning_finlandes_arhuaco_metrica_despues_COMP_10_0.0002.pickle
BLEU = 2.88 21.4/4.9/1.8/0.8 (BP = 0.832 ratio = 0.845 hyp_len = 11987 ref_len = 14188)
chrF2++ = 24.74


100%|██████████| 574/574 [23:23<00:00,  2.44s/it]




 ---------------------- Resultados ../results/arhuaco/fine_tunning_finlandes_arhuaco_metrica_despues_COMP_3_2e-05.pickle
BLEU = 0.27 8.6/0.4/0.1/0.0 (BP = 0.870 ratio = 0.878 hyp_len = 12455 ref_len = 14188)
chrF2++ = 13.05


100%|██████████| 574/574 [1:42:16<00:00, 10.69s/it]    




 ---------------------- Resultados ../results/arhuaco/fine_tunning_finlandes_arhuaco_metrica_despues_COMP_5_2e-05.pickle
BLEU = 0.42 9.0/0.7/0.2/0.1 (BP = 0.809 ratio = 0.825 hyp_len = 11702 ref_len = 14188)
chrF2++ = 14.23


100%|██████████| 574/574 [20:56<00:00,  2.19s/it]



 ---------------------- Resultados ../results/arhuaco/fine_tunning_finlandes_arhuaco_metrica_despues_COMP_3_0.0002.pickle
BLEU = 1.52 16.2/2.9/1.0/0.3 (BP = 0.802 ratio = 0.819 hyp_len = 11619 ref_len = 14188)
chrF2++ = 20.73





# Métricas en test para los modelos de COMP_NC

In [31]:

def translate(text, src_lang='es', tgt_lang='fi', a=32, b=3, max_input_length=128, num_beams=4, **kwargs):
    tokenizer.src_lang = src_lang
    tokenizer.tgt_lang = tgt_lang
    inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=max_input_length)
    result = model.generate(
        **inputs.to(model.device),
        forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_lang),
        max_new_tokens=int(a + b * inputs.input_ids.shape[1]),
        num_beams=num_beams,
        **kwargs
    )
    return tokenizer.batch_decode(result, skip_special_tokens=True)

def batched_translate(texts, batch_size=16, **kwargs):
    """Translate texts in batches of similar length"""
    idxs, texts2 = zip(*sorted(enumerate(texts), key=lambda p: len(p[1]), reverse=True))
    results = []
    for i in trange(0, len(texts2), batch_size):
        results.extend(translate(texts2[i: i+batch_size], **kwargs))
    return [p for i, p in sorted(zip(idxs, results))]

In [32]:
keys_nd = [k for k in eval_blues.keys() if 'COMP_NC' in k ]
keys_nd

['../results/arhuaco/fine_tunning_finlandes_arhuaco_metrica_despues_COMP_NC_3_0.0002.pickle',
 '../results/arhuaco/fine_tunning_finlandes_arhuaco_metrica_despues_COMP_NC_5_0.0002.pickle',
 '../results/arhuaco/fine_tunning_finlandes_arhuaco_metrica_despues_COMP_NC_3_2e-05.pickle',
 '../results/arhuaco/fine_tunning_finlandes_arhuaco_metrica_despues_COMP_NC_10_0.0002.pickle',
 '../results/arhuaco/fine_tunning_finlandes_arhuaco_metrica_despues_COMP_NC_10_2e-05.pickle',
 '../results/arhuaco/fine_tunning_finlandes_arhuaco_metrica_despues_COMP_NC_5_2e-05.pickle']

In [33]:
path_data = '../data/arhuaco'

d = 'COMP_NC'

# Cargar datos
dataset_dict = preprocess_dataset(path_data + '/' + d + '.csv', lang_output = 'fi')

df_test = pd.DataFrame(dataset_dict['test']['translation'])
df_train = pd.DataFrame(dataset_dict['train']['translation'])
df_validation = pd.DataFrame(dataset_dict['validation']['translation'])

In [35]:
for model_name in keys_nd:
    # Cargar modelo y tokenizados
    name = model_name.split('.pickle')[0].replace('metrica_despues', 'modelo')
    tokenizer = AutoTokenizer.from_pretrained(name)
    model = AutoModelForSeq2SeqLM.from_pretrained(name)
    
    # Traducir
    ar_pred = []
    for i in trange(0, len(df_test.values)):
        translated = translate(df_test.es[i])
        ar_pred.append(translated)
    ar_pred2 = [w[0] for w in ar_pred] 

    # Resultados 
    print(f'\n\n ---------------------- Resultados {model_name}')
    print(bleu_calc.corpus_score(ar_pred2, [df_test['fi'].tolist()]))
    print(chrf_calc.corpus_score(ar_pred2, [df_test['fi'].tolist()]))

    # Guardar 
    name = name.split('/')[-1]
    print(name)
    with open('../results/arhuaco/resultados_traducciones_{name}_arhuaco.pickle', 'wb') as file:
        pickle.dump(ar_pred2, file)

100%|██████████| 563/563 [18:14<00:00,  1.94s/it]




 ---------------------- Resultados ../results/arhuaco/fine_tunning_finlandes_arhuaco_metrica_despues_COMP_NC_3_0.0002.pickle
BLEU = 1.63 16.5/3.1/1.0/0.3 (BP = 0.852 ratio = 0.862 hyp_len = 11425 ref_len = 13258)
chrF2++ = 21.65
fine_tunning_finlandes_arhuaco_modelo_COMP_NC_3_0.0002


100%|██████████| 563/563 [18:10<00:00,  1.94s/it]




 ---------------------- Resultados ../results/arhuaco/fine_tunning_finlandes_arhuaco_metrica_despues_COMP_NC_5_0.0002.pickle
BLEU = 2.69 18.7/4.3/1.7/0.6 (BP = 0.880 ratio = 0.887 hyp_len = 11759 ref_len = 13258)
chrF2++ = 23.72
fine_tunning_finlandes_arhuaco_modelo_COMP_NC_5_0.0002


100%|██████████| 563/563 [23:57<00:00,  2.55s/it]




 ---------------------- Resultados ../results/arhuaco/fine_tunning_finlandes_arhuaco_metrica_despues_COMP_NC_3_2e-05.pickle
BLEU = 0.23 6.5/0.4/0.1/0.0 (BP = 0.889 ratio = 0.895 hyp_len = 11861 ref_len = 13258)
chrF2++ = 12.44
fine_tunning_finlandes_arhuaco_modelo_COMP_NC_3_2e-05


100%|██████████| 563/563 [17:59<00:00,  1.92s/it]




 ---------------------- Resultados ../results/arhuaco/fine_tunning_finlandes_arhuaco_metrica_despues_COMP_NC_10_0.0002.pickle
BLEU = 3.66 21.7/5.6/2.2/1.0 (BP = 0.910 ratio = 0.914 hyp_len = 12116 ref_len = 13258)
chrF2++ = 26.51
fine_tunning_finlandes_arhuaco_modelo_COMP_NC_10_0.0002


100%|██████████| 563/563 [18:51<00:00,  2.01s/it]




 ---------------------- Resultados ../results/arhuaco/fine_tunning_finlandes_arhuaco_metrica_despues_COMP_NC_10_2e-05.pickle
BLEU = 0.80 14.2/1.9/0.5/0.1 (BP = 0.805 ratio = 0.821 hyp_len = 10890 ref_len = 13258)
chrF2++ = 18.32
fine_tunning_finlandes_arhuaco_modelo_COMP_NC_10_2e-05


100%|██████████| 563/563 [24:58<00:00,  2.66s/it]



 ---------------------- Resultados ../results/arhuaco/fine_tunning_finlandes_arhuaco_metrica_despues_COMP_NC_5_2e-05.pickle
BLEU = 0.15 7.9/0.5/0.1/0.0 (BP = 0.821 ratio = 0.835 hyp_len = 11071 ref_len = 13258)
chrF2++ = 13.69
fine_tunning_finlandes_arhuaco_modelo_COMP_NC_5_2e-05





# Métricas en test para los modelos de Biblia

In [36]:

def translate(text, src_lang='es', tgt_lang='fi', a=32, b=3, max_input_length=128, num_beams=4, **kwargs):
    tokenizer.src_lang = src_lang
    tokenizer.tgt_lang = tgt_lang
    inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=max_input_length)
    result = model.generate(
        **inputs.to(model.device),
        forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_lang),
        max_new_tokens=int(a + b * inputs.input_ids.shape[1]),
        num_beams=num_beams,
        **kwargs
    )
    return tokenizer.batch_decode(result, skip_special_tokens=True)

def batched_translate(texts, batch_size=16, **kwargs):
    """Translate texts in batches of similar length"""
    idxs, texts2 = zip(*sorted(enumerate(texts), key=lambda p: len(p[1]), reverse=True))
    results = []
    for i in trange(0, len(texts2), batch_size):
        results.extend(translate(texts2[i: i+batch_size], **kwargs))
    return [p for i, p in sorted(zip(idxs, results))]

In [37]:
keys_nd = [k for k in eval_blues.keys() if 'BIBLIA' in k ]
keys_nd

['../results/arhuaco/fine_tunning_finlandes_arhuaco_metrica_despues_BIBLIA_5_2e-05.pickle',
 '../results/arhuaco/fine_tunning_finlandes_arhuaco_metrica_despues_BIBLIA_10_0.0002.pickle',
 '../results/arhuaco/fine_tunning_finlandes_arhuaco_metrica_despues_BIBLIA_3_2e-05.pickle',
 '../results/arhuaco/fine_tunning_finlandes_arhuaco_metrica_despues_BIBLIA_3_0.0002.pickle',
 '../results/arhuaco/fine_tunning_finlandes_arhuaco_metrica_despues_BIBLIA_5_0.0002.pickle',
 '../results/arhuaco/fine_tunning_finlandes_arhuaco_metrica_despues_BIBLIA_10_2e-05.pickle']

In [38]:
path_data = '../data/arhuaco'

d = 'BIBLIA'

# Cargar datos
dataset_dict = preprocess_dataset(path_data + '/' + d + '.csv', lang_output = 'fi')

df_test = pd.DataFrame(dataset_dict['test']['translation'])
df_train = pd.DataFrame(dataset_dict['train']['translation'])
df_validation = pd.DataFrame(dataset_dict['validation']['translation'])

In [39]:
for model_name in keys_nd:
    # Cargar modelo y tokenizados
    name = model_name.split('.pickle')[0].replace('metrica_despues', 'modelo')
    tokenizer = AutoTokenizer.from_pretrained(name)
    model = AutoModelForSeq2SeqLM.from_pretrained(name)
    
    # Traducir
    ar_pred = []
    for i in trange(0, len(df_test.values)):
        translated = translate(df_test.es[i])
        ar_pred.append(translated)
    ar_pred2 = [w[0] for w in ar_pred] 

    # Resultados 
    print(f'\n\n ---------------------- Resultados {model_name}')
    print(bleu_calc.corpus_score(ar_pred2, [df_test['fi'].tolist()]))
    print(chrf_calc.corpus_score(ar_pred2, [df_test['fi'].tolist()]))

    # Guardar 
    name = name.split('/')[-1]
    print(name)
    with open('../results/arhuaco/resultados_traducciones_{name}_arhuaco.pickle', 'wb') as file:
        pickle.dump(ar_pred2, file)

100%|██████████| 557/557 [24:41<00:00,  2.66s/it]




 ---------------------- Resultados ../results/arhuaco/fine_tunning_finlandes_arhuaco_metrica_despues_BIBLIA_5_2e-05.pickle
BLEU = 0.37 8.3/0.7/0.1/0.0 (BP = 0.863 ratio = 0.872 hyp_len = 11282 ref_len = 12945)
chrF2++ = 13.93
fine_tunning_finlandes_arhuaco_modelo_BIBLIA_5_2e-05


100%|██████████| 557/557 [17:20<00:00,  1.87s/it]




 ---------------------- Resultados ../results/arhuaco/fine_tunning_finlandes_arhuaco_metrica_despues_BIBLIA_10_0.0002.pickle
BLEU = 3.17 20.3/5.0/2.0/0.8 (BP = 0.882 ratio = 0.889 hyp_len = 11505 ref_len = 12945)
chrF2++ = 25.53
fine_tunning_finlandes_arhuaco_modelo_BIBLIA_10_0.0002


100%|██████████| 557/557 [23:32<00:00,  2.54s/it]




 ---------------------- Resultados ../results/arhuaco/fine_tunning_finlandes_arhuaco_metrica_despues_BIBLIA_3_2e-05.pickle
BLEU = 0.15 7.4/0.4/0.1/0.0 (BP = 0.901 ratio = 0.905 hyp_len = 11720 ref_len = 12945)
chrF2++ = 12.49
fine_tunning_finlandes_arhuaco_modelo_BIBLIA_3_2e-05


100%|██████████| 557/557 [18:52<00:00,  2.03s/it]




 ---------------------- Resultados ../results/arhuaco/fine_tunning_finlandes_arhuaco_metrica_despues_BIBLIA_3_0.0002.pickle
BLEU = 1.86 17.0/3.2/1.0/0.4 (BP = 0.896 ratio = 0.901 hyp_len = 11661 ref_len = 12945)
chrF2++ = 21.87
fine_tunning_finlandes_arhuaco_modelo_BIBLIA_3_0.0002


100%|██████████| 557/557 [17:48<00:00,  1.92s/it]




 ---------------------- Resultados ../results/arhuaco/fine_tunning_finlandes_arhuaco_metrica_despues_BIBLIA_5_0.0002.pickle
BLEU = 2.71 19.0/4.2/1.6/0.7 (BP = 0.896 ratio = 0.901 hyp_len = 11664 ref_len = 12945)
chrF2++ = 24.15
fine_tunning_finlandes_arhuaco_modelo_BIBLIA_5_0.0002


100%|██████████| 557/557 [20:49<00:00,  2.24s/it]



 ---------------------- Resultados ../results/arhuaco/fine_tunning_finlandes_arhuaco_metrica_despues_BIBLIA_10_2e-05.pickle
BLEU = 0.96 14.9/2.0/0.5/0.1 (BP = 0.844 ratio = 0.855 hyp_len = 11072 ref_len = 12945)
chrF2++ = 19.09
fine_tunning_finlandes_arhuaco_modelo_BIBLIA_10_2e-05





In [7]:
import pickle
from glob import glob

glob('../results/arhuaco_reves/*.pickle')
for g in glob('../results/arhuaco_reves/*.pickle'):
    with open(g, 'rb') as file:
        print('\n',g)
        print(pickle.load(file))


 ../results/arhuaco_reves/arhuaco_metrica_despues_COMP_3_2e-05_REVES.pickle
{'eval_loss': 3.663532018661499, 'eval_bleu': 0.6863517490896596, 'eval_runtime': 64.0529, 'eval_samples_per_second': 8.946, 'eval_steps_per_second': 0.141, 'epoch': 3.0}

 ../results/arhuaco_reves/arhuaco_metrica_despues_COMP_5_2e-05_REVES.pickle
{'eval_loss': 3.5439751148223877, 'eval_bleu': 0.7665199640397524, 'eval_runtime': 65.1476, 'eval_samples_per_second': 8.795, 'eval_steps_per_second': 0.138, 'epoch': 5.0}

 ../results/arhuaco_reves/arhuaco_metrica_despues_COMP_10_0.0002_REVES.pickle
{'eval_loss': 3.5056161880493164, 'eval_bleu': 3.6185704653853294, 'eval_runtime': 59.8403, 'eval_samples_per_second': 9.575, 'eval_steps_per_second': 0.15, 'epoch': 10.0}

 ../results/arhuaco_reves/arhuaco_metrica_despues_COMP_5_0.0002_REVES.pickle
{'eval_loss': 3.239234685897827, 'eval_bleu': 2.684949197916594, 'eval_runtime': 60.2437, 'eval_samples_per_second': 9.511, 'eval_steps_per_second': 0.149, 'epoch': 5.0}

 ..