# Librerías

In [None]:
!pip install datasets
!pip install sacremoses
!pip install sacrebleu
!pip install evaluate
!pip install transformers[sentencepiece]
!pip install transformers[torch]

Collecting datasets
  Downloading datasets-2.15.0-py3-none-any.whl (521 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m521.2/521.2 kB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
Collecting pyarrow-hotfix (from datasets)
  Downloading pyarrow_hotfix-0.6-py3-none-any.whl (7.9 kB)
Collecting dill<0.3.8,>=0.3.0 (from datasets)
  Downloading dill-0.3.7-py3-none-any.whl (115 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m18.1 MB/s[0m eta [36m0:00:00[0m
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m20.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: pyarrow-hotfix, dill, multiprocess, datasets
Successfully installed datasets-2.15.0 dill-0.3.7 multiprocess-0.70.15 pyarrow-hotfix-0.6
Collecting sacremoses
  Downloading sacremoses-0.1.1-py3-none-any.whl (897 kB)
[2K     

In [None]:
from glob import glob
import pandas as pd
import numpy as np
from tqdm.auto import tqdm, trange
import sys
import os

In [None]:
from datasets import load_dataset, DatasetDict, Dataset
from sklearn.model_selection import train_test_split
from transformers import AutoTokenizer
import pandas as pd

In [None]:
from transformers import Seq2SeqTrainingArguments
from transformers import DataCollatorForSeq2Seq
from transformers import AutoModelForSeq2SeqLM
from transformers import EarlyStoppingCallback
from transformers import Seq2SeqTrainer

import torch

import numpy as np
import pickle
import evaluate

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import sacrebleu
bleu_calc = sacrebleu.BLEU()
chrf_calc = sacrebleu.CHRF(word_order=2)

# Funciones auxiliares

In [None]:
def preprocess_dataset(path_dataset: str, lang_output: str):
  """
  Lee los datos y los preprocesa. Lo pasa al formato necesario DatasetDict
  y divide los datos en train, test y validación.
  Sirve para traducción de indígena a español

  input:
  - path_dataset: con la ruta en donde se encuentra la base a procesar
  - lang_output: wayuu, arh de donde va a terminar la traducción

  output:
  - dataset_dict: DatasetDict con train test y validation
  """
  # Lectura de datos y conversión a diccionario
  dataset = pd.read_csv(path_dataset)
  conv = {'esp': 'es', 'wayuu': lang_output, 'arh': lang_output}
  dataset.rename(columns = conv, inplace = True)

  dataset = [{'es': row['es'], lang_output: row[lang_output]} for _, row in dataset.iterrows()]

  # División train, test y validación
  train, test = train_test_split(dataset, test_size = 0.2, random_state = 42)
  val, test = train_test_split(test, test_size = 0.5, random_state = 42)

  # Creación de datasets
  train = Dataset.from_dict({"id": list(range(len(train))), "translation": train})
  test = Dataset.from_dict({"id": list(range(len(test))), "translation": test})
  validation = Dataset.from_dict({"id": list(range(len(val))), "translation": val})

  # Creación del diccionario
  dataset_dict = DatasetDict({"train": train, "test": test, "validation": validation})

  return dataset_dict

#

In [None]:
def tokenizar(dataset_dict, tokenizer, max_length = 150):
  """
  A partir de un DatasetDict, tokeniza los datos. Esto depende del modelo a utilizar,
  y de un modelo específico.

  input:
  - dataset_dict: con los datos de train, test y validación
  - tokenizer: tokenizer
  - max_length: de las sentencias a considerar

  output:
  - tokenized_datasets
  """

  def preprocess_function(examples):
      inputs = [ex["es"] for ex in examples["translation"]]
      targets = [ex["fi"] for ex in examples["translation"]]
      model_inputs = tokenizer(
          inputs, text_target=targets, max_length=max_length, truncation=True
      )
      return model_inputs

  # Tokenizar los datos
  tokenized_datasets = dataset_dict.map(
    preprocess_function,
    batched=True,
    remove_columns=dataset_dict["train"].column_names,
  )

  return tokenized_datasets, tokenizer

# Evaluamos el mejor modelo en cuanto a BLEU en entrenamiento

Veamos el modelo que dio mejor métrica BLUE en el entrenamiento para validation.

In [None]:
model_path = "/content/drive/MyDrive/Datos traduccion wayuu - español/traduccion esp wayuu"
eval_blues = {}

for res in glob(model_path + '/*'):
  if 'pickle' in res and 'resultados' not in res:
    with open(res, 'rb') as file:
      blue_score = pickle.load(file)['eval_bleu']
      eval_blues[res] = blue_score

In [None]:
eval_blues

{'/content/drive/MyDrive/Datos traduccion wayuu - español/traduccion esp wayuu/metrica_despues_COMP_10_2e-05.pickle': 3.841921542846619,
 '/content/drive/MyDrive/Datos traduccion wayuu - español/traduccion esp wayuu/metrica_despues_COMP_10_0.0002.pickle': 9.944518207708816,
 '/content/drive/MyDrive/Datos traduccion wayuu - español/traduccion esp wayuu/metrica_despues_COMP_NC_3_2e-05.pickle': 1.0976952286993389,
 '/content/drive/MyDrive/Datos traduccion wayuu - español/traduccion esp wayuu/metrica_despues_COMP_NC_3_0.0002.pickle': 6.985037271297072,
 '/content/drive/MyDrive/Datos traduccion wayuu - español/traduccion esp wayuu/metrica_despues_COMP_NC_5_2e-05.pickle': 1.9693512837568947,
 '/content/drive/MyDrive/Datos traduccion wayuu - español/traduccion esp wayuu/metrica_despues_COMP_NC_5_0.0002.pickle': 8.630473080537001,
 '/content/drive/MyDrive/Datos traduccion wayuu - español/traduccion esp wayuu/metrica_despues_COMP_NC_10_2e-05.pickle': 3.8415706127918066,
 '/content/drive/

In [None]:
res_max = max(eval_blues, key=lambda k: eval_blues[k])

Este es el modelo con mejor score BLUE para el conjunto de validación entre los entrenados

In [None]:
res_max, eval_blues[res_max]

('../results/wayuu/metrica_despues_COMP_NC_10_0.0002.pickle',
 10.092185362846715)

In [None]:
path_data = '../data/wayuu'

d = '_'.join(res_max.split('_')[-4:-2])
print(d)

# Cargar datos
dataset_dict = preprocess_dataset(path_data + '/' + d + '.csv', lang_output = 'fi')

# Cargar modelo y tokenizados
name = res_max.split('.pickle')[0].replace('metrica_despues', 'modelo')
tokenizer = AutoTokenizer.from_pretrained(name)
model = AutoModelForSeq2SeqLM.from_pretrained(name)

df_test = pd.DataFrame(dataset_dict['test']['translation'])
df_train = pd.DataFrame(dataset_dict['train']['translation'])
df_validation = pd.DataFrame(dataset_dict['validation']['translation'])

COMP_NC


In [None]:
def translate(text, src_lang='es', tgt_lang='fi', a=32, b=3, max_input_length=128, num_beams=4, **kwargs):
    tokenizer.src_lang = src_lang
    tokenizer.tgt_lang = tgt_lang
    inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=max_input_length)
    result = model.generate(
        **inputs.to(model.device),
        forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_lang),
        max_new_tokens=int(a + b * inputs.input_ids.shape[1]),
        num_beams=num_beams,
        **kwargs
    )
    return tokenizer.batch_decode(result, skip_special_tokens=True)

def batched_translate(texts, batch_size=16, **kwargs):
    """Translate texts in batches of similar length"""
    idxs, texts2 = zip(*sorted(enumerate(texts), key=lambda p: len(p[1]), reverse=True))
    results = []
    for i in trange(0, len(texts2), batch_size):
        results.extend(translate(texts2[i: i+batch_size], **kwargs))
    return [p for i, p in sorted(zip(idxs, results))]

Obtenemos las predicciones

In [None]:
df_test

Unnamed: 0,es,fi
0,renunciamos a actuar de forma oculta y avergon...,"tu tekirajakat anain jia, nnojotsu taattajaain..."
1,"si el mundo los odia a ustedes, sepan que prim...",aashajaashi jesus nuchikua namuin muleka kan...
2,como podemos ser mas compasivos,kasa waa'inrajatka supula alinjatuin wayuu wapula
3,jesus acababa de ense arles a sus discipulos q...,ni'ikuin jesuu na nikirajuinkana sunain achunt...
4,con que se divierten hoy dia muchas personas,kasa naainjaka ma'in na wayuukana maa'ulu yaa
...,...,...
8124,ma ana vere a mis hermanos,tereena huata tawalayu
8125,pero es preciso que sean constantes en el cump...,anakaja nnojorule juu'ulaain suulia anoujaa. m...
8126,pero poco a poco sus sentimientos se haran mas...,mapa ki'raleeshi'iya naya sunain muin nakuwa'ipa
8127,sin embargo eso no quiere decir que no tengamo...,kamaneepaja'a waya sumuin wayuu supushua'a


In [None]:
wayuu_pred = []
for i in trange(0, len(df_test.values)):
    translated = translate(df_test.es[i])
    wayuu_pred.append(translated)

100%|██████████| 8129/8129 [2:58:14<00:00,  1.32s/it]  


# Métricas en test

In [None]:
df_test.head()

Unnamed: 0,es,fi
0,renunciamos a actuar de forma oculta y avergon...,"tu tekirajakat anain jia, nnojotsu taattajaain..."
1,"si el mundo los odia a ustedes, sepan que prim...",aashajaashi jesus nuchikua namuin muleka kan...
2,como podemos ser mas compasivos,kasa waa'inrajatka supula alinjatuin wayuu wapula
3,jesus acababa de ense arles a sus discipulos q...,ni'ikuin jesuu na nikirajuinkana sunain achunt...
4,con que se divierten hoy dia muchas personas,kasa naainjaka ma'in na wayuukana maa'ulu yaa


In [None]:
import sacrebleu
bleu_calc = sacrebleu.BLEU()
chrf_calc = sacrebleu.CHRF(word_order=2)

In [None]:
wayuu_pred2 = [w[0] for w in wayuu_pred]

In [None]:
print(bleu_calc.corpus_score(wayuu_pred2, [df_test['fi'].tolist()]))
print(chrf_calc.corpus_score(wayuu_pred2, [df_test['fi'].tolist()]))

BLEU = 7.48 31.0/12.0/5.6/2.9 (BP = 0.849 ratio = 0.859 hyp_len = 95933 ref_len = 111620)
chrF2++ = 31.90


In [None]:
with open(model_path + '/resultados_traducciones_mejor_modelo_test_wayuu.pickle', 'wb') as file:
  pickle.dump(wayuu_pred, file)

# Revisión otros modelos - No diccionario

In [None]:
keys_nd = [k for k in eval_blues.keys() if 'ND_' in k]
keys_nd

['../results/wayuu/metrica_despues_COMP_ND_3_0.0002.pickle',
 '../results/wayuu/metrica_despues_COMP_ND_10_0.0002.pickle',
 '../results/wayuu/metrica_despues_COMP_ND_5_2e-05.pickle',
 '../results/wayuu/metrica_despues_COMP_ND_5_0.0002.pickle',
 '../results/wayuu/metrica_despues_COMP_ND_3_2e-05.pickle',
 '../results/wayuu/metrica_despues_COMP_ND_10_2e-05.pickle']

In [None]:
path_data = '../data/wayuu'

d = 'COMP_ND'

# Cargar datos
dataset_dict = preprocess_dataset(path_data + '/' + d + '.csv', lang_output = 'fi')

df_test = pd.DataFrame(dataset_dict['test']['translation'])
df_train = pd.DataFrame(dataset_dict['train']['translation'])
df_validation = pd.DataFrame(dataset_dict['validation']['translation'])

In [None]:
for model_name in keys_nd:
    # Cargar modelo y tokenizados
    name = model_name.split('.pickle')[0].replace('metrica_despues', 'modelo')
    tokenizer = AutoTokenizer.from_pretrained(name)
    model = AutoModelForSeq2SeqLM.from_pretrained(name)

    # Traducir
    print('Comienza a predecir')
    wayuu_pred = []
    for i in trange(0, len(df_test.values)):
        translated = translate(df_test.es[i])
        wayuu_pred.append(translated)
    wayuu_pred2 = [w[0] for w in wayuu_pred]

    # Resultados
    print(f'\n\n ---------------------- Resultados {model_name}\n')
    print(bleu_calc.corpus_score(wayuu_pred2, [df_test['fi'].tolist()]))
    print(chrf_calc.corpus_score(wayuu_pred2, [df_test['fi'].tolist()]))

    # Guardar
    name = name.split('/')[-1]
    with open(model_path + '/resultados_traducciones_{name}_wayuu.pickle', 'wb') as file:
        pickle.dump(wayuu_pred, file)

Comienza a predecir


100%|██████████| 894/894 [35:55<00:00,  2.41s/it]




 ---------------------- Resultados ../results/wayuu/metrica_despues_COMP_ND_3_0.0002.pickle

BLEU = 0.68 28.6/5.3/1.1/0.2 (BP = 0.281 ratio = 0.441 hyp_len = 22764 ref_len = 51637)
chrF2++ = 15.47
Comienza a predecir


100%|██████████| 894/894 [37:26<00:00,  2.51s/it]




 ---------------------- Resultados ../results/wayuu/metrica_despues_COMP_ND_10_0.0002.pickle

BLEU = 2.53 36.6/10.9/4.1/1.8 (BP = 0.342 ratio = 0.482 hyp_len = 24907 ref_len = 51637)
chrF2++ = 21.37
Comienza a predecir


100%|██████████| 894/894 [51:29<00:00,  3.46s/it]  




 ---------------------- Resultados ../results/wayuu/metrica_despues_COMP_ND_5_2e-05.pickle

BLEU = 0.17 8.1/0.9/0.1/0.0 (BP = 0.641 ratio = 0.692 hyp_len = 35726 ref_len = 51637)
chrF2++ = 9.15
Comienza a predecir


100%|██████████| 894/894 [37:06<00:00,  2.49s/it]




 ---------------------- Resultados ../results/wayuu/metrica_despues_COMP_ND_5_0.0002.pickle

BLEU = 1.47 31.7/7.7/2.3/0.7 (BP = 0.327 ratio = 0.472 hyp_len = 24381 ref_len = 51637)
chrF2++ = 18.49
Comienza a predecir


100%|██████████| 894/894 [50:45<00:00,  3.41s/it]  




 ---------------------- Resultados ../results/wayuu/metrica_despues_COMP_ND_3_2e-05.pickle

BLEU = 0.08 3.9/0.2/0.0/0.0 (BP = 0.610 ratio = 0.669 hyp_len = 34544 ref_len = 51637)
chrF2++ = 7.55
Comienza a predecir


100%|██████████| 894/894 [37:25<00:00,  2.51s/it]




 ---------------------- Resultados ../results/wayuu/metrica_despues_COMP_ND_10_2e-05.pickle

BLEU = 0.37 20.5/3.0/0.4/0.0 (BP = 0.348 ratio = 0.487 hyp_len = 25127 ref_len = 51637)
chrF2++ = 12.79


# Revisión otros modelos - No diccionario palabras únicas

In [None]:
path_data = '../data/wayuu'

d = 'COMP_NDU'
print(d)

# Cargar datos
dataset_dict = preprocess_dataset(path_data + '/' + d + '.csv', lang_output = 'fi')

df_test = pd.DataFrame(dataset_dict['test']['translation'])
df_train = pd.DataFrame(dataset_dict['train']['translation'])
df_validation = pd.DataFrame(dataset_dict['validation']['translation'])

COMP_NDU


In [None]:
keys_nd = [k for k in eval_blues.keys() if 'NDU_' in k]
keys_nd

['../results/wayuu/metrica_despues_COMP_NDU_10_2e-05.pickle',
 '../results/wayuu/metrica_despues_COMP_NDU_5_0.0002.pickle',
 '../results/wayuu/metrica_despues_COMP_NDU_3_0.0002.pickle',
 '../results/wayuu/metrica_despues_COMP_NDU_3_2e-05.pickle',
 '../results/wayuu/metrica_despues_COMP_NDU_5_2e-05.pickle',
 '../results/wayuu/metrica_despues_COMP_NDU_10_0.0002.pickle']

In [None]:
for model_name in keys_nd:
    # Cargar modelo y tokenizados
    name = model_name.split('.pickle')[0].replace('metrica_despues', 'modelo')
    tokenizer = AutoTokenizer.from_pretrained(name)
    model = AutoModelForSeq2SeqLM.from_pretrained(name)

    # Traducir
    print('Comienza a predecir')
    wayuu_pred = []
    for i in trange(0, len(df_test.values)):
        translated = translate(df_test.es[i])
        wayuu_pred.append(translated)
    wayuu_pred2 = [w[0] for w in wayuu_pred]

    # Resultados
    print(f'\n\n ---------------------- Resultados {model_name}\n')
    print(bleu_calc.corpus_score(wayuu_pred2, [df_test['fi'].tolist()]))
    print(chrf_calc.corpus_score(wayuu_pred2, [df_test['fi'].tolist()]))

    # Guardar
    name = name.split('/')[-1]
    with open(model_path + '/resultados_traducciones_{name}_wayuu.pickle', 'wb') as file:
        pickle.dump(wayuu_pred, file)

Comienza a predecir


100%|██████████| 7925/7925 [4:48:02<00:00,  2.18s/it]     




 ---------------------- Resultados ../results/wayuu/metrica_despues_COMP_NDU_10_2e-05.pickle

BLEU = 2.65 24.2/6.2/1.8/0.5 (BP = 0.769 ratio = 0.792 hyp_len = 86944 ref_len = 109834)
chrF2++ = 24.93
Comienza a predecir


100%|██████████| 7925/7925 [2:44:16<00:00,  1.24s/it]     




 ---------------------- Resultados ../results/wayuu/metrica_despues_COMP_NDU_5_0.0002.pickle

BLEU = 6.47 30.4/11.1/4.8/2.3 (BP = 0.832 ratio = 0.845 hyp_len = 92780 ref_len = 109834)
chrF2++ = 30.82
Comienza a predecir


100%|██████████| 7925/7925 [6:44:44<00:00,  3.06s/it]     




 ---------------------- Resultados ../results/wayuu/metrica_despues_COMP_NDU_3_0.0002.pickle

BLEU = 5.33 28.5/9.7/3.8/1.6 (BP = 0.824 ratio = 0.838 hyp_len = 92056 ref_len = 109834)
chrF2++ = 29.27
Comienza a predecir


100%|██████████| 7925/7925 [3:05:25<00:00,  1.40s/it]  




 ---------------------- Resultados ../results/wayuu/metrica_despues_COMP_NDU_3_2e-05.pickle

BLEU = 0.89 17.6/2.7/0.4/0.1 (BP = 0.791 ratio = 0.810 hyp_len = 88985 ref_len = 109834)
chrF2++ = 20.20
Comienza a predecir


100%|██████████| 7925/7925 [5:51:16<00:00,  2.66s/it]     




 ---------------------- Resultados ../results/wayuu/metrica_despues_COMP_NDU_5_2e-05.pickle

BLEU = 1.49 20.6/4.0/0.8/0.2 (BP = 0.775 ratio = 0.797 hyp_len = 87567 ref_len = 109834)
chrF2++ = 22.22
Comienza a predecir


100%|██████████| 7925/7925 [5:09:23<00:00,  2.34s/it]    




 ---------------------- Resultados ../results/wayuu/metrica_despues_COMP_NDU_10_0.0002.pickle

BLEU = 7.53 32.1/12.4/5.8/3.0 (BP = 0.826 ratio = 0.839 hyp_len = 92163 ref_len = 109834)
chrF2++ = 31.98


# Revisión otros modelos - No constitución

In [None]:
def translate(model, tokenizer, text, src_lang='es', tgt_lang='fi', a=32, b=3, max_input_length=128, num_beams=4, **kwargs):
    tokenizer.src_lang = src_lang
    tokenizer.tgt_lang = tgt_lang
    inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=max_input_length)
    result = model.generate(
        **inputs.to(model.device),
        forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_lang),
        max_new_tokens=int(a + b * inputs.input_ids.shape[1]),
        num_beams=num_beams,
        **kwargs
    )
    return tokenizer.batch_decode(result, skip_special_tokens=True)

def batched_translate(model, tokenizer, texts, batch_size=16, **kwargs):
    """Translate texts in batches of similar length"""
    idxs, texts2 = zip(*sorted(enumerate(texts), key=lambda p: len(p[1]), reverse=True))
    results = []
    for i in trange(0, len(texts2), batch_size):
        results.extend(translate(model, tokenizer, texts2[i: i+batch_size], **kwargs))
    return [p for i, p in sorted(zip(idxs, results))]

In [None]:
path_data = '/content/drive/MyDrive/Datos traduccion wayuu - español/traduccion esp wayuu/data/wayuu'

d = 'COMP_NC'
print(d)

# Cargar datos
dataset_dict = preprocess_dataset(path_data + '/' + d + '.csv', lang_output = 'fi')

df_test = pd.DataFrame(dataset_dict['test']['translation'])
df_train = pd.DataFrame(dataset_dict['train']['translation'])
df_validation = pd.DataFrame(dataset_dict['validation']['translation'])

COMP_NC


In [None]:
keys_nc = [k for k in eval_blues.keys() if 'NC_' in k]
keys_nc

['/content/drive/MyDrive/Datos traduccion wayuu - español/traduccion esp wayuu/metrica_despues_COMP_NC_3_2e-05.pickle',
 '/content/drive/MyDrive/Datos traduccion wayuu - español/traduccion esp wayuu/metrica_despues_COMP_NC_3_0.0002.pickle',
 '/content/drive/MyDrive/Datos traduccion wayuu - español/traduccion esp wayuu/metrica_despues_COMP_NC_5_2e-05.pickle',
 '/content/drive/MyDrive/Datos traduccion wayuu - español/traduccion esp wayuu/metrica_despues_COMP_NC_5_0.0002.pickle',
 '/content/drive/MyDrive/Datos traduccion wayuu - español/traduccion esp wayuu/metrica_despues_COMP_NC_10_2e-05.pickle',
 '/content/drive/MyDrive/Datos traduccion wayuu - español/traduccion esp wayuu/metrica_despues_COMP_NC_10_0.0002.pickle']

In [None]:
for model_name in keys_nc:
    # Cargar modelo y tokenizados
    name = model_name.split('.pickle')[0].replace('metrica_despues', 'modelo')
    tokenizer = AutoTokenizer.from_pretrained(name)
    model = AutoModelForSeq2SeqLM.from_pretrained(name).to('cuda')

    # Traducir
    wayuu_pred = batched_translate(model, tokenizer, df_test.es, src_lang='es', tgt_lang='fi')

    # Resultados
    print(f'\n\n ---------------------- Resultados {model_name}\n')
    print(bleu_calc.corpus_score(wayuu_pred, [df_test['fi'].tolist()]))
    print(chrf_calc.corpus_score(wayuu_pred, [df_test['fi'].tolist()]))

    # Guardar
    name = name.split('/')[-1]
    with open(model_path + f'/resultados_traducciones_{name}_wayuu.pickle', 'wb') as file:
        pickle.dump(wayuu_pred, file)

  0%|          | 0/509 [00:00<?, ?it/s]



 ---------------------- Resultados /content/drive/MyDrive/Datos traduccion wayuu - español/traduccion esp wayuu/metrica_despues_COMP_NC_3_2e-05.pickle

BLEU = 0.88 17.6/2.7/0.4/0.1 (BP = 0.777 ratio = 0.798 hyp_len = 89097 ref_len = 111620)
chrF2++ = 20.00


  0%|          | 0/509 [00:00<?, ?it/s]



 ---------------------- Resultados /content/drive/MyDrive/Datos traduccion wayuu - español/traduccion esp wayuu/metrica_despues_COMP_NC_3_0.0002.pickle

BLEU = 5.02 28.8/9.7/3.8/1.6 (BP = 0.779 ratio = 0.800 hyp_len = 89349 ref_len = 111620)
chrF2++ = 28.89


  0%|          | 0/509 [00:00<?, ?it/s]



 ---------------------- Resultados /content/drive/MyDrive/Datos traduccion wayuu - español/traduccion esp wayuu/metrica_despues_COMP_NC_5_2e-05.pickle

BLEU = 1.50 20.7/4.0/0.9/0.2 (BP = 0.746 ratio = 0.774 hyp_len = 86343 ref_len = 111620)
chrF2++ = 21.89


  0%|          | 0/509 [00:00<?, ?it/s]



 ---------------------- Resultados /content/drive/MyDrive/Datos traduccion wayuu - español/traduccion esp wayuu/metrica_despues_COMP_NC_5_0.0002.pickle

BLEU = 6.26 29.6/10.8/4.6/2.1 (BP = 0.834 ratio = 0.846 hyp_len = 94486 ref_len = 111620)
chrF2++ = 30.61


  0%|          | 0/509 [00:00<?, ?it/s]



 ---------------------- Resultados /content/drive/MyDrive/Datos traduccion wayuu - español/traduccion esp wayuu/metrica_despues_COMP_NC_10_2e-05.pickle

BLEU = 2.76 24.7/6.4/1.9/0.6 (BP = 0.750 ratio = 0.777 hyp_len = 86716 ref_len = 111620)
chrF2++ = 24.99


  0%|          | 0/509 [00:00<?, ?it/s]



 ---------------------- Resultados /content/drive/MyDrive/Datos traduccion wayuu - español/traduccion esp wayuu/metrica_despues_COMP_NC_10_0.0002.pickle

BLEU = 7.50 31.0/12.0/5.6/2.9 (BP = 0.851 ratio = 0.861 hyp_len = 96120 ref_len = 111620)
chrF2++ = 31.92
