# Set up

In [1]:
# Reviews CSV (updated)
%%capture
!wget https://www.dropbox.com/scl/fi/6u1yfcnnf4jqmhedx519u/Reviews.csv?rlkey=xqmvvohkq0i0k7hho79fs43b6&st=mexudbu2&dl=0
!mv Reviews.csv?rlkey=xqmvvohkq0i0k7hho79fs43b6 reviews.csv
# Metadata for each restaurant
!wget https://www.dropbox.com/scl/fi/cxckzuj81gsnlsvclqnza/metadata.json.gz?rlkey=d4xerrcwbeyt09oi01f9f4wru&st=sv6cnpzh&dl=0
!mv metadata.json.gz?rlkey=d4xerrcwbeyt09oi01f9f4wru metadata.json.gz
# LLaVa Image Descriptions
!wget https://www.dropbox.com/scl/fi/50pmwvytozpz0cl1p054f/tiny_LLaVa_images_descriptions.json.gz?rlkey=7vreygmtd16lohs3bx6yvmwdk&st=9568qz84&dl=0
!mv tiny_LLaVa_images_descriptions.json.gz?rlkey=7vreygmtd16lohs3bx6yvmwdk tiny_LLaVa_images_descriptions.json.gz

# Libraries

In [2]:
# HuggingFace requirements
!pip install transformers datasets evaluate accelerate

Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.1.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m10.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m4

In [3]:
!pip install nltk



In [4]:
# HuggingFace
from transformers import T5ForConditionalGeneration, T5Tokenizer
import torch
# Data visualization and manipulation
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [5]:
# Metadata
import gzip
import json

In [6]:
# Evaluation
import copy
import nltk
nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('wordnet')
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from nltk import word_tokenize
from nltk.translate import meteor

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


## Data Preprocessing

In [7]:
df = pd.read_csv('reviews.csv')
print(f"Tamaño dataset: {df.shape}")

# Eliminamos datos nulos ...
df = df.dropna()
df.info()

Tamaño dataset: (8334, 9)
<class 'pandas.core.frame.DataFrame'>
Index: 8038 entries, 0 to 8333
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Unnamed: 0    8038 non-null   int64 
 1   user_id       8038 non-null   object
 2   gmap_id       8038 non-null   object
 3   rating        8038 non-null   int64 
 4   text          8038 non-null   object
 5   img_url       8038 non-null   object
 6   img_filename  8038 non-null   object
 7   state         8038 non-null   object
 8   rest_id       8038 non-null   int64 
dtypes: int64(3), object(6)
memory usage: 628.0+ KB


In [8]:
# Obtenemos el json de la metadata
def parse(path):
  g = gzip.open(path, 'r')
  for l in g:
    yield json.loads(l)

metadata = list(parse('metadata.json.gz'))
metadata = metadata[0]
# Obtenemos json de descripciones de imagenes
descriptions = list(parse('tiny_LLaVa_images_descriptions.json.gz'))
descriptions = descriptions[0]

In [9]:
# indice metadata
new_metadata = []
for value in metadata.values():
    new_metadata.extend(value)
idx2metadata = {}
for data in new_metadata:
    gmap_id = data['gmap_id']
    del data['gmap_id']
    idx2metadata[gmap_id] = data
# indice descripcion
idx2description = {}
for key, data in descriptions.items():
    key = key.split("/")[-1].split(".")[0]
    idx2description[key] = data

## Review Generation


#### Flan T5

In [10]:
def generate_review(user_id, item_id, image, previous_reviews, model, tokenizer, print_prompt):
    """
    Genera una review a partir de uid, itid, y una concatenación de previous reviews. Retorna la review generada.
    """
    service_options = "N/A"
    if 'MISC' in idx2metadata[item_id] and 'Service options' in idx2metadata[item_id]['MISC']:
        service_options = ", ".join(idx2metadata[item_id]['MISC']['Service options'])

    prompt = f"""Provide a detailed and unique recommendation for the following restaurant. Use the provided details and avoid repeating information unnecessarily. Highlight the restaurant's features, customer experience, and what makes it stand out. Tailor the recommendation based on the context and information available.

### Restaurant Details ###
Name: {idx2metadata[item_id]['name']}
Average Rating: {idx2metadata[item_id]['avg_rating']}
Description: {idx2metadata[item_id]['description']}
Categories: {", ".join(idx2metadata[item_id]['category'])}
Service Options: {service_options}
Image Description: {image}

### Previous Reviews ###
{previous_reviews if previous_reviews else 'No reviews available'}

### Guideline for Writing the Recommendation ###
- Highlight the unique features of the restaurant (e.g., menu items, ambiance, service quality).
- Tailor the response based on available service options (e.g., delivery, takeaway, or dine-in).
- If relevant, mention scenarios or audiences for whom the restaurant is ideal (e.g., families, couples, groups).
- Avoid using the exact same phrasing repeatedly, and do not copy this example directly.

Now, write a tailored recommendation based on the above information, keep it concise."""
    if (print_prompt):
      print("Prompt:\n----------------------\n" + prompt + "\n----------------------\n")
    inputs = tokenizer(prompt, return_tensors="pt").input_ids
    outputs = model.generate(
        inputs,
        max_new_tokens=250,
        no_repeat_ngram_size=3,
        temperature=0.7,
        top_k=50,
        top_p=0.9
        do_sample=True
    )
    generated_review = tokenizer.decode(outputs[0], skip_special_tokens=True)
    #print("Tokenized Input:", tokenizer.decode(inputs[0]))
    #print("Model Raw Output:", tokenizer.decode(outputs[0]))
    return generated_review.strip()

def generate_review_for_user_item_pair(user_id, item_id, df, model, tokenizer, print_prompt):
    """
    Recolecta hasta 3 reviews previas del usuario y genera una review a partir de uid y itid. Retorna la review generada.
    """
    # Par usuario-restaurante específico
    row = df[(df['user_id'] == user_id) & (df['gmap_id'] == item_id)]
    row = row.iloc[0]

    rating = row.get('rating')
    if rating is None or np.isnan(rating):  # Si rating no existe, utilizamos promedio
        rating = idx2metadata[item_id]['avg_rating']
    image_description = idx2description[item_id]
    # Obtenemos reviews pasadas del restaurante con ratings similares
    restaurant_reviews = df[(df['gmap_id'] == item_id) & (df['rating'].isin([rating - 1, rating, rating + 1]))].head(3)
    # Eliminamos la review del usuario objetivo
    restaurant_reviews = restaurant_reviews[restaurant_reviews['user_id'] != user_id]
    previous_reviews = "\n* ".join(restaurant_reviews['text'])
    context = previous_reviews

    return generate_review(user_id, item_id, image_description, context, model, tokenizer, print_prompt)


In [11]:
# Cargamos modelo flat T5 de huggingface
model_name = "google/flan-t5-large"
model_flan_t5 = T5ForConditionalGeneration.from_pretrained(model_name)
tokenizer_flan_t5 = T5Tokenizer.from_pretrained(model_name)
#tokenizer.pad_token = tokenizer.eos_token

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.13G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


### Generar Explicación Con Flan-T5

### Elegir par usuario, restaurante

In [30]:
# Obtenemos row aleatoriamente
random_row = df.sample(1) # Para elegir usuario manualmente cambiar esta linea
user_id = random_row['user_id'].values[0]
item_id = random_row['gmap_id'].values[0]

In [31]:
print(idx2metadata[item_id]) # Imprimimos metadata del restaurante para saber que estamos recomendando

{'name': "Chris's Coffee & Custard", 'address': "Chris's Coffee & Custard, 1824 9th St SE Suite B, Roanoke, VA 24013", 'description': None, 'latitude': 37.2553194, 'longitude': -79.9242714, 'category': ['Coffee shop', 'Business to business service', 'Cafe', 'Ice cream shop', 'Restaurant'], 'avg_rating': 4.9, 'num_of_reviews': 48, 'price': None, 'hours': [['Saturday', '9AM–9PM'], ['Sunday', 'Closed'], ['Monday', 'Closed'], ['Tuesday', '7:30AM–8:30PM'], ['Wednesday', '7:30AM–8:30PM'], ['Thursday', '7:30AM–8:30PM'], ['Friday', '7:30AM–8:30PM']], 'MISC': {'Service options': ['Takeout', 'Dine-in', 'Delivery'], 'Highlights': ['Fast service'], 'Popular for': ['Solo dining'], 'Accessibility': ['Wheelchair accessible entrance'], 'Offerings': ['Coffee', 'Quick bite'], 'Dining options': ['Dessert'], 'Amenities': ['Good for kids'], 'Atmosphere': ['Casual'], 'Payments': ['Credit cards']}, 'state': 'Open ⋅ Closes 9PM', 'relative_results': None, 'url': 'https://www.google.com/maps/place//data=!4m2!3m

In [32]:
# Generamos Explicación
generated_review = generate_review_for_user_item_pair(user_id, item_id, df, model_flan_t5, tokenizer_flan_t5, True)
print(f"Explicación generada: \n{generated_review}")
print(f"\n\nExplicación real: \n{df[(df['user_id'] == user_id) & (df['gmap_id'] == item_id)]['text'].values[0]}")

Prompt:
----------------------
Provide a detailed and unique recommendation for the following restaurant. Use the provided details and avoid repeating information unnecessarily. Highlight the restaurant's features, customer experience, and what makes it stand out. Tailor the recommendation based on the context and information available.

### Restaurant Details ###
Name: Chris's Coffee & Custard
Average Rating: 4.9
Description: None
Categories: Coffee shop, Business to business service, Cafe, Ice cream shop, Restaurant
Service Options: Takeout, Dine-in, Delivery
Image Description: The image features a close-up view of a partially eaten cookie, showcasing the delicious treat's crumbs and filling. The cookie is placed on a plate, which is positioned on a counter. The scene suggests that someone has enjoyed the cookie, and the plate is now empty.

### Previous Reviews ###
No reviews available

### Guideline for Writing the Recommendation ###
- Highlight the unique features of the restauran



1.   Raffel, C., Shazeer, N., Roberts, A., Lee, K., Narang, S., Matena, M., ... & Liu, P. J. (2020). Exploring the limits of transfer learning with a unified text-to-text transformer. J. Mach. Learn. Res., 21(140), 1-67.
https://huggingface.co/google-t5/t5-base#uses




## HuggingFace SMOL

In [25]:
def generate_review_hugging_face(user_id, item_id, image, previous_reviews, model, tokenizer, print_prompt):
    """
    Genera una review a partir de uid, itid, y una concatenación de previous reviews. Retorna la review generada.
    """
    service_options = ""
    if 'MISC' in idx2metadata[item_id] and 'Service options' in idx2metadata[item_id]['MISC']:
        service_options = ", ".join(idx2metadata[item_id]['MISC']['Service options'])

    prompt = f"""Provide a detailed and unique recommendation for the following restaurant. Use the provided details and avoid repeating information unnecessarily. Highlight the restaurant's features, customer experience, and what makes it stand out. Tailor the recommendation based on the context and information available.

### Restaurant Details ###
Name: {idx2metadata[item_id]['name']}
Average Rating: {idx2metadata[item_id]['avg_rating']}
Description: {idx2metadata[item_id]['description']}
Categories: {", ".join(idx2metadata[item_id]['category'])}
Service Options: {service_options}
Image Description: {image}

### Previous Reviews ###
{previous_reviews if previous_reviews else 'No reviews available'}

### Guideline for Writing the Recommendation ###
- Highlight the unique features of the restaurant (e.g., menu items, ambiance, service quality).
- Tailor the response based on available service options (e.g., delivery, takeaway, or dine-in).
- If relevant, mention scenarios or audiences for whom the restaurant is ideal (e.g., families, couples, groups).
- Avoid using the exact same phrasing repeatedly, and do not copy this example directly.

Now, write a tailored recommendation based on the above information, keep it concise."""
    if (print_prompt):
      print("Prompt:\n----------------------\n" + prompt + "\n----------------------\n")
    messages = [{"role": "user", "content": prompt}]
    input_text= hugging_face_tokenizer.apply_chat_template(messages, tokenize=False)
    inputs = hugging_face_tokenizer.encode(input_text, return_tensors="pt").to(device)
    outputs = hugging_face_model.generate(inputs, max_new_tokens=250, temperature=0.2, top_p=0.9, do_sample=True)
    result = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)
    return result.strip()



def generate_review_for_user_item_pair_hugging_face(user_id, item_id, df, model, tokenizer, print_prompt):
    """
    Recolecta hasta 3 reviews previas del usuario y genera una review a partir de uid y itid. Retorna la review generada.
    """
    # Par usuario-restaurante específico
    row = df[(df['user_id'] == user_id) & (df['gmap_id'] == item_id)]
    row = row.iloc[0]

    rating = row.get('rating')
    if rating is None or np.isnan(rating):  # Si rating no existe, utilizamos promedio
        rating = idx2metadata[item_id]['avg_rating']
    image_url = row['img_url']
    image_description = descriptions.get(image_url, "NA")
    # Obtenemos reviews pasadas del usuario con ratings similares
    user_reviews = df[(df['user_id'] == user_id) & (df['rating'].isin([rating - 1, rating, rating + 1]))].head(3)
    # Eliminamos la review del restaurant objetivo
    user_reviews = user_reviews[user_reviews['gmap_id'] != item_id]
    previous_reviews = "\n* ".join(user_reviews['text'])
    context = previous_reviews

    return generate_review_hugging_face(user_id, item_id, image_description, context, model, tokenizer, print_prompt)


In [26]:
from transformers import AutoModelForCausalLM, AutoTokenizer
checkpoint = "HuggingFaceTB/SmolLM2-1.7B-Instruct"

device = "cpu" # for GPU usage or "cpu" for CPU usage
hugging_face_tokenizer = AutoTokenizer.from_pretrained(checkpoint)
hugging_face_model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)


tokenizer_config.json:   0%|          | 0.00/3.76k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/801k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/466k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.10M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/655 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/792 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.42G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/132 [00:00<?, ?B/s]

### Generar Explicación con SmolLM

### Definir par usuario, restaurante

In [34]:
# Obtenemos row aleatoriamente
random_row = df.sample(1) # Para elegir usuario manualmente cambiar esta linea
user_id = random_row['user_id'].values[0]
item_id = random_row['gmap_id'].values[0]

In [35]:
print(idx2metadata[item_id]) # Imprimimos metadata del restaurante para saber que estamos recomendando

{'name': 'Noodles and Company', 'address': 'Noodles and Company, 1601 Willow Lawn Dr, Richmond, VA 23230, United States', 'description': "Counter-serve chain offering international noodle & pasta dishes from mac 'n' cheese to pad Thai.", 'latitude': 37.583113999999995, 'longitude': -77.4963361, 'category': ['Restaurant', 'Down home cooking restaurant', 'Takeout restaurant', 'Pasta shop', 'Salad shop', 'Soup restaurant'], 'avg_rating': 4.1, 'num_of_reviews': 395, 'price': '₩', 'hours': [['Monday', '11AM–9PM'], ['Tuesday', '11AM–9PM'], ['Wednesday', '11AM–9PM'], ['Thursday', '11AM–10PM'], ['Friday', '11AM–10PM'], ['Saturday', '11AM–10PM'], ['Sunday', '11AM–9PM']], 'MISC': {'Service options': ['Curbside pickup', 'No-contact delivery', 'Delivery', 'Takeaway', 'Dine-in'], 'Health and safety': ['Mask required'], 'Popular for': ['Lunch', 'Dinner', 'Solo dining'], 'Accessibility': ['Wheelchair-accessible toilet'], 'Offerings': ['Cocktails', 'Comfort food', 'Halal food', 'Happy-hour drinks', 'H

In [36]:
# Generamos review
generated_review = generate_review_for_user_item_pair_hugging_face(user_id, item_id, df, hugging_face_model, hugging_face_tokenizer, True)
print(f"Explicación generada: \n{generated_review}")
print(f"\n\nExplicación real: \n{df[(df['user_id'] == user_id) & (df['gmap_id'] == item_id)]['text'].values[0]}")

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Prompt:
----------------------
Provide a detailed and unique recommendation for the following restaurant. Use the provided details and avoid repeating information unnecessarily. Highlight the restaurant's features, customer experience, and what makes it stand out. Tailor the recommendation based on the context and information available.

### Restaurant Details ###
Name: Noodles and Company
Average Rating: 4.1
Description: Counter-serve chain offering international noodle & pasta dishes from mac 'n' cheese to pad Thai.
Categories: Restaurant, Down home cooking restaurant, Takeout restaurant, Pasta shop, Salad shop, Soup restaurant
Service Options: Curbside pickup, No-contact delivery, Delivery, Takeaway, Dine-in
Image Description: NA

### Previous Reviews ###
No reviews available

### Guideline for Writing the Recommendation ###
- Highlight the unique features of the restaurant (e.g., menu items, ambiance, service quality).
- Tailor the response based on available service options (e.g.,

1. Allal, L. B., Lozhkov, A., Bakouch, E., Blázquez, G. M., Tunstall, L., Piqueres, A., Marafioti, A., Zakka, C., von Werra, L., & Wolf, T. (2024). SmolLM2 - with great data, comes great performance.


# Evaluation


In [33]:
# https://www.digitalocean.com/community/tutorials/automated-metrics-for-evaluating-generated-text
def calculate_bleu(candidate, reference):
    '''
    candidate, reference: generated and ground-truth sentences
    '''
    reference = word_tokenize(reference)
    candidate = word_tokenize(candidate)
    smoothing = SmoothingFunction().method1
    score = sentence_bleu([reference], candidate, smoothing_function=smoothing)
    return round(score, 4)

def calculate_meteor(candidate, reference):
  '''
  candidate, reference: tokenized list of words in the sentence
  '''
  reference = word_tokenize(reference)
  candidate = word_tokenize(candidate)
  meteor_score = round(meteor([candidate],reference), 4)
  return meteor_score

### Evaluation of single case

In [37]:
# Buscamos review del user a ese item
user_reviews = df[(df['user_id'] == user_id) & (df['gmap_id'] == item_id)]
print(f"BLEU: {calculate_bleu(generated_review, user_reviews['text'].values[0])}")
print(f"METEOR: {calculate_meteor(generated_review, user_reviews['text'].values[0])}")

BLEU: 0.0059
METEOR: 0.1357


### Flan

In [38]:
# Generamos reviews para 20 pares de user id e item id de manera aleatoria y evaluamos
num_pairs = 20
bleu_scores = []
meteor_scores = []
for i in range(num_pairs):
    print(f"Generando {i}")
    random_row = df.sample(1)
    user_id = random_row['user_id'].values[0]
    item_id = random_row['gmap_id'].values[0]
    generated_review = generate_review_for_user_item_pair(user_id, item_id, df, model_flan_t5, tokenizer_flan_t5, False)
    user_reviews = df[(df['user_id'] == user_id) & (df['gmap_id'] == item_id)]
    bleu_scores.append(calculate_bleu(generated_review, user_reviews['text'].values[0]))
    meteor_scores.append(calculate_meteor(generated_review, user_reviews['text'].values[0]))
    if i % 10 == 0:
      print(f"BLEU promedio: {np.mean(bleu_scores)}")
      print(f"METEOR promedio: {np.mean(meteor_scores)}")
print(f"BLEU promedio FINAL: {np.mean(bleu_scores)}")
print(f"METEOR promedio FINAL: {np.mean(meteor_scores)}")


Generando 0
BLEU promedio: 0.0114
METEOR promedio: 0.0704
Generando 1
Generando 2
Generando 3
Generando 4
Generando 5
Generando 6
Generando 7
Generando 8
Generando 9
Generando 10
BLEU promedio: 0.00787272727272727
METEOR promedio: 0.16823636363636363
Generando 11
Generando 12
Generando 13
Generando 14
Generando 15
Generando 16
Generando 17
Generando 18
Generando 19
BLEU promedio FINAL: 0.0068000000000000005
METEOR promedio FINAL: 0.152145


### SmolLM

In [39]:
# Generamos reviews para 20 pares de user id e item id de manera aleatoria y evaluamos
num_pairs = 20
bleu_scores = []
meteor_scores = []
for i in range(num_pairs):
    print(f"Generando {i}")
    random_row = df.sample(1)
    user_id = random_row['user_id'].values[0]
    item_id = random_row['gmap_id'].values[0]
    generated_review = generate_review_for_user_item_pair_hugging_face(user_id, item_id, df, hugging_face_model, hugging_face_tokenizer, False)
    user_reviews = df[(df['user_id'] == user_id) & (df['gmap_id'] == item_id)]
    bleu_scores.append(calculate_bleu(generated_review, user_reviews['text'].values[0]))
    meteor_scores.append(calculate_meteor(generated_review, user_reviews['text'].values[0]))
    if i % 10 == 0:
      print(f"BLEU promedio: {np.mean(bleu_scores)}")
      print(f"METEOR promedio: {np.mean(meteor_scores)}")
print(f"BLEU promedio FINAL: {np.mean(bleu_scores)}")
print(f"METEOR promedio FINAL: {np.mean(meteor_scores)}")


Generando 0
BLEU promedio: 0.0037
METEOR promedio: 0.0332
Generando 1
Generando 2
Generando 3
Generando 4
Generando 5
Generando 6
Generando 7
Generando 8
Generando 9
Generando 10
BLEU promedio: 0.0048090909090909096
METEOR promedio: 0.07007272727272727
Generando 11
Generando 12
Generando 13
Generando 14
Generando 15
Generando 16
Generando 17
Generando 18
Generando 19
BLEU promedio FINAL: 0.004685
METEOR promedio FINAL: 0.07603499999999999
