Este notebook asume que se está ejecutando en Google Colab, y que el dataset `LLM-Redial` disponible en https://drive.google.com/drive/folders/1TIP4PFm9z0C4R4--KnHoWuiB1uK-dv5m se encuentra descargado en el drive del usuario.

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
import shutil
import os

source_path = '/content/gdrive/MyDrive/Proyecto LLMonkeys/Tools.py'
destination_path = '/content/Tools.py'

shutil.copy(source_path, destination_path)

'/content/Tools.py'

In [3]:
import zipfile

zip_path = '/content/gdrive/MyDrive/Proyecto LLMonkeys/LLM_Redial.zip'
extract_path = '/content/LLM_Redial'

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

In [4]:
from transformers import AutoTokenizer

model_name = "distilgpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/762 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [5]:
import Tools as t

# Cargar datos del Dataset

path = "./LLM_Redial/Movie"

final_data_path = '{}/final_data.jsonl'.format(path)
Conversation_path = '{}/Conversation.txt'.format(path)
user_map_path = '{}/user_ids.json'.format(path)
item_map_path = '{}/item_map.json'.format(path)

final_data = t.read_jsonl(final_data_path)
user_map = t.read_json(user_map_path)
item_map = t.read_json(item_map_path)
Conversation = t.read_dialogue(Conversation_path)

**Separación de la información de los usuarios en train y test**


In [6]:
import json
import random

path = './LLM_Redial/Movie/final_data.jsonl'

# Cada entrada de final_data.jsonl se ve así
#{
#  "A30Q8X8B1S3GGT": {
#    "history_interaction": [...],
#    "user_might_like": [...],
#    "Conversation": [...]
#  }
#}


with open(path, 'r', encoding='utf-8') as file:
    data = [json.loads(line) for line in file]

# Dividimos los dialogos en 80% & 20% para training y testing.
n_conversations = 10089 # *Por qué 10089? son casi 30k
train_len = n_conversations * 0.8
test_len = n_conversations * 0.2

train_conv = []
test_conv = []
used_users = []

aux = []

# Integramos el 20% de usuarios a Test
while len(aux) < test_len:
    try:
        convs = []
        # "La semilla random.seed(42) se reinicia dentro del bucle, por lo que no tiene efecto real. Debería estar fuera del bucle para resultados reproducibles."
        random.seed(42)

        # Usando used_users aseguramos que un mismo usuario no aparezca en Train y Test al mismo tiempo
        user_id = random.choice(list(set(user_map.keys())^set(used_users)))
        used_users.append(user_id)

        # Guardamos la info del usuario y sus dialogos
        user_data = next((item[user_id] for item in data if user_id in item), None)
        user_conversations = user_data.get("Conversation", [])

        for i in range(len(user_conversations)):
            selected_conversation = user_conversations[i]
            conversation_details = list(selected_conversation.values())[0]
            conversation_id = conversation_details["conversation_id"]
            if conversation_id != 10088:
                conversation = Conversation[Conversation.index(f"{conversation_id}\n"):Conversation.index(f"{conversation_id+1}\n")]
            else:
                conversation = Conversation[Conversation.index(f"{conversation_id}\n"):]

            convs.append(conversation)
            aux.append(conversation)
        test_conv.append([user_id,convs])
    except ValueError:
        print("ValueError: ", user_id, f"{conversation_id}\n")

# Integramos el resto de los usuarios y su info a Training
for user_id in list(set(user_map.keys()) ^ set(used_users)):
    try:
        convs = []
        user_data = next((item[user_id] for item in data if user_id in item), None)
        user_conversations = user_data.get("Conversation", [])
        for i in range(len(user_conversations)):
            selected_conversation = user_conversations[i]
            conversation_details = list(selected_conversation.values())[0]
            conversation_id = conversation_details["conversation_id"]
            if conversation_id != 10088:
                conversation = Conversation[Conversation.index(f"{conversation_id}\n"):Conversation.index(f"{conversation_id+1}\n")]
            else:
                conversation = Conversation[Conversation.index(f"{conversation_id}\n"):]

            convs.append(conversation)
        train_conv.append([user_id,convs])
    except ValueError:
        print("ValueError: ", user_id, f"{conversation_id}\n")

print("Número de usuarios en conjunto de TEST:", len(test_conv))
print("Número de usuarios en conjunto de TRAIN:", len(train_conv))
print("Total de usuarios en test + train:", len(test_conv) + len(train_conv))
print("Total de conversaciones originales esperadas:", n_conversations)

Número de usuarios en conjunto de TEST: 605
Número de usuarios en conjunto de TRAIN: 2526
Total de usuarios en test + train: 3131
Total de conversaciones originales esperadas: 10089


In [7]:
# Darle formato de lista a las respuestas
def format_ans(ans,n,i):
    try:
        idx = [ans.index(f"{i}. ") for i in range(1, n+1)]
        idx_c = idx[n-1] + ans[idx[n-1]:].index(chr(34))
        idx_end = idx_c + ans[idx_c+1:].index(chr(34))

        answer_list = [ans[idx[i]+3:idx[i+1]-1] for i in range(n-1)]
        answer_list.append(ans[idx[n-1]+3:idx_end+2])
        answer_str = ", ".join(answer_list)
        answer = f"{i}. {answer_str}\n"
        i+=1

    except Exception as e:
        answer = e

    finally:
        return answer, i

# Output ex. 1. The Matrix, Inception, Interstellar, Arrival

### Selección de diálogo a utilizar

In [8]:
num_test_items = 10

In [9]:
# Generamos ejemplos de conversaciones al azar

rand_conversations = []

for n in range(num_test_items):
  random.seed(n)
  rand_user = random.choice(test_conv)
  user_id = rand_user[0]
  user_conversation  = random.choice(rand_user[1])

  user_data = next((item[user_id] for item in data if user_id in item), None)
  convs = user_data.get("Conversation", [])

  for i in range(len(rand_user[1])):
    if user_conversation == rand_user[1][i]:
      rand_user_conv_id = i

  dialog = "\n\n".join(user_conversation.split("\n\n")[1:4])
  dialog_id = user_conversation.split("\n\n")[0]
  dialog_ground_truth = list(convs[rand_user_conv_id].values())[0]["rec_item"]

  rand_conversations.append([
      rand_user_conv_id, # 0: Índice de conversación
      dialog, # 1: Texto del diálogo parcial
      user_data, # 2: Info estructurada del usuario
      dialog_id, # 3: ID de conversación
      dialog_ground_truth # 4: Item recomendado como verdad
      ])

print(rand_conversations[0][4]) # dialog_ground_truth

['B00AAA62TI']


In [10]:
# Appendeamos a rand_conversations las interacciones del usuario para tenerlas a mano

for i in range(num_test_items):
  user_data = rand_conversations[i][2]
  rand_user_interactions = user_data.get("history_interactions", [])

  rand_user_interactions = [item_map[m] for m in rand_user_interactions]
  rand_conversations[i].append(rand_user_interactions)

In [11]:
# Preparamos ejemplos representativos aleatorios para entrenar

few_shot_data = []
random.seed(42)
train_users = random.sample(train_conv, 5)

for u in train_users:
  user_data = next((item[u[0]] for item in data if u[0] in item), None)
  user_interactions = user_data.get("history_interaction", [])
  user_interactions = [item_map[m] for m in user_interactions]

  conversation = min(u[1], key=len)
  conversation[conversation.index("User:"):-2]
  convs = user_data.get("Conversation", [])
  for c in convs:
    user_likes = list(c.values())[0]["user_likes"]
    user_dislikes = list(c.values())[0]["user_dislikes"]
    recs= list(c.values())[0]["rec_item"]

    user_likes = [item_map[m] for m in user_likes]
    user_dislikes = [item_map[m] for m in user_dislikes]
    recs = [item_map[m] for m in recs]

  # * Por qué comentaron lo final? Para entrenar puede ser mucho mas util ejemplos de que le gustan a los usuarios y que no, no?
  few_shot_data.append({
      # "user_interactions": user_interactions,
      "conversation": conversation,
      # "user_likes": user_likes,
      # "user_dislikes": user_dislikes,
      # "recs": recs
  })

  # * Finalmente, por qué usamos solamente las conversaciones como información relevante para few shot y no al reves con la lista de
  # rand conversation?

## Distil

### Zero-Shot sin interacciones históricas:

In [12]:
import torch

print("GPU disponible:", torch.cuda.is_available())

GPU disponible: True


In [13]:
from transformers import pipeline

outputs_z_n = []

for n in range(num_test_items):
  k = 12
  outputs = []

  pipe = pipeline(
      "text-generation",
      model=model_name,
      device_map="auto",
      torch_dtype=torch.float16,
  )

  ctx = (
      "YOU ARE A MOVIE RECOMMENDATION SYSTEM."
      "GENERATE A NUMBERED LIST OF 10 MOVIES. \n"
      "RULES: \n"
      "1. DO NOT write dialogs, explanations nor additional text or information. \n"
      "2. DO NOT repeat movies mentioned in the conversation. \n"
      "3. Response format: \n"
      "1. \"Movie name 1\" \n"
      "2. \"Movie name 2\" \n"
      "... \n"
      "10. \"Movie name 10\" \n"
      "Use ONLY this format and NOTHING else."
  )

  msg = (
      "Based on the following conversation: \n"
      f"{rand_conversations[n][1]} \n\n" # * Es zero shot, pero aun asi le entregamos dialogos de ejemplo?
      "Generate a list of 10 recommended movies (JUST NAMES, ONE PER LINE):"
  )

  messages = [{"role": "user", "content": ctx + msg}]

  # prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
  # Error: ValueError: Cannot use chat template functions because tokenizer.chat_template is not set
  # pipe.tokenizer... solo funciona con modelos tipo mistral, gpt2-chat, llama, etc.
  # distilgpt2 no es un modelo conversacional, no tiene plantilla de chat cargada

  prompt = ctx + msg
  inputs = pipe.tokenizer(prompt, return_tensors="pt").to("cuda")

  with torch.no_grad():
      for i in range(k):
          output_ids = pipe.model.generate(
              **inputs,
              max_new_tokens=200,
              do_sample=True,
              temperature=0.4,
              top_k=50,
              top_p=0.95
          )
          decoded = pipe.tokenizer.decode(output_ids[0], skip_special_tokens=True)
          outputs.append(decoded)
  outputs_z_n.append(outputs)

model.safetensors:   0%|          | 0.00/353M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Device set to use cuda:0
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Device set to use cuda:0
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_t

### Zero-Shot con interacciones históricas:

In [15]:
from transformers import pipeline

k = 20
outputs_z_s = []

pipe = pipeline(
    "text-generation",
    model=model_name,
    device_map="auto",
    torch_dtype=torch.float16,
)

ctx = (
    "YOU ARE A MOVIE RECOMMENDATION SYSTEM."
    "GENERATE A NUMBERED LIST OF 10 MOVIES. \n"
    "RULES: \n"
    "1. DO NOT write dialogs, explanations nor additional text or information. \n"
    "2. DO NOT repeat movies mentioned in the conversation. \n"
    "3. Response format: \n"
    "1. \"Movie name 1\" \n"
    "2. \"Movie name 2\" \n"
    "... \n"
    "10. \"Movie name 10\" \n"
    "Use ONLY this format and NOTHING else."
)

msg = (
    "Based on the following conversation: \n"
    f"{dialog} \n\n"
    "And the movies the user has previously interaced with: \n"
    f"{rand_user_interactions}\n\n"
    "Generate a list of 10 recommended movies (JUST NAMES, ONE PER LINE):"
)

messages = [{"role": "user", "content": ctx + msg}]

prompt = ctx + msg
inputs = pipe.tokenizer(prompt, return_tensors="pt").to("cuda")

with torch.no_grad():
    for i in range(k):
        output_ids = pipe.model.generate(
            **inputs,
            max_new_tokens=200,
            do_sample=True,
            temperature=0.7,
            top_k=50,
            top_p=0.95
        )
        decoded = pipe.tokenizer.decode(output_ids[0], skip_special_tokens=True)
        outputs_z_s.append(decoded)

Device set to use cuda:0
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id

### Few-Shot sin interacciones históricas

In [18]:
from transformers import pipeline
k = 20
outputs_f_n = []
# Pipeline Initialization
pipe = pipeline(
    "text-generation",
    model=model_name,
    device_map="auto",
    torch_dtype=torch.float16,
)

ctx = (
    "YOU ARE A MOVIE RECOMMENDATION SYSTEM."
    "GENERATE A NUMBERED LIST OF 10 MOVIES. \n"
    "RULES: \n"
    "1. DO NOT write dialogs, explanations nor additional text or information. \n"
    "2. DO NOT repeat movies mentioned in the conversation. \n"
    "3. Response format: \n"
    "1. \"Movie name 1\" \n"
    "2. \"Movie name 2\" \n"
    "... \n"
    "10. \"Movie name 10\" \n"
    "Use ONLY this format and NOTHING else."
)

msg = (
    "Based on these 4 examples: \n"
    f"{few_shot_data[0]}\n"
    f"{few_shot_data[1]}\n"
    f"{few_shot_data[2]}\n"
    f"{few_shot_data[3]}\n\n"
    "And based on the following conversation: \n"
    f"{dialog} \n\n"
    "Generate a list of 10 recommended movies (JUST NAMES, NO EXTRA INFO, ONE PER LINE):"
)

prompt = ctx + msg
# * prompt muy largo, tiene 1024 tokens como maximo distil
inputs = pipe.tokenizer(prompt, return_tensors="pt").to("cuda")

with torch.no_grad():
    for i in range(k):
        output_ids = pipe.model.generate(
            **inputs,
            max_new_tokens=180,
            do_sample=True,
            temperature=0.7,
            top_k=50,
            top_p=0.95
        )
        decoded = pipe.tokenizer.decode(output_ids[0], skip_special_tokens=True)
        outputs_f_n.append(decoded)

Device set to use cpu
Token indices sequence length is longer than the specified maximum sequence length for this model (1510 > 1024). Running this sequence through the model will result in indexing errors


RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


### Few-Shot con interacción histórica

In [None]:
from transformers import pipeline

k = 20
outputs_f_s = []

pipe = pipeline(
    "text-generation",
    model=model_name,
    device_map="auto",
    torch_dtype=torch.float16,
)

ctx = (
    "YOU ARE A MOVIE RECOMMENDATION SYSTEM."
    "GENERATE A NUMBERED LIST OF 10 MOVIES. \n"
    "RULES: \n"
    "1. DO NOT write dialogs, explanations nor additional text or information. \n"
    "2. DO NOT repeat movies mentioned in the conversation. \n"
    "3. Response format: \n"
    "1. \"Movie name 1\" \n"
    "2. \"Movie name 2\" \n"
    "... \n"
    "10. \"Movie name 10\" \n"
    "Use ONLY this format and NOTHING else."
)

msg = (
    "Based on these 4 examples: \n"
    f"{few_shot_data[0]}\n"
    f"{few_shot_data[1]}\n"
    f"{few_shot_data[2]}\n"
    f"{few_shot_data[3]}\n\n"
    "And based on the following conversation: \n"
    f"{dialog} \n\n"
    "And the movies the user has previously interaced with: \n"
    f"{rand_user_interactions[:5]}\n\n"
    "Generate a list of 10 recommended movies (JUST NAMES, NO EXTRA INFO, ONE PER LINE):"
)

prompt = ctx + msg
inputs = pipe.tokenizer(prompt, return_tensors="pt").to("cuda")

with torch.no_grad():
    for i in range(k):
        output_ids = pipe.model.generate(
            **inputs,
            max_new_tokens=180,
            do_sample=True,
            temperature=0.7,
            top_k=50,
            top_p=0.95
        )
        decoded = pipe.tokenizer.decode(output_ids[0], skip_special_tokens=True)
        outputs_f_s.append(decoded)

Device set to use cuda:0


## Evaluación

In [None]:
user_conversation_id = int(user_conversation[:user_conversation.index("User:")-2])
user_data = next((item[rand_user[0]] for item in data if rand_user[0] in item), None)
convs = user_data.get("Conversation", [])
ground_truth = [item_map[m] for m in list(convs[rand_user_conv_id].values())[0]["rec_item"]]


print(user_conversation_id)
print(ground_truth)

7295
['My All American']


In [None]:
for c in rand_conversations:
  print(c[4])
  ground_truth = [item_map[m] for m in c[4]]
  print(ground_truth)

['B001SO6KNS']
[' NEW Ip Man - Ip Man (blu-ray) (Blu-ray)']
['B008220CQU']
['End of Watch']
['B001FB55M6']
['Yes Man']
['B00HE1EEQ4']
['The Wind Rises']
['6304286805']
['Sunday in New York VHS']
['B00000F2LK']
["Breakin' [VHS]"]
['6304946805']
['B-24 Trilogy: The Victory Bombers VHS']
['6303038816']
['Kansas City Confidential VHS']
['B00005JPTK']
['I Am Legend']
['B000GYI3PY']
['Mrs. Palfrey at the Claremont']


In [None]:
import numpy as np
import re
from sklearn.metrics import ndcg_score

# Funciones generadas por DeepSeek
def recall_at_k(generated_recommendations, ground_truth, k=10):
    # Tomar las primeras K recomendaciones generadas
    top_k = generated_recommendations[:k]

    # Convertir a conjuntos para comparación
    generated_set = set(top_k)
    true_set = set(ground_truth)

    # Calcular intersección (recomendaciones correctas en top K)
    hits = len(generated_set.intersection(true_set))

    # Evitar división por cero
    return hits / len(true_set) if len(true_set) > 0 else 0.0

def ndcg_at_k(generated_recommendations, ground_truth, k=10):
    # Crear una lista binaria de relevancia (1 si está en ground truth, 0 si no)
    relevance = [1 if item in ground_truth else 0 for item in generated_recommendations[:k]]

    # Crear el "ideal ranking" (todas las relevantes primero)
    ideal_relevance = sorted(relevance, reverse=True)

    # Calcular NDCG
    return ndcg_score([relevance], [ideal_relevance])

pattern = r"\d+\.\s(.*?)\s\(\d{4}\)"

### Recall@5 y NDCG@5:

In [None]:
# Zero-Shot
z_n_rec_lists = []
i=1
for outputs in outputs_z_n:
  rec_lists = []
  for out in outputs:
      ans, i = format_ans(out[out.index("<|assistant|>"):],10,i)
      # print(ans)
      listed = out[out.index("<|assistant|>")+13:].replace("\n",", ")[2:]
      print_list = True
      nums = [f"{i}." for i in range(1,11)]
      for num in nums:
        if num not in listed:
          print_list = False
      if print_list:
        good_list = re.findall(pattern, listed)
        rec_lists.append(good_list)

  rec_lists = [e[:10] for e in rec_lists if len(e) >= 10]
  z_n_rec_lists.append(rec_lists)

In [None]:
z_s_rec_lists = []
i=1
for outputs in outputs_z_s:
  rec_lists = []
  for out in outputs:
      ans, i = format_ans(out[out.index("<|assistant|>"):],10,i)
      # print(ans)
      listed = out[out.index("<|assistant|>")+13:].replace("\n",", ")[2:]
      print_list = True
      nums = [f"{i}." for i in range(1,11)]
      for num in nums:
        if num not in listed:
          print_list = False
      if print_list:
        good_list = re.findall(pattern, listed)
        rec_lists.append(good_list)

  rec_lists = [e[:10] for e in rec_lists if len(e) >= 10]
  z_s_rec_lists.append(rec_lists)

In [None]:
recall_zn_1_10 = 0
ndcg_zn_1_10 = 0
best_recall_zn_10 = 0.0
best_ndcg_zn_10 = 0.0

for i in range(num_test_items):
  # Zero-shot sin sampling
  recall_zn_1_10 += recall_at_k(z_n_rec_lists[i][0], rand_conversations[i][4], k=10)
  ndcg_zn_1_10 += ndcg_at_k(z_n_rec_lists[i][0], rand_conversations[i][4], k=10)

  # Zero-shot con sampling
  best_r = 0.0
  best_n = 0.0

  for l in z_n_rec_lists[i]:
    recall = recall_at_k(l, rand_conversations[i][4], k=10)
    ndcg = ndcg_at_k(l, rand_conversations[i][4], k=10)
    if recall > best_r:
      best_r = recall
    if ndcg > best_n:
      best_n = ndcg

  best_recall_zn_10 += best_r
  best_ndcg_zn_10 += best_n
print(recall_zn_1_10/num_test_items, ndcg_zn_1_10/num_test_items)
print(best_recall_zn_10/num_test_items, best_ndcg_zn_10/num_test_items)

0.0 0.0
0.0 0.0


In [None]:
recall_zs_1_10 = 0
ndcg_zs_1_10 = 0
best_recall_zs_10 = 0.0
best_ndcg_zs_10 = 0.0

for i in range(num_test_items):
  # Zero-shot sin sampling
  recall_zs_1_10 += recall_at_k(z_s_rec_lists[i][0], rand_conversations[i][4], k=10)
  ndcg_zs_1_10 += ndcg_at_k(z_s_rec_lists[i][0], rand_conversations[i][4], k=10)

  # Zero-shot con sampling
  best_r = 0.0
  best_n = 0.0

  for l in z_s_rec_lists[i]:
    recall = recall_at_k(l, rand_conversations[i][4], k=10)
    ndcg = ndcg_at_k(l, rand_conversations[i][4], k=10)
    if recall > best_r:
      best_r = recall
    if ndcg > best_n:
      best_n = ndcg

  best_recall_zs_10 += best_r
  best_ndcg_zs_10 += best_n

print(recall_zs_1_10/num_test_items, ndcg_zs_1_10/num_test_items)
print(best_recall_zs_10/num_test_items, best_ndcg_zs_10/num_test_items)

In [None]:
# Zero-Shot
z_n_rec_lists = []
z_s_rec_lists = []
i=1
for out in outputs_z_n[0]:
    ans, i = format_ans(out[out.index("<|assistant|>"):],10,i)
    # print(ans)
    listed = out[out.index("<|assistant|>")+13:].replace("\n",", ")[2:]
    print_list = True
    nums = [f"{i}." for i in range(1,11)]
    for num in nums:
      if num not in listed:
        print_list = False
    if print_list:
      good_list = re.findall(pattern, listed)
      z_n_rec_lists.append(good_list)
i=1
for out in outputs_z_s:
    ans, i = format_ans(out[out.index("<|assistant|>"):],10,i)
    # print(ans)
    listed = out[out.index("<|assistant|>")+13:].replace("\n",", ")[2:]
    print_list = True
    nums = [f"{i}." for i in range(1,11)]
    for num in nums:
      if num not in listed:
        print_list = False
    if print_list:
      good_list = re.findall(pattern, listed)
      z_s_rec_lists.append(good_list)

z_n_rec_lists = [e[:10] for e in z_n_rec_lists if len(e) >= 10]
z_s_rec_lists = [e[:10] for e in z_s_rec_lists if len(e) >= 10]

# Zero-shot sin sampling

recall_zn_1_5 = recall_at_k(z_n_rec_lists[0], ground_truth, k=5)
ndcg_zn_1_5 = ndcg_at_k(z_n_rec_lists[0], ground_truth, k=5)

recall_zs_1_5 = recall_at_k(z_s_rec_lists[0], ground_truth, k=5)
ndcg_zs_1_5 = ndcg_at_k(z_s_rec_lists[0], ground_truth, k=5)

print(recall_zn_1_5,ndcg_zn_1_5)
print(recall_zs_1_5,ndcg_zs_1_5)

# Zero-shot con sampling
best_recall_zn_5 = 0.0
best_ndcg_zn_5 = 0.0

for l in z_n_rec_lists:
  recall = recall_at_k(l, ground_truth, k=5)
  ndcg = ndcg_at_k(l, ground_truth, k=5)
  if recall > best_recall_zn_5:
    best_recall_zn_5 = recall
  if ndcg > best_ndcg_zn_5:
    best_ndcg_zn_5 = ndcg

best_recall_zs_5 = 0.0
best_ndcg_zs_5 = 0.0

for l in z_s_rec_lists:
  recall = recall_at_k(l, ground_truth, k=5)
  ndcg = ndcg_at_k(l, ground_truth, k=5)
  if recall > best_recall_zs_5:
    best_recall_zs_5 = recall
  if ndcg > best_ndcg_zs_5:
    best_ndcg_zs_5 = ndcg

print(best_recall_zn_5,best_ndcg_zn_5)
print(best_recall_zs_5,best_ndcg_zs_5)

0.0 0.0
0.0 0.0
0.0 0.0
0.0 0.0


In [None]:
# Few-Shot
f_n_rec_lists = []
f_s_rec_lists = []
i=1
for out in outputs_f_n:
    ans, i = format_ans(out[out.index("<|assistant|>"):],10,i)
    # print(ans)
    listed = out[out.index("<|assistant|>")+13:].replace("\n",", ")[2:]
    print_list = True
    nums = [f"{i}." for i in range(1,11)]
    for num in nums:
      if num not in listed:
        print_list = False
    if print_list:
      good_list = re.findall(pattern, listed)
      f_n_rec_lists.append(good_list)
i=1
for out in outputs_f_s:
    ans, i = format_ans(out[out.index("<|assistant|>"):],10,i)
    # print(ans)
    listed = out[out.index("<|assistant|>")+13:].replace("\n",", ")[2:]
    print_list = True
    nums = [f"{i}." for i in range(1,11)]
    for num in nums:
      if num not in listed:
        print_list = False
    if print_list:
      good_list = re.findall(pattern, listed)
      f_s_rec_lists.append(good_list)

f_n_rec_lists = [e[:10] for e in f_n_rec_lists if len(e) >= 10]
f_s_rec_lists = [e[:10] for e in f_s_rec_lists if len(e) >= 10]

# Few-shot sin sampling

recall_fn_1_5 = recall_at_k(f_n_rec_lists[0], ground_truth, k=5)
ndcg_fn_1_5 = ndcg_at_k(f_n_rec_lists[0], ground_truth, k=5)

recall_fs_1_5 = recall_at_k(f_s_rec_lists[0], ground_truth, k=5)
ndcg_fs_1_5 = ndcg_at_k(f_s_rec_lists[0], ground_truth, k=5)

print(recall_fn_1_5,ndcg_fn_1_5)
print(recall_fs_1_5,ndcg_fs_1_5)

# Few-shot con sampling
best_recall_fn_5 = 0.0
best_ndcg_fn_5 = 0.0

for l in f_n_rec_lists:
  recall = recall_at_k(l, ground_truth, k=5)
  ndcg = ndcg_at_k(l, ground_truth, k=5)
  if recall > best_recall_fn_5:
    best_recall_fn_5 = recall
  if ndcg > best_ndcg_fn_5:
    best_ndcg_fn_5 = ndcg

best_recall_fs_5 = 0.0
best_ndcg_fs_5 = 0.0

for l in f_s_rec_lists:
  recall = recall_at_k(l, ground_truth, k=5)
  ndcg = ndcg_at_k(l, ground_truth, k=5)
  if recall > best_recall_fs_5:
    best_recall_fs_5 = recall
  if ndcg > best_ndcg_fs_5:
    best_ndcg_fs_5 = ndcg

print(best_recall_fn_5,best_ndcg_fn_5)
print(best_recall_fs_5,best_ndcg_fs_5)

0.0 0.0
0.0 0.0
0.0 0.0
0.0 0.0


### Recall@10 y NDCG@10:

Zero-Shot:

In [None]:

# Zero-shot sin sampling

recall_zn_1_10 = recall_at_k(z_n_rec_lists[0], ground_truth, k=10)
ndcg_zn_1_10 = ndcg_at_k(z_n_rec_lists[0], ground_truth, k=10)

recall_zs_1_10 = recall_at_k(z_s_rec_lists[0], ground_truth, k=10)
ndcg_zs_1_10 = ndcg_at_k(z_s_rec_lists[0], ground_truth, k=10)

print(recall_zn_1_10,ndcg_zn_1_10)
print(recall_zs_1_10,ndcg_zs_1_10)

# Zero-shot con sampling
best_recall_zn_10 = 0.0
best_ndcg_zn_10 = 0.0

for l in z_n_rec_lists:
  recall = recall_at_k(l, ground_truth, k=10)
  ndcg = ndcg_at_k(l, ground_truth, k=10)
  if recall > best_recall_zn_10:
    best_recall_zn_10 = recall
  if ndcg > best_ndcg_zn_10:
    best_ndcg_zn_10 = ndcg

best_recall_zs_10 = 0.0
best_ndcg_zs_10 = 0.0

for l in z_s_rec_lists:
  recall = recall_at_k(l, ground_truth, k=10)
  ndcg = ndcg_at_k(l, ground_truth, k=10)
  if recall > best_recall_zs_10:
    best_recall_zs_10 = recall
  if ndcg > best_ndcg_zs_10:
    best_ndcg_zs_10 = ndcg

print(best_recall_zn_10,best_ndcg_zn_10)
print(best_recall_zs_10,best_ndcg_zs_10)

0.0 0.0
0.0 0.0
0.0 0.0
0.0 0.0


Few-Shot

In [None]:

# Few-shot sin sampling

recall_fn_1_10 = recall_at_k(f_n_rec_lists[0], ground_truth, k=10)
ndcg_fn_1_10 = ndcg_at_k(f_n_rec_lists[0], ground_truth, k=10)

recall_fs_1_10 = recall_at_k(f_s_rec_lists[0], ground_truth, k=10)
ndcg_fs_1_10 = ndcg_at_k(f_s_rec_lists[0], ground_truth, k=10)

print(recall_fn_1_10,ndcg_fn_1_10)
print(recall_fs_1_10,ndcg_fs_1_10)

# Few-shot con sampling
best_recall_fn_10 = 0.0
best_ndcg_fn_10 = 0.0

for l in f_n_rec_lists:
  recall = recall_at_k(l, ground_truth, k=10)
  ndcg = ndcg_at_k(l, ground_truth, k=10)
  if recall > best_recall_fn_10:
    best_recall_fn_10 = recall
  if ndcg > best_ndcg_fn_10:
    best_ndcg_fn_10 = ndcg

best_recall_fs_10 = 0.0
best_ndcg_fs_10 = 0.0

for l in f_s_rec_lists:
  recall = recall_at_k(l, ground_truth, k=10)
  ndcg = ndcg_at_k(l, ground_truth, k=10)
  if recall > best_recall_fs_10:
    best_recall_fs_10 = recall
  if ndcg > best_ndcg_fs_10:
    best_ndcg_fs_10 = ndcg

print(best_recall_fn_10,best_ndcg_fn_10)
print(best_recall_fs_10,best_ndcg_fs_10)

0.0 0.0
0.0 0.0
0.0 0.0
0.0 0.0


In [None]:
print("\nZero-Shot sin interacciones históricas:")
print(f"Sin sampling: Recall@5: {recall_zn_1_5}, NDCG@5: {ndcg_zn_1_5}, Recall@10: {recall_zn_1_10}, NDCG@10: {ndcg_zn_1_10}")
print(f"Con sampling: Recall@5: {best_recall_zn_5}, NDCG@5: {best_ndcg_zn_5}, Recall@10: {best_recall_zn_10}, NDCG@10: {best_ndcg_zn_10}")

print("\nZero-Shot con interacciones históricas:")
print(f"Sin sampling: Recall@5: {recall_zs_1_5}, NDCG@5: {ndcg_zs_1_5}, Recall@10: {recall_zs_1_10}, NDCG@10: {ndcg_zs_1_10}")
print(f"Con sampling: Recall@5: {best_recall_zs_5}, NDCG@5: {best_ndcg_zs_5}, Recall@10: {best_recall_zs_10}, NDCG@10: {best_ndcg_zs_10}")

print("\nFew-Shot sin interacciones históricas:")
print(f"Sin sampling: Recall@5: {recall_fn_1_5}, NDCG@5: {ndcg_fn_1_5}, Recall@10: {recall_fn_1_10}, NDCG@10: {ndcg_fn_1_10}")
print(f"Con sampling: Recall@5: {best_recall_fn_5}, NDCG@5: {best_ndcg_fn_5}, Recall@10: {best_recall_fn_10}, NDCG@10: {best_ndcg_fn_10}")

print("\nFew-Shot con interacciones históricas:")
print(f"Sin sampling: Recall@5: {recall_fs_1_5}, NDCG@5: {ndcg_fs_1_5}, Recall@10: {recall_fs_1_10}, NDCG@10: {ndcg_fs_1_10}")
print(f"Con sampling: Recall@5: {best_recall_fs_5}, NDCG@5: {best_ndcg_fs_5}, Recall@10: {best_recall_fs_10}, NDCG@10: {best_ndcg_fs_10}")

Random:
Sin sampling: Recall@5: 0.0, NDCG@5: 0.0, Recall@10: 0.0, NDCG@10: 0.0
Con sampling: Recall@5: 0.0, NDCG@5: 0.0, Recall@10: 0.0, NDCG@10: 0.0

Most Popular:
Sin sampling: Recall@5: 0.0, NDCG@5: 0.0, Recall@10: 0.0, NDCG@10: 0.0
Con sampling: Recall@5: 0.0, NDCG@5: 0.0, Recall@10: 0.0, NDCG@10: 0.0

Zero-Shot sin interacciones históricas:
Sin sampling: Recall@5: 0.0, NDCG@5: 0.0, Recall@10: 0.0, NDCG@10: 0.0
Con sampling: Recall@5: 0.0, NDCG@5: 0.0, Recall@10: 0.0, NDCG@10: 0.0

Zero-Shot con interacciones históricas:
Sin sampling: Recall@5: 0.0, NDCG@5: 0.0, Recall@10: 0.0, NDCG@10: 0.0
Con sampling: Recall@5: 0.0, NDCG@5: 0.0, Recall@10: 0.0, NDCG@10: 0.0

Few-Shot sin interacciones históricas:
Sin sampling: Recall@5: 0.0, NDCG@5: 0.0, Recall@10: 0.0, NDCG@10: 0.0
Con sampling: Recall@5: 0.0, NDCG@5: 0.0, Recall@10: 0.0, NDCG@10: 0.0

Few-Shot con interacciones históricas:
Sin sampling: Recall@5: 0.0, NDCG@5: 0.0, Recall@10: 0.0, NDCG@10: 0.0
Con sampling: Recall@5: 0.0, NDCG@