# **Instalación e Importación de Librerías**

In [1]:
!pip install seaborn  --upgrade
!pip install surprise --upgrade

Collecting surprise
  Downloading surprise-0.1-py2.py3-none-any.whl.metadata (327 bytes)
Collecting scikit-surprise (from surprise)
  Downloading scikit_surprise-1.1.4.tar.gz (154 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Downloading surprise-0.1-py2.py3-none-any.whl (1.8 kB)
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25l[?25hcanceled
[31mERROR: Operation cancelled by user[0m[31m
[0mTraceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/pip/_internal/cli/base_command.py", line 179, in exc_logging_wrapper
    status = run_func(*args)
  File "/usr/local/lib/python3.10/dist-packages/pip/_internal/cli/req_command.py", line 67, i

In [3]:
import pandas as pd
import seaborn as sns
import numpy as np
import scipy.sparse as sparse
import matplotlib.pyplot as plt
import time
import psutil
from tabulate import tabulate
from collections import defaultdict
import random


# **Carga de Datos**

In [4]:
!wget -O LLM-ReDial-2024.zip "https://www.dropbox.com/scl/fi/x9avfdx2a1k6uq97f0efj/LLM-ReDial-2024.zip?rlkey=ijqpf91d13d6lowek3ebjvd0n&e=2&dl=1"
!unzip LLM-ReDial-2024.zip
!unzip LLM_Redial.zip
!rm LLM_Redial.zip
!rm LLM-ReDial-2024.zip
!rm -rf __MACOSX

--2024-11-11 22:48:33--  https://www.dropbox.com/scl/fi/x9avfdx2a1k6uq97f0efj/LLM-ReDial-2024.zip?rlkey=ijqpf91d13d6lowek3ebjvd0n&e=2&dl=1
Resolving www.dropbox.com (www.dropbox.com)... 162.125.5.18, 2620:100:601d:18::a27d:512
Connecting to www.dropbox.com (www.dropbox.com)|162.125.5.18|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://uc54ac117aacc2a84a08a1b5b39d.dl.dropboxusercontent.com/cd/0/inline/CeN3ZaLNNNIS2SiW6HqYEdltWkO1OwMwfxjojq7NBT4OGiv7X4DGC4d6Zgcl14ckX7lX7eMrQszGvY3wsiDVxeCnzVKRi8WSNlgS6jfp6nrVdAj772pGOEFqxfurVFWRpUI/file?dl=1# [following]
--2024-11-11 22:48:34--  https://uc54ac117aacc2a84a08a1b5b39d.dl.dropboxusercontent.com/cd/0/inline/CeN3ZaLNNNIS2SiW6HqYEdltWkO1OwMwfxjojq7NBT4OGiv7X4DGC4d6Zgcl14ckX7lX7eMrQszGvY3wsiDVxeCnzVKRi8WSNlgS6jfp6nrVdAj772pGOEFqxfurVFWRpUI/file?dl=1
Resolving uc54ac117aacc2a84a08a1b5b39d.dl.dropboxusercontent.com (uc54ac117aacc2a84a08a1b5b39d.dl.dropboxusercontent.com)... 162.125.5.15, 2620:100:601d:15::a27d

# **Random Baseline**

**Función para leer datos de un usuario**

In [5]:
### Directamente desde sample_data/Tools.py
import json
import csv

def read_user_data(filename, user_id):
    with open(filename, 'r', encoding='utf-8') as file:
        data = [json.loads(line) for line in file]

    for entry in data:
        if user_id in entry:
            return entry[user_id]

    return None


**Evaluar baseline**

In [7]:
def recall_at_k(r, k, m):
    return (np.asarray(r)[:k] != 0).sum() / m

def dcg_at_k(r, k):
    r = np.asfarray(r)[:k]
    if r.size:
        return np.sum(np.subtract(np.power(2, r), 1) / np.log2(np.arange(2, r.size + 2)))
    return 0.

def idcg_at_k(k):
    return dcg_at_k(np.ones(k), k)

def ndcg_at_k(r, k, max_relevant):
    idcg = idcg_at_k(min(k, max_relevant))
    if not idcg:
        return 0.
    return dcg_at_k(r, k) / idcg

In [None]:
import os
from most_popular import MostPopular

def evaluate(items_map_path, users_map_path, final_data_path, n, sample_percent=0.1):
    users_map = json.load(open(users_map_path, 'r'))
    items_map = json.load(open(items_map_path, 'r'))

    user_ids = list(users_map.keys())
    items_ids = list(items_map.keys())

    sample_size = int(sample_percent * len(user_ids))
    sampled_users = random.sample(user_ids, sample_size)

    data = []
    with open(os.path.join("Movie", "final_data.jsonl"), "r") as f:
        for line in f:
            data.append(json.loads(line))

    model = MostPopular(data)
    model.fit()
    recommendations = model.recommend(n)

    mean_ndcg = 0
    mean_recall = 0
    for user in sampled_users:
        user_data = read_user_data(final_data_path, user)
        if not user_data:
            continue

        user_might_like = [items_map[id] for id in user_data["user_might_like"]]
        rel_vector = np.isin(recommendations, user_might_like, assume_unique=True).astype(int)

        mean_ndcg += ndcg_at_k(rel_vector, 20, len(user_might_like))
        mean_recall += recall_at_k(rel_vector, 20, len(user_might_like))

    mean_ndcg /= sample_size
    mean_recall /= sample_size

    return mean_ndcg, mean_recall

In [12]:
movies_items_map_path = "Movie/item_map.json"
movies_users_map_path = "Movie/user_ids.json"
movies_final_data_path = "Movie/final_data.jsonl"

books_items_map_path = "Books/item_map.json"
books_users_map_path = "Books/user_ids.json"
books_final_data_path = "Books/final_data.jsonl"

**Evaluar para películas**

In [None]:
ndcg, recall = evaluate(movies_items_map_path, movies_users_map_path, movies_final_data_path, 20, 0.01)

In [20]:
print(f"NDCG: {ndcg}")
print(f"Recall: {recall}")

NDCG: 0.0
Recall: 0.0


**Evaluar para libros**

In [None]:
evaluate_random_baseline(books_items_map_path, books_users_map_path, books_final_data_path, 5)

0.0