In [1]:
import pandas as pd
import random

In [2]:
# carga a dataframe del archivo parquet para identifica userId
stream = pd.read_parquet('reviews.parquet')

In [3]:
# cargar plata_stars.parquet para las recomendacions
plata = pd.read_parquet('plata_stars.parquet')

In [4]:
# generar indices para la función des_recommendations
indices = pd.Series(plata.index, index=plata['show_id']).drop_duplicates()

In [5]:
# cargar cos_sim array, generado a partir de:
"""
tfidf = TfidfVectorizer(stop_words='english')
tfidf_description = tfidf.fit_transform(plata['description'])
cos_sim = linear_kernel(tfidf_description, tfidf_description)
"""
import numpy as np
cos_sim_file = 'cos_sim.npy'
cos_sim = np.load(cos_sim_file)

In [6]:
# Definimos funcion para obtener una lista de 3 películas con las mejores puntuaciones dadas por un usuario

def get_tres(userId):
    
    # Filtramos el dataframe para incluir sólo las películas calificadas por el usuario
    df = stream[stream['userId'] == userId]
    
    # Definimos los posibles valores de rating
    rating_values=[10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]

    # Creamos diccionario donde las llaves son los posibles rating y los valores son una lista de movieIds que tienen ese rating
    rating_dict = {}
    for rating in rating_values:
        rating_dict[str(rating)] = df[df['rating'] == rating]['movieId'].values.tolist()
        if len(rating_dict[str(rating)]) >= 3:
            break

    # Create a new dictionary where the keys are rating values and the values are lists of movieIds that are the user's top rated movies for that rating value
    user_top_movies = {}
    z = 3
    for rating in rating_values:
        user_top_movies[str(rating)] = rating_dict[str(rating)]
        z = z - len(user_top_movies[str(rating)])
        if z <= 0:
            break

    # Create a list of 3 recommended movies by selecting the user's top rated movies from the highest rating value to the lowest rating value
    final_list = []
    for rating in rating_values:
        movies = user_top_movies[str(rating)]
        if len(movies) > 0:
            if len(movies) <= 3 - len(final_list):
                final_list += movies
            else:
                final_list += random.sample(movies, 3 - len(final_list))
        if len(final_list) == 3:
            break

    # Return the final list of recommended movies
    return final_list

In [9]:
# Función para obtener las recomendaciones
def des_recommendations(showId, cos_sim):
    idx = indices[showId]
    sim_scores = list(enumerate(cos_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:16]
    movie_indices = [i[0] for i in sim_scores]
    return plata.loc[movie_indices, 'show_id'].tolist()

In [26]:
def get_final_recommendations(the_3, cos_sim):
    final_list = []
    final_list += get_list1(the_3, cos_sim)
    final_list += get_list2(the_3, cos_sim)
    final_list += get_list3(the_3, cos_sim)
    final_list = sorted(final_list, key=lambda x: x[1], reverse=True)[:10]
    return final_list

def get_list1(the_3, cos_sim):
    showId = the_3[0]
    return des_recommendations(showId, cos_sim)

def get_list2(the_3, cos_sim):
    if len(the_3) > 1:
        showId = the_3[1]
        return des_recommendations(showId, cos_sim)
    else:
        return []

def get_list3(the_3, cos_sim):
    if len(the_3) > 2:
        showId = the_3[2]
        return des_recommendations(showId, cos_sim)
    else:
        return []

In [28]:
def get_user_recommendations(userId):
    if userId not in stream['userId'].values:
        return 'Usuario no existente'
    else:
        the_3 = get_tres(userId)
        recommended_movies = get_final_recommendations(the_3, cos_sim)
        return recommended_movies

In [31]:
userId = 1055
numero_de_recomendaciones = 10


show_id_list = get_user_recommendations(userId)

for show_id in show_id_list:
    title = plata.loc[plata['show_id'] == show_id, 'title'].iloc[0]
    print(title)

robert
back to the 90s
celebrity ghost stories
the forest of love
true horror
speak up: empower your ideas
futmalls.com
acapulco shore
the prince family
paranormal solutions inc.
