In [27]:
import numpy as np
import pandas as pd
import sqlite3 as sql
from sklearn.preprocessing import MinMaxScaler
from ipywidgets import interact ## para análisis interactivo
from sklearn import neighbors ### basado en contenido un solo producto consumido
import joblib



#### conectar_base_de_Datos

conn=sql.connect('db_movies')
cur=conn.cursor()

#### ver tablas disponibles en base de datos ###

cur.execute("SELECT name FROM sqlite_master WHERE type='table';")
cur.fetchall()


[('ratings',),
 ('movies',),
 ('usuarios_sel',),
 ('consolidacion',),
 ('movies_sel',),
 ('ratings_final',),
 ('base_lista2',),
 ('filtro3',),
 ('base_lista3',),
 ('filtro_rat',),
 ('filtro_rat2',),
 ('consolidacion2',),
 ('movies2',),
 ('base_lista3_a',)]

# <h1> 1. sistemas basados en popularidad

In [30]:
#### Peliculas películas mejor calificadas
pd.read_sql("""select title,
            avg(rating) as prom_rating,
            count(*) as num_visitas
            from base_lista3_a
            group by title
            --having num_visitas
            order by  prom_rating desc
            limit 10
            """, conn)

Unnamed: 0,title,prom_rating,num_visitas
0,"Three Billboards Outside Ebbing, Missouri (2017)",4.833333,6
1,Paths of Glory (1957),4.75,8
2,Hedwig and the Angry Inch (2000),4.714286,7
3,"Last Picture Show, The (1971)",4.666667,6
4,Secrets & Lies (1996),4.625,8
5,"Streetcar Named Desire, A (1951)",4.615385,13
6,"Verdict, The (1982)",4.583333,6
7,Ran (1985),4.5,11
8,Paperman (2012),4.5,6
9,Harold and Maude (1971),4.5,18


In [58]:
#### Las peliculas mas vistas con su rating 
pd.read_sql("""select title,
            avg(rating) as prom_rating,
            count(*) as num_visitas
            from base_lista3_a
            group by title
            order by num_visitas desc
            limit 10
            """, conn)

Unnamed: 0,title,prom_rating,num_visitas
0,Forrest Gump (1994),4.173701,308
1,"Shawshank Redemption, The (1994)",4.442953,298
2,Pulp Fiction (1994),4.164336,286
3,"Silence of the Lambs, The (1991)",4.148077,260
4,"Matrix, The (1999)",4.184825,257
5,Star Wars: Episode IV - A New Hope (1977),4.230435,230
6,Braveheart (1995),4.050228,219
7,Jurassic Park (1993),3.756881,218
8,Terminator 2: Judgment Day (1991),3.980583,206
9,Schindler's List (1993),4.220588,204


In [32]:
# Películas mejor calificadas por el año en que fueron estrenadas.
cm=pd.read_sql("""select year,
            title,
            avg(rating) as prom_rating,
            count(*) as num_visitas
            from base_lista3_a
            group by  year, title
            order by year desc, prom_rating desc
            limit 10
            """, conn)
cm

Unnamed: 0,year,title,prom_rating,num_visitas
0,2018,Avengers: Infinity War - Part I (2018),3.863636,11
1,2018,Deadpool 2 (2018),3.85,10
2,2017,"Three Billboards Outside Ebbing, Missouri (2017)",4.833333,6
3,2017,Logan (2017),4.236842,19
4,2017,Untitled Spider-Man Reboot (2017),4.083333,12
5,2017,Thor: Ragnarok (2017),4.0,16
6,2017,Guardians of the Galaxy 2 (2017),3.909091,22
7,2017,Pirates of the Caribbean: Dead Men Tell No Tal...,3.785714,7
8,2017,The Shape of Water (2017),3.666667,6
9,2017,Blade Runner 2049 (2017),3.625,12


In [33]:
#### Películas mejores calificadas por el año en que fueron vistas.
pd.read_sql("""select year,
            title,
            avg(rating) as prom_rating,
            count(*) as num_visitas
            from base_lista3_a
            group by  year, title
            order by year desc, prom_rating desc
            limit 10
            """, conn)


Unnamed: 0,year,title,prom_rating,num_visitas
0,2018,Avengers: Infinity War - Part I (2018),3.863636,11
1,2018,Deadpool 2 (2018),3.85,10
2,2017,"Three Billboards Outside Ebbing, Missouri (2017)",4.833333,6
3,2017,Logan (2017),4.236842,19
4,2017,Untitled Spider-Man Reboot (2017),4.083333,12
5,2017,Thor: Ragnarok (2017),4.0,16
6,2017,Guardians of the Galaxy 2 (2017),3.909091,22
7,2017,Pirates of the Caribbean: Dead Men Tell No Tal...,3.785714,7
8,2017,The Shape of Water (2017),3.666667,6
9,2017,Blade Runner 2049 (2017),3.625,12


# <h1> 2.1 Sistema de recomendación basado en contenido un solo producto - Manual

In [55]:
# Se importan los datos.
movies=pd.read_sql('select * from movies2', conn )
movies=movies.drop('index', axis=1)
movies.head(3)

Unnamed: 0,level_0,movieId,title,(no genres listed),Action,Adventure,Animation,Children,Comedy,Crime,...,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western,year
0,0,1,Toy Story (1995),0,0,1,1,1,1,0,...,0,0,0,0,0,0,0,0,0,1995.0
1,1,2,Jumanji (1995),0,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,1995.0
2,2,3,Grumpier Old Men (1995),0,0,0,0,0,1,0,...,0,0,0,0,1,0,0,0,0,1995.0


In [36]:
# Escalado del año
sc=MinMaxScaler()
movies[["year_sc"]]=sc.fit_transform(movies[['year']])

In [56]:
# Se borran las columnas con información no relevante.
movies_dum=movies.drop(columns=['movieId','year','title','level_0','(no genres listed)'])
movies_dum.head(3)

Unnamed: 0,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0
1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0


In [39]:
# Películas recomendadas ejemplo por una sola película
movie='Jumanji (1995)'
ind_movie=movies[movies['title']==movie].index.values.astype(int)[0]
similar_movies=movies_dum.corrwith(movies_dum.iloc[ind_movie,:],axis=1)

similar_movies=similar_movies.sort_values(ascending=False)
top_similar_movies=similar_movies.to_frame(name="correlación").iloc[0:11,]
top_similar_movies['title']=movies["title"]
top_similar_movies


Unnamed: 0,correlación,title
1,1.0,Jumanji (1995)
53,1.0,"Indian in the Cupboard, The (1995)"
9553,0.99999,Gulliver's Travels (1996)
109,0.99999,"NeverEnding Story III, The (1994)"
8718,0.99983,The Cave of the Golden Rose (1991)
1618,0.999734,"NeverEnding Story II: The Next Chapter, The (1..."
3574,0.999632,Harry Potter and the Sorcerer's Stone (a.k.a. ...
6074,0.998992,"Chronicles of Narnia: The Lion, the Witch and ..."
1799,0.998917,Santa Claus: The Movie (1985)
1556,0.998917,Return to Oz (1985)


In [40]:
# Películas recomendados ejemplo para visualización todos las películas
def recomendacion(movie = list(movies['title'])):

    ind_movie=movies[movies['title']==movie].index.values.astype(int)[0] #### obtener indice de libro seleccionado de lista
    similar_movies=movies_dum.corrwith(movies_dum.iloc[ind_movie,:],axis=1) ## correlación entre libro seleccionado y todos los otros
    similar_movies=similar_movies.sort_values(ascending=False) #### ordenar correlaciones
    top_similar_movies=similar_movies.to_frame(name="correlación").iloc[1:11,] ### el 11 es número de libros recomendados
    top_similar_movies['title']=movies["title"]### agregaro los nombres (como tiene mismo indice no se debe cruzar)
    return top_similar_movies

In [41]:
print(interact(recomendacion))

interactive(children=(Dropdown(description='movie', options=('Toy Story (1995)', 'Jumanji (1995)', 'Grumpier O…

<function recomendacion at 0x00000166631448B0>


# <h1> 2.2 Sistema de recomendación basado en contenido KNN un solo producto visto

In [43]:
# Crear el modelo y entrenarlo
model = neighbors.NearestNeighbors(n_neighbors=11, metric='cosine')
model.fit(movies_dum)
dist, idlist = model.kneighbors(movies_dum)

In [44]:
# Crear la lista de películas recomendadas según una sola película
movie_list_name = []
movie_name = 'Jumanji (1995)'
movie_id = movies[movies['title'] == movie_name].index[0]

for newid in idlist[movie_id]:
    recommended_movie = movies.loc[newid].title
    if recommended_movie != movie_name:  # Excluir la película original
        movie_list_name.append(recommended_movie)

movie_list_name

['Indian in the Cupboard, The (1995)',
 "Gulliver's Travels (1996)",
 'NeverEnding Story III, The (1994)',
 'The Cave of the Golden Rose (1991)',
 'NeverEnding Story II: The Next Chapter, The (1990)',
 "Harry Potter and the Sorcerer's Stone (a.k.a. Harry Potter and the Philosopher's Stone) (2001)",
 'Chronicles of Narnia: The Lion, the Witch and the Wardrobe, The (2005)',
 'Santa Claus: The Movie (1985)',
 'Return to Oz (1985)',
 'NeverEnding Story, The (1984)']

In [45]:
# Crear la lista de películas recomendadas según todas las película existentes en la base
def movieRecommender(movie_name = list(movies['title'].value_counts().index)):
    movie_list_name = []
    movie_id = movies[movies['title'] == movie_name].index
    movie_id = movie_id[0]
    for newid in idlist[movie_id]:
        recommended_movie = movies.loc[newid].title
        if recommended_movie != movie_name:  # Excluir la película original
            movie_list_name.append(recommended_movie)

    movie_list_name = pd.DataFrame(movie_list_name, columns=['Book Title'])
    movie_id
    return movie_list_name

In [46]:
print(interact(movieRecommender))

interactive(children=(Dropdown(description='movie_name', options=('Eros (2004)', 'Emma (1996)', 'War of the Wo…

<function movieRecommender at 0x000001665CD71630>


# <h1> 3 Sistema de recomendación basado en contenido KNN, Con base en todo lo visto por el usuario

In [54]:
# Se importan los datos y se borran datos no relevantes
movies=pd.read_sql('select * from movies2', conn )
movies=movies.drop(columns=['index','level_0','(no genres listed)'],axis=1)
movies.head(3)

Unnamed: 0,movieId,title,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,...,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western,year
0,1,Toy Story (1995),0,1,1,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,1995.0
1,2,Jumanji (1995),0,1,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1995.0
2,3,Grumpier Old Men (1995),0,0,0,0,1,0,0,0,...,0,0,0,0,1,0,0,0,0,1995.0


In [48]:
# Se filtra una lista con los usuarios sin repetirlos
usuarios=pd.read_sql('select distinct (userId) as user_id from filtro_rat2',conn)
usuarios

Unnamed: 0,user_id
0,1
1,2
2,3
3,4
4,5
...,...
584,602
585,604
586,605
587,607


In [49]:
# Se crea un Id para un ejemplo y se utiliza books_dum2 para guardar la base que se necesita para ejecutar el código 
user_id=172
books_dum2=movies

In [50]:
# Crear la lista de películas recomendadas según las películas vistas por el usuario
def recomendar(user_id=list(usuarios['user_id'].value_counts().index)):
    
    ###seleccionar solo los ratings del usuario seleccionado
    ratings=pd.read_sql('select *from filtro_rat where userId=:user',conn, params={'user':user_id})
    
    ###convertir ratings del usuario a array
    l_books_r=ratings['movieId'].to_numpy()
    
    ###agregar la columna de isbn y titulo del libro a dummie para filtrar y mostrar nombre
    #books_dum2[['movieId','title']]=movies[['movieId','title']]
    
    ### filtrar libros calificados por el usuario
    books_r=books_dum2[books_dum2['movieId'].isin(l_books_r)]
    
    ## eliminar columna nombre e isbn
    books_r=books_r.drop(columns=['movieId','title'])
    books_r["indice"]=1 ### para usar group by y que quede en formato pandas tabla de centroide
    ##centroide o perfil del usuario
    centroide=books_r.groupby("indice").mean()
    
    
    ### filtrar libros no leídos
    books_nr=books_dum2[~books_dum2['movieId'].isin(l_books_r)]
    ## eliminbar nombre e isbn
    books_nr=books_nr.drop(columns=['movieId','title'])
    ### entrenar modelo 
    model=neighbors.NearestNeighbors(n_neighbors=11, metric='cosine')
    model.fit(books_nr)
    dist, idlist = model.kneighbors(centroide)
    
    ids=idlist[0] ### queda en un array anidado, para sacarlo
    recomend_b=movies.loc[ids][['title','movieId']]
    leidos=movies[movies['movieId'].isin(l_books_r)][['title','movieId']]
    recomend_b = recomend_b.drop_duplicates(subset='movieId')
    recomend_b = recomend_b.head(11)    
    
    return recomend_b

In [57]:
# Se imprime la lista de películas para el usuario seleccionado"
recomendar(user_id)

Unnamed: 0,title,movieId
9025,The Lost Room (2006),140737
9496,Sandy Wexler (2017),170897
9624,Dane Cook: Troublemaker (2014),179427
9217,London Has Fallen (2016),152079
8875,Afonya (1975),134041
9380,Over the Garden Wall (2013),163809
9148,A Perfect Day (2015),147410
9503,Planet Earth II (2016),171011
8657,Man on High Heels (2014),120919
8809,The Final Girls (2015),130686


In [52]:
print(interact(recomendar))


interactive(children=(Dropdown(description='user_id', options=(1, 419, 402, 403, 404, 405, 406, 407, 408, 409,…

<function recomendar at 0x000001665DB02950>
