In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
movies= pd.read_csv('../data/movies.csv')
links = pd.read_csv('../data/links.csv')
ratings = pd.read_csv('../data/ratings.csv')
tags = pd.read_csv('../data/tags.csv')


In [3]:
# popularity above 10
def recommender(n):
    df = (
    movies
        .merge(ratings, how='inner')
        .groupby(['title'])
        .agg(popularity = ('rating','count'),
            quality = ('rating', 'mean'))
        .query('popularity >= 10')
        .sort_values('quality', ascending=False)
        
    ).reset_index()
    return df.iloc[:n]


In [4]:
recommender(5)

Unnamed: 0,title,popularity,quality
0,Secrets & Lies (1996),11,4.590909
1,Guess Who's Coming to Dinner (1967),11,4.545455
2,Paths of Glory (1957),12,4.541667
3,"Streetcar Named Desire, A (1951)",20,4.475
4,"Celebration, The (Festen) (1998)",12,4.458333


In [5]:
#merge movies and ratings
df_2 = movies.merge(ratings,'inner')

# Sparse matrix function (0's not NaNs)
def get_sparse_matrix(dense_matrix):
    return (dense_matrix
        .pivot_table(values='rating',columns='title', index='userId')
        .fillna(0)
    )

In [29]:
get_sparse_matrix(df_2).head()

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
def get_user_prefered_item(dense_matrix: pd.DataFrame, user):
    
    data = dense_matrix.copy()

    return(
    data
        .query('userId == @user')
        .sort_values('rating', ascending=False)
        .reset_index()
        ['title'][0]
        )

get_user_prefered_item(df_2, 2)

'The Jinx: The Life and Deaths of Robert Durst (2015)'

In [8]:
def item_based_recommender(dense_matrix: pd.DataFrame, title: str, n=5): # n=6, minimum number of ratings

    sparse_matrix = get_sparse_matrix(dense_matrix)

    return (
    sparse_matrix
        .corrwith(sparse_matrix[title])
        .sort_values(ascending=False)
        .index
        .to_list()[1:n+1]
    )
    

item_based_recommender(df_2, 'Pirates of the Caribbean: At World\'s End (2007)')

["Pirates of the Caribbean: Dead Man's Chest (2006)",
 'Pirates of the Caribbean: The Curse of the Black Pearl (2003)',
 'Charlie and the Chocolate Factory (2005)',
 'Brothers Grimm, The (2005)',
 "Ocean's Twelve (2004)"]

In [9]:
user = 6
pref_item = get_user_prefered_item(df_2, user)
print(pref_item)
item_based_recommender(df_2, pref_item)

Shawshank Redemption, The (1994)


['Pulp Fiction (1994)',
 'Forrest Gump (1994)',
 'Usual Suspects, The (1995)',
 "Schindler's List (1993)",
 'Silence of the Lambs, The (1991)']

In [10]:
df_2

Unnamed: 0,movieId,title,genres,userId,rating,timestamp
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,1,4.0,964982703
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,5,4.0,847434962
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,7,4.5,1106635946
3,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,15,2.5,1510577970
4,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,17,4.5,1305696483
...,...,...,...,...,...,...
100831,193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy,184,4.0,1537109082
100832,193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy,184,3.5,1537109545
100833,193585,Flint (2017),Drama,184,3.5,1537109805
100834,193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation,184,3.5,1537110021


In [11]:
# put myself in the df
# Creating the Second Dataframe using dictionary
df_me = pd.DataFrame({#'movieId': [1],
                    'title':'Toy Story (1995)',
                    'genres':'Adventure|Animation|Children|Comedy|Fantasy',
                    'userId':'koog',
                    'rating':[5.0]})

# for appending df2 at the end of df1
df_me = df_2.append(df_me,ignore_index=True)

#shape = sparse.shape
#print('\nDataFrame Shape :', shape)
#print('\nNumber of rows :', shape[0])
#print('\nNumber of columns :', shape[1])


In [12]:
user = 'koog'
pref_item = get_user_prefered_item(df_me, user)
print(pref_item)
item_based_recommender(df_me, pref_item)

Toy Story (1995)


['Toy Story 2 (1999)',
 'Groundhog Day (1993)',
 'Independence Day (a.k.a. ID4) (1996)',
 'Willy Wonka & the Chocolate Factory (1971)',
 'Mission: Impossible (1996)']