In [5]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
%matplotlib inline
from sklearn.linear_model import BayesianRidge

In [2]:
genome = pd.read_csv('genome-scores.csv')
movies = pd.read_csv('movies_updated.csv', encoding = "ISO-8859-1")
ratings = pd.read_csv('ratings_updated.csv')
users = pd.read_csv('users.csv')
cf_top10 = pd.read_csv('CF_top10.csv')

In [3]:
genome = genome.pivot(index='movieId', columns='tagId', values='relevance')

In [6]:
def get_recommendations(user_number, genre='Romance'):

    #get user data
    user_profile = ratings.loc[ratings['userId'] == user_number]
    
    #subset the movies by genre, take out the movies the user has seen
    movies_subset =  movies.loc[movies[genre] == 1][['movieId']]
    movies_subset = movies_subset.merge(user_profile, how='left', left_on='movieId', right_on='movieId')
    movies_subset = movies_subset.loc[movies_subset['userId'].isnull()][['movieId']]
    
    ## now join with the genome
    movies_subset = movies_subset.join(genome, on='movieId', how='left')
    movies_subset.set_index('movieId', inplace=True)
    
    ## join the user profile with the genome and prepare for model fit
    user_profile = user_profile.join(genome, on='movieId', how='left')
    user_profile.set_index('movieId', inplace=True)
    user_profile.drop(['userId', 'timestamp'], axis=1, inplace=True)
    
    #train the model
    BR = BayesianRidge()
    BR.fit(user_profile.drop('rating', axis=1), user_profile['rating'])
    
    #predict and get the y_hat
    y_hat = BR.predict(movies_subset)
    movies_subset['yhat'] = y_hat
    movies_subset= movies_subset[['yhat']]
    movies_subset.sort_values('yhat', ascending=False, inplace=True)
    movies_subset.reset_index(inplace=True)
    
    #return top 5
    movies_subset.head()
    
    return movies_subset.head().merge(movies, left_on='movieId', right_on='movieId', how='left')

In [27]:
def get_CF10(user_number):
    
    #find the user in the top10 DF
    melt_this = cf_top10.loc[cf_top10['userId'] == user_number]
    
    #change the DF from wide to long
    melt_this = pd.melt(melt_this,value_vars=['0','1','2','3','4','5','6','7','8','9'], value_name='movieId')[['movieId']]
    
    #join with the movies DF
    return melt_this.merge(movies, left_on='movieId', right_on='movieId', how='left')

In [35]:
def user_ratings_history(user_number):
    rated_what = ratings.loc[ratings['userId'] == user_number]
    return rated_what.merge(movies, left_on='movieId', right_on='movieId', how='left').drop(['userId', 'movieId'], axis=1)

In [31]:
users.head()

Unnamed: 0,userId,rated_count,avg_rating
0,129562,13,2.692308
1,65137,13,3.961538
2,11415,14,3.214286
3,136748,14,3.714286
4,59147,14,4.464286


In [38]:
user_ratings_history(11415)

Unnamed: 0,rating,timestamp,title,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,...,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,4.0,1204997180,Anaconda (1997),1,1,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
1,4.5,1204997927,Heaven (1998),0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
2,3.5,1204997207,Stigmata (1999),0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,4.0,1204997720,"Guardian, The (1990)",0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,1,0,0
4,2.5,1204997284,Alive (1993),0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0.5,1204997269,Return to Me (2000),0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
6,5.0,1204998479,Gladiator (2000),1,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,0.5,1204997294,Romeo and Juliet (1968),0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
8,0.5,1204997479,Prelude to a Kiss (1992),0,0,0,0,1,0,0,...,0,0,0,0,0,1,0,0,0,0
9,5.0,1204998565,"Beautiful Mind, A (2001)",0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0


In [39]:
get_CF10(11415)

Unnamed: 0,movieId,title,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,7153,"Lord of the Rings: The Return of the King, The...",1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0
1,5952,"Lord of the Rings: The Two Towers, The (2002)",0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
2,4993,"Lord of the Rings: The Fellowship of the Ring,...",0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
3,318,"Shawshank Redemption, The (1994)",0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0
4,2028,Saving Private Ryan (1998),1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0
5,109487,Interstellar (2014),0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
6,110,Braveheart (1995),1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0
7,260,Star Wars: Episode IV - A New Hope (1977),1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
8,356,Forrest Gump (1994),0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,1,0
9,99114,Django Unchained (2012),1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1


In [46]:
get_recommendations(11415, genre='Adventure')

Unnamed: 0,movieId,yhat,title,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,...,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,48304,4.825097,Apocalypto (2006),0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
1,5952,4.810298,"Lord of the Rings: The Two Towers, The (2002)",0,1,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
2,91529,4.787809,"Dark Knight Rises, The (2012)",1,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
3,7454,4.712749,Van Helsing (2004),1,1,0,0,0,0,0,...,1,0,1,0,0,0,0,0,0,0
4,40732,4.704625,"Descent, The (2005)",0,1,0,0,0,0,0,...,0,0,1,0,0,0,0,1,0,0
