# Knowledge Based Recommendation System

In [1]:
import numpy as np
import pandas as pd

In [2]:
# Read in the datasets
movies = pd.read_csv('data/movies.csv')
reviews = pd.read_csv('data/ratings.csv')

In [3]:
movies.head()

Unnamed: 0,movie_id,title,genres,year,Romance,Musical,Documentary,Horror,Adventure,Sci-Fi,...,History,Action,Talk-Show,Game-Show,Reality-TV,News,Music,Sport,Mystery,century
0,8,Edison Kinetoscopic Record of a Sneeze,Documentary|Short,1894,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,1800
1,10,La sortie des usines Lumière,Documentary|Short,1895,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,1800
2,12,The Arrival of a Train,Documentary|Short,1896,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,1800
3,91,Le manoir du diable,Short|Horror,1896,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,1800
4,131,Une nuit terrible,Short|Comedy|Horror,1896,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,1800


In [4]:
reviews.head()

Unnamed: 0,user_id,movie_id,rating,timestamp
0,1,114508,8,2013-10-05 21:00:50
1,2,499549,9,2013-08-17 15:26:38
2,2,1305591,8,2013-08-17 12:28:27
3,2,1428538,1,2013-06-15 14:38:09
4,3,75314,1,2020-07-23 01:42:04


# Create a Ranked dataframe of movies

In [11]:
# Pull the average ratings and number of ratings for each movie
df_agg = reviews.groupby('movie_id')[['rating',"timestamp"]].agg({'rating':['mean', 'count'],"timestamp":['max']})

# Rename Columns
df_agg.columns = ['avg_rating', 'num_ratings', 'last_rating']


# merge with the movies dataset
rank_movie_recs = movies.set_index('movie_id').join(df_agg)

# sort by top rated and number of ratings
rank_movie_recs.sort_values(by=['avg_rating', 'num_ratings','last_rating'], ascending=False, inplace=True)

rank_movie_recs.head()

Unnamed: 0_level_0,title,genres,year,Romance,Musical,Documentary,Horror,Adventure,Sci-Fi,Western,...,Game-Show,Reality-TV,News,Music,Sport,Mystery,century,avg_rating,num_ratings,last_rating
movie_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
4921860,MSG 2 the Messenger,Comedy|Drama|Fantasy|Horror,2015,0,0,0,1,0,0,0,...,0,0,0,0,0,0,2000,10.0,48,2016-08-14 17:16:50
5262972,Avengers: Age of Ultron Parody,Short|Comedy,2015,0,0,0,0,0,0,0,...,0,0,0,0,0,0,2000,10.0,28,2016-01-08 00:44:43
6662050,Five Minutes,Short|Comedy,2017,0,0,0,0,0,0,0,...,0,0,0,0,0,0,2000,10.0,22,2019-04-20 22:29:19
2737018,Selam,Drama|Romance,2013,1,0,0,0,0,0,0,...,0,0,0,0,0,0,2000,10.0,10,2015-05-10 22:56:01
5804314,Let There Be Light,Drama,2017,0,0,0,0,0,0,0,...,0,0,0,0,0,0,2000,10.0,7,2019-12-25 16:27:47


## Popular Movies

In [12]:
def popular_recommendations(n_top, ranked_movies):
    '''
    INPUT:
    n_top - an integer of the number recommendations you want back
    ranked_movies - a dataframe from 

    OUTPUT:
    top_movies - a list of the n_top recommended movies by movie title in order best to worst
    '''

    # Implement your code here
    

    top_movies = ranked_movies["title"].tolist()[:n_top]
    return top_movies

In [13]:
# Top 5 movies recommended
recs_20_for_1 = popular_recommendations(5, rank_movie_recs)
recs_20_for_1

['MSG 2 the Messenger',
 'Avengers: Age of Ultron Parody',
 'Five Minutes',
 'Selam',
 'Let There Be Light']

## Filtered Recomendations

Filtering Based on Genre, Year, and Rating

In [27]:
def popular_recs_filtered(n_top, ranked_movies, years=None, genres=None):
    '''
    INPUT:
    user_id - the user_id (str) of the individual you are making recommendations for
    n_top - an integer of the number recommendations you want back
    ranked_movies - a pandas dataframe of the already ranked movies based on avg rating, count, and time
    years - a list of strings with years of movies
    genres - a list of strings with genres of movies
    
    OUTPUT:
    top_movies - a list of the n_top recommended movies by movie title in order best to worst
    '''
    
    # Implement your code here

    # Step 1: filter movies based on year and genre 
    # Step 2: create top movies list 
    if years is not None:
        top_movies = ranked_movies[ranked_movies['year'].isin(list(map(int,years)))]
    else:
        top_movies = ranked_movies

    if genres is not None:
        for gen in genres:
            top_movies = top_movies[top_movies[gen] == 1]
            
    top_movies = top_movies["title"].tolist()[:n_top]

    return top_movies

In [28]:
# Top 20 movies recommended for  with years=['2015', '2016', '2017', '2018'], genres=['History']
recs_20_for_filtered = popular_recs_filtered(20, rank_movie_recs,
                                                 years=['2015', '2016', '2017', '2018'],
                                               genres=['History'])

recs_20_for_filtered

['Aloko Udapadi',
 "42nd Street Memories: The Rise and Fall of America's Most Notorious Street",
 'Russkie evrei. Film tretiy. Posle 1948 goda',
 'Russkie evrei. Film vtoroy. 1918-1948',
 'Jian jun da ye',
 'VHS Massacre',
 'Big Sonia',
 'Death of a Nation',
 'Das schweigende Klassenzimmer',
 'Farzand',
 'Panic: The Untold Story of the 2008 Financial Crisis',
 'Ashes in the Snow',
 'Sustainable',
 'What Was Ours',
 'Shock Room',
 'Catching the Sun',
 'Four Blood Moons',
 'Desert Migration',
 'A Billion Lives',
 "Hillary's America: The Secret History of the Democratic Party"]