## Movies Recommendation System
&nbsp;
### Based on Content Filtering 

In [None]:
# importing libraries 
import pandas as pd
import numpy as np

In [None]:
# importing user data from the zip file
user_cols = ['user_id','age','sex','occupation','zip_code']
users = pd.read_csv('ml-100k/u.user', sep='|', names = user_cols, encoding = 'latin-1')

# importing movie ratings from the zip file
ratings_cols = ['user_id','movie_id','rating','unix_timestamp']
ratings = pd.read_csv('ml-100k/u.data', sep='\t', names = ratings_cols, encoding = 'latin-1')

# importing movies data from the zip file
movies_cols = ['movie_id','title','release_date','video_release_date','imdb_url']
movies = pd.read_csv('ml-100k/u.item', sep='|', names = movies_cols,usecols = range(5),
                     encoding = 'latin-1')

In [None]:
# importing genre dataset 
genres_list = ['unknown','Action','Adventure','Animation','Children','Comedy','Crime',
               'Documentary','Drama','Fantasy','Film-Noir','Horror','Musical','Mystery',
               'Romance','Sci-Fi','Thriller','War','Western']
genre = pd.read_csv('ml-100k/u.item', sep='|',names = genres_list,usecols = range(5,24),encoding = 'latin-1')

In [None]:
# dropping redundant columns
movies.drop(['video_release_date','imdb_url'],inplace=True,axis = 1)
ratings.drop('unix_timestamp',axis = 1,inplace=True)

In [None]:
# merge all the dataset into one whole dataset
dataset = pd.merge(pd.merge(movies, ratings),users)

In [None]:
# top 20 most rated movies
dataset[['title','rating']].sort_values('rating', ascending=False).head(20)

In [None]:
# Totol movies in terms of genre
genre.sum().sort_values(ascending=False)

In [None]:
# Applying Cosine Similarity on genre dataset since it is already in sparse matrix form
from sklearn.metrics.pairwise import cosine_similarity
cosine_sim = cosine_similarity(genre)

In [None]:
# Let's generate recommendation based on those similarity score of movies

# first create a list of titles
titles = movies['title']

# Create a series as key and value with key being movies title and value being it's indices
indices = pd.Series(movies.index,index=movies['title'])

# creating the recommendation function
def movie_recommendation(title):
    #gets the index of the recieved title
    index = indices[title]
    #gets the similarity scores for the movies similar to the one at the index 
    sim_scores = list(enumerate(cosine_sim[index]))
    # sorts the score in descending order
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    # stores top 20 score
    sim_scores = sim_scores[1:21]
    # store the indices of the top 20 score
    movie_indices = [i[0] for i in sim_scores]
    # return the list of movies on that indices
    return titles.iloc[movie_indices]

In [None]:
movie_recommendations('Toy Story (1995)').head(20)