# Table of Content
1. Loading Data
1. Preprocessing
1. Content Based Filtering

In [None]:
# Importing important libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

Loading Data

In [None]:
# Loading Data 
movies = pd.read_csv('../input/netflix-shows/netflix_titles.csv')
movies.head()

In [None]:
# A concise summary of a data
movies.info()

Preprocessing

In [None]:
# To Avoid White Space Issues
movies['listed_in'] = movies.listed_in.str.replace(', ', '|')
movies['listed_in'] = movies['listed_in'].apply(lambda x: x.strip())
movies.head(5)

In [None]:
# Spliting the listed_in into list listed_in to simplify future use
movies['listed_in'] = movies.listed_in.str.split('|')
movies.head(5)

In [None]:
# Converting the categorical data

moviesDetails = movies.copy()

for index, row in movies.iterrows():
    for genre in row['listed_in']:
        moviesDetails.at[index, genre] = 1
    
moviesDetails = moviesDetails.fillna(0)
moviesDetails.head(5)

Content Based Filtering

In [None]:
# Defining user preferences
userInput = [{'title':'Black Panther', 'ratings':4.6},
            {'title':'Thor: Ragnarok', 'ratings':3.5},
            {'title':'Avengers: Infinity War', 'ratings':5.0},
            {'title':"Marvel's The Defenders", 'ratings':2.0},
            {'title':"Men in Black", 'ratings':4.4}
            ]
inputMovies = pd.DataFrame(userInput)

#Visualizing user preferences
plt.bar(inputMovies['title'], inputMovies["ratings"])

plt.xticks(rotation=45)
plt.xlabel("Title")
plt.ylabel("Ratings")
plt.title("User Preferences")

In [None]:
# Merging the show_id with user preferences
inputId = movies[movies['title'].isin(inputMovies['title'].tolist())]

inputMovies = pd.merge(inputId, inputMovies)
inputMovies = inputMovies.drop('type', 1).drop('director', 1).drop('cast', 1).drop('country', 1).drop('date_added', 1).drop('release_year', 1).drop('rating', 1).drop('duration', 1).drop('listed_in', 1).drop('description', 1)

inputMovies

In [None]:
# Gathering the movies from the input
userMovies = moviesDetails[moviesDetails['show_id'].isin(inputMovies['show_id'].tolist())]
userMovies

In [None]:
# Resetting index and dropping unnecessary features to avoid issues
userMovies = userMovies.reset_index(drop=True)
userGenre = userMovies.drop('show_id', 1).drop('title', 1).drop('type', 1).drop('director', 1).drop('cast', 1).drop('country', 1).drop('date_added', 1).drop('release_year', 1).drop('rating', 1).drop('duration', 1).drop('listed_in', 1).drop('description', 1)
userGenre

In [None]:
inputMovies['ratings']

In [None]:
# Dot produt to get user profile
userProfile = userGenre.transpose().dot(inputMovies['ratings'])
userProfile.head(10)

In [None]:
# Now let's get the genres of every movie in our original dataframe And drop the unnecessary information
detailedTable = moviesDetails.set_index(moviesDetails['show_id'])
detailedTable = detailedTable.drop('show_id', 1).drop('title', 1).drop('type', 1).drop('director', 1).drop('cast', 1).drop('country', 1).drop('date_added', 1).drop('release_year', 1).drop('rating', 1).drop('duration', 1).drop('listed_in', 1).drop('description', 1)
detailedTable.head()

In [None]:
detailedTable.shape

In [None]:
# Multiply the genres by the user profile and then take the weighted average
recommendedMovies = ((detailedTable*userProfile).sum(axis=1))/(userProfile.sum())
recommendedMovies.head()

In [None]:
# Sort our recommendations in descending order to get the peaks at top
recommendedMovies = recommendedMovies.sort_values(ascending=False)
recommendedMovies.head()

In [None]:
#Final Recomendation Table

final_Table = movies.loc[movies['show_id'].isin(recommendedMovies.head(20).keys())]
final_Table.title.to_frame()

# **Advantages and Disadvantages of Content-Based Recommendation System**

**Advantages**
1. Learns user's preferences
1. Highly personalized for the user

**Disadvantages**
1. Doesn't take into account what others think of the item, so low quality item recommendations might happen
1. Extracting data is not always intuitive
1. Determining what characteristics of the item the user dislikes or likes is not always obvious