## In this notebook we will implement Collaborative Filtering, as described in the below steps

* Import the required libraries
* Load the popular Movie lens dataset
* Merge the Ratings and Movies data 
* Calculate pearson correlation 
* Calculated similarity for Romantic and Action movies

## Item Based Filtering

* In this technique we will measure the similarity between the items that users have rated or interacted with, and other items in the store

In [None]:
!python -m pip install pip --upgrade --user -q
!python -m pip install numpy pandas seaborn matplotlib scipy statsmodels sklearn keras tensorflow opencv-python scikit-image --user -q

In [None]:
import IPython
IPython.Application.instance().kernel.do_shutdown(True)

In [None]:
import pandas as pd

movies = pd.read_csv("movies.csv",encoding="Latin1")
Ratings = pd.read_csv("ratings.csv")
Tags = pd.read_csv("tags.csv",encoding="Latin1")

movies.head()

In [None]:
ratings = pd.merge(movies, Ratings).drop(['genres', 'timestamp'], axis=1)

print(ratings.shape)

ratings.head()

In [None]:
UserRatings = ratings.pivot_table(index=['userId'],columns=['title'], values='rating')

print("Before: ", UserRatings.shape)

UserRatings = UserRatings.dropna(thresh=10, axis=1).fillna(0, axis=1)
print("After: ",UserRatings.shape)

## Pearson Correlation

In [None]:
corrMatrix = UserRatings.corr(method='pearson')

corrMatrix.head(10)

In [None]:
def get_similar(movie_name, rating):
    similar_ratings = corrMatrix[movie_name]*(rating-2.5)
    similar_ratings = similar_ratings.sort_values(ascending=False)
    return similar_ratings

## Similar Romantic Movies

In [None]:
romantic_movies= [("Father of the Bride Part II (1995)",5),
                  ("Alice in Wonderland (2010)",3),
                  ("Aliens (1986)",1),
                  ("2001: A Space Odyssey (1968)",2)]

similar_movies = pd.DataFrame()

for movie,rating in romantic_movies:
    similar_movies = similar_movies.append(get_similar(movie,rating),ignore_index = True)

similar_movies.head(10)

In [None]:
#Top-20 similar romantic movies
similar_movies.sum().sort_values(ascending=False).head(20)

## Similar Action Movies

In [None]:
action_movies = [("Skyfall (2012)",5),
                 ("Mission: Impossible III (2006)",4),
                 ("Toy Story 3 (2010)",2),
                 ("2 Fast 2 Furious (Fast and the Furious 2, The) (2003)",4)]

similar_movies = pd.DataFrame()

for movie,rating in action_movies:
    similar_movies = similar_movies.append(get_similar(movie,rating),ignore_index = True)


similar_movies.head(10)

In [None]:
#Top-20 similar action movies
similar_movies.sum().sort_values(ascending=False).head(20)