**Item Based Collaborative Filtering**

*Method 1(Using Indices and distances)*

In [None]:
#modules required
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
#Mount the dataset from the drive
%cd '/content/drive/My Drive/Colab Notebooks/Amazon movies'

In [None]:
#Display first 5 rows of the dataset
df = pd.read_csv('Amazon - Movies and TV Ratings.csv')
df.head()

In [None]:
#To set user_id as the index and remove auto generated indexes in the main df
df = df.set_index('user_id')
df.fillna(0,inplace=True)
df.head()


In [None]:
#For better understanding,set rows to each unique movie name and columns to each unique user name
movies = df.T#transpose method to swap rows and columns
movies.head()

In [None]:
#To group all the columns under one column name
movies.index.names=['Movie_name']
movies.head()

In [None]:
from sklearn.neighbors import NearestNeighbors

In [None]:
#The NearestNeighbors() in the sklearn.neighbors library is used to calculate the distance between movies using the cosine similarity and find the nearest neighbors for each movie.
knn = NearestNeighbors(metric='cosine', algorithm='brute')
knn.fit(movies.values)
distances, indices = knn.kneighbors(movies.values,n_neighbors=3)

In [None]:
print(indices)

In [None]:
print(distances)

In [None]:
# get the index for 'Movie1'
index_for_movie = movies.index.tolist().index('Movie1')
# find the indices for the similar movies
sim_movies = indices[index_for_movie].tolist()
# distances between 'Movie1' and the similar movies
movie_distances = distances[index_for_movie].tolist()
# the position of 'Movie1' in the list sim_movies
id_movie = sim_movies.index(index_for_movie)
# remove 'Movie1' from the list sim_movies
sim_movies.remove(index_for_movie)
# remove 'Movie1' from the list movie_distances
movie_distances.pop(id_movie)
print('The Nearest Movies to Movie1:', sim_movies)
print('The Distance from Movie1:', movie_distances)

*Method 2(Using corrwith())*

In [None]:
df = pd.read_csv('Amazon - Movies and TV Ratings.csv')
df.fillna(0,inplace=True)
df.head()

In [None]:
#To make a column into a row and to generate a new column with the value field
#The melted_df will only be used later to draw the graph
melted_df = df.melt(id_vars=['user_id'],var_name="Movie_name",value_name='Ratings')
melted_df.fillna(0,inplace=True)
melted_df = melted_df.set_index('user_id')
melted_df.head()


In [None]:
melted_df.groupby('Movie_name')['Ratings'].mean().sort_values(ascending=False).head()

In [None]:
melted_df.groupby('Movie_name')['Ratings'].count().sort_values(ascending=False).head()

In [None]:
plot_1 = pd.DataFrame(melted_df.groupby('Movie_name')['Ratings'].mean())
plot_1.head()

In [None]:
plot_1['No. of ratings']=pd.DataFrame(melted_df.groupby('Movie_name')['Ratings'].count())
plot_1.head()

In [None]:
plt.figure(figsize=(15,4))
plot_1['No. of ratings'].hist(bins=70)

In [None]:
plt.figure(figsize=(15,4))
plot_1['Ratings'].hist(bins=70)

In [None]:
sns.jointplot(x='Ratings',y='No. of ratings',data=plot_1,alpha=0.5)


In [None]:
movie1_ratings = df['Movie1']
movie1_ratings.head()

In [None]:
similar_to_movie1 = df.corrwith(movie1_ratings)

In [None]:
corr_movie1 = pd.DataFrame(similar_to_movie1,columns=['Correlation'])
corr_movie1.head()

In [None]:
corr_movie1 = corr_movie1.join(plot_1['No. of ratings'])
corr_movie1.head()

In [None]:
corr_movie1[corr_movie1['No. of ratings']>100].sort_values('Correlation',ascending=False).head()