In [None]:
from datetime import datetime
import pandas as pd
import numpy as np
import seaborn as sns
import os
import random
import matplotlib
import matplotlib.pyplot as plt

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
df_ratings=pd.read_csv('../input/movielens-dataset/ratings.csv')
df_ratings.head()

In [None]:
df_movies=pd.read_csv("../input/movielens-dataset/movies.csv")
df_movies.head()

In [None]:
df_genres=df_movies['genres'].str.get_dummies(sep='|')
df_genres.head()

In [None]:
df_movies=pd.merge(df_movies, df_genres, left_index=True, right_index=True)
df_movies.head()

In [None]:
df_movies.drop(['genres'], axis = 1,inplace=True) 
df_movies.head()

In [None]:
df_movies['year']=df_movies['title']
df_movies['title'] = [x[:-7] for x in df_movies['title']]
df_movies.head()

In [None]:
df_movies['year'] = [x[-5:-1] for x in df_movies['year']]
df_movies.head()

In [None]:
movie_data = pd.merge(df_ratings, df_movies, on='movieId')

movie_data.head()

In [None]:
plt.figure(figsize = (12, 8))
ax = sns.countplot(x="rating", data=movie_data)
for q in ax.patches:
    ax.annotate(str(q.get_height()), (q.get_x() * 1.01 , q.get_height() * 1.01))
ax.set_yticklabels([num for num in ax.get_yticks()])
plt.tick_params(labelsize = 15)
plt.title("Count Ratings in movie data", fontsize = 20)
plt.xlabel("Ratings", fontsize = 20)
plt.ylabel("Number of Ratings", fontsize = 20)
plt.grid()

In [None]:
movie_data.groupby('title')['rating'].mean().head()

In [None]:
movie_data.groupby('title')['rating'].mean().sort_values(ascending=False).head()

In [None]:
movie_data.groupby('title')['rating'].count().sort_values(ascending=False).head()

In [None]:
ratings_mean_count = pd.DataFrame(movie_data.groupby('title')['rating'].mean())
ratings_mean_count.head()

In [None]:
ratings_mean_count['rating_counts'] = pd.DataFrame(movie_data.groupby('title')['rating'].count())
ratings_mean_count.head()

In [None]:
plt.figure(figsize=(20,5))
plt.rcParams['patch.force_edgecolor'] = True
ratings_mean_count['rating_counts'].hist(bins=50)
plt.show()

In [None]:
plt.figure(figsize=(20,5))
plt.rcParams['patch.force_edgecolor'] = True
ratings_mean_count['rating'].hist(bins=50)
plt.show()

In [None]:
plt.rcParams['patch.force_edgecolor'] = True
sns.jointplot(x='rating', y='rating_counts', data=ratings_mean_count, alpha=0.4,height=8)
plt.show()

In [None]:
user_movie_rating = movie_data.pivot_table(index='userId', columns='title', values='rating')
user_movie_rating.head()

In [None]:
user_movie_rating.fillna(0,inplace=True)
user_movie_rating.head()

In [None]:
def recommend(sample,min_rating_counts):
    inputmovie_ratings = user_movie_rating[sample]
    movies_like_input = user_movie_rating.corrwith(inputmovie_ratings)
    corr_inputmovie = pd.DataFrame(movies_like_input, columns=['Correlation'])
    corr_inputmovie.dropna(inplace=True)
    corr_inputmovie.sort_values('Correlation', ascending=False)
    corr_inputmovie = corr_inputmovie.join(ratings_mean_count['rating_counts'])
    corr_inputmovie=corr_inputmovie[
        corr_inputmovie['rating_counts']>min_rating_counts
    ].sort_values('Correlation', ascending=False)
    corr_inputmovie = corr_inputmovie.reset_index()
    corr_inputmovie.index = corr_inputmovie.index + 1
    return corr_inputmovie.head(10)
    

##### As per data available, 3 is median  for rating_counts column
for many random value ( movie), there can't be any recommendation as min number of reviews are not satisfied

In [None]:
import random
movie=random.choice(df_movies.title.unique())
print("Recommendation for movie :  {}\n".format(movie))
suggestion=recommend(movie,ratings_mean_count.rating_counts.median())
if(len(suggestion)==0):
    print("Not enough ratings to Recommend other Movies")
else:
    print(suggestion['title'])
#user can filter how many rating coutings is needed to get result