### Import required Libraries

In [1]:
import os
from google.colab import drive

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf

### Dataset

In [2]:
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
dataset_path = "/content/drive/My Drive/Machine Learning Datasets/Movie_Recommendation"

In [4]:
# URM = pd.read_csv(dataset_path + '/UserRatingMatrix.csv', index_col = 'userId')
# ICM = pd.read_csv(dataset_path + '/ItemContentMatrix.csv', index_col = 'movieId')
ratings = pd.read_csv(dataset_path + '/ratings.csv')
movies = pd.read_csv(dataset_path + '/movies.csv', index_col = 'movieId')

In [5]:
ratings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 92464 entries, 0 to 92463
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   movieId    92464 non-null  int64  
 1   userId     92464 non-null  int64  
 2   rating     92464 non-null  float64
 3   timestamp  92464 non-null  int64  
 4   tmdbId     92464 non-null  int64  
dtypes: float64(1), int64(4)
memory usage: 3.5 MB


In [6]:
len(ratings.userId.unique())

610

# <center>Non Personalized Recommenders</center>
<p align = "justify">Non Personalized Recommenders are useful when a new user starts using a platform, like when a user just starts using the platform supported by our Recommendation System. He/she can be suggested with items at random. But, can we do better?
</p>
Yes, we can!!! Here are a few ways for the same.

    -> A simple way is to recommend the most popular items, whatever be the ratings. The user is recommended the items which were rated most times.
    -> A better approach is to recommend the best rated items. Average Ratings for all movies are calculated, using the non-zero ratings only.
    -> A support term is used to give weightage to the movies rated by more users.


In [7]:
ratings.head()

Unnamed: 0,movieId,userId,rating,timestamp,tmdbId
0,1,1,4.0,964982703,862
1,3,1,4.0,964981247,15602
2,6,1,4.0,964982224,949
3,47,1,5.0,964983815,807
4,50,1,5.0,964982931,629


In [8]:
def best_rated_with_support(ratings_df, support_factor):
    ratings_by_movies = ratings_df.groupby('tmdbId')
    avg_ratings_by_movies = (ratings_by_movies.rating.sum() / (ratings_by_movies.rating.count() + support_factor)).sort_values(ascending = False)
    return avg_ratings_by_movies

In [9]:
best_rated_movies = best_rated_with_support(ratings, 10)

In [10]:
best_rated_movies.head()

tmdbId
278    4.296774
550    4.075342
680    4.051495
629    4.050725
238    4.044503
Name: rating, dtype: float64

In [11]:
len(best_rated_movies), len(movies)

(9588, 9588)

In [12]:
best_rated_movies = pd.concat([best_rated_movies, movies.set_index('tmdbId')], axis = 1).sort_values('rating', ascending = False).dropna()

In [14]:
best_rated_movies.head()

Unnamed: 0_level_0,rating,title,genres
tmdbId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
278,4.296774,"Shawshank Redemption, The (1994)",Crime|Drama
550,4.075342,Fight Club (1999),Action|Crime|Drama|Thriller
680,4.051495,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller
629,4.050725,"Usual Suspects, The (1995)",Crime|Mystery|Thriller
238,4.044503,"Godfather, The (1972)",Crime|Drama


### Conclusion
    -> If a new user comes to use the platform our recommendation supports, we can recommend top 20 movies based on the average ratings.

In [15]:
best_rated_movies.to_csv(dataset_path + '/BestRatedMovies.csv')

<center>END</center>