##Importing the Pandas library

In [None]:
import pandas as pd

###Reading the files

In [None]:
moviesmeta = pd.read_csv("movies.csv", low_memory = False)
ratingsmeta = pd.read_csv("ratings.csv", low_memory = False)

###Viewing the file

In [None]:
moviesmeta.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [None]:
ratingsmeta.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [None]:
moviesmeta.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9742 entries, 0 to 9741
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   movieId  9742 non-null   int64 
 1   title    9742 non-null   object
 2   genres   9742 non-null   object
dtypes: int64(1), object(2)
memory usage: 228.5+ KB


In [None]:
ratingsmeta.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100836 entries, 0 to 100835
Data columns (total 4 columns):
 #   Column     Non-Null Count   Dtype  
---  ------     --------------   -----  
 0   userId     100836 non-null  int64  
 1   movieId    100836 non-null  int64  
 2   rating     100836 non-null  float64
 3   timestamp  100836 non-null  int64  
dtypes: float64(1), int64(3)
memory usage: 3.1 MB


###Merging the 2 Datasets


In [None]:
df = pd.merge(moviesmeta, ratingsmeta)

In [None]:
df.head()

Unnamed: 0,movieId,title,genres,userId,rating,timestamp
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,1,4.0,964982703
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,5,4.0,847434962
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,7,4.5,1106635946
3,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,15,2.5,1510577970
4,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,17,4.5,1305696483


###Droping the unwanted Column

In [None]:
df = df.drop(['timestamp'], axis = 1)

In [None]:
df.head()

Unnamed: 0,movieId,title,genres,userId,rating
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,1,4.0
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,5,4.0
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,7,4.5
3,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,15,2.5
4,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,17,4.5


###Calculating the Mean Rating

In [None]:
meanvote = df['rating'].mean()
print(meanvote)

3.501556983616962


###Getting the Minimum Rating with 70 percentile

In [None]:
minrating = df['rating'].quantile(0.70)
print(minrating)

4.0


###Getting dataset with rating >= minimum required rating

In [None]:
q_movies = df.copy().loc[df['rating'] >= minrating]
q_movies.shape

(48580, 5)

###Function to calculate the Weighted Average Rating

In [None]:
def weighted_rating(x, minrating=minrating, meanvote=meanvote):
  voters = x['rating']
  avg_vote = meanvote
  return (voters/(voters+minrating) * avg_vote) + (minrating/(minrating+voters) * meanvote)

In [None]:
q_movies.head()

Unnamed: 0,movieId,title,genres,userId,rating
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,1,4.0
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,5,4.0
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,7,4.5
4,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,17,4.5
6,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,19,4.0


###Creating a Column score and assigning the weighted average values

In [None]:
score = q_movies.apply(weighted_rating, axis=1)

###Adding the score column to the Dataset

In [None]:
q_movies['score'] = score

###Sorting the values based on Rating in decending order

In [None]:
q_movies = q_movies.sort_values('rating', ascending=False)

###Giving a precision of 2 decimal point

In [None]:
pd.set_option('precision', 2)

##Viewing the recommended list

In [None]:
q_movies[['title', 'rating', 'score']].head(10)

Unnamed: 0,title,rating,score
24120,Monty Python and the Holy Grail (1975),5.0,3.5
20987,Sunset Blvd. (a.k.a. Sunset Boulevard) (1950),5.0,3.5
36805,Mutiny on the Bounty (1935),5.0,3.5
20941,Gone with the Wind (1939),5.0,3.5
20945,Gone with the Wind (1939),5.0,3.5
56166,Blazing Saddles (1974),5.0,3.5
20949,Gone with the Wind (1939),5.0,3.5
56157,Romeo and Juliet (1968),5.0,3.5
89078,Inglourious Basterds (2009),5.0,3.5
20955,Gone with the Wind (1939),5.0,3.5
