### Importing libraries and Downloading datasets

In [1]:
#Loading the libraries
import numpy as np
import pandas as pd

In [2]:
#Downloading and unzipping the required files
!wget -O ml-25m.zip https://files.grouplens.org/datasets/movielens/ml-25m.zip
!unzip -o -j ml-25m.zip

--2021-08-11 18:24:04--  https://files.grouplens.org/datasets/movielens/ml-25m.zip
Resolving files.grouplens.org (files.grouplens.org)... 128.101.65.152
Connecting to files.grouplens.org (files.grouplens.org)|128.101.65.152|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 261978986 (250M) [application/zip]
Saving to: ‘ml-25m.zip’


2021-08-11 18:24:08 (72.0 MB/s) - ‘ml-25m.zip’ saved [261978986/261978986]

Archive:  ml-25m.zip
  inflating: tags.csv                
  inflating: links.csv               
  inflating: README.txt              
  inflating: ratings.csv             
  inflating: genome-tags.csv         
  inflating: genome-scores.csv       
  inflating: movies.csv              


### Reading datasets

In [3]:
#Reading 'ratings.csv' file
ratings_data = pd.read_csv('ratings.csv').drop('timestamp',1)

In [4]:
#Reading 'movies.csv' file
movies_data = pd.read_csv('movies.csv')

### Data Preprocessing

In [5]:
#Extracting 'year' from 'title' column and saving it in another column
movies_data['year'] = movies_data['title'].str.extract('(\(\d\d\d\d\))',expand=False)
movies_data['year'] = movies_data['year'].str.extract('(\d\d\d\d)',expand=False)

In [6]:
#Replacing all null values with 0
movies_data = movies_data.replace(np.NaN,int(0))

#Typecasting the attributes into relevant datatypes
movies_data['year'] = [int(str(i).replace(",", "")) for i in movies_data['year']]
movies_data['genres'] = [str(i).replace(",", "") for i in movies_data['genres']]

### Top-N Rated Movies Recommender System (Function Implementation)

---

Note:
*   Dataset has movies released until 2019. So, you will get recommendations of movies released till 2019 only.
*   Dataset has following genres: [Action, Adventure, Animation, Children's, Comedy, Crime, Documentary, Drama, Fantasy, Film-noir, Horror, Musical, Mystery, Romance, Sci-fi, Thriller, War, Western]. So, choose among one of them.

In [7]:
#Function definition for Top-N Rated Movie Recommendations (given year and genre)
def get_top_rated_recommendations(top_n, from_year, to_year, genre):
  
  #List of genres available in dataset
  genre_list = ['Action', 'Adventure', 'Animation', "Children's", 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy', 
                'Film-noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-fi', 'Thriller', 'War', 'Western']

  #Conditional statements for avoiding user to input anything out of bounds
  if from_year < 1850 and to_year > 2019:
    print("Please input year between 1850-2019 (both inclusive) !!!")
  elif genre.capitalize() not in genre_list:
    print("Please input genre from the above mentioned list !!!")
  
  #If correct inputs mentioned, then implementing the function
  else:

    #Capitalizing the 'genre' argument as it is mentioned as in Capitalized format
    genre = genre.capitalize()

    #Filtering the 'movies' dataset for given year-range
    movies_by_year = movies_data[(movies_data.year >= from_year) & (movies_data.year <= to_year)]

    #Filtering the above filtered 'movies' dataset by genre provided by user
    movies_by_genre = movies_by_year[movies_by_year.genres.str.contains(genre,case=False)]

    #Removing movies which has 'no genres listed' from movies_by_genre
    movies_by_genre = movies_by_genre[movies_by_genre.genres != '(no genres listed']

    #Merging 'movies' dataset with 'ratings' dataset
    movies_ratings = pd.merge(ratings_data, movies_by_genre, on='movieId')

    #Ratings for movie and number of people who rated movie grouped by for each movie
    ratings_avg_count = pd.DataFrame(movies_ratings.groupby('title')['rating'].mean())
    ratings_avg_count.rename(columns={'rating':'ratings_avg'}, inplace=True)
    ratings_avg_count['ratings_count'] = pd.DataFrame(movies_ratings.groupby('title')['rating'].count())

    #Calculating Average Ratings provided to movies within given year-range
    avg_rating = ratings_avg_count['ratings_avg'].mean()

    #Calculating Average Count of people who rated movies within given year-range
    avg_count = ratings_avg_count['ratings_count'].mean()
    
    #Filtering the dataset for providing top-N rated movies more than average ratings and and average count
    ratings_avg_count = ratings_avg_count[(ratings_avg_count.ratings_avg > avg_rating) & (ratings_avg_count.ratings_count > avg_count)]

    #Selecting top-N rated movies from the movies list (sorted descending by movie ratings)
    recommendations = ratings_avg_count.sort_values(by='ratings_avg',ascending=False).head(top_n)

    #Resetting the index of recommendations dataframe
    recommendations.reset_index(level=0, inplace=True)

    #Setting the columns title and index from 1 for recommendations dataframe
    recommendations.columns = ['title','ratings_avg','ratings_count']
    recommendations.index += 1

    #Printing the statement for movie recommendations to users
    print(f'Top-{top_n} Rated {genre} Movies from {from_year} to {to_year} recommended for you:' )

    #Returning top-N rated movie recommendations to function call
    return recommendations

In [8]:
#Function call for top-N Rated Movie Recommendations 
#(User need to input number of movies required as recommendations, from_year, to_year and genre)
get_top_rated_recommendations(10, 2011, 2015, 'action')

Top-10 Rated Action Movies from 2011 to 2015 recommended for you:


Unnamed: 0,title,ratings_avg,ratings_count
1,Django Unchained (2012),4.004882,20687
2,"Dark Knight Rises, The (2012)",3.971349,19912
3,Edge of Tomorrow (2014),3.94008,12425
4,Guardians of the Galaxy (2014),3.932247,16324
5,Harry Potter and the Deathly Hallows: Part 2 (...,3.906986,13455
6,Big Hero 6 (2014),3.879613,10379
7,Rush (2013),3.867236,3574
8,Mad Max: Fury Road (2015),3.855034,13479
9,Headhunters (Hodejegerne) (2011),3.807249,1214
10,Kingsman: The Secret Service (2015),3.798285,9620
