In [1]:
import pandas as pd

# Load Movies and Ratings dataset
movies = pd.read_csv('C:\\Users\\King\\Desktop\\DSML Internship\\Assignments\\Final Certification Project\\movies.csv')
ratings = pd.read_csv('C:\\Users\\King\\Desktop\\DSML Internship\\Assignments\\Final Certification Project\\ratings.csv')


In [2]:
# Understanding the distribution of features
print(movies.head())
print(ratings.head())

# Finding unique users and movies
unique_users = ratings['userId'].nunique()
unique_movies = ratings['movieId'].nunique()
print("Unique Users:", unique_users)
print("Unique Movies:", unique_movies)

# Average rating and total movies at the genre level
movie_genre = movies['genres'].str.split('|', expand=True).stack().reset_index(level=1, drop=True)
movie_genre.name = 'genre'
genres_count = movie_genre.value_counts()
print("Genre-wise Movie Count:\n", genres_count)

# Unique genres considered
unique_genres = movie_genre.unique()
print("Unique Genres:", unique_genres)

   movieId                               title  \
0        1                    Toy Story (1995)   
1        2                      Jumanji (1995)   
2        3             Grumpier Old Men (1995)   
3        4            Waiting to Exhale (1995)   
4        5  Father of the Bride Part II (1995)   

                                        genres  
0  Adventure|Animation|Children|Comedy|Fantasy  
1                   Adventure|Children|Fantasy  
2                               Comedy|Romance  
3                         Comedy|Drama|Romance  
4                                       Comedy  
   userId  movieId  rating  timestamp
0       1        1     4.0  964982703
1       1        3     4.0  964981247
2       1        6     4.0  964982224
3       1       47     5.0  964983815
4       1       50     5.0  964982931
Unique Users: 610
Unique Movies: 9724
Genre-wise Movie Count:
 genre
Drama                 4361
Comedy                3756
Thriller              1894
Action                1828


In [3]:
def popularity_based_recommender(genre, threshold, n):
    genre_movies = movies[movies['genres'].str.contains(genre)]
    genre_movie_ratings = pd.merge(genre_movies, ratings, on='movieId')
    genre_movie_ratings_count = genre_movie_ratings.groupby('title').size()
    genre_movie_ratings_avg = genre_movie_ratings.groupby('title')['rating'].mean()
    
    popular_movies = pd.DataFrame({'Num Reviews': genre_movie_ratings_count, 'Average Movie Rating': genre_movie_ratings_avg})
    popular_movies = popular_movies[popular_movies['Num Reviews'] >= threshold].sort_values(by=['Average Movie Rating'], ascending=False).head(n)
    
    return popular_movies

# Example usage:
genre_input = 'Comedy'
min_reviews_threshold = 100
num_recommendations = 5
output_popularity = popularity_based_recommender(genre_input, min_reviews_threshold, num_recommendations)
print(output_popularity)


                                                    Num Reviews  \
title                                                             
Princess Bride, The (1987)                                  142   
Pulp Fiction (1994)                                         307   
Amelie (Fabuleux destin d'Amélie Poulain, Le) (...          120   
Forrest Gump (1994)                                         329   
Monty Python and the Holy Grail (1975)                      136   

                                                    Average Movie Rating  
title                                                                     
Princess Bride, The (1987)                                      4.232394  
Pulp Fiction (1994)                                             4.197068  
Amelie (Fabuleux destin d'Amélie Poulain, Le) (...              4.183333  
Forrest Gump (1994)                                             4.164134  
Monty Python and the Holy Grail (1975)                          4.161765  


In [4]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

def content_based_recommender(movie_title, n):
    tfidf = TfidfVectorizer(stop_words='english')
    movies['genres'] = movies['genres'].fillna('')
    tfidf_matrix = tfidf.fit_transform(movies['genres'])
    
    cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
    idx = movies[movies['title'] == movie_title].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:n+1]
    
    movie_indices = [i[0] for i in sim_scores]
    return movies['title'].iloc[movie_indices]

# Example usage:
input_movie_title = 'Toy Story (1995)'
num_recommendations_content = 5
output_content_based = content_based_recommender(input_movie_title, num_recommendations_content)
print(output_content_based)


1706                                       Antz (1998)
2355                                Toy Story 2 (1999)
2809    Adventures of Rocky and Bullwinkle, The (2000)
3000                  Emperor's New Groove, The (2000)
3568                             Monsters, Inc. (2001)
Name: title, dtype: object


In [5]:
from sklearn.metrics.pairwise import cosine_similarity

def collaborative_based_recommender(user_id, n, k):
    user_movies = ratings[ratings['userId'] == user_id]['movieId']
    similar_users = ratings[ratings['movieId'].isin(user_movies)]['userId'].unique()
    similar_users = similar_users[similar_users != user_id][:k]
    
    similar_users_ratings = ratings[ratings['userId'].isin(similar_users)]
    similar_users_ratings_grouped = similar_users_ratings.groupby('movieId')['rating'].mean().sort_values(ascending=False)
    
    movie_ids = similar_users_ratings_grouped.index.tolist()
    movie_titles = movies[movies['movieId'].isin(movie_ids)]['title'][:n]
    
    return movie_titles

# Example usage:
user_input = 1
num_recommendations_collab = 5
threshold_similar_users = 100
output_collaborative = collaborative_based_recommender(user_input, num_recommendations_collab, threshold_similar_users)
print(output_collaborative)


0                      Toy Story (1995)
1                        Jumanji (1995)
2               Grumpier Old Men (1995)
3              Waiting to Exhale (1995)
4    Father of the Bride Part II (1995)
Name: title, dtype: object


In [6]:
import ipywidgets as widgets
from IPython.display import display

# Create widgets for inputs
genre_input_widget = widgets.Text(description='Genre:')
threshold_input_widget = widgets.IntText(description='Min Reviews Threshold:')
num_recommendations_widget = widgets.IntText(description='Num Recommendations:')
output_widget = widgets.Output()

# Define function for handling button click
def on_button_click(b):
    with output_widget:
        output_widget.clear_output()
        genre = genre_input_widget.value
        threshold = threshold_input_widget.value
        n = num_recommendations_widget.value
        
        recommendations = popularity_based_recommender(genre, threshold, n)
        print(recommendations)

# Create button for triggering recommendation
button = widgets.Button(description='Get Recommendations')
button.on_click(on_button_click)

# Display widgets and button
display(genre_input_widget, threshold_input_widget, num_recommendations_widget, button)
display(output_widget)


Text(value='', description='Genre:')

IntText(value=0, description='Min Reviews Threshold:')

IntText(value=0, description='Num Recommendations:')

Button(description='Get Recommendations', style=ButtonStyle())

Output()