# Importing Libraries and Loading Dataset

In [1]:
import pandas as pd
import numpy as np

# Load datasets
movies = pd.read_csv('Movies.csv')
ratings = pd.read_csv('Ratings.csv')

# Displaying dataset for Movies Data

In [2]:
# Display the first few rows of each dataframe
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


# Displaying Dataset for Ratings Data

In [3]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,16,4.0,1217897793
1,1,24,1.5,1217895807
2,1,32,4.0,1217896246
3,1,47,4.0,1217896556
4,1,50,4.0,1217896523


# Exploratory Data Analysis 

In [4]:
# Basic statistics of ratings
print(ratings.describe())

# Number of unique users and movies
unique_users = ratings['userId'].nunique()
unique_movies = ratings['movieId'].nunique()

print(f'Unique users: {unique_users}')
print(f'Unique movies: {unique_movies}')

# Average rating per movie
average_rating = ratings.groupby('movieId')['rating'].mean()
print(average_rating.head())

# Total movies at genre level and unique genres
movies['genres'] = movies['genres'].str.split('|')
genres_exploded = movies.explode('genres')
genre_counts = genres_exploded['genres'].value_counts()

print(genre_counts)

              userId        movieId         rating     timestamp
count  105339.000000  105339.000000  105339.000000  1.053390e+05
mean      364.924539   13381.312477       3.516850  1.130424e+09
std       197.486905   26170.456869       1.044872  1.802660e+08
min         1.000000       1.000000       0.500000  8.285650e+08
25%       192.000000    1073.000000       3.000000  9.711008e+08
50%       383.000000    2497.000000       3.500000  1.115154e+09
75%       557.000000    5991.000000       4.000000  1.275496e+09
max       668.000000  149532.000000       5.000000  1.452405e+09
Unique users: 668
Unique movies: 10325
movieId
1    3.907328
2    3.353261
3    3.189655
4    2.818182
5    3.250000
Name: rating, dtype: float64
genres
Drama                 5220
Comedy                3515
Thriller              2187
Romance               1788
Action                1737
Crime                 1440
Adventure             1164
Horror                1001
Sci-Fi                 860
Mystery            

# Designed Recommendation Modules as Popularity-based Recommender System

In [5]:
def popularity_based_recommendation(genre, min_reviews, num_recommendations):
    # Filter movies by genre
    genre_movies = genres_exploded[genres_exploded['genres'] == genre]
    
    # Merge with ratings
    genre_ratings = pd.merge(genre_movies, ratings, on='movieId')
    
    # Group by movie and calculate average rating and review count
    movie_stats = genre_ratings.groupby('title').agg(
        average_rating=('rating', 'mean'),
        num_reviews=('rating', 'count')
    ).reset_index()
    
    # Filter movies by minimum reviews
    popular_movies = movie_stats[movie_stats['num_reviews'] >= min_reviews]
    
    # Sort by average rating and get top N movies
    top_movies = popular_movies.sort_values(by='average_rating', ascending=False).head(num_recommendations)
    
    return top_movies

# Example usage
print(popularity_based_recommendation('Comedy', 100, 5))

                                       title  average_rating  num_reviews
2093  Monty Python and the Holy Grail (1975)        4.301948          154
995                             Fargo (1996)        4.271144          201
2498              Princess Bride, The (1987)        4.163743          171
2523                     Pulp Fiction (1994)        4.160000          325
1069                     Forrest Gump (1994)        4.138264          311


# Content-based Recommender System

In [6]:
def content_based_recommendation(movie_title, num_recommendations):
    # Get the genres of the given movie
    movie_genres = movies[movies['title'] == movie_title]['genres'].values[0]
    
    # Filter movies with similar genres
    similar_movies = movies[movies['genres'].apply(lambda x: any(genre in x for genre in movie_genres))]
    
    # Exclude the input movie from recommendations
    similar_movies = similar_movies[similar_movies['title'] != movie_title]
    
    # Return top N recommendations
    return similar_movies.head(num_recommendations)['title']

# Example usage
print(content_based_recommendation('Toy Story (1995)', 5))

1                        Jumanji (1995)
2               Grumpier Old Men (1995)
3              Waiting to Exhale (1995)
4    Father of the Bride Part II (1995)
6                        Sabrina (1995)
Name: title, dtype: object


# Collaborative Filtering Recommender System

In [7]:
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix

def collaborative_filtering_recommendation(user_id, num_recommendations, num_similar_users):
    # Create user-item matrix
    user_item_matrix = ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)
    user_item_sparse = csr_matrix(user_item_matrix)
    
    # Compute similarity between users
    user_similarity = cosine_similarity(user_item_sparse)
    
    # Get similar users
    user_idx = user_id - 1
    similar_users = np.argsort(-user_similarity[user_idx])[:num_similar_users]
    
    # Aggregate ratings of similar users
    similar_users_ratings = user_item_matrix.iloc[similar_users].mean(axis=0)
    
    # Exclude movies already rated by the user
    user_ratings = user_item_matrix.iloc[user_idx]
    recommendations = similar_users_ratings[user_ratings == 0]
    
    # Get top N recommendations
    top_recommendations = recommendations.sort_values(ascending=False).head(num_recommendations)
    
    # Map movie IDs to titles
    top_recommendations = top_recommendations.index.map(movies.set_index('movieId')['title'])
    
    return top_recommendations

# Example usage
print(collaborative_filtering_recommendation(1, 5, 100))

Index(['Indiana Jones and the Last Crusade (1989)', 'Toy Story (1995)',
       'Die Hard (1988)', 'Aliens (1986)', 'Memento (2000)'],
      dtype='object', name='movieId')


# GUI Interface

In [8]:
import ipywidgets as widgets
from IPython.display import display

# Define widgets
genre_widget = widgets.Text(value='Action', description='Genre:')
min_reviews_widget = widgets.IntSlider(value=100, min=0, max=500, step=10, description='Min Reviews:')
num_recommendations_widget = widgets.IntSlider(value=5, min=1, max=20, step=1, description='Num Recommendations:')
output_widget = widgets.Output()

# Define update function
def update_recommendations(change):
    with output_widget:
        output_widget.clear_output()
        genre = genre_widget.value
        min_reviews = min_reviews_widget.value
        num_recommendations = num_recommendations_widget.value
        recommendations = popularity_based_recommendation(genre, min_reviews, num_recommendations)
        display(recommendations)

# Attach update function to widget changes
genre_widget.observe(update_recommendations, names='value')
min_reviews_widget.observe(update_recommendations, names='value')
num_recommendations_widget.observe(update_recommendations, names='value')

# Display widgets
display(genre_widget, min_reviews_widget, num_recommendations_widget, output_widget)

# Initialize output
update_recommendations(None)

Text(value='Action', description='Genre:')

IntSlider(value=100, description='Min Reviews:', max=500, step=10)

IntSlider(value=5, description='Num Recommendations:', max=20, min=1)

Output()

In [10]:
import ipywidgets as widgets
from IPython.display import display

# Define widgets
genre_widget = widgets.Text(value='Drama', description='Genre:')
min_reviews_widget = widgets.IntSlider(value=100, min=0, max=500, step=10, description='Min Reviews:')
num_recommendations_widget = widgets.IntSlider(value=5, min=1, max=20, step=1, description='Num Recommendations:')
output_widget = widgets.Output()

# Define update function
def update_recommendations(change):
    with output_widget:
        output_widget.clear_output()
        genre = genre_widget.value
        min_reviews = min_reviews_widget.value
        num_recommendations = num_recommendations_widget.value
        recommendations = popularity_based_recommendation(genre, min_reviews, num_recommendations)
        display(recommendations)

# Attach update function to widget changes
genre_widget.observe(update_recommendations, names='value')
min_reviews_widget.observe(update_recommendations, names='value')
num_recommendations_widget.observe(update_recommendations, names='value')

# Display widgets
display(genre_widget, min_reviews_widget, num_recommendations_widget, output_widget)

# Initialize output
update_recommendations(None)

Text(value='Drama', description='Genre:')

IntSlider(value=100, description='Min Reviews:', max=500, step=10)

IntSlider(value=5, description='Num Recommendations:', max=20, min=1)

Output()

In [12]:
import ipywidgets as widgets
from IPython.display import display

# Define widgets
genre_widget = widgets.Text(value='Horror', description='Genre:')
min_reviews_widget = widgets.IntSlider(value=100, min=0, max=500, step=10, description='Min Reviews:')
num_recommendations_widget = widgets.IntSlider(value=5, min=1, max=20, step=1, description='Num Recommendations:')
output_widget = widgets.Output()

# Define update function
def update_recommendations(change):
    with output_widget:
        output_widget.clear_output()
        genre = genre_widget.value
        min_reviews = min_reviews_widget.value
        num_recommendations = num_recommendations_widget.value
        recommendations = popularity_based_recommendation(genre, min_reviews, num_recommendations)
        display(recommendations)

# Attach update function to widget changes
genre_widget.observe(update_recommendations, names='value')
min_reviews_widget.observe(update_recommendations, names='value')
num_recommendations_widget.observe(update_recommendations, names='value')

# Display widgets
display(genre_widget, min_reviews_widget, num_recommendations_widget, output_widget)

# Initialize output
update_recommendations(None)

Text(value='Horror', description='Genre:')

IntSlider(value=100, description='Min Reviews:', max=500, step=10)

IntSlider(value=5, description='Num Recommendations:', max=20, min=1)

Output()

In [16]:
import ipywidgets as widgets
from IPython.display import display

# Define widgets
genre_widget = widgets.Text(value='Romance', description='Genre:')
min_reviews_widget = widgets.IntSlider(value=100, min=0, max=500, step=10, description='Min Reviews:')
num_recommendations_widget = widgets.IntSlider(value=5, min=1, max=20, step=1, description='Num Recommendations:')
output_widget = widgets.Output()

# Define update function
def update_recommendations(change):
    with output_widget:
        output_widget.clear_output()
        genre = genre_widget.value
        min_reviews = min_reviews_widget.value
        num_recommendations = num_recommendations_widget.value
        recommendations = popularity_based_recommendation(genre, min_reviews, num_recommendations)
        display(recommendations)

# Attach update function to widget changes
genre_widget.observe(update_recommendations, names='value')
min_reviews_widget.observe(update_recommendations, names='value')
num_recommendations_widget.observe(update_recommendations, names='value')

# Display widgets
display(genre_widget, min_reviews_widget, num_recommendations_widget, output_widget)

# Initialize output
update_recommendations(None)

Text(value='Romance', description='Genre:')

IntSlider(value=100, description='Min Reviews:', max=500, step=10)

IntSlider(value=5, description='Num Recommendations:', max=20, min=1)

Output()