In [48]:
# Step 1: Import libraries and load dataset
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
import ipywidgets as widgets
from IPython.display import display
from collections import defaultdict

# Load the data
movies = pd.read_csv('Movies.csv')
ratings = pd.read_csv('Ratings.csv')

# Step 2: Exploratory Data Analysis

# Understanding the distribution of features
ratings.describe()

# Finding unique users and movies
unique_users = ratings['userId'].nunique()
unique_movies = movies['movieId'].nunique()

# Average rating and Total movies at genre level
movie_genre_ratings = pd.merge(ratings, movies, on='movieId')
genre_avg_ratings = movie_genre_ratings.groupby('genres')['rating'].mean()
total_movies_by_genre = movie_genre_ratings['genres'].value_counts()

# Unique genres considered
unique_genres = movies['genres'].str.split('|', expand=True).stack().unique()

# Step 3: Design recommendation modules

# Popularity-based recommender system
def popularity_recommender(genre, threshold, N):
    genre_ratings = movie_genre_ratings[movie_genre_ratings['genres'].str.contains(genre)]
    genre_ratings_count = genre_ratings.groupby('movieId')['rating'].count()
    popular_movies = genre_ratings_count[genre_ratings_count >= threshold].index.tolist()
    sorted_movies = genre_ratings[genre_ratings['movieId'].isin(popular_movies)].sort_values(by='rating', ascending=False)
    top_N_movies = sorted_movies.head(N)
    return top_N_movies

# Content-based recommender system
tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(movies['genres'])

def content_based_recommender(movie_title, N):
    movie_idx = movies[movies['title'] == movie_title].index[0]
    cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
    similar_movies_indices = cosine_sim[movie_idx].argsort()[::-1]
    similar_movies_indices = similar_movies_indices[1:N+1]
    recommended_movies = movies['title'].iloc[similar_movies_indices].tolist()
    return recommended_movies

# Collaborative-based recommender system
def collaborative_recommender(user_id, k, N):
    user_ratings = ratings[ratings['userId'] == user_id]
    similar_users = defaultdict(int)
    
    for movie_id, rating in zip(user_ratings['movieId'], user_ratings['rating']):
        similar_users.update(dict(movie_genre_ratings[movie_genre_ratings['movieId'] == movie_id].groupby('userId')['rating'].sum()))
    
    similar_users = {k: v for k, v in sorted(similar_users.items(), key=lambda item: item[1], reverse=True)}
    similar_users = list(similar_users.keys())[:k]
    
    recommended_movies = []
    
    for similar_user in similar_users:
        movies_rated_by_similar_user = ratings[ratings['userId'] == similar_user]
        for movie_id in movies_rated_by_similar_user['movieId']:
            if movie_id not in user_ratings['movieId'].tolist():
                recommended_movies.append(movie_id)
                if len(recommended_movies) == N:
                    return movies['title'][movies['movieId'].isin(recommended_movies)].tolist()
    
    return []

# Step 4: Create a GUI interface

genre_dropdown = widgets.Dropdown(
    options=unique_genres,
    description='Genre:',
    disabled=False,
)

threshold_slider = widgets.FloatSlider(
    value=10,
    min=1,
    max=500,
    step=1,
    description='Threshold:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d'
)

N_slider = widgets.IntSlider(
    value=5,
    min=1,
    max=20,
    step=1,
    description='Top N:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d'
)

recommendation_output = widgets.Output()

def generate_recommendations(change):
    with recommendation_output:
        recommendation_output.clear_output()
        genre = genre_dropdown.value
        threshold = threshold_slider.value
        N = N_slider.value
        
        recommended_movies = popularity_recommender(genre, threshold, N)
        print("Popularity-based Recommendations:")
        for idx, row in recommended_movies.iterrows():
            print(row['title'])
        
        recommended_movies = content_based_recommender(movies['title'].iloc[0], N)  # Change the movie title as needed
        print("\nContent-based Recommendations:")
        for movie in recommended_movies:
            print(movie)
        
        user_id = 1  # Change the user ID as needed
        recommended_movies = collaborative_recommender(user_id, k=5, N=N)
        print("\nCollaborative-based Recommendations:")
        for movie in recommended_movies:
            print(movie)

genre_dropdown.observe(generate_recommendations, names='value')
threshold_slider.observe(generate_recommendations, names='value')
N_slider.observe(generate_recommendations, names='value')

display(genre_dropdown, threshold_slider, N_slider, recommendation_output)

Dropdown(description='Genre:', options=('Adventure', 'Animation', 'Children', 'Comedy', 'Fantasy', 'Romance', …

FloatSlider(value=10.0, continuous_update=False, description='Threshold:', max=500.0, min=1.0, readout_format=…

IntSlider(value=5, continuous_update=False, description='Top N:', max=20, min=1)

Output()