In [6]:
%pip install ipywidgets
%pip install requests beautifulsoup4 pandas


In [12]:

import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import ipywidgets as widgets
from IPython.display import display

# Load the dataset
movies = pd.read_csv('./movies.csv')

# Clean column names and data
movies.columns = movies.columns.str.strip()
movies['Worldwide Gross'] = movies['Worldwide Gross'].str.replace('$', '').str.replace(',', '').astype(float)
movies['Genre'] = movies['Genre'].str.strip()

In [15]:
# Create a combined feature for recommendation
movies['combined_features'] = movies['Genre'] + ' ' + movies['Lead Studio'] + ' ' + movies['Year'].astype(str)

# Clean and prepare text data (using raw string)
movies['combined_features'] = movies['combined_features'].str.lower().str.replace(r'[^\w\s]', '')

# Handle missing values
movies.fillna('', inplace=True)

In [16]:
# Initialize TF-IDF Vectorizer
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies['combined_features'])

# Compute cosine similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Create mapping between title and index
indices = pd.Series(movies.index, index=movies['Film']).drop_duplicates()

def get_recommendations(title, cosine_sim=cosine_sim):
    # Get the index of the movie that matches the title
    idx = indices[title]
    
    # Get the pairwise similarity scores
    sim_scores = list(enumerate(cosine_sim[idx]))
    
    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    # Get the scores of the 10 most similar movies
    sim_scores = sim_scores[1:11]
    
    # Get the movie indices
    movie_indices = [i[0] for i in sim_scores]
    
    # Return the top 10 most similar movies
    return movies[['Film', 'Genre', 'Lead Studio', 'Audience score %', 'Rotten Tomatoes %']].iloc[movie_indices]

In [20]:
# Get unique values for filters
genres = sorted(movies['Genre'].unique())
studios = sorted(movies['Lead Studio'].unique())
years = sorted(movies['Year'].unique(), reverse=True)

# Create widgets
genre_dropdown = widgets.Dropdown(
    options=genres,
    description='Genre:',
    disabled=False
)

studio_dropdown = widgets.Dropdown(
    options=studios,
    description='Studio:',
    disabled=False
)

year_slider = widgets.IntSlider(
    value=2011,
    min=min(years),
    max=max(years),
    step=1,
    description='Year:'
)

rating_slider = widgets.IntSlider(
    value=50,
    min=0,
    max=100,
    step=1,
    description='Min Audience Score:'
)

submit_button = widgets.Button(description="Get Recommendations")

# Display widgets
display(genre_dropdown)
display(studio_dropdown)
display(year_slider)
display(rating_slider)
display(submit_button)

# Recommendation function
def on_submit_button_clicked(b):
    filtered_movies = movies.copy()
    
    # Apply filters
    if genre_dropdown.value:
        filtered_movies = filtered_movies[filtered_movies['Genre'] == genre_dropdown.value]
    
    if studio_dropdown.value:
        filtered_movies = filtered_movies[filtered_movies['Lead Studio'] == studio_dropdown.value]
    
    filtered_movies = filtered_movies[filtered_movies['Year'] <= year_slider.value]
    filtered_movies = filtered_movies[filtered_movies['Audience score %'] >= rating_slider.value]
    
    # Sort by weighted score (audience + critic ratings)
    filtered_movies['score'] = filtered_movies['Audience score %'] * 0.7 + filtered_movies['Rotten Tomatoes %'] * 0.3
    
    if len(filtered_movies) == 0:
        print("No movies match all your criteria. Here are some popular movies you might like:")
        display(movies.sort_values(['Audience score %', 'Rotten Tomatoes %'], ascending=False).head(10)[['Film', 'Genre', 'Lead Studio']])
    else:
        print(f"Found {len(filtered_movies)} matching movies. Here are the top recommendations:")
        display(filtered_movies.sort_values('score', ascending=False).head(10)[['Film', 'Genre', 'Lead Studio', 'Audience score %', 'Rotten Tomatoes %']])

submit_button.on_click(on_submit_button_clicked)

Dropdown(description='Genre:', options=('Action', 'Animation', 'Comdy', 'Comedy', 'Drama', 'Fantasy', 'Romance…

Dropdown(description='Studio:', options=('20th Century Fox', 'CBS', 'Disney', 'Fox', 'Independent', 'Lionsgate…

IntSlider(value=2011, description='Year:', max=2011, min=2007)

IntSlider(value=50, description='Min Audience Score:')

Button(description='Get Recommendations', style=ButtonStyle())