In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import re
import ipywidgets as widgets
from IPython.display import display

In [2]:
# Load movies data
movies = pd.read_csv("movies.csv")

In [3]:
# Load ratings data
ratings = pd.read_csv("ratings.csv")

In [4]:
# Data cleaning
def clean_title(title):
    return re.sub("[^a-zA-Z0-9 ]", "", title)

In [5]:
movies["clean_title"] = movies["title"].apply(clean_title)

In [6]:
movies

Unnamed: 0,movieId,title,genres,clean_title
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,Toy Story 1995
1,2,Jumanji (1995),Adventure|Children|Fantasy,Jumanji 1995
2,3,Grumpier Old Men (1995),Comedy|Romance,Grumpier Old Men 1995
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance,Waiting to Exhale 1995
4,5,Father of the Bride Part II (1995),Comedy,Father of the Bride Part II 1995
...,...,...,...,...
62418,209157,We (2018),Drama,We 2018
62419,209159,Window of the Soul (2001),Documentary,Window of the Soul 2001
62420,209163,Bad Poems (2018),Comedy|Drama,Bad Poems 2018
62421,209169,A Girl Thing (2001),(no genres listed),A Girl Thing 2001


In [7]:
# TF-IDF vectorization
vectorizer = TfidfVectorizer(ngram_range=(1, 2))
tfidf = vectorizer.fit_transform(movies["clean_title"])

In [8]:
# Function to search for movies
def search(title):
    title = clean_title(title)
    query_vec = vectorizer.transform([title])
    similarity = cosine_similarity(query_vec, tfidf).flatten()
    indices = np.argpartition(similarity, -5)[-5:]
    results = movies.iloc[indices].iloc[::-1]
    return results

In [9]:
# Widget for movie input
movie_input = widgets.Text(
    value='Toy Story',
    description='Movie Title:',
    disabled=False
)


In [10]:
# Output for movie list
movie_list = widgets.Output()


In [11]:
# Function to display movie search results
def on_type(data):
    with movie_list:
        movie_list.clear_output()
        title = data["new"]
        if len(title) > 5:
            display(search(title))

In [12]:
# Observe changes in movie input
movie_input.observe(on_type, names='value')

In [13]:
# Display movie input and results
display(movie_input, movie_list)

Text(value='Toy Story', description='Movie Title:')

Output()

In [14]:
# Function to find similar movies
def find_similar_movies(movie_id):
    similar_users = ratings[(ratings["movieId"] == movie_id) & (ratings["rating"] > 4)]["userId"].unique()
    similar_user_recs = ratings[(ratings["userId"].isin(similar_users)) & (ratings["rating"] > 4)]["movieId"]
    similar_user_recs = similar_user_recs.value_counts() / len(similar_users)

    similar_user_recs = similar_user_recs[similar_user_recs > .10]
    all_users = ratings[(ratings["movieId"].isin(similar_user_recs.index)) & (ratings["rating"] > 4)]
    all_user_recs = all_users["movieId"].value_counts() / len(all_users["userId"].unique())
    rec_percentages = pd.concat([similar_user_recs, all_user_recs], axis=1)
    rec_percentages.columns = ["similar", "all"]

    rec_percentages["score"] = rec_percentages["similar"] / rec_percentages["all"]
    rec_percentages = rec_percentages.sort_values("score", ascending=False)
    return rec_percentages.head(10).merge(movies, left_index=True, right_on="movieId")[["score", "title", "genres"]]


In [15]:
# Widget for movie name input
movie_name_input = widgets.Text(
    value='Toy Story',
    description='Movie Title:',
    disabled=False
)

In [24]:
def find_similar_movies_by_genre(movie_id, ratings, movies):
    # Your logic to find similar movies based on genre
    target_movie_genre = movies.loc[movies['movieId'] == movie_id, 'genres'].values[0]
    similar_movies = movies[movies['genres'].str.contains(target_movie_genre)]
    similar_movies = similar_movies[similar_movies['movieId'] != movie_id]
    return similar_movies

In [25]:
def display_recommendations(title, search_function=find_similar_movies, threshold=4):
    results = search_function(title)
    movie_id = results.iloc[0]["movieId"]

    # Display genre-based recommendations
    genre_based_recommendations = find_similar_movies_by_genre(movie_id, ratings, movies)
    print("Genre-Based Recommendations:")
    display(genre_based_recommendations[["title", "genres"]])

    # Display other recommendations
    other_recommendations = find_similar_movies_with_threshold(movie_id, ratings, movies, threshold)
    print("Other Recommendations:")
    display(other_recommendations[["title", "genres"]])

    # Plot movie ratings
    plot_movie_ratings(movies, ratings)

In [16]:
# Output for recommendation list
recommendation_list = widgets.Output()

In [17]:
# Function to display movie recommendations
def on_type(data):
    with recommendation_list:
        recommendation_list.clear_output()
        title = data["new"]
        if len(title) > 5:
            results = search(title)
            movie_id = results.iloc[0]["movieId"]
            display(find_similar_movies(movie_id))

In [18]:
# Observe changes in movie name input
movie_name_input.observe(on_type, names='value')

In [19]:
# Display movie name input and recommendations
display(movie_name_input, recommendation_list)

Text(value='Toy Story', description='Movie Title:')

Output()