In [1]:
import pandas as pd
import numpy as np
from sklearn.decomposition import TruncatedSVD
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
movies = pd.read_csv('movies_dataset.csv', lineterminator='\n')
title = movies['title'] 

In [3]:
vectorizer = TfidfVectorizer(ngram_range = (1,2))
tfidf_matrix = vectorizer.fit_transform(title)

In [4]:
svd = TruncatedSVD(n_components=20)
latent_matrix = svd.fit_transform(tfidf_matrix)

In [5]:
cosine_sim = cosine_similarity(latent_matrix, latent_matrix)

In [6]:
def recommend_movies(movie_title, num_movies=5):
    idx = movies.loc[movies['title'].str.lower() == movie_title.lower()].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:num_movies+1]
    movie_indices = [i[0] for i in sim_scores]
    return movies['title'].iloc[movie_indices]

In [7]:
def searching(title):
    # Filter the dataset based on the title
    results = movies[movies['title'].str.contains(title, case=False, na=False)]
    
    # Display the results
    with movie_list:
        if not results.empty:
            display(HTML(results.to_html(index=False)))
        else:
            print("No matching movies found.")

In [8]:
import ipywidgets as widgets
from IPython.display import display
from IPython.display import HTML

movie_input = widgets.Text(
    value = "Toy Story",
    description = "Movie Title:"
)
movie_list = widgets.Output()

def on_type(data):
    with movie_list:
        movie_list.clear_output()
        title = data['new']
        if len(title) > 1:
            display(searching(title))
movie_input.observe(on_type, names = 'value')
display(movie_input, movie_list)

Text(value='Toy Story', description='Movie Title:')

Output()