In [None]:
import pandas as pd
import numpy as np
import re

In [None]:
movies=pd.read_csv('/content/drive/MyDrive/ml-25m/ml-25m/movies.csv')

In [None]:
movies

In [None]:
def title_cleaner(title):
  title = re.sub("[^a-zA-Z0-9 ]", "", title)
  return title

In [None]:
movies['clean_title']=movies['title'].apply(title_cleaner)
print(movies['clean_title'])

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer= TfidfVectorizer(ngram_range=(1,2))
tfidf=vectorizer.fit_transform(movies['clean_title'])
print(f"TF-IDF Matrix Shape: {tfidf.shape}")  # Check the shape of the TF-IDF matrix


In [None]:
from sklearn.metrics.pairwise import cosine_similarity
def search_movie(title):
  title=title_cleaner(title)
  query_vec= vectorizer.transform([title])
  similarity=cosine_similarity(query_vec,tfidf).flatten()
  indices=np.argpartition(similarity, -5)[-5:]
  results=movies.iloc[indices][::-1]
  return results

In [None]:
import ipywidgets as widg
from IPython.display import display

movie_input=widg.Text( value="Toy story",description="Movie Title",disable="false")

movie_list=widg.Output()

def on_type(data):
  with movie_list:
    movie_list.clear_output()
    title=data["new"]
    if len(title)>5:
      display(search_movie(title))

movie_input.observe(on_type,names='value')
display(movie_input,movie_list)


In [None]:
print(movies['clean_title'])

In [None]:
ratings=pd.read_csv('/content/drive/MyDrive/ml-25m/ml-25m/ratings.csv')


In [None]:
movie_id = ratings["movieId"]

# #def find_similar_movies(movie_id):
# movie = movies[movies["movieId"] == movie_id]

In [None]:
ratings.dtypes
ratings.head()

In [None]:
import pandas as pd

def get_recommendations(movie_id,threshold=0.10):
    # Find users who rated the given movie highly (greater than 4)
    similar_users = ratings[(ratings["movieId"] == movie_id) & (ratings["rating"] > 4)]["userId"].unique()

    # Get the movie recommendations based on what these similar users rated highly
    similar_user_recs = ratings[(ratings["userId"].isin(similar_users)) & (ratings["rating"] > 4)]["movieId"]

    # Count how many similar users liked each movie and normalize by the number of similar users
    similar_user_recs = similar_user_recs.value_counts() / len(similar_users)

    #Filter out movies liked by fewer than the given threshold of similar users
    similar_user_recs = similar_user_recs[similar_user_recs > threshold]

    #Find all users who liked these recommended movies (movies liked by similar users)
    all_users = ratings[(ratings["movieId"].isin(similar_user_recs.index)) & (ratings["rating"] > 4)]

    #Calculate the percentage of all users who liked each movie
    all_user_recs = all_users["movieId"].value_counts() / len(all_users["userId"].unique())

    #Combine the recommendations from similar users and all users into a DataFrame
    rec_percentages = pd.concat([similar_user_recs, all_user_recs], axis=1)

    # Rename the columns for clarity
    rec_percentages.columns = ["similar", "all"]

    #  Return the DataFrame of recommended movies and their percentages
    rec_percentages["score"] = rec_percentages["similar"] / rec_percentages["all"]
    rec_percentages = rec_percentages.sort_values("score", ascending=False)
    return rec_percentages.head(10).merge(movies, left_index=True, right_on="movieId")[["score", "title", "genres"]]


rec_percentages = get_recommendations(movie_id)
print(rec_percentages)



In [None]:
movie_name_input= widg.Text(value="toy story", description="Movie Title",disable=False)
recommendation= widg.Output()
def on_type(data):
    with recommendation:
        recommendation.clear_output()
        title = data["new"]
        if len(title) > 5:
            results = search_movie(title)
            movie_id = results.iloc[0]["movieId"]
            display(get_recommendations(movie_id))

movie_name_input.observe(on_type, names='value')

display(movie_name_input, recommendation)