In [5]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split 
import tkinter as tk 
from tkinter import messagebox

In [6]:
# ---- DATA LOADING AND PREPROCESSING ----
ratings = pd.read_csv(r"C:\Users\mehre\OneDrive\Documents\ratings.csv")
movies = pd.read_csv(r"C:\Users\mehre\OneDrive\Documents\movies.csv")

In [7]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [8]:
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [9]:
ratings.isnull().sum()

userId       0
movieId      0
rating       0
timestamp    0
dtype: int64

In [10]:
movies.duplicated().sum()

np.int64(0)

In [11]:
# Creating a user-movie matrix 
user_movie_matrix = ratings.pivot(index = 'userId', columns = 'movieId', values = 'rating')

In [12]:
user_movie_matrix

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,,4.0,,,4.0,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,4.0,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2.5,,,,,,2.5,,,,...,,,,,,,,,,
607,4.0,,,,,,,,,,...,,,,,,,,,,
608,2.5,2.0,2.0,,,,,,,4.0,...,,,,,,,,,,
609,3.0,,,,,,,,,4.0,...,,,,,,,,,,


In [13]:
train, test = train_test_split(ratings, test_size = 0.2, random_state = 42)
train_matrix = train.pivot(index = 'userId', columns = 'movieId', values ='rating')

In [14]:
train_matrix.head()

movieId,1,2,3,4,5,6,7,8,9,10,...,191005,193565,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,,4.0,,,4.0,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,


In [15]:
item_similarity = pd.DataFrame(cosine_similarity(train_matrix.fillna(0).T), index=train_matrix.columns,
    columns=train_matrix.columns)
train_matrix.head()

movieId,1,2,3,4,5,6,7,8,9,10,...,191005,193565,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,,4.0,,,4.0,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,


In [16]:
# Predict user's rating for a movie 
def predict_rating(user_Id, movie_Id, train_matrix, item_similarity, k=10):
    try:
        user_ratings = train_matrix.loc[user_Id]  # Ratings by the user
        rated_movies = user_ratings[user_ratings.notna()]  # Movies the user has already rated

        if rated_movies.empty or movie_Id not in item_similarity.index:
            return None

        # Similarities between target movie and rated movies
        sim_scores = item_similarity.loc[movie_Id, rated_movies.index]

        # Top k similar movies
        top_idx = sim_scores.abs().sort_values(ascending=False).index[:k]
        top_sims = sim_scores[top_idx]
        top_ratings = rated_movies[top_idx]

        if top_sims.abs().sum() == 0:
            return None

        prediction = (top_sims * top_ratings).sum() / top_sims.abs().sum()
        return prediction
    except:
        return None


In [17]:
def recommend_movies_for_user(user_id, n=5):
    if user_id not in user_movie_matrix.index:
        return []
    user_ratings = user_movie_matrix.loc[user_id]
    unrated_movies = user_ratings[user_ratings.isna()].index
    scores = []
    for movie_id in unrated_movies:
        pred = predict_rating(user_id, movie_id, train_matrix, item_similarity) #for every unrated movie
        if pred is not None and not np.isnan(pred):
            scores.append((movie_id, pred))
    top = sorted(scores, key=lambda x: x[1], reverse=True)[:n]
    rec_titles = [movies[movies['movieId'] == mid]['title'].values[0] for mid, _ in top]
    return rec_titles

In [18]:
def on_recommend():
    try:
        user_input = entry.get()
        user_id = int(user_input)
        recs = recommend_movies_for_user(user_id, n=5)
        if not recs:
            result_text.set("User not found or no recommendations available.")
        else:
            result_text.set("Top 5 Movie Recommendations:\n\n• " + '\n• '.join(recs))
    except ValueError:
        messagebox.showwarning("Input Error", "Please enter a valid integer User ID.")

win = tk.Tk()
win.title('Movie Recommender')
win.geometry('600x500')
win.config(padx=20, pady=20)

label = tk.Label(win, text='Enter a User ID:', font=(14))
label.pack(pady=10)

entry = tk.Entry(win, width=20, font=(12))
entry.pack()

button = tk.Button(win, text='Get Recommendations', command= on_recommend, font=(12))
button.pack(pady=10)

result_text = tk.StringVar()
result_label = tk.Label(win, textvariable=result_text, wraplength=600, justify="left", font=(12))
result_label.pack(pady=20)

min_user = user_movie_matrix.index.min()
max_user = user_movie_matrix.index.max()
info_label = tk.Label(win, text=f"Valid User IDs: {min_user} to {max_user}", font=(10), fg='gray')
info_label.pack()

win.mainloop()