In [3]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Load the data frame
df = pd.read_csv(r"C:\Users\HP\Desktop\Internship final project\ratings_small.csv")

# Create a pivot table with users as rows and movies as columns
pivot_table = df.pivot_table(index='userId', columns='movieId', values='rating', fill_value=0)

# Compute the cosine similarity matrix
cosine_sim = cosine_similarity(pivot_table)

# Function to get top N similar users based on cosine similarity
def get_similar_users(user_id, top_n):
    user_index = pivot_table.index.get_loc(user_id)
    user_similarity = cosine_sim[user_index]
    similar_users_indices = user_similarity.argsort()[::-1][1:top_n+1]
    similar_users = pivot_table.iloc[similar_users_indices].index
    return similar_users

# Function to get movie recommendations for a user
def get_movie_recommendations(user_id, top_n):
    similar_users = get_similar_users(user_id, top_n)
    user_movies = pivot_table.loc[user_id]
    similar_users_movies = pivot_table.loc[similar_users].sum()
    recommendations = similar_users_movies[similar_users_movies == 0].index
    return recommendations

# Function to display recommended movies
def display_movie_recommendations(movie_recommendations):
    print("Top 5 recommended movies:")
    for movie_id in movie_recommendations[:5]:
        print("Movie ID:", movie_id)

# Split the data into train and test sets
train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)

# Function to evaluate the model using RMSE
def evaluate_model():
    pivot_table_train = train_data.pivot_table(index='userId', columns='movieId', values='rating', fill_value=0)
    pivot_table_test = test_data.pivot_table(index='userId', columns='movieId', values='rating', fill_value=0)
    
    cosine_sim_train = cosine_similarity(pivot_table_train)
    
    errors = []
    for _, row in test_data.iterrows():
        user_id = row['userId']
        movie_id = row['movieId']
        rating = row['rating']
        
        user_index = pivot_table_train.index.get_loc(user_id)
        user_similarity = cosine_sim_train[user_index]
        similar_users_indices = user_similarity.argsort()[::-1][1:top_n+1]
        similar_users = pivot_table_train.iloc[similar_users_indices].index
        similar_users_movies = pivot_table_train.loc[similar_users]
        predicted_rating = similar_users_movies[movie_id].mean()
        
        errors.append((predicted_rating - rating) ** 2)
    
    rmse = mean_squared_error(test_data['rating'], pivot_table_test.mean(axis=1), squared=False)
    return rmse

# Example usage
user_id = 1
top_n = 5

# Prompt the user to enter a movie they like
liked_movie = input("Enter a movie you like: ")

# Get movie recommendations for the user
recommendations = get_movie_recommendations(user_id, top_n)

# Filter recommendations based on the liked movie
filtered_recommendations = [str(movie) for movie in recommendations if liked_movie.lower() in str(movie).lower()]

# Display the recommended movies
display_movie_recommendations(filtered_recommendations)

Enter a movie you like: 5
Top 5 recommended movies:
Movie ID: 5
Movie ID: 15
Movie ID: 25
Movie ID: 35
Movie ID: 45


KeyError: 4736.0