In [3]:
import pandas as pd
import numpy as np

ratings_df = pd.read_csv('ratings.csv')

user_item_matrix = pd.pivot_table(ratings_df, values='rating', index='userId', columns='movieId')
user_item_matrix = user_item_matrix.fillna(0)

def cosine_similarity_matrix(matrix):
    norm = np.linalg.norm(matrix, axis=1)
    return np.dot(matrix, matrix.T) / (np.outer(norm, norm) + 1e-8)

user_similarity = cosine_similarity_matrix(user_item_matrix.values)

def get_top_k_similar_users(user_id, k=7):
    user_similarity_scores = user_similarity[user_id]
    similar_users_indices = np.argsort(user_similarity_scores)[::-1]
    similar_users_indices = similar_users_indices[1:]
    return similar_users_indices[:k]

def predict_rating(user_id, movie_id, k=7):
    similar_users_indices = get_top_k_similar_users(user_id, k)
    numerator = 0
    denominator = 0

    for i in similar_users_indices:
        if user_item_matrix.iloc[i, movie_id] > 0:
            similarity_score = user_similarity[user_id][i]
            numerator += similarity_score * user_item_matrix.iloc[i, movie_id]
            denominator += similarity_score

    if denominator == 0:
        return 0.0

    predicted_rating = numerator / denominator
    predicted_rating = round(predicted_rating * 2) / 2
    return predicted_rating

user_id = 1
movie_id = 123
predicted_rating = predict_rating(user_id, movie_id)
print(f"Predicted rating for movieId {movie_id} by userId {user_id}: {predicted_rating}")


Predicted rating for movieId 123 by userId 1: 4.0
