In [215]:
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors

def display(data):
    data_with_nans = data.copy()
    data_with_nans[data_with_nans == 0] = np.nan
    print(data_with_nans)

def find_similar_users(user_id, data, k, usermap, userinversemap):
    similar_users = []
    distances = []
    k += 1
    
    user = data.loc[user_id].values.reshape(1, -1)
    knn = NearestNeighbors(n_neighbors=k, algorithm="brute", metric="cosine")
    knn.fit(data)
    neighbors = knn.kneighbors(user, return_distance=True)
    
    for i in range(k):
        neighbor = neighbors[1].item(i)
        similar_users.append(userinversemap[neighbor])
        distances.append(neighbors[0][0][i])

    return distances[1:], similar_users[1:]

def predict(user_id, movie_id, k):
    ratings = pd.read_csv("ratings.csv")
    data = ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)
    usermap = dict(zip(np.unique(ratings["userId"]), list(range(len(ratings["userId"].unique())))))
    userinversemap = dict(zip(list(range(len(ratings["userId"].unique()))), np.unique(ratings["userId"])))
    distances, similar_users = find_similar_users(user_id, data, k, usermap, userinversemap)
    
    numerator = []
    denominator = []
    if data.loc[user_id][movie_id] != 0:
        print("Rating already exists")
        return data.loc[user_id][movie_id]
    for i in range(k):
        other_user_rating = data.loc[similar_users[i]][movie_id]
        if other_user_rating == 0: continue
        numerator.append(other_user_rating * distances[i])
        denominator.append(distances[i])  
    if len(numerator) == 0 or len(denominator) == 0:
        print("Insufficient ratings")
        return None
    return sum(numerator) / sum(denominator)

user_id = 1
movie_id = 69
k = 10
prediction = predict(user_id, movie_id, k)
print(f"Predicted Rating: {prediction}")

Predicted Rating: 4.24321190179887
