In [82]:
import numpy as np
import pandas as pd

In [83]:
movies = pd.read_csv("../dataset/movies.csv")
ratings = pd.read_csv("../dataset/ratings.csv")
links = pd.read_csv("../dataset/links.csv")
tags = pd.read_csv("../dataset/tags.csv")

In [84]:
movies

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
9737,193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy
9738,193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy
9739,193585,Flint (2017),Drama
9740,193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation


In [85]:
ratings

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931
...,...,...,...,...
100831,610,166534,4.0,1493848402
100832,610,168248,5.0,1493850091
100833,610,168250,5.0,1494273047
100834,610,168252,5.0,1493846352


In [86]:
# Here we make sure that a the (userId, movieId) tuples are all distinct in the "ratings" dataset
print("Duplicated (userId, movieId) tuples in \"ratings\" dataset:", ratings.duplicated(subset=["userId", "movieId"]).sum())

Duplicated (userId, movieId) tuples in "ratings" dataset: 0


In [87]:
# Creating the user-movie rating pivot table
# The rows are the users, columns are the movies and the values are the ratings
user_rates = ratings.pivot_table(index='userId', columns='movieId', values='rating', fill_value=0)

In [88]:
def svd(A):
    ATA = A.T @ A
    eigvals, eigvecs = np.linalg.eig(ATA)

    singular_values = np.sqrt(eigvals)

    V = eigvecs
    S = singular_values
    U = np.matmul(A, eigvecs) / singular_values

    return U, S, V.T

In [89]:
def cosine_similarity(x, y):
    return np.dot(x, y) / (np.linalg.norm(x) * np.linalg.norm(y))

In [90]:
def eig(matrix, num_iterations=1000, tolerance=1e-10):
    n = matrix.shape[0]
    eigenvectors = np.eye(n)

    for _ in range(num_iterations):
        # QR decomposition
        q, r = np.linalg.qr(matrix)

        # Update matrix with RQ decomposition
        matrix = r @ q

        # Accumulate eigenvectors
        eigenvectors = eigenvectors @ q

        # Check for convergence
        off_diagonal_sum = np.sum(np.abs(matrix - np.diag(np.diagonal(matrix))))
        if off_diagonal_sum < tolerance:
            break

    # Extract eigenvalues and eigenvectors
    eigenvalues = np.diag(matrix)
    
    return eigenvalues, eigenvectors

In [None]:
U, S, VT = svd(user_rates)
print(U.shape)

In [None]:
# Choose a target user for recommendation
target_user_index = int(input("Enter a user ID to get recommendations: "))

user_latent_feature_vector = U[target_user_index, :]

# Calculate cosine similarity between the target user and all other users
user_similarity_scores = np.array([cosine_similarity(user_latent_feature_vector, U[i, :]) for i in range(U.shape[0])])

# Find the indices of users most similar to the target user
similar_users_indices = np.argsort(user_similarity_scores)[::-1]

# Exclude the target user from the similar users
similar_users_indices = similar_users_indices[similar_users_indices != target_user_index]

# Recommend movies based on the most similar users
recommended_movies = np.dot(U[similar_users_indices, :] * S, VT)

# Exclude movies the target user has already rated
movies_already_rated = user_rates.iloc[target_user_index][user_rates.iloc[target_user_index] > 0].index
recommended_movies[:, movies_already_rated] = 0

recommended_movie_ids = recommended_movies[target_user_index, :].argsort()[::-1][:10]

In [None]:
movies[movies["movieId"].isin(recommended_movie_ids)]