In [1]:
import numpy as np
import pandas as pd

In [2]:
movies = pd.read_csv("../dataset/movies.csv")
ratings = pd.read_csv("../dataset/ratings.csv")
links = pd.read_csv("../dataset/links.csv")
tags = pd.read_csv("../dataset/tags.csv")

In [3]:
movies

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
9737,193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy
9738,193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy
9739,193585,Flint (2017),Drama
9740,193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation


In [4]:
ratings

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931
...,...,...,...,...
100831,610,166534,4.0,1493848402
100832,610,168248,5.0,1493850091
100833,610,168250,5.0,1494273047
100834,610,168252,5.0,1493846352


In [5]:
# Here we make sure that a the (userId, movieId) tuples are all distinct in the "ratings" dataset
print("Duplicated (userId, movieId) tuples in \"ratings\" dataset:", ratings.duplicated(subset=["userId", "movieId"]).sum())

Duplicated (userId, movieId) tuples in "ratings" dataset: 0


In [6]:
# Creating the user-movie rating pivot table
# The rows are the users, columns are the movies and the values are the ratings
user_rates = ratings.pivot_table(index='userId', columns='movieId', values='rating', fill_value=0)

In [7]:
def svd(A):
    # Get eigenvectors of A^T
    eigvals, eigvecs = np.linalg.eig(ATA)

    # Sort the eigenvalues in descending order
    sort_indices_ATA = np.argsort(eigvals_ATA)[::-1]
    sort_indices_AAT = np.argsort(eigvals_AAT)[::-1]

    # Sort eigenvalues and corresponding eigenvectors
    eigvals_ATA = eigvals_ATA[sort_indices_ATA]
    U = U[:, sort_indices_ATA]
    eigvals_AAT = eigvals_AAT[sort_indices_AAT]
    V = V[:, sort_indices_AAT]

    # Compute singular values from eigenvalues
    S = np.sqrt(eigvals_ATA)

    # Compute the right singular vectors (transpose)
    Vt = V.T

    return U, S, Vt

In [8]:
def cosine_similarity(x, y):
    return np.dot(x, y) / (np.linalg.norm(x) * np.linalg.norm(y))

In [9]:
def eig(matrix, num_iterations=1000, tolerance=1e-10):
    n = matrix.shape[0]
    eigenvectors = np.eye(n)

    for _ in range(num_iterations):
        # QR decomposition
        q, r = np.linalg.qr(matrix)

        # Update matrix with RQ decomposition
        matrix = r @ q

        # Accumulate eigenvectors
        eigenvectors = eigenvectors @ q

        # Check for convergence
        off_diagonal_sum = np.sum(np.abs(matrix - np.diag(np.diagonal(matrix))))
        if off_diagonal_sum < tolerance:
            break

    # Extract eigenvalues and eigenvectors
    eigenvalues = np.diag(matrix)
    
    return eigenvalues, eigenvectors

In [10]:
# Calcualting the SVD
U, S, VT = np.linalg.svd(user_rates, full_matrices=False)

print(f"{U=}")
print(f"{S=}")
print(f"{VT=}")

U=array([[-5.55541517e-02,  6.16738477e-02, -1.08974491e-02, ...,
         3.01873377e-03, -2.89230819e-04,  4.31423480e-04],
       [-5.86629527e-03, -1.77377186e-02, -4.42345417e-03, ...,
        -3.26516243e-03, -8.86828015e-03,  8.86330337e-04],
       [-1.35323055e-03,  2.06861278e-03,  1.71517331e-03, ...,
        -2.29472620e-03, -2.14465926e-03, -1.61906293e-03],
       ...,
       [-1.16114423e-01,  1.18470415e-02, -9.76290702e-03, ...,
         8.84377554e-04, -7.65989186e-04,  1.37856876e-03],
       [-7.57943540e-03,  1.37846340e-02, -3.97412421e-02, ...,
         1.21896501e-02, -3.21030684e-03, -1.75729437e-02],
       [-1.38864880e-01, -2.02184449e-01,  9.26753579e-02, ...,
        -7.72222925e-05, -3.38393669e-04,  7.05394146e-04]])
S=array([534.41989777, 231.23661142, 191.1508762 , 170.42250831,
       154.552948  , 147.33575651, 135.65556768, 122.66302989,
       121.44217651, 113.11144323, 109.60313933, 107.93266172,
       105.97376877, 102.05675293,  99.87323589,  

In [11]:
U2, S2, VT2 = svd(user_rates)

print(f"{U2=}")
print(f"{S2=}")
print(f"{VT2=}")

U2=array([[-7.04498985e-02+0.00000000e+00j, -2.75911949e-02+0.00000000e+00j,
         7.84438842e-02+0.00000000e+00j, ...,
         9.96755166e-03+0.00000000e+00j,  2.77998146e-03-8.40194220e-03j,
         2.77998146e-03+8.40194220e-03j],
       [-3.85393459e-02+0.00000000e+00j, -2.06662722e-03+0.00000000e+00j,
         5.68447103e-02+0.00000000e+00j, ...,
         3.82182366e-03+0.00000000e+00j,  7.47425200e-03-3.77575031e-03j,
         7.47425200e-03+3.77575031e-03j],
       [-1.59129220e-02+0.00000000e+00j, -2.47146155e-02+0.00000000e+00j,
         1.80051145e-02+0.00000000e+00j, ...,
        -6.44513631e-03+0.00000000e+00j, -4.24211329e-03+1.54915154e-02j,
        -4.24211329e-03-1.54915154e-02j],
       ...,
       [-6.46836073e-05+0.00000000e+00j,  5.97586244e-04+0.00000000e+00j,
        -8.71093879e-05+0.00000000e+00j, ...,
        -1.84271331e-03+0.00000000e+00j, -2.12914541e-03-2.81047498e-05j,
        -2.12914541e-03+2.81047498e-05j],
       [-6.46836073e-05+0.00000000e+00j, 

In [12]:
# Choose a target user for recommendation
target_user_index = int(input("Enter a user ID to get recommendations: "))

user_latent_feature_vector = U[target_user_index, :]

# Calculate cosine similarity between the target user and all other users
user_similarity_scores = np.array([cosine_similarity(user_latent_feature_vector, U[i, :]) for i in range(U.shape[0])])

# Find the indices of users most similar to the target user
similar_users_indices = np.argsort(user_similarity_scores)[::-1]

# Exclude the target user from the similar users
similar_users_indices = similar_users_indices[similar_users_indices != target_user_index]

# Recommend movies based on the most similar users
# print(U[similar_users_indices, :], U[similar_users_indices, :].shape)
# print(S, S.shape)
A = U[similar_users_indices, :] * S
recommended_movies = np.dot(A, VT)

# Exclude movies the target user has already rated
movies_already_rated = user_rates.iloc[target_user_index][user_rates.iloc[target_user_index] > 0].index
recommended_movies[:, movies_already_rated] = 0

recommended_movie_ids = recommended_movies[target_user_index, :].argsort()[::-1][:10]

Enter a user ID to get recommendations:  0


In [13]:
movies[movies["movieId"].isin(recommended_movie_ids)]

Unnamed: 0,movieId,title,genres
39,43,Restoration (1995),Drama
42,46,How to Make an American Quilt (1995),Drama|Romance
218,254,Jefferson in Paris (1995),Drama
239,277,Miracle on 34th Street (1994),Drama
273,314,"Secret of Roan Inish, The (1994)",Children|Drama|Fantasy|Mystery
525,613,Jane Eyre (1996),Drama|Romance
