# Computational Linear Algebra
### Getting movie recommendations with Singular Value Decomposition (SVD)
Juan Camilo Sarmiento - Equinox - Plan Semilla - Data Science

Load movie data from npz files:

In [1]:
import numpy as np
import os

movie_titles_npz=np.load(os.path.join(os.getcwd(),"Movies",'movie_titles.npz'))
movie_recommend_npz=np.load(os.path.join(os.getcwd(),"Movies",'movie_recommend.npz'))

print("movie_titles.npz keys:",movie_titles_npz.files)
print("movie_recommend.npz keys:",movie_recommend_npz.files)

movie_titles.npz keys: ['titles']
movie_recommend.npz keys: ['format', 'shape', 'data', 'row', 'col']


Getting movie titles data:

In [2]:
title_array=movie_titles_npz["titles"]
title_array.shape,title_array

((3706,),
 array(["One Flew Over the Cuckoo's Nest (1975)",
        'James and the Giant Peach (1996)', 'My Fair Lady (1964)', ...,
        'Far and Away (1992)', 'Texas Chainsaw Massacre, The (1974)',
        'Contact (1997)'], dtype='<U75'))

Getting movie recommend data:

In [3]:
format_array=movie_recommend_npz["format"]
format_array.shape,format_array

((), array('coo', dtype='<U3'))

In [4]:
shape_array=movie_recommend_npz["shape"]
shape_array.shape,shape_array

((2,), array([6040, 3706]))

In [5]:
data_array=movie_recommend_npz["data"]
data_array.shape,data_array

((1000209,), array([5, 5, 5, ..., 4, 4, 5], dtype=int8))

In [6]:
row_array=movie_recommend_npz["row"]
row_array.shape,row_array

((1000209,), array([   0,    0,    0, ..., 6039, 6039, 6039]))

In [7]:
col_array=movie_recommend_npz["col"]
col_array.shape,col_array

((1000209,), array([   0,   47,  144, ..., 3493, 3508, 3575]))

In [8]:
import scipy
sparse_matrix=scipy.sparse.load_npz(os.path.join(os.getcwd(),"Movies",'movie_recommend.npz'))
sparse_matrix

<6040x3706 sparse matrix of type '<class 'numpy.int8'>'
	with 1000209 stored elements in COOrdinate format>

In [9]:
sparse_matrix = sparse_matrix.toarray()
sparse_matrix.shape,sparse_matrix

((6040, 3706),
 array([[5, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [3, 0, 0, ..., 0, 0, 0]], dtype=int8))

Now onto the movie recommendation system:

In [33]:
def recommender(matrix,percent):
    matrix=matrix.astype(np.float64)
    U,S,V=np.linalg.svd(matrix,full_matrices=False)
    S=np.diag(S)
    i=int(S.shape[0]*percent)
    recommender_mtx=np.matmul(U[:,:i],np.matmul(S[:i,:i],V[:i,:]))
    return recommender_mtx,U,S,V

def recommend(usr_codes,recommend_qty,titles,sparse_mtx):
    ratings,_,_,V=recommender(sparse_mtx,0.3)
    scores = np.dot(ratings[usr_codes], V.T)
    ranking_indx=np.argsort(-scores,axis=1)
    recommendations=[]
    for i in range(len(usr_codes)):
        recommended=titles[ranking_indx[i,:recommend_qty]]
        print(f"Top {recommend_qty} movies for user {usr_codes[i]}:")
        for movie in recommended:
            print(movie)
        recommendations.append((usr_codes[i],recommended))
    return recommendations
    
    
recommendations=recommend([0,1,2],3,title_array,sparse_matrix)

Top 3 movies for user 0:
Bug's Life, A (1998)
Sound of Music, The (1965)
Miracle on 34th Street (1947)
Top 3 movies for user 1:
Airplane! (1980)
Bambi (1942)
Wallace & Gromit: The Best of Aardman Animation (1996)
Top 3 movies for user 2:
Bug's Life, A (1998)
Girl, Interrupted (1999)
Star Wars: Episode I - The Phantom Menace (1999)
