In [47]:
import pandas as pd
import numpy as np
import implicit

In [208]:
ratings = pd.read_csv("./data/ml-latest-small/ratings.csv")
movies = pd.read_csv("./data/ml-latest-small/movies.csv")

In [209]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [210]:
ratings.dtypes

userId         int64
movieId        int64
rating       float64
timestamp      int64
dtype: object

In [211]:
M = movies.shape[0]
U = ratings.userId.unique().shape[0]
M, U


(9742, 610)

In [34]:
id_to_movie = movies[["movieId"]].to_dict()['movieId']

In [35]:
movie_to_idx = { m:i   for i,m in id_to_movie.items() }
 

In [227]:
A = np.zeros((M,U), dtype=np.float32)
A.dtype

#from scipy.sparse import csc_matrix
#A = csc_matrix((M, U), dtype=np.float32) #.toarray()

dtype('float32')

In [152]:
for idx, rec in ratings.iterrows():
    
    
    uid = rec.userId.astype(np.int32)-1
    mid =  movie_to_idx[rec.movieId.astype(np.int32)]
    A[mid][uid] = rec.rating.astype(np.float32) 
    

In [153]:
np.save(  "./data/ml-latest-small/A.npy", A)

In [154]:
A[0:10, 0:10]

array([[4. , 0. , 0. , 0. , 4. , 0. , 4.5, 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 4. , 0. , 4. , 0. , 0. ],
       [4. , 0. , 0. , 0. , 0. , 5. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 3. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 5. , 0. , 0. , 0. , 0. ],
       [4. , 0. , 0. , 0. , 0. , 4. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 4. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 3. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 3. , 0. , 2. , 0. , 0. ]], dtype=float32)

In [105]:
model = implicit.als.AlternatingLeastSquares(factors=32, iterations=10)


In [155]:
from scipy import sparse
 
B=sparse.csr_matrix(A)

In [156]:
model.fit(B, show_progress=False)

In [175]:
user_items = B.T.tocsr()


In [193]:
ratings.groupby(by="userId").count().reset_index()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,232,232,232
1,2,29,29,29
2,3,39,39,39
3,4,216,216,216
4,5,44,44,44
...,...,...,...,...
605,606,1115,1115,1115
606,607,187,187,187
607,608,831,831,831
608,609,37,37,37


In [222]:
userId = 99

ids = ratings[(ratings.userId==userId+1) & (ratings.rating>4)].movieId.values

movies[movies.movieId.isin(ids)]


Unnamed: 0,movieId,title,genres
15,16,Casino (1995),Crime|Drama
16,17,Sense and Sensibility (1995),Drama|Romance
27,28,Persuasion (1995),Drama|Romance
84,95,Broken Arrow (1996),Action|Adventure|Thriller
140,168,First Knight (1995),Action|Drama|Romance
225,261,Little Women (1994),Drama
326,368,Maverick (1994),Adventure|Comedy|Western
472,539,Sleepless in Seattle (1993),Comedy|Drama|Romance
485,553,Tombstone (1993),Action|Drama|Western
514,597,Pretty Woman (1990),Comedy|Romance


In [223]:
recommendations = model.recommend(userId, user_items)
recommendations

[(35, 1.0334817),
 (958, 0.95087004),
 (383, 0.8994977),
 (2103, 0.8491739),
 (815, 0.84259),
 (436, 0.80875254),
 (577, 0.79066527),
 (2195, 0.7899339),
 (1178, 0.7620723),
 (6, 0.71828675)]

In [224]:
recs = [r for r,_ in recommendations]
movies[movies.index.isin(recs)]

Unnamed: 0,movieId,title,genres
6,7,Sabrina (1995),Comedy|Romance
35,39,Clueless (1995),Comedy|Romance
383,440,Dave (1993),Comedy|Romance
436,500,Mrs. Doubtfire (1993),Comedy|Drama
577,708,"Truth About Cats & Dogs, The (1996)",Comedy|Romance
815,1073,Willy Wonka & the Chocolate Factory (1971),Children|Comedy|Fantasy|Musical
958,1259,Stand by Me (1986),Adventure|Drama
1178,1569,My Best Friend's Wedding (1997),Comedy|Romance
2103,2797,Big (1988),Comedy|Drama|Fantasy|Romance
2195,2918,Ferris Bueller's Day Off (1986),Comedy


In [225]:
movie = 815
# find related items
related = model.similar_items(movie)
rel = [ r for r,_ in related]
sorted(rel)
related

[(815, 1.0),
 (116, 0.69577485),
 (642, 0.68469185),
 (75, 0.6845335),
 (551, 0.67997414),
 (618, 0.6775492),
 (622, 0.67616403),
 (875, 0.66427827),
 (592, 0.65973806),
 (575, 0.6569229)]

In [226]:
movies[movies.index.isin(rel)]

Unnamed: 0,movieId,title,genres
75,83,Once Upon a Time... When We Were Colored (1995),Drama|Romance
116,141,"Birdcage, The (1996)",Comedy
551,661,James and the Giant Peach (1996),Adventure|Animation|Children|Fantasy|Musical
575,706,Sunset Park (1996),Drama
592,733,"Rock, The (1996)",Action|Adventure|Thriller
618,783,"Hunchback of Notre Dame, The (1996)",Animation|Children|Drama|Musical|Romance
622,788,"Nutty Professor, The (1996)",Comedy|Fantasy|Romance|Sci-Fi
642,828,"Adventures of Pinocchio, The (1996)",Adventure|Children
815,1073,Willy Wonka & the Chocolate Factory (1971),Children|Comedy|Fantasy|Musical
875,1167,Dear God (1996),Comedy
