# Movielens 영화 추천 실습
 
데이터 셋 : Movielens 

- 유저가 영화에 대해 평점을 매긴 데이터가 데이터 크기 별로 있습니다. MovieLens 1M Dataset 사용을 권장합니다.
- 별점 데이터는 대표적인 explicit 데이터입니다. 하지만 implicit 데이터로 간주하고 테스트해 볼 수 있습니다.
- 별점을 시청횟수로 해석해서 생각하겠습니다.
- 또한 유저가 3점 미만으로 준 데이터는 선호하지 않는다고 가정하고 제외하겠습니다.


# 1) 데이터 준비와 전처리
- Movielens 데이터
- rating.dat : 인덱싱된 사용자-영화-평점 데이터

In [1]:
import pandas as pd
import os

In [2]:
# movie data
movie_file_path='./data/ml_small/movies.csv'
movies = pd.read_csv(movie_file_path)
movies.index = list(movies['movieId'])
movies.drop(columns='movieId', inplace=True)
movies.index.name="id"

movies['title_low'] = movies['title'].str[:-7].str.lower()
movies['year'] = movies['title'].str[-5:-1]
movies.head()


Unnamed: 0_level_0,title,genres,title_low,year
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,toy story,1995
2,Jumanji (1995),Adventure|Children|Fantasy,jumanji,1995
3,Grumpier Old Men (1995),Comedy|Romance,grumpier old men,1995
4,Waiting to Exhale (1995),Comedy|Drama|Romance,waiting to exhale,1995
5,Father of the Bride Part II (1995),Comedy,father of the bride part ii,1995


In [3]:
# rating data

rating_file_path='./data/ml_small/ratings.csv'
ratings = pd.read_csv(rating_file_path)
orginal_data_size = len(ratings)

# 3점 이상만 남깁니다.
ratings = ratings[ratings['rating']>=3]
filtered_data_size = len(ratings)

print(f'orginal_data_size: {orginal_data_size}, filtered_data_size: {filtered_data_size}')
print(f'Ratio of Remaining Data is {filtered_data_size / orginal_data_size:.2%}')

ratings.head()


orginal_data_size: 100836, filtered_data_size: 81763
Ratio of Remaining Data is 81.09%


Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


# 2) 데이터 분석
- ratings에 있는 유니크한 영화 개수
- ratings에 있는 유니크한 사용자 수
- 가장 인기 있는 영화 30개(인기순)

In [4]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [5]:
movies.head()

Unnamed: 0_level_0,title,genres,title_low,year
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,toy story,1995
2,Jumanji (1995),Adventure|Children|Fantasy,jumanji,1995
3,Grumpier Old Men (1995),Comedy|Romance,grumpier old men,1995
4,Waiting to Exhale (1995),Comedy|Drama|Romance,waiting to exhale,1995
5,Father of the Bride Part II (1995),Comedy,father of the bride part ii,1995


In [6]:
# 평가된 영화의 개수
num_movie = ratings['movieId'].nunique()
print('movie num : ', num_movie)

id_uniq_movie = ratings['movieId'].unique()
print('movie id : ', id_uniq_movie)


movie num :  8452
movie id :  [     1      3      6 ... 160836 163937 163981]


In [7]:
# 평가한 사람의 수
num_user = ratings['userId'].nunique()
print('user num : ', num_user)

id_uniq_user = ratings['userId'].unique()
print('user id : ', id_uniq_user)


user num :  609
user id :  [  1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36
  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54
  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72
  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90
  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108
 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
 235 236 237 238 239 240

In [8]:
# 평이 많은 영화 30개
cnt_movie = ratings.groupby('movieId')['userId'].count()
many_rating = cnt_movie.sort_values(ascending=False).head(30)

title_movie = movies['title']
title_many_rating = title_movie[many_rating.index.tolist()]

rank = 1
print('rank / num_rating / id  :  name')
for id_movie, num_rating in many_rating.items():
    title = title_movie[id_movie]
    print(f'{rank} / {num_rating} / {id_movie}  :  {title}')
    rank += 1


rank / num_rating / id  :  name
1 / 315 / 356  :  Forrest Gump (1994)
2 / 313 / 318  :  Shawshank Redemption, The (1994)
3 / 287 / 296  :  Pulp Fiction (1994)
4 / 263 / 593  :  Silence of the Lambs, The (1991)
5 / 257 / 2571  :  Matrix, The (1999)
6 / 234 / 260  :  Star Wars: Episode IV - A New Hope (1977)
7 / 218 / 110  :  Braveheart (1995)
8 / 216 / 480  :  Jurassic Park (1993)
9 / 210 / 589  :  Terminator 2: Judgment Day (1991)
10 / 206 / 2959  :  Fight Club (1999)
11 / 205 / 527  :  Schindler's List (1993)
12 / 199 / 1  :  Toy Story (1995)
13 / 199 / 1196  :  Star Wars: Episode V - The Empire Strikes Back (1980)
14 / 197 / 50  :  Usual Suspects, The (1995)
15 / 191 / 47  :  Seven (a.k.a. Se7en) (1995)
16 / 191 / 1198  :  Raiders of the Lost Ark (Indiana Jones and the Raiders of the Lost Ark) (1981)
17 / 186 / 2858  :  American Beauty (1999)
18 / 185 / 1210  :  Star Wars: Episode VI - Return of the Jedi (1983)
19 / 185 / 150  :  Apollo 13 (1995)
20 / 183 / 4993  :  Lord of the Rings

In [9]:
print(cnt_movie.sort_values(ascending=False).head(1000))

movieId
356      315
318      313
296      287
593      263
2571     257
        ... 
6953      20
3504      20
54995     20
2431      20
41997     20
Name: userId, Length: 1000, dtype: int64


In [10]:
des_cnt_movie = cnt_movie.describe()
print(des_cnt_movie)


count    8452.000000
mean        9.673805
std        21.179015
min         1.000000
25%         1.000000
50%         2.000000
75%         8.000000
max       315.000000
Name: userId, dtype: float64


In [11]:
# 평점 수가 100개 이상, 평균 평점 순 
base_cnt = 100
movie_cnt_100 = cnt_movie[cnt_movie > base_cnt]
id_movie_cnt_100 = movie_cnt_100.index.tolist()

rating_movie = ratings[['movieId', 'rating']]
isin_cnt_100 = rating_movie['movieId'].isin(id_movie_cnt_100)
rating_movie = rating_movie.loc[isin_cnt_100]

mean_rating_movie = rating_movie.groupby('movieId')['rating'].mean()
many_rating_mean = mean_rating_movie.sort_values(ascending=False).head(30)

rank = 1
print('rank / num_rating / id  :  name')
for id_movie, num_rating in many_rating_mean.items():
    title = title_movie[id_movie]
    print(f'{rank} / {num_rating} / {id_movie}  :  {title}')
    rank += 1

rank / num_rating / id  :  name
1 / 4.474719101123595 / 858  :  Godfather, The (1972)
2 / 4.466453674121405 / 318  :  Shawshank Redemption, The (1994)
3 / 4.422330097087379 / 2959  :  Fight Club (1999)
4 / 4.421951219512195 / 527  :  Schindler's List (1993)
5 / 4.402723735408561 / 2571  :  Matrix, The (1999)
6 / 4.395299145299146 / 260  :  Star Wars: Episode IV - A New Hope (1977)
7 / 4.376306620209059 / 296  :  Pulp Fiction (1994)
8 / 4.358870967741935 / 1221  :  Godfather: Part II, The (1974)
9 / 4.346733668341709 / 1196  :  Star Wars: Episode V - The Empire Strikes Back (1980)
10 / 4.340740740740741 / 1197  :  Princess Bride, The (1987)
11 / 4.337301587301587 / 1136  :  Monty Python and the Holy Grail (1975)
12 / 4.336633663366337 / 1208  :  Apocalypse Now (1979)
13 / 4.336283185840708 / 4973  :  Amelie (Fabuleux destin d'Amélie Poulain, Le) (2001)
14 / 4.334951456310679 / 48516  :  Departed, The (2006)
15 / 4.322335025380711 / 50  :  Usual Suspects, The (1995)
16 / 4.31937172774869

# 3) 내가 선호하는 영화를 5가지 골라서 ratings에 추가해 줍시다.

In [12]:
# 본인이 좋아하시는 아티스트 데이터로 바꿔서 추가하셔도 됩니다! 단, 이름은 꼭 데이터셋에 있는 것과 동일하게 맞춰주세요. 
my_favorite = ['Fantastic Beasts and Where to Find Them', 
               "Miss Peregrine's Home for Peculiar Children", 
               'Now You See Me', 
               'Push', 
               'Edge of Tomorrow']
my_favorite = [name.lower() for name in my_favorite]

isin_favorite = movies['title_low'].isin(my_favorite)
movie_favorite = movies.loc[isin_favorite]
id_favorite = movie_favorite.index.tolist()

title_favor = movies['title'][id_favorite]
genores_favor = movies['genres'][id_favorite]
year_favor = movies['year'][id_favorite]
print(year_favor)


id
66171     2009
102903    2013
111759    2014
135143    2016
162600    2016
Name: year, dtype: object


In [13]:
# 'zimin'이라는 user_id가 위 아티스트의 노래를 30회씩 들었다고 가정하겠습니다.
my_playlist = pd.DataFrame({'userId': ['lee']*5, 'movieId': id_favorite, 'rating':[5.0]*5})

if not ratings.isin({'userId':['lee']})['userId'].any(): 
    ratings = ratings.append(my_playlist)                           

ratings.tail(10)       # 잘 추가되었는지 확인해 봅시다.

Unnamed: 0,userId,movieId,rating,timestamp
100831,610,166534,4.0,1493848000.0
100832,610,168248,5.0,1493850000.0
100833,610,168250,5.0,1494273000.0
100834,610,168252,5.0,1493846000.0
100835,610,170875,3.0,1493846000.0
0,lee,66171,5.0,
1,lee,102903,5.0,
2,lee,111759,5.0,
3,lee,135143,5.0,
4,lee,162600,5.0,


In [14]:
num_movie = ratings['movieId'].nunique()
id_uniq_movie = ratings['movieId'].unique()

num_user = ratings['userId'].nunique()
id_uniq_user = ratings['userId'].unique()

# 유저, 아티스트 indexing 하는 코드 idx는 index의 약자입니다.
user_to_idx = {v:k for k,v in enumerate(id_uniq_user)}
movie_to_idx = {v:k for k,v in enumerate(id_uniq_movie)}
idx_to_user = {v:k for k,v in user_to_idx.items()}
idx_to_movie = {v:k for k,v in movie_to_idx.items()}


# 인덱싱이 잘 되었는지 확인해 봅니다. 
print(num_user)

# indexing을 통해 데이터 컬럼 내 값을 바꾸는 코드
# user_to_idx.get을 통해 user_id 컬럼의 모든 값을 인덱싱한 Series를 구해 봅시다. 
# 혹시 정상적으로 인덱싱되지 않은 row가 있다면 인덱스가 NaN이 될 테니 dropna()로 제거합니다. 
temp_user = ratings['userId'].map(user_to_idx.get).dropna()
if len(temp_user) == len(ratings):
    print('userId column indexing OK!!')
    ratings['userId'] = temp_user
else:
    print('userId column indexing Fail!!')

temp_movie = ratings['movieId'].map(movie_to_idx.get).dropna()
if len(temp_movie) == len(ratings):
    print('movieId column indexing OK!!')
    ratings['movieId'] = temp_movie
else:
    print('movieId column indexing Fail!!')

ratings.tail()


610
userId column indexing OK!!
movieId column indexing OK!!


Unnamed: 0,userId,movieId,rating,timestamp
0,609,5398,5.0,
1,609,1205,5.0,
2,609,934,5.0,
3,609,2630,5.0,
4,609,7730,5.0,


# 4) CSR matrix를 직접 만들어 봅시다.

In [15]:
# 실습 위에 설명보고 이해해서 만들어보기
from scipy.sparse import csr_matrix

csr_data = csr_matrix((ratings.rating, (ratings.userId, ratings.movieId)), shape=(num_user, num_movie))
csr_data

<610x8452 sparse matrix of type '<class 'numpy.float64'>'
	with 81768 stored elements in Compressed Sparse Row format>

# 5) als_model = AlternatingLeastSquares 모델을 직접 구성하여 훈련시켜 봅시다.

In [16]:
from implicit.als import AlternatingLeastSquares
import os
import numpy as np

# implicit 라이브러리에서 권장하고 있는 부분입니다. 학습 내용과는 무관합니다.
os.environ['OPENBLAS_NUM_THREADS']='1'
os.environ['KMP_DUPLICATE_LIB_OK']='True'
os.environ['MKL_NUM_THREADS']='1'

In [17]:
# Implicit AlternatingLeastSquares 모델의 선언
als_model = AlternatingLeastSquares(factors=100, regularization=0.01, use_gpu=False, iterations=15, dtype=np.float32)

# als 모델은 input으로 (item X user 꼴의 matrix를 받기 때문에 Transpose해줍니다.)
csr_data_transpose = csr_data.T
csr_data_transpose

# 모델 훈련
als_model.fit(csr_data_transpose)

  0%|          | 0/15 [00:00<?, ?it/s]

In [18]:
lee_, push_ = user_to_idx['lee'], movie_to_idx[66171]
lee_vector, push_vector = als_model.user_factors[lee_], als_model.item_factors[push_]


In [19]:
lee_vector

array([ 0.20323844, -0.18163265,  0.17587239, -0.15222   , -0.23944832,
       -0.11188185,  0.20121768, -0.44527727, -0.3868005 ,  0.13478814,
       -0.04973844,  0.10945023,  0.22972889,  0.08853662, -0.14852041,
       -0.08795793, -0.1622832 ,  0.28801635, -0.30504078, -0.03942215,
       -0.07657857,  0.21878928, -0.06833711, -0.14209823,  0.12099177,
        0.05953527,  0.06758741, -0.41765785,  0.10996599,  0.07181422,
        0.1830653 ,  0.28313577,  0.01823657,  0.18926835, -0.22161275,
       -0.01572936, -0.20487498,  0.41761178,  0.06006008,  0.25397012,
       -0.01610625, -0.17220761,  0.00096108,  0.1488047 ,  0.08473587,
        0.37826842, -0.14140202,  0.10400569,  0.08605842,  0.3420907 ,
       -0.13854726,  0.09161616,  0.09797567,  0.0233429 ,  0.10880789,
       -0.06249427,  0.22272912, -0.03510672, -0.16204299,  0.06148349,
       -0.01236563, -0.02954807, -0.02076169, -0.36398396,  0.02087549,
       -0.37408665, -0.13437353, -0.08800321, -0.16833836, -0.15

In [20]:
push_vector

array([ 3.3642817e-04,  1.6046561e-03,  2.8286427e-03, -6.7132670e-03,
       -3.0363693e-03, -2.3193036e-04,  3.4794237e-03, -3.8458619e-03,
       -3.4056224e-03, -2.8407935e-03, -2.5056563e-03,  2.0908210e-03,
       -4.6098153e-03,  1.9499486e-03,  3.4929148e-03, -3.6319990e-03,
       -1.6362444e-03,  1.9109226e-03, -5.9878882e-03, -1.2392376e-03,
        1.1287010e-03,  5.6741619e-04,  8.9022606e-06,  1.2584621e-03,
        4.7289836e-03,  8.1806872e-03,  8.3212845e-04, -2.9592281e-03,
       -2.5770785e-03,  4.2406456e-03,  4.5853849e-05,  9.4546815e-03,
        8.0844117e-03, -7.6944255e-03, -6.4557814e-04, -5.2434523e-03,
        1.3099622e-03,  5.0497884e-03, -5.3944588e-03, -1.5886193e-04,
        6.8183843e-05,  6.1822194e-03,  6.3185109e-04,  1.8301293e-03,
        5.1703481e-03,  8.2411179e-03, -2.4169321e-04,  1.7638642e-03,
        5.6522801e-03,  3.4323223e-03,  1.6139909e-03, -3.8351936e-03,
        5.7026129e-03, -5.9456937e-04,  3.8901262e-03,  5.6926126e-04,
      

In [21]:
# zimin과 black_eyed_peas를 내적하는 코드
np.dot(lee_vector, push_vector)

0.031059006

# 6) 내가 선호하는 5가지 영화 중 하나와 그 외의 영화 하나를 골라 훈련된 모델이 예측한 나의 선호도를 파악해 보세요.

In [22]:
# test
# 'Fantastic Beasts and Where to Find Them' : 135143
# "Miss Peregrine's Home for Peculiar Children" : 162600
# 'Now You See Me' : 102903
# 'Push' : 66171
# 'Edge of Tomorrow' : 111759
edge_tomorrow = movie_to_idx[111759]
edge_tomorrow_vector = als_model.item_factors[edge_tomorrow]
print(np.dot(lee_vector, edge_tomorrow_vector))

peregrine = movie_to_idx[162600]
peregrine_vector = als_model.item_factors[peregrine]
print(np.dot(lee_vector, peregrine_vector))

# 'Million Dollar Arm'  : 111795
million = movie_to_idx[111795]
million_vector = als_model.item_factors[million]
print(np.dot(lee_vector, million_vector))


0.22130771
0.062417235
-0.0013084688


# 7) 내가 좋아하는 영화와 비슷한 영화를 추천받아 봅시다.

In [23]:
def get_similar_movie(id_movie, num, movie_to_idx, idx_to_movie):
    idx_movie = movie_to_idx[id_movie]
    similar_movie = als_model.similar_items(idx_movie, N=num)
    id_similar_movie = [idx_to_movie[idx_movie[0]] for idx_movie in similar_movie]
    vec_similar_movie = [idx_movie[1] for idx_movie in similar_movie]
    return [id_similar_movie, vec_similar_movie]


In [24]:
# 'Fantastic Beasts and Where to Find Them' : 135143
similar_movie = get_similar_movie(135143, 15, movie_to_idx, idx_to_movie)

rank = 1
id_movie, vec_movie = similar_movie
for idx in range(len(id_movie)):
    name = movies['title'][id_movie[idx]]
    print(f'{rank} // {name} // {vec_movie[idx]}')
    rank += 1


1 // Fantastic Beasts and Where to Find Them (2016) // 1.0000001192092896
2 // Rogue One: A Star Wars Story (2016) // 0.7115494012832642
3 // Wonder Woman (2017) // 0.6288707852363586
4 // Justice League (2017) // 0.5745062828063965
5 // Jumanji: Welcome to the Jungle (2017) // 0.5593062043190002
6 // Doctor Strange (2016) // 0.5498544573783875
7 // Black Panther (2017) // 0.5493570566177368
8 // The Hobbit: The Battle of the Five Armies (2014) // 0.5252147316932678
9 // Miss Peregrine's Home for Peculiar Children (2016) // 0.5152993202209473
10 // The Jungle Book (2016) // 0.5058378577232361
11 // Doctor Who: The Waters of Mars (2009) // 0.49976441264152527
12 // Guardians of the Galaxy 2 (2017) // 0.4996277093887329
13 // Doctor Who: The Doctor, the Widow and the Wardrobe (2011) // 0.4986324906349182
14 // Doctor Who: Voyage Of The Damned (2007) // 0.4977448582649231
15 // Doctor Who: The Husbands of River Song (2015) // 0.495137095451355


In [25]:
# 'Now You See Me' : 102903
similar_movie = get_similar_movie(102903, 15, movie_to_idx, idx_to_movie)

rank = 1
id_movie, vec_movie = similar_movie
for idx in range(len(id_movie)):
    name = movies['title'][id_movie[idx]]
    print(f'{rank} // {name} // {vec_movie[idx]}')
    rank += 1


1 // Now You See Me (2013) // 1.0
2 // Now You See Me 2 (2016) // 0.7638814449310303
3 // Green Hornet, The (2011) // 0.6139853000640869
4 // Vacation (2015) // 0.5984287261962891
5 // Spotlight (2015) // 0.5466896891593933
6 // The Magnificent Seven (2016) // 0.5368114709854126
7 // Suicide Squad (2016) // 0.5319454073905945
8 // We're the Millers (2013) // 0.527251660823822
9 // Kingsman: The Golden Circle (2017) // 0.5247607231140137
10 // Untitled Spider-Man Reboot (2017) // 0.5226229429244995
11 // Sherlock: The Abominable Bride (2016) // 0.5198637843132019
12 // Ted 2 (2015) // 0.5187830328941345
13 // Jumanji: Welcome to the Jungle (2017) // 0.5186588168144226
14 // Let's Be Cops (2014) // 0.5184555053710938
15 // John Wick (2014) // 0.5023296475410461


In [26]:
# 'Edge of Tomorrow' : 111759
similar_movie = get_similar_movie(111759, 15, movie_to_idx, idx_to_movie)

rank = 1
id_movie, vec_movie = similar_movie
for idx in range(len(id_movie)):
    name = movies['title'][id_movie[idx]]
    print(f'{rank} // {name} // {vec_movie[idx]}')
    rank += 1


1 // Edge of Tomorrow (2014) // 1.0
2 // Star Wars: Episode VII - The Force Awakens (2015) // 0.6497737169265747
3 // The Martian (2015) // 0.6167597770690918
4 // Deadpool (2016) // 0.5781443119049072
5 // Mad Max: Fury Road (2015) // 0.5766379237174988
6 // Looper (2012) // 0.539068877696991
7 // Interstellar (2014) // 0.5191696882247925
8 // Avengers, The (2012) // 0.5189666748046875
9 // Gravity (2013) // 0.5128878951072693
10 // Guardians of the Galaxy (2014) // 0.5052263736724854
11 // X-Men: Days of Future Past (2014) // 0.5027709007263184
12 // John Wick (2014) // 0.502395749092102
13 // Rogue One: A Star Wars Story (2016) // 0.5017278790473938
14 // The Revenant (2015) // 0.4798077642917633
15 // The Imitation Game (2014) // 0.47580814361572266


# 8) 내가 가장 좋아할 만한 영화들을 추천받아 봅시다.

In [27]:
user = user_to_idx['lee']
# recommend에서는 user*item CSR Matrix를 받습니다.
movie_recommended = als_model.recommend(user, csr_data, N=20, filter_already_liked_items=True)
movie_recommended


[(1589, 0.15705784),
 (944, 0.15674357),
 (937, 0.15595095),
 (1631, 0.14661705),
 (797, 0.13846019),
 (1230, 0.13518645),
 (936, 0.13088866),
 (1585, 0.13087769),
 (1223, 0.12839738),
 (198, 0.12659979),
 (84, 0.12140048),
 (249, 0.115463614),
 (1231, 0.11313163),
 (1232, 0.11277447),
 (1257, 0.10951104),
 (1240, 0.109484375),
 (1234, 0.10818784),
 (2477, 0.10049896),
 (1200, 0.100269705),
 (809, 0.09962631)]

In [28]:
[idx_to_movie[i[0]] for i in movie_recommended]

[88125,
 166528,
 134130,
 115149,
 69844,
 122904,
 122886,
 81834,
 116797,
 3147,
 1270,
 122882,
 122918,
 122920,
 168252,
 142488,
 122926,
 54001,
 98809,
 81845]

In [29]:
[movies['title'][idx_to_movie[i[0]]] for i in movie_recommended]

['Harry Potter and the Deathly Hallows: Part 2 (2011)',
 'Rogue One: A Star Wars Story (2016)',
 'The Martian (2015)',
 'John Wick (2014)',
 'Harry Potter and the Half-Blood Prince (2009)',
 'Deadpool (2016)',
 'Star Wars: Episode VII - The Force Awakens (2015)',
 'Harry Potter and the Deathly Hallows: Part 1 (2010)',
 'The Imitation Game (2014)',
 'Green Mile, The (1999)',
 'Back to the Future (1985)',
 'Mad Max: Fury Road (2015)',
 'Guardians of the Galaxy 2 (2017)',
 'Captain America: Civil War (2016)',
 'Logan (2017)',
 'Spotlight (2015)',
 'Untitled Spider-Man Reboot (2017)',
 'Harry Potter and the Order of the Phoenix (2007)',
 'Hobbit: An Unexpected Journey, The (2012)',
 "King's Speech, The (2010)"]

In [30]:
# 'Rogue One: A Star Wars Story' : 166528
rogue = movie_to_idx[166528]
explain = als_model.explain(user, csr_data, itemid=rogue)

In [31]:
[(movies['title'][idx_to_movie[i[0]]], i[1]) for i in explain[1]]


[('Fantastic Beasts and Where to Find Them (2016)', 0.06382360811976165),
 ('Edge of Tomorrow (2014)', 0.051636629396783144),
 ('Now You See Me (2013)', 0.02197486675704219),
 ("Miss Peregrine's Home for Peculiar Children (2016)", 0.0129939921058165),
 ('Push (2009)', 0.005548214155612838)]