In [108]:
import os
import pandas as pd
rating_file_path = 'C:/users/juwon/practice_aiffel/recommendata_iu/data/ml-1m/ratings.dat'
rating_cols = ['user_id', 'movie_id', 'ratings', 'timestamp']
ratings = pd.read_csv(rating_file_path, sep='::', names = rating_cols, engine='python', encoding='ISO-8859-1')
original_data_size = len(ratings)
ratings.head()

Unnamed: 0,user_id,movie_id,ratings,timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


## 1. 데이터전처리

In [109]:
ratings = ratings[ratings['ratings']>=3]
filtered_data_size = len(ratings)

print('원본 데이터 크기: {}, 평점 3점이상인 데이터 크기: {}'.format(original_data_size, filtered_data_size))
print('원본 데이터에 대한 필터링된 데이터 비율 : {:.2f}%'.format((filtered_data_size/original_data_size)*100))

원본 데이터 크기: 1000209, 평점 3점이상인 데이터 크기: 836478
원본 데이터에 대한 필터링된 데이터 비율 : 83.63%


In [110]:
ratings.rename(columns={'ratings':'counts'},inplace =True)
ratings.head()

Unnamed: 0,user_id,movie_id,counts,timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [111]:
using_cols = ['user_id','movie_id','counts']
ratings = ratings[using_cols]
ratings.head()

Unnamed: 0,user_id,movie_id,counts
0,1,1193,5
1,1,661,3
2,1,914,3
3,1,3408,4
4,1,2355,5


In [112]:
movie_file_path = 'C:/users/juwon/practice_aiffel/recommendata_iu/data/ml-1m/movies.dat'
movie_cols = ['movie_id','title','genre']
movies = pd.read_csv(movie_file_path, sep='::', names=movie_cols, engine='python', encoding='ISO-8859-1')
movies.head()

Unnamed: 0,movie_id,title,genre
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy


In [113]:
print(movies['title'].unique())
print(movies['movie_id'].unique())
print(len(movies['title'].unique()))
print(len(movies['movie_id'].unique()))

['Toy Story (1995)' 'Jumanji (1995)' 'Grumpier Old Men (1995)' ...
 'Tigerland (2000)' 'Two Family House (2000)' 'Contender, The (2000)']
[   1    2    3 ... 3950 3951 3952]
3883
3883


In [114]:
movies[movies['movie_id']==3952]

Unnamed: 0,movie_id,title,genre
3882,3952,"Contender, The (2000)",Drama|Thriller


영화 개수와 id_index의 마지막이 같지 않습니다.

In [115]:
title_to_index = {k:v for v,k in zip(movies['movie_id'],movies['title'])}
index_to_title = {v:k for v,k in zip(movies['movie_id'],movies['title'])}

In [116]:
title_to_index

{'Toy Story (1995)': 1,
 'Jumanji (1995)': 2,
 'Grumpier Old Men (1995)': 3,
 'Waiting to Exhale (1995)': 4,
 'Father of the Bride Part II (1995)': 5,
 'Heat (1995)': 6,
 'Sabrina (1995)': 7,
 'Tom and Huck (1995)': 8,
 'Sudden Death (1995)': 9,
 'GoldenEye (1995)': 10,
 'American President, The (1995)': 11,
 'Dracula: Dead and Loving It (1995)': 12,
 'Balto (1995)': 13,
 'Nixon (1995)': 14,
 'Cutthroat Island (1995)': 15,
 'Casino (1995)': 16,
 'Sense and Sensibility (1995)': 17,
 'Four Rooms (1995)': 18,
 'Ace Ventura: When Nature Calls (1995)': 19,
 'Money Train (1995)': 20,
 'Get Shorty (1995)': 21,
 'Copycat (1995)': 22,
 'Assassins (1995)': 23,
 'Powder (1995)': 24,
 'Leaving Las Vegas (1995)': 25,
 'Othello (1995)': 26,
 'Now and Then (1995)': 27,
 'Persuasion (1995)': 28,
 'City of Lost Children, The (1995)': 29,
 'Shanghai Triad (Yao a yao yao dao waipo qiao) (1995)': 30,
 'Dangerous Minds (1995)': 31,
 'Twelve Monkeys (1995)': 32,
 'Wings of Courage (1995)': 33,
 'Babe (1995)

## 2. 데이터 분석
### 영화 개수

In [117]:
ratings['movie_id'].nunique()

3628

### 사용자 수

In [118]:
ratings['user_id'].nunique()

6039

In [119]:
ratings.counts

0          5
1          3
2          3
3          4
4          5
          ..
1000203    3
1000205    5
1000206    5
1000207    4
1000208    4
Name: counts, Length: 836478, dtype: int64

In [120]:
data = pd.merge(ratings,movies)
data.head()

Unnamed: 0,user_id,movie_id,counts,title,genre
0,1,1193,5,One Flew Over the Cuckoo's Nest (1975),Drama
1,2,1193,5,One Flew Over the Cuckoo's Nest (1975),Drama
2,12,1193,4,One Flew Over the Cuckoo's Nest (1975),Drama
3,15,1193,4,One Flew Over the Cuckoo's Nest (1975),Drama
4,17,1193,5,One Flew Over the Cuckoo's Nest (1975),Drama


### 가장 인기 있는 영화 50개

In [121]:
movie_count = data.groupby('title')['user_id'].count()
movie_count.sort_values(ascending=False).head(50)

title
American Beauty (1999)                                   3211
Star Wars: Episode IV - A New Hope (1977)                2910
Star Wars: Episode V - The Empire Strikes Back (1980)    2885
Star Wars: Episode VI - Return of the Jedi (1983)        2716
Saving Private Ryan (1998)                               2561
Terminator 2: Judgment Day (1991)                        2509
Silence of the Lambs, The (1991)                         2498
Raiders of the Lost Ark (1981)                           2473
Back to the Future (1985)                                2460
Matrix, The (1999)                                       2434
Jurassic Park (1993)                                     2413
Sixth Sense, The (1999)                                  2385
Fargo (1996)                                             2371
Braveheart (1995)                                        2314
Men in Black (1997)                                      2297
Schindler's List (1993)                                  2257
Pr

## 3. 내가 선호하는 영화 ratings에 추가하기

In [122]:
my_farvorite_title = ['Die Hard (1988)','Toy Story (1995)','Aliens (1986)','Mission: Impossible (1996)','Terminator 2: Judgment Day (1991)']
my_favorite_index = []
for title in my_farvorite_title :
    a = title_to_index[title]
    my_favorite_index.append(a)
    
print(my_favorite_index)

[1036, 1, 1200, 648, 589]


In [123]:
ratings[ratings['user_id']==6041]

Unnamed: 0,user_id,movie_id,counts


In [124]:
my_movie_list = pd.DataFrame({'user_id':[6041]*5, 'movie_id': my_favorite_index, 'counts':[5,3,5,4,5]})
if not ratings.isin({'user_id':[6041]})['user_id'].any():
    ratings = pd.concat([ratings, my_movie_list],ignore_index=True)
ratings.tail(10)

Unnamed: 0,user_id,movie_id,counts
836473,6040,1090,3
836474,6040,1094,5
836475,6040,562,5
836476,6040,1096,4
836477,6040,1097,4
836478,6041,1036,5
836479,6041,1,3
836480,6041,1200,5
836481,6041,648,4
836482,6041,589,5


## 4. CSR matrix 만들기

In [125]:
print(num_movie)
print(num_user)

3628
6040


In [126]:
ratings['user_id'].max()

6041

In [127]:
ratings['movie_id'].max()

3952

In [128]:
import implicit
from scipy.sparse import csr_matrix

num_user = ratings['user_id'].nunique()
num_movie = ratings['movie_id'].nunique()

csr_data = csr_matrix((ratings.counts,(ratings.user_id, ratings.movie_id)), shape=(num_user+2, num_movie+325))
csr_data

<6042x3953 sparse matrix of type '<class 'numpy.int64'>'
	with 836483 stored elements in Compressed Sparse Row format>

shape(m,n)에서 shpae의m,n은 user_id, movie_id max값보다 크거나 같아야 하므로 늘려주었습니다.

## 5. als_model 훈련

In [129]:
from implicit.als import AlternatingLeastSquares
import os
import numpy as np
# implicit 라이브러리에서 권장하고 있는 부분입니다. 학습 내용과는 무관합니다.
os.environ['OPENBLAS_NUM_THREADS']='1'
os.environ['KMP_DUPLICATE_LIB_OK']='True'
os.environ['MKL_NUM_THREADS']='1'

In [130]:
als_model = AlternatingLeastSquares(factors=100, regularization=0.01, use_gpu=False, iterations=15, dtype=np.float32)

In [131]:
als_model.fit(csr_data)

  0%|          | 0/15 [00:00<?, ?it/s]

## 6. 좋아하는 영화의 선호도 살펴보기 및 다른 영화 선호도 비교해보기

### 6-1. 내가 좋아하는 영화 (1) Die Hard (1988)
좋아하는 영화중 하나인 다이하드를 뽑았습니다. 다이하드는 5점을 주었습니다.

In [132]:
my_index = 6041
die_hard_index = title_to_index['Die Hard (1988)']
my_vector, die_hard_vector = als_model.user_factors[6041], als_model.item_factors[die_hard_index]

In [133]:
my_vector

array([-0.5055238 ,  0.04609311,  1.0220786 , -1.3655561 ,  0.04517855,
        0.6435609 ,  0.21343748,  0.27859783,  0.01329405,  0.31527305,
        0.05103133, -0.8502435 , -0.14713119, -0.15494275,  0.05167307,
        0.14135706, -0.42836612, -0.00884452,  0.15697673,  0.4591675 ,
        0.22562136,  1.0516309 , -0.3747277 ,  0.015092  ,  0.72611904,
       -1.0676914 , -0.48740226,  0.613463  ,  0.5068877 ,  0.7401003 ,
       -0.52603936, -0.5140305 ,  0.00401326, -0.5489869 , -0.15387705,
       -0.23919941, -0.32429782,  0.3549385 ,  0.37651405,  0.8701103 ,
        0.00448804, -0.3023334 ,  0.5640671 ,  0.13297299,  0.57406443,
        0.31017435, -0.6567703 ,  0.7547007 , -0.22726846, -0.79568285,
       -0.33738732, -0.44089472,  0.36229488, -0.5711648 ,  0.08784909,
       -0.01066506, -0.8248472 , -0.26171628, -0.0412163 ,  0.34291855,
       -0.22186461, -0.3047603 , -0.3095872 , -0.19600187, -0.255825  ,
       -0.19266379,  0.5392211 , -0.06629176, -0.44161808, -0.31

In [135]:
die_hard_vector

array([-0.01113901,  0.00815545,  0.03259899, -0.02491127,  0.01755629,
        0.01784285,  0.02957476,  0.02981156,  0.00318598,  0.01331869,
        0.01801275, -0.01886158,  0.01478144, -0.00297525, -0.01248569,
        0.02097216, -0.00831727,  0.00522774, -0.03071372,  0.02446727,
        0.00483368, -0.00396456, -0.01855403, -0.00674553,  0.00610365,
       -0.02091914, -0.01114127,  0.03389471,  0.00650847,  0.02329474,
       -0.0034259 ,  0.00454094,  0.02036073,  0.0008984 ,  0.01520158,
        0.00353549,  0.00494825,  0.01679995,  0.0060315 ,  0.00428806,
        0.02274961, -0.03012647,  0.03068925,  0.01325272,  0.01323565,
        0.01665109, -0.00404236,  0.00563695,  0.00683911,  0.01249928,
       -0.00221752, -0.01259591,  0.01255117, -0.01719219, -0.00038655,
        0.00737199,  0.00225386, -0.00903167,  0.02237509,  0.02138288,
       -0.00347483, -0.00859822, -0.025716  ,  0.00753908,  0.00082078,
       -0.00171565,  0.03061999,  0.00864804, -0.00855454,  0.00

In [136]:
np.dot(my_vector, die_hard_vector)

0.4683102

5점을 준 다이하드는 0.46이 나왔습니다.

### 6-1. 내가 좋아하는 영화 (2) Toy Story (1995)

In [141]:
toy_story_index = title_to_index['Toy Story (1995)']
toy_story_vector = als_model.item_factors[toy_story_index]
toy_story_vector

array([ 2.17692722e-02,  1.56727415e-02,  5.50077669e-03, -4.78384225e-03,
        5.71780745e-03,  1.49143497e-02,  1.02295531e-02, -3.45213106e-03,
        1.84725560e-02,  1.48558011e-02,  1.45173282e-03,  6.40645949e-03,
        1.70487817e-02, -1.97765622e-02, -1.10477563e-02, -1.28605617e-02,
        1.96205303e-02,  9.02649190e-05,  3.37136947e-02,  1.18604330e-02,
       -2.28261594e-02,  3.09183598e-02,  2.37328093e-02,  6.54985057e-03,
        2.31619924e-02, -2.16967836e-02,  2.29523778e-02, -2.66323891e-02,
        7.42655853e-03,  1.82500686e-02,  1.63335237e-03, -9.25127044e-03,
       -3.02525330e-02, -2.69136801e-02, -5.52290352e-03,  5.78481471e-03,
       -2.07169950e-02,  4.49878909e-03,  2.09062770e-02, -5.51299891e-03,
        1.02603687e-02,  6.42705755e-03, -2.29027984e-03,  1.46066202e-02,
        2.20475700e-02,  5.53748244e-03, -3.93880485e-03,  4.71316986e-02,
       -2.08011325e-02, -1.92951430e-02, -3.37383822e-02, -7.94399471e-04,
        1.48474553e-03, -

In [142]:
np.dot(my_vector,toy_story_vector)

0.38152924

좋아하는 영화들중 가장 낮은 3점을 받은 토이스토리의 선호도 입니다.

### 6-2. 다른 영화의 선호도 예측해보기 American Beauty (1999) 
이번엔 제가 좋아하는 영화가 아닌 다른 영화의 선호도를 구해보겠습니다.   
가장 인기있는 영화 American Beauty의 선호도를 예측해보겠습니다.   
제가 선호하는 영화들의 장르들과 연관성이 없는 영화입니다.

In [137]:
american_beauty_index = title_to_index['American Beauty (1999)']
american_beauty_vector= als_model.item_factors[american_beauty_index]
american_beauty_vector

array([ 3.41493785e-02,  1.62527785e-02,  1.93242300e-02,  2.14662589e-02,
        1.67219602e-02, -2.17389055e-02, -7.93290697e-03,  4.58158851e-02,
        3.05055128e-03,  3.81261967e-02,  2.73481440e-02,  1.67076662e-02,
        2.31786985e-02, -1.83856208e-02, -2.98272283e-03,  1.09088700e-02,
        5.26678860e-02,  4.09441181e-02,  3.07604223e-02, -3.18399468e-03,
       -5.07344194e-02, -9.12188273e-03, -1.12326248e-02,  3.81640941e-02,
        5.61089627e-03, -2.22034901e-02, -1.87908616e-02,  1.76185705e-02,
        6.01410009e-02,  6.10740967e-02,  2.26550307e-02, -3.58714344e-04,
       -3.49661000e-02,  1.44096538e-02, -1.55428769e-02,  3.40176746e-02,
       -9.32308380e-03, -3.68446857e-02,  6.22296287e-03,  1.96582898e-02,
       -8.28654133e-03,  5.74774249e-03,  2.61868257e-02, -1.12115182e-02,
       -2.40840726e-02,  3.48183922e-02, -2.40719784e-03,  1.24327373e-02,
        2.02420354e-02,  3.22112441e-02,  8.27378099e-05,  1.78328007e-02,
        6.16334495e-04,  

In [138]:
np.dot(my_vector, american_beauty_vector)

-0.04311883

이 영화의 내용은 유튜브를 통해 요약과 리뷰를 봤습니다. 영상은 끝까지 다봤지만 보고 싶은 생각은 들지 않았는데..   
어느정도 맞는 것 같습니다.

## 7. 내가 좋아하는 영화와 비슷한 영화 추천 받아보기

In [145]:
similar_movie = als_model.similar_items(die_hard_index, N=20)
similar_movie

(array([1036, 1240, 2194, 1291,  457, 1200, 3527, 1198, 1610, 1954, 2947,
        1222, 2000, 1370,  377, 1242, 3551, 2887, 3505, 2353]),
 array([0.9999999 , 0.68795633, 0.6350702 , 0.6340137 , 0.56512874,
        0.5137062 , 0.5065532 , 0.50505376, 0.47885916, 0.45303157,
        0.45258546, 0.4448847 , 0.4422541 , 0.43499294, 0.4270668 ,
        0.42336994, 0.39911845, 0.39630458, 0.39268112, 0.39205882],
       dtype=float32))

In [155]:
def get_similar_movie(movie_index):
    temp = als_model.similar_items(movie_index, N=20)
    similar_movie = [(index_to_title[a],b) for a,b in zip(temp[0],temp[1])]
    return similar_movie

In [156]:
get_similar_movie(die_hard_index)

[('Die Hard (1988)', 0.9999999),
 ('Terminator, The (1984)', 0.68795633),
 ('Untouchables, The (1987)', 0.6350702),
 ('Indiana Jones and the Last Crusade (1989)', 0.6340137),
 ('Fugitive, The (1993)', 0.56512874),
 ('Aliens (1986)', 0.5137062),
 ('Predator (1987)', 0.5065532),
 ('Raiders of the Lost Ark (1981)', 0.50505376),
 ('Hunt for Red October, The (1990)', 0.47885916),
 ('Rocky (1976)', 0.45303157),
 ('Goldfinger (1964)', 0.45258546),
 ('Full Metal Jacket (1987)', 0.4448847),
 ('Lethal Weapon (1987)', 0.4422541),
 ('Die Hard 2 (1990)', 0.43499294),
 ('Speed (1994)', 0.4270668),
 ('Glory (1989)', 0.42336994),
 ('Marathon Man (1976)', 0.39911845),
 ('Simon Sez (1999)', 0.39630458),
 ('No Way Out (1987)', 0.39268112),
 ('Enemy of the State (1998)', 0.39205882)]

In [157]:
get_similar_movie(toy_story_index)

[('Toy Story (1995)', 0.99999994),
 ('Toy Story 2 (1999)', 0.80196095),
 ("Bug's Life, A (1998)", 0.6229349),
 ('Aladdin (1992)', 0.5574835),
 ('Groundhog Day (1993)', 0.54027814),
 ('Babe (1995)', 0.5135199),
 ('Lion King, The (1994)', 0.4616221),
 ('Pleasantville (1998)', 0.42792752),
 ("There's Something About Mary (1998)", 0.42597917),
 ('Beauty and the Beast (1991)', 0.4142559),
 ('Shakespeare in Love (1998)', 0.37460268),
 ('Mulan (1998)', 0.36842716),
 ('Hercules (1997)', 0.3653828),
 ('Tarzan (1999)', 0.36123607),
 ('Forrest Gump (1994)', 0.3470561),
 ("Wayne's World (1992)", 0.32743597),
 ('Full Monty, The (1997)', 0.32730252),
 ('Santa Clause, The (1994)', 0.32351482),
 ('George of the Jungle (1997)', 0.32029882),
 ('Mask, The (1994)', 0.30411103)]

토이스토리는 다음시리즈를 두번째로 추천해주었는데 다이하드의 경우 절반도 못미치는 선호도를 주었습니다.   
후속 시리즈가 많은 사람들에게 좋은 점수를 못받은 것 같습니다.

In [158]:
get_similar_movie(american_beauty_index)

[('American Beauty (1999)', 1.0),
 ('Being John Malkovich (1999)', 0.7760144),
 ('Shakespeare in Love (1998)', 0.4634329),
 ('Election (1999)', 0.41932237),
 ('Fargo (1996)', 0.39744925),
 ('American Pie (1999)', 0.38473645),
 ('Silence of the Lambs, The (1991)', 0.38119775),
 ('Pulp Fiction (1994)', 0.37759003),
 ("Boys Don't Cry (1999)", 0.35091418),
 ('Schlafes Bruder (Brother of Sleep) (1995)', 0.345192),
 ('Airplane! (1980)', 0.34477803),
 ('Saving Private Ryan (1998)', 0.3446601),
 ('Stand by Me (1986)', 0.34336385),
 ('Paralyzing Fear: The Story of Polio in America, A (1998)', 0.33561844),
 ('Sixth Sense, The (1999)', 0.33478108),
 ('Clerks (1994)', 0.33112004),
 ('Braveheart (1995)', 0.3241012),
 ('Dry Cleaning (Nettoyage à sec) (1997)', 0.31000316),
 ('Jurassic Park (1993)', 0.30869552),
 ('Anywhere But Here (1999)', 0.30674624)]

쥬라기 공원이 있네요.. 가장 인기있는 영화는 장르에 구애받지 않기에 비슷한 영화들이 중구난방으로 섞인 것 같습니다.  

In [159]:
aliens_index = title_to_index['Aliens (1986)']
get_similar_movie(aliens_index)

[('Aliens (1986)', 1.0),
 ('Alien (1979)', 0.7910093),
 ('Terminator, The (1984)', 0.7864213),
 ('Predator (1987)', 0.6661277),
 ('Blade Runner (1982)', 0.5431105),
 ('Die Hard (1988)', 0.5137062),
 ('Abyss, The (1989)', 0.46398994),
 ('Mad Max 2 (a.k.a. The Road Warrior) (1981)', 0.43217194),
 ('Star Wars: Episode V - The Empire Strikes Back (1980)', 0.4202924),
 ('Full Metal Jacket (1987)', 0.41858795),
 ('Thing, The (1982)', 0.41290167),
 ('Alien: Resurrection (1997)', 0.4038345),
 ('Alien³ (1992)', 0.40013427),
 ('Robocop (1987)', 0.38492194),
 ('Terminator 2: Judgment Day (1991)', 0.37566337),
 ('2001: A Space Odyssey (1968)', 0.36100465),
 ('Brazil (1985)', 0.35995537),
 ('Star Trek: The Wrath of Khan (1982)', 0.35368747),
 ('Follow the Bitch (1998)', 0.34433225),
 ('Mad Max (1979)', 0.3372887)]

## 8. 내가 좋아할만한 영화 추천 받아보기

In [160]:
movie_recommended = als_model.recommend(my_index, csr_data[my_index], N=20, filter_already_liked_items=True)
movie_recommended

(array([1240, 1214, 2571,  457, 2916, 3527, 3114, 1610, 1291, 1210, 3703,
        1196,  480,  588,  260,  377,  593, 1198, 1222,  541]),
 array([0.60472596, 0.4456514 , 0.423925  , 0.3900859 , 0.33295903,
        0.31903094, 0.26049775, 0.24925813, 0.23472068, 0.2323326 ,
        0.2193425 , 0.21874057, 0.21468642, 0.2036709 , 0.2008158 ,
        0.19940424, 0.19817525, 0.19355108, 0.19200751, 0.18992215],
       dtype=float32))

In [161]:
[index_to_title[i] for i in movie_recommended[0]]

['Terminator, The (1984)',
 'Alien (1979)',
 'Matrix, The (1999)',
 'Fugitive, The (1993)',
 'Total Recall (1990)',
 'Predator (1987)',
 'Toy Story 2 (1999)',
 'Hunt for Red October, The (1990)',
 'Indiana Jones and the Last Crusade (1989)',
 'Star Wars: Episode VI - Return of the Jedi (1983)',
 'Mad Max 2 (a.k.a. The Road Warrior) (1981)',
 'Star Wars: Episode V - The Empire Strikes Back (1980)',
 'Jurassic Park (1993)',
 'Aladdin (1992)',
 'Star Wars: Episode IV - A New Hope (1977)',
 'Speed (1994)',
 'Silence of the Lambs, The (1991)',
 'Raiders of the Lost Ark (1981)',
 'Full Metal Jacket (1987)',
 'Blade Runner (1982)']

In [177]:
def get_explain(title:str):
    title_index = title_to_index[title]
    title_explain = als_model.explain(my_index, csr_data, itemid=title_index)
    explain_factors = [(index_to_title[i[0]],i[1]) for i in title_explain[1]]
    
    print(title_explain[0])
    for i in explain_factors:
        print(i)

In [179]:
get_explain('Terminator, The (1984)')

0.5946554512157957
('Aliens (1986)', 0.22387795709741787)
('Die Hard (1988)', 0.17432573142993807)
('Terminator 2: Judgment Day (1991)', 0.1611711700751539)
('Mission: Impossible (1996)', 0.021666096175957644)
('Toy Story (1995)', 0.013614496437328195)


In [178]:
get_explain('Alien (1979)')

0.43797501851095677
('Aliens (1986)', 0.28853147324418327)
('Terminator 2: Judgment Day (1991)', 0.07829696169930637)
('Die Hard (1988)', 0.06117936805328246)
('Mission: Impossible (1996)', 0.011474030636144085)
('Toy Story (1995)', -0.0015068151219594125)


## 회고

als모델을 사용하여 영화 추천시스템을 구현했다. 데이터는 영화의 별점을 담은 explicit 데이터였지만 시청횟수로 바꾸고 implicit한 데이터로 해석하였다. 구현을 위한 모델로는 MF(행렬분해)모델을 사용하였다.
내가 좋아하는 영화들을 넣어서 비교해봤을 때 만점을 준 영화는 내적값이 0.46으로 학습이 잘 된 편은 아니지만 평가하지 않은 영화들 중 전혀 다른 장르의 영화에는 훨씬 낮은 수치가 나왔다. 0.46을 매우선호로 해석을 한다면 다른 값들에 대해서도 어느정도 신뢰있게 해석 할 수 있다. 전혀 다른장르인 AmericanBeauty에서 음수 값이 나왔기 때문이다. 이번 구현을 통해서 실전에서는 수치보다는 수치에 대한 해석이 더 중요할 것 같다고 생각했다.