### Item-based Filtering
- 협업 필터링 방식 - 사용자 기반, 아이템 기반(주로 아이템 기반 필터링을 사용함)
- 사용자 기반 협업 필터링
    - 사용자의 행동 양식을 기반으로 추천(평점, 상품 구매 이력 등)
    - x축에 아이템, y축에 사용자를 표시
    - 어떤 사용자와 비슷한 성향의 고객들이 구매한 상품을 추천
- 아이템 기반 협업 필터링(Item based collaborative Filtering):
    - x축에 사용자, y축에 아이템을 표시하고 유사한 아이템을 추천해 주는 방식
    - 주로 코사인 유사도를 사용하여 계산함
    - 어떤 상품과 비슷한 상품을 추천

In [1]:
import pandas as pd

df = pd.read_csv('Data/ratings_small.csv')
df.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205


In [2]:
df = df.pivot_table('rating', index = 'userId', columns = 'movieId')
df

movieId,1,2,3,4,5,6,7,8,9,10,...,161084,161155,161594,161830,161918,161944,162376,162542,162672,163949
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,4.0,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,4.0,...,,,,,,,,,,
5,,,4.0,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
667,,,,,,4.0,,,,,...,,,,,,,,,,
668,,,,,,,,,,,...,,,,,,,,,,
669,,,,,,,,,,,...,,,,,,,,,,
670,4.0,,,,,,,,,,...,,,,,,,,,,


In [4]:
#영화 제목을 가져오기 위해 데이터프레임 조인
df_ratings= pd.read_csv('Data/ratings_small.csv')
df_movies = pd.read_csv('Data/tmdb_5000_movies.csv')
df_movies.head(3)

Unnamed: 0,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,vote_average,vote_count
0,237000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.avatarmovie.com/,19995,"[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...",en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,"[{""name"": ""Ingenious Film Partners"", ""id"": 289...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2009-12-10,2787965087,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800
1,300000000,"[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...",http://disney.go.com/disneypictures/pirates/,285,"[{""id"": 270, ""name"": ""ocean""}, {""id"": 726, ""na...",en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}, {""...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2007-05-19,961000000,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500
2,245000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.sonypictures.com/movies/spectre/,206647,"[{""id"": 470, ""name"": ""spy""}, {""id"": 818, ""name...",en,Spectre,A cryptic message from Bond’s past sends him o...,107.376788,"[{""name"": ""Columbia Pictures"", ""id"": 5}, {""nam...","[{""iso_3166_1"": ""GB"", ""name"": ""United Kingdom""...",2015-10-26,880674609,148.0,"[{""iso_639_1"": ""fr"", ""name"": ""Fran\u00e7ais""},...",Released,A Plan No One Escapes,Spectre,6.3,4466


In [5]:
# 필드명 id를 movieId로 변경
df_movies.rename(columns = {'id': 'movieId'}, inplace = True)

# ratings의 movieId와 movies의 movieId가 같은 행끼리 조인
df_ratings_movies = pd.merge(df_ratings, df_movies, on = 'movieId')
# 영화코드와 영화제목을 결합

In [6]:
df_ratings_movies.head(3)

Unnamed: 0,userId,movieId,rating,timestamp,budget,genres,homepage,keywords,original_language,original_title,...,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,vote_average,vote_count
0,1,2105,4.0,1260759139,11000000,"[{""id"": 35, ""name"": ""Comedy""}, {""id"": 10749, ""...",,"[{""id"": 3687, ""name"": ""graduation""}, {""id"": 61...",en,American Pie,...,"[{""iso_3166_1"": ""US"", ""name"": ""United States o...",1999-07-09,235483004,95.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,There's nothing like your first piece.,American Pie,6.4,2296
1,4,2105,4.0,949896114,11000000,"[{""id"": 35, ""name"": ""Comedy""}, {""id"": 10749, ""...",,"[{""id"": 3687, ""name"": ""graduation""}, {""id"": 61...",en,American Pie,...,"[{""iso_3166_1"": ""US"", ""name"": ""United States o...",1999-07-09,235483004,95.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,There's nothing like your first piece.,American Pie,6.4,2296
2,15,2105,4.0,1052896867,11000000,"[{""id"": 35, ""name"": ""Comedy""}, {""id"": 10749, ""...",,"[{""id"": 3687, ""name"": ""graduation""}, {""id"": 61...",en,American Pie,...,"[{""iso_3166_1"": ""US"", ""name"": ""United States o...",1999-07-09,235483004,95.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,There's nothing like your first piece.,American Pie,6.4,2296


In [7]:
# x축 사용자아이디, y축 영화제목의 피벗테이블을 만들고 NaN은 0으로 채움
df2 = df_ratings_movies.pivot_table('rating', index = 'userId', columns = 'title').fillna(0)
df2.head(3)

title,10 Things I Hate About You,12 Angry Men,1408,15 Minutes,16 Blocks,"20,000 Leagues Under the Sea",2001: A Space Odyssey,2046,21 Grams,25th Hour,...,Willy Wonka & the Chocolate Factory,World Trade Center,X-Men Origins: Wolverine,Y Tu Mamá También,You Only Live Twice,"You, Me and Dupree",Young Frankenstein,Zodiac,eXistenZ,xXx
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
#아이템 기반 협업 필터링에서는 row가 아이템이어야 하므로 x,y축을 바꿈
df2 = df2.transpose()
df2.head(3)

userId,1,2,3,4,5,6,7,8,9,10,...,662,663,664,665,666,667,668,669,670,671
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10 Things I Hate About You,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0
12 Angry Men,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [9]:
#코사인 유사도 계산
from sklearn.metrics.pairwise import cosine_similarity
movie_sim = cosine_similarity(df2, df2)
print(movie_sim.shape)

(856, 856)


In [11]:
movie_sim

array([[1.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.01412134],
       [0.        , 1.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 1.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 1.        , 0.16380117,
        0.10537865],
       [0.        , 0.        , 0.        , ..., 0.16380117, 1.        ,
        0.0441038 ],
       [0.01412134, 0.        , 0.        , ..., 0.10537865, 0.0441038 ,
        1.        ]])

In [13]:
# 코사인유사도 벡터를 데이터프레임으로 변환
df_movie_sim = pd.DataFrame(data = movie_sim, index = df2.index, columns = df2.index)
df_movie_sim.head(3)

title,10 Things I Hate About You,12 Angry Men,1408,15 Minutes,16 Blocks,"20,000 Leagues Under the Sea",2001: A Space Odyssey,2046,21 Grams,25th Hour,...,Willy Wonka & the Chocolate Factory,World Trade Center,X-Men Origins: Wolverine,Y Tu Mamá También,You Only Live Twice,"You, Me and Dupree",Young Frankenstein,Zodiac,eXistenZ,xXx
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10 Things I Hate About You,1.0,0.0,0.0,0.182153,0.0,0.022069,0.085323,0.0,0.0,0.10349,...,0.059856,0.0,0.161801,0.088076,0.0,0.0,0.097588,0.0,0.0,0.014121
12 Angry Men,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1408,0.0,0.0,1.0,0.447214,0.0,0.173381,0.028245,0.0,0.0,0.0,...,0.146955,0.0,0.148968,0.140265,0.0,0.0,0.191675,0.0,0.0,0.0


In [14]:
#킹콩 영화와 코사인 유사도가 비슷한 5개의 영화 추천
df_movie_sim["King Kong"].sort_values(ascending=False)[1:5]

title
Fantasia     0.648886
2046         0.648886
Liar Liar    0.486664
Rendition    0.486664
Name: King Kong, dtype: float64

In [23]:
df2.index

Index(['10 Things I Hate About You', '12 Angry Men', '1408', '15 Minutes',
       '16 Blocks', '20,000 Leagues Under the Sea', '2001: A Space Odyssey',
       '2046', '21 Grams', '25th Hour',
       ...
       'Willy Wonka & the Chocolate Factory', 'World Trade Center',
       'X-Men Origins: Wolverine', 'Y Tu Mamá También', 'You Only Live Twice',
       'You, Me and Dupree', 'Young Frankenstein', 'Zodiac', 'eXistenZ',
       'xXx'],
      dtype='object', name='title', length=856)

In [26]:
m = 'Avatar'
m.isin.list(df2.index)

AttributeError: 'str' object has no attribute 'isin'

In [24]:
df_movie_sim["Avatar"].sort_values(ascending=False)[1:5]

KeyError: 'Avatar'