# 사용자 기반 협업필터링을 이용한 영화 추천 시스템
- **목표** : 사용자 기반 협업필터링을 이용하여 영화 추천 알고리즘을 구현한다.

# 1. IMPORT

In [1]:
import pandas as pd
import numpy as np

from pprint import pprint

import matplotlib.pyplot as plt
from matplotlib import font_manager, rc

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

import pickle
import random
from collections import Counter

# 2. DATA LOAD
### 1) CSV 파일을 load

In [2]:
# movie_customer 데이터 불러오기
df_mc = pd.read_csv('movie_customer.csv', encoding = 'euc-kr')
print(df_mc.shape)
df_mc.head()

(3277, 8)


Unnamed: 0,customer_id,gender,age,age_group,married,kids_under12,area,subs_start_year
0,C5001,F,51,50대,M,N,용산구,2015
1,C5002,M,22,20대,S,N,강남구,2015
2,C5003,F,33,30대,M,Y,서대문구,2015
3,C5004,F,24,20대,M,N,서대문구,2015
4,C5005,F,35,30대,M,N,마포구,2015


In [3]:
# movie_down 데이터 불러오기
df_md = pd.read_csv('movie_down.csv', encoding = 'euc-kr')
print(df_md.shape)
df_md.head()

(116464, 6)


Unnamed: 0,customer_id,item_id,down_date,down_year,down_month,down_weekday
0,C5001,I-1038,2015-01-08,2015,2015M01,Thu
1,C5001,I-1003,2015-01-17,2015,2015M01,Sat
2,C5001,I-1017,2015-01-17,2015,2015M01,Sat
3,C5001,I-1028,2015-01-19,2015,2015M01,Mon
4,C5001,I-1036,2015-01-20,2015,2015M01,Tue


In [4]:
# movie_inventory 데이터 불러오기
df_mi = pd.read_csv('movie_inventory.csv', encoding = 'euc-kr')
print(df_mi.shape)
df_mi.head()

(106, 32)


Unnamed: 0,item_id,contract_year,movie_id,title,release_year,release_date,runtime,mpa_rating,mpa_rating_origin,imdb_score,...,director,Genre_1,Genre_2,Genre_3,actor_1,actor_2,actor_3,contract_price,studio_score,price_class
0,I-1001,2015,tt0121766,Star Wars: Episode III - Revenge of the Sith,2005,05-19-2005,140,PG-13,PG-13,7.5,...,George Lucas,Action,Adventure,Fantasy,Ewan McGregor,Natalie Portman,Hayden Christensen,2700,10,10
1,I-1002,2015,tt0330373,Harry Potter and the Goblet of Fire,2005,11-18-2005,157,PG-13,PG-13,7.7,...,Mike Newell,Adventure,Family,Fantasy,Eric Sykes,Timothy Spall,David Tennant,2700,10,10
2,I-1003,2015,tt0363771,"The Chronicles of Narnia: The Lion, the Witc",2005,12-09-2005,143,PG,PG,6.9,...,Andrew Adamson,Adventure,Family,Fantasy,Georgie Henley,Skandar Keynes,William Moseley,2295,10,1
3,I-1004,2015,tt0383574,Pirates of the Caribbean: Dead Man's Chest,2006,07-07-2006,151,PG-13,PG-13,7.3,...,Gore Verbinski,Action,Adventure,Fantasy,Johnny Depp,Orlando Bloom,Keira Knightley,2295,10,1
4,I-1005,2015,tt0413300,Spider-Man 3,2007,05-04-2007,139,PG-13,PG-13,6.2,...,Sam Raimi,Action,Adventure,Sci-Fi,Tobey Maguire,Kirsten Dunst,James Franco,2295,10,1


In [5]:
# movie_mata 데이터 불러오기
df_mm = pd.read_csv('movie_meta.csv', encoding = 'euc-kr')
print(df_mm.shape)
df_mm.head()

(1668, 40)


Unnamed: 0,movie_id,title,release_year,release_date,runtime,mpa_rating,mpa_rating_origin,imdb_score,votes,reviews_users,...,contract_price,studio_score,price_class,writer_1,writer_2,actor_1,actor_2,actor_3,description,storyline
0,tt0472033,9,2009,09-09-2009,79,PG-13,PG-13,7.1,127563,267,...,2195,6,1,Pamela Pettler,Shane Acker,Christopher Plummer,Martin Landau,John C. Reilly,A rag doll that awakens in a postapocalyptic f...,In a world destroyed in a war between man and ...
1,tt0478087,21,2008,03-28-2008,123,PG-13,PG-13,6.8,216963,302,...,2295,10,1,Peter Steinfeld,Allan Loeb,Jim Sturgess,Kevin Spacey,Kate Bosworth,"""21"" is the fact-based story about six MIT stu...","Ben Campbell is a young, highly intelligent, s..."
2,tt0453562,42,2013,04-12-2013,128,PG-13,PG-13,7.5,79204,234,...,2700,10,10,Brian Helgeland,,Chadwick Boseman,Harrison Ford,Nicole Beharie,"In 1947, Jackie Robinson becomes the first Afr...","In 1946, Jackie Robinson is a Negro League bas..."
3,tt0416449,300,2006,03-09-2007,117,R,R,7.7,702777,2142,...,2700,10,10,Zack Snyder,Kurt Johnstad,Gerard Butler,Lena Headey,Dominic West,King Leonidas of Sparta and a force of 300 men...,In the Battle of Thermopylae of 480 BC an alli...
4,tt0450385,1408,2007,06-22-2007,104,PG-13,PG-13,6.8,245370,622,...,2145,4,1,Matt Greenberg,Scott Alexander,John Cusack,Tony Shalhoub,Len Cariou,A man who specialises in debunking paranormal ...,The cynical and skeptical writer Mike Enslin w...


### 2) movie_down과 movie_customer 결합

In [6]:
# movie_down과 movie_customer 결합
# customer_id를 기준
# 왼쪽 데이터 고유값 기준 (left)
df_mc_md = pd.merge(df_md, df_mc, how = 'left', on = ['customer_id'])
print(df_mc_md.shape)
df_mc_md

(116464, 13)


Unnamed: 0,customer_id,item_id,down_date,down_year,down_month,down_weekday,gender,age,age_group,married,kids_under12,area,subs_start_year
0,C5001,I-1038,2015-01-08,2015,2015M01,Thu,F,51,50대,M,N,용산구,2015
1,C5001,I-1003,2015-01-17,2015,2015M01,Sat,F,51,50대,M,N,용산구,2015
2,C5001,I-1017,2015-01-17,2015,2015M01,Sat,F,51,50대,M,N,용산구,2015
3,C5001,I-1028,2015-01-19,2015,2015M01,Mon,F,51,50대,M,N,용산구,2015
4,C5001,I-1036,2015-01-20,2015,2015M01,Tue,F,51,50대,M,N,용산구,2015
...,...,...,...,...,...,...,...,...,...,...,...,...,...
116459,C8277,I-1087,2018-09-21,2018,2018M09,Fri,M,37,30대,S,N,강남구,2018
116460,C8277,I-1090,2018-09-29,2018,2018M09,Sat,M,37,30대,S,N,강남구,2018
116461,C8277,I-1088,2018-10-24,2018,2018M10,Wed,M,37,30대,S,N,강남구,2018
116462,C8277,I-1091,2018-10-25,2018,2018M10,Thu,M,37,30대,S,N,강남구,2018


In [7]:
# movie_down과 movie_customer 결합 데이터의 칼럼 이름들 출력
df_mc_md.columns

Index(['customer_id', 'item_id', 'down_date', 'down_year', 'down_month',
       'down_weekday', 'gender', 'age', 'age_group', 'married', 'kids_under12',
       'area', 'subs_start_year'],
      dtype='object')

### 3) movie_down, movie_customer와 movie_inventory 결합

In [8]:
# movie_down, movie_customer와 movie_inventory 결합
# item_id를 기준
# 왼쪽 데이터 고유값 기준 (left)
df_mc_md_mi = pd.merge(df_mc_md, df_mi, how = 'left', on = ['item_id'])
print(df_mc_md_mi.shape)
df_mc_md_mi

(116464, 44)


Unnamed: 0,customer_id,item_id,down_date,down_year,down_month,down_weekday,gender,age,age_group,married,...,director,Genre_1,Genre_2,Genre_3,actor_1,actor_2,actor_3,contract_price,studio_score,price_class
0,C5001,I-1038,2015-01-08,2015,2015M01,Thu,F,51,50대,M,...,Francis Lawrence,Action,Adventure,Mystery,Jennifer Lawrence,Liam Hemsworth,Jack Quaid,2600,6,10
1,C5001,I-1003,2015-01-17,2015,2015M01,Sat,F,51,50대,M,...,Andrew Adamson,Adventure,Family,Fantasy,Georgie Henley,Skandar Keynes,William Moseley,2295,10,1
2,C5001,I-1017,2015-01-17,2015,2015M01,Sat,F,51,50대,M,...,Michael Bay,Action,Adventure,Sci-Fi,Shia LaBeouf,Megan Fox,Josh Duhamel,2145,4,1
3,C5001,I-1028,2015-01-19,2015,2015M01,Mon,F,51,50대,M,...,Michael Bay,Action,Adventure,Sci-Fi,Shia LaBeouf,Rosie Huntington-Whiteley,Josh Duhamel,2145,4,1
4,C5001,I-1036,2015-01-20,2015,2015M01,Tue,F,51,50대,M,...,Pierre Coffin,Animation,Adventure,Comedy,Steve Carell,Kristen Wiig,Benjamin Bratt,2295,10,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
116459,C8277,I-1087,2018-09-21,2018,2018M09,Fri,M,37,30대,S,...,Kyle Balda,Animation,Adventure,Comedy,Steve Carell,Kristen Wiig,Trey Parker,2295,10,1
116460,C8277,I-1090,2018-09-29,2018,2018M09,Sat,M,37,30대,S,...,Andy Muschietti,Horror,,,Jaeden Martell,Jeremy Ray Taylor,Sophia Lillis,2195,6,1
116461,C8277,I-1088,2018-10-24,2018,2018M10,Wed,M,37,30대,S,...,Jon Watts,Action,Adventure,Sci-Fi,Tom Holland,Michael Keaton,Robert Downey Jr.,2700,10,10
116462,C8277,I-1091,2018-10-25,2018,2018M10,Thu,M,37,30대,S,...,Taika Waititi,Action,Adventure,Comedy,Chris Hemsworth,Tom Hiddleston,Cate Blanchett,2700,10,10


In [9]:
# 모두 합친 데이터 셋 이름을 df1으로 변경
df1 = df_mc_md_mi 

In [10]:
# df1 칼럼이름 출력
df1.columns

Index(['customer_id', 'item_id', 'down_date', 'down_year', 'down_month',
       'down_weekday', 'gender', 'age', 'age_group', 'married', 'kids_under12',
       'area', 'subs_start_year', 'contract_year', 'movie_id', 'title',
       'release_year', 'release_date', 'runtime', 'mpa_rating',
       'mpa_rating_origin', 'imdb_score', 'votes', 'reviews_users',
       'reviews_critics', 'budget', 'income_usa', 'income_ww',
       'theater_opening', 'theater_total', 'country_1', 'country_2',
       'country_3', 'studio', 'director', 'Genre_1', 'Genre_2', 'Genre_3',
       'actor_1', 'actor_2', 'actor_3', 'contract_price', 'studio_score',
       'price_class'],
      dtype='object')

In [11]:
# df1 열과 행 확인
df1.shape

(116464, 44)

In [12]:
# df1 마지막 5행 확인
df1.tail(5)

Unnamed: 0,customer_id,item_id,down_date,down_year,down_month,down_weekday,gender,age,age_group,married,...,director,Genre_1,Genre_2,Genre_3,actor_1,actor_2,actor_3,contract_price,studio_score,price_class
116459,C8277,I-1087,2018-09-21,2018,2018M09,Fri,M,37,30대,S,...,Kyle Balda,Animation,Adventure,Comedy,Steve Carell,Kristen Wiig,Trey Parker,2295,10,1
116460,C8277,I-1090,2018-09-29,2018,2018M09,Sat,M,37,30대,S,...,Andy Muschietti,Horror,,,Jaeden Martell,Jeremy Ray Taylor,Sophia Lillis,2195,6,1
116461,C8277,I-1088,2018-10-24,2018,2018M10,Wed,M,37,30대,S,...,Jon Watts,Action,Adventure,Sci-Fi,Tom Holland,Michael Keaton,Robert Downey Jr.,2700,10,10
116462,C8277,I-1091,2018-10-25,2018,2018M10,Thu,M,37,30대,S,...,Taika Waititi,Action,Adventure,Comedy,Chris Hemsworth,Tom Hiddleston,Cate Blanchett,2700,10,10
116463,C8277,I-1016,2018-11-15,2018,2018M11,Thu,M,37,30대,S,...,Todd Phillips,Comedy,,,Bradley Cooper,Ed Helms,Zach Galifianakis,2700,10,10


In [13]:
# 고객 아이디(customer_id), 영화 제목(title) 의 상관관계 확인
# 표 안의 값들은 영화를 다운로드 받은 수
df2 = pd.crosstab(df1.customer_id, df1.title)
df2

title,American Sniper,Ant-Man,Avatar,Avengers: Age of Ultron,Batman v Superman: Dawn of Justice,Captain America: Civil War,Captain America: The Winter Soldier,Coco,Daddy's Home,Deadpool,...,Toy Story 3,Trainwreck,Transformers,Transformers: Dark of the Moon,Transformers: Revenge of the Fallen,Trolls,Up,Wonder Woman,X-Men: Apocalypse,Zootopia
customer_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
C5001,1,1,0,0,0,1,1,1,0,0,...,0,0,0,1,1,2,1,1,0,0
C5002,3,0,0,0,0,0,1,0,1,1,...,0,3,0,0,0,0,0,0,0,0
C5003,1,1,2,0,0,0,0,1,2,0,...,0,2,0,0,0,1,2,1,1,0
C5004,0,1,1,1,0,1,0,0,0,0,...,0,0,1,1,4,1,0,1,0,1
C5005,0,1,1,0,0,0,0,1,1,2,...,0,2,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
C8273,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
C8274,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
C8275,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,2,0,0
C8276,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0


# 3. 협업 필터링 생성

### 1) 유틸리티행렬 생성

#### (1) LabelEndoder를 이용하여 user와 title을 수치화 한다.

In [14]:
# 문자형 변수들을 숫자형 변수로 변경하는 함수 생성
def execute_labeling(column_name):
    Lec = LabelEncoder()
    df_copy = df1.copy()
    df_label = pd.DataFrame(Lec.fit_transform(df_copy[column_name].values), columns = [column_name+'_label'])
    return df_label

# customer_id를 숫자형 변수로 변경
df_user = execute_labeling('customer_id')

# 숫자형 변수로 바뀐 customer_id의 중복을 배제한 개수 출력
print(len(df_user['customer_id_label'].value_counts()))


df_copy = df1.copy()    # df1을 복사한 df_copy 생성
df_copy = df_copy.drop('customer_id', axis = True)    # customer_id (문자형 변수) 컬럼 제거
df_label = pd.concat([df_user, df_copy], axis = 1)    # 라벨링한 컬럼과 기존 데이터프레임 합치기

# df_label의 마지막 5행 출력
df_label.tail(5)

3277


Unnamed: 0,customer_id_label,item_id,down_date,down_year,down_month,down_weekday,gender,age,age_group,married,...,director,Genre_1,Genre_2,Genre_3,actor_1,actor_2,actor_3,contract_price,studio_score,price_class
116459,3276,I-1087,2018-09-21,2018,2018M09,Fri,M,37,30대,S,...,Kyle Balda,Animation,Adventure,Comedy,Steve Carell,Kristen Wiig,Trey Parker,2295,10,1
116460,3276,I-1090,2018-09-29,2018,2018M09,Sat,M,37,30대,S,...,Andy Muschietti,Horror,,,Jaeden Martell,Jeremy Ray Taylor,Sophia Lillis,2195,6,1
116461,3276,I-1088,2018-10-24,2018,2018M10,Wed,M,37,30대,S,...,Jon Watts,Action,Adventure,Sci-Fi,Tom Holland,Michael Keaton,Robert Downey Jr.,2700,10,10
116462,3276,I-1091,2018-10-25,2018,2018M10,Thu,M,37,30대,S,...,Taika Waititi,Action,Adventure,Comedy,Chris Hemsworth,Tom Hiddleston,Cate Blanchett,2700,10,10
116463,3276,I-1016,2018-11-15,2018,2018M11,Thu,M,37,30대,S,...,Todd Phillips,Comedy,,,Bradley Cooper,Ed Helms,Zach Galifianakis,2700,10,10


#### (2) 유저와 영화 제목이 column이고 각 원소가 평점인 DataFrame 생성

In [15]:
movie_list = list(df1['title'].unique())      # df1의 영화 제목(title) 칼럼에 있는 값들을 칼럼으로 한 movie_list 생성
movie_list = sorted(movie_list, key=str)      # movie_list의 칼럼들을 문자순으로 정렬
df_matrix = pd.DataFrame(columns = ['customer_id'] + movie_list)     # 무비 리스트에 customer_id 컬럼 생성
df_matrix    #결과확인

Unnamed: 0,customer_id,American Sniper,Ant-Man,Avatar,Avengers: Age of Ultron,Batman v Superman: Dawn of Justice,Captain America: Civil War,Captain America: The Winter Soldier,Coco,Daddy's Home,...,Toy Story 3,Trainwreck,Transformers,Transformers: Dark of the Moon,Transformers: Revenge of the Fallen,Trolls,Up,Wonder Woman,X-Men: Apocalypse,Zootopia


In [16]:
%%time

# df2 데이터들과 customer_id 데이터들을 df_matrix에 추가
for i in range(len(df2.index)):
    df_matrix.loc[i] = df2.iloc[i] # 행채우기
    df_matrix['customer_id'][i] = i

CPU times: user 5.54 s, sys: 15.5 ms, total: 5.55 s
Wall time: 5.54 s


#### (3) pickle

In [17]:
# 유틸리티행렬 pickle 저장
file = df_matrix
directory = open('utilitymatrix', 'wb')
pickle.dump(file, directory)
directory.close()

# customer_id(문자형) 채워진 user_name 생성
user_name = list(df1['customer_id'].unique())
user_name = sorted(user_name, key = str)
user_name = pd.DataFrame(user_name)
user_name.columns = ['user_name']

# user_name pickle 저장
file_user = user_name
directory_user = open('user_name', 'wb')
pickle.dump(file_user, directory_user)
directory_user.close()

# pickle로 저장한 user_name data frame open
f = open('user_name', 'rb')
user_name = pickle.load(f)
user_name = user_name

# pickle로 저장한 유틸리티 행렬 DATA LOAD
f = open('utilitymatrix', 'rb')
utilitymatrix = pickle.load(f)

df = utilitymatrix
df

Unnamed: 0,customer_id,American Sniper,Ant-Man,Avatar,Avengers: Age of Ultron,Batman v Superman: Dawn of Justice,Captain America: Civil War,Captain America: The Winter Soldier,Coco,Daddy's Home,...,Toy Story 3,Trainwreck,Transformers,Transformers: Dark of the Moon,Transformers: Revenge of the Fallen,Trolls,Up,Wonder Woman,X-Men: Apocalypse,Zootopia
0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,...,0.0,0.0,0.0,1.0,1.0,2.0,1.0,1.0,0.0,0.0
1,1.0,3.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,...,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2.0,1.0,1.0,2.0,0.0,0.0,0.0,0.0,1.0,2.0,...,0.0,2.0,0.0,0.0,0.0,1.0,2.0,1.0,1.0,0.0
3,3.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,1.0,1.0,4.0,1.0,0.0,1.0,0.0,1.0
4,4.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,...,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3272,3272.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
3273,3273.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
3274,3274.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0
3275,3275.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0


In [18]:
# df에 user_name 칼럼 열 추가
df_concat = pd.concat([user_name, df], axis = 1)
df_concat

Unnamed: 0,user_name,customer_id,American Sniper,Ant-Man,Avatar,Avengers: Age of Ultron,Batman v Superman: Dawn of Justice,Captain America: Civil War,Captain America: The Winter Soldier,Coco,...,Toy Story 3,Trainwreck,Transformers,Transformers: Dark of the Moon,Transformers: Revenge of the Fallen,Trolls,Up,Wonder Woman,X-Men: Apocalypse,Zootopia
0,C5001,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,...,0.0,0.0,0.0,1.0,1.0,2.0,1.0,1.0,0.0,0.0
1,C5002,1.0,3.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,C5003,2.0,1.0,1.0,2.0,0.0,0.0,0.0,0.0,1.0,...,0.0,2.0,0.0,0.0,0.0,1.0,2.0,1.0,1.0,0.0
3,C5004,3.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,...,0.0,0.0,1.0,1.0,4.0,1.0,0.0,1.0,0.0,1.0
4,C5005,4.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,...,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3272,C8273,3272.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
3273,C8274,3273.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
3274,C8275,3274.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0
3275,C8276,3275.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0


#### (4) 고객과 성향이 같은 데이터만 출력하는 함수 생성

In [19]:
# 연령대가 일치하는 데이터만 남기는 함수 생성
def user_age(user_name, df_raw, df_all):
    group_age = df_raw[df_raw['customer_id'] == user_name]['age_group'].values[0]
    li = []
    for i in df_all.loc[:,'user_name']:
        li.append(i)  
    
    new_li = []
    for i in li:
        if df_raw[df_raw['customer_id'] == i].reset_index().loc[0, 'age_group'] != group_age:
            index = df_all[df_all['user_name'] == i].index[0]
            df_all.drop(index, inplace = True)
    return df_all

In [20]:
# 성별이 일치하는 데이터만 남기는 함수 생성
def gender(user_name, df_raw, df_all):
    gender = df_raw[df_raw['customer_id'] == user_name]['gender'].values[0]
    li = []
    for i in df_all.loc[:,'user_name']:
        li.append(i)
    
    new_li = []
    for i in li:
        if df_raw[df_raw['customer_id'] == i].reset_index().loc[0, 'gender'] != gender:
            index = df_all[df_all['user_name'] == i].index[0]
            df_all.drop(index, inplace = True)
    return df_all

In [21]:
# 결혼여부가 일치하는 데이터만 남기는 함수 생성
def married(user_name, df_raw, df_all):
    married = df_raw[df_raw['customer_id'] == user_name]['married'].values[0]
    li = []
    for i in df_all.loc[:,'user_name']:
        li.append(i)
    
    new_li = []
    for i in li:
        if df_raw[df_raw['customer_id'] == i].reset_index().loc[0, 'married'] != married:
            index = df_all[df_all['user_name'] == i].index[0]
            df_all.drop(index, inplace = True)
    return df_all

In [22]:
# 아이 유무가 일치하는 데이터만 남기는 함수 생성
def kids_under12(user_name, df_raw, df_all):
    kids_under12 = df_raw[df_raw['customer_id'] == user_name]['kids_under12'].values[0]
    li = []
    for i in df_all.loc[:,'user_name']:
        li.append(i)
    
    new_li = []
    for i in li:
        if df_raw[df_raw['customer_id'] == i].reset_index().loc[0, 'kids_under12'] != kids_under12:
            index = df_all[df_all['user_name'] == i].index[0]
            df_all.drop(index, inplace = True)
    return df_all

In [23]:
# 고객의 선호 장르를 찾는 함수 생성
def find_genre(Genre1, Genre2, Genre3):
    Genre1 = weight_Counter(Genre1, 2)    # Genre_1은 가중치 2
    Genre2 = weight_Counter(Genre2, 1)    # Genre_2는 가중치 1
    Genre3 = weight_Counter(Genre3, 0.5)  # Genre_3은 가중치 0.5
    
    m = 0
    user_key = 0
    genre = Genre1 + Genre2 + Genre3
    for key, value in genre.items():
        if value > m:
            user_key = key
            m = value
    return user_key      # 고객의 선호 장르(가장 많이 본 장르) 반환

In [24]:
# 장르에 가중치를 부여하는 함수 생성
def weight_Counter(origin, num):
    counter = Counter(origin)
    for key in counter:
        counter[key] *= num
    return counter

# print(Counter(Genre1))
# print(weight_Counter(Genre1, 2))

In [25]:
# 'C5001'고객의 선호 장르 분석 예시
Genre1 = df1[df1['customer_id'] == 'C5001']['Genre_1']
Genre2 = df1[df1['customer_id'] == 'C5001']['Genre_2']
Genre3 = df1[df1['customer_id'] == 'C5001']['Genre_3']
find_genre(Genre1, Genre2, Genre3)

'Action'

In [26]:
# 장르가 일치하는 데이터만 남기는 함수 생성
def genre(user_name, df_raw, df_all):
    Genre1 = df_raw[df_raw['customer_id'] == 'C5001']['Genre_1']
    Genre2 = df_raw[df_raw['customer_id'] == 'C5001']['Genre_2']
    Genre3 = df_raw[df_raw['customer_id'] == 'C5001']['Genre_3']
    user_key = find_genre(Genre1, Genre2, Genre3)
    li = []
    for i in df_all.loc[:,'user_name']:
        li.append(i)

    new_li = []
    for i in li:
        df_tmp1 = df_raw[df_raw['customer_id'] == i].reset_index().loc[:, 'Genre_1']
        df_tmp2 = df_raw[df_raw['customer_id'] == i].reset_index().loc[:, 'Genre_2']
        df_tmp3 = df_raw[df_raw['customer_id'] == i].reset_index().loc[:, 'Genre_3']
        other_key = find_genre(df_tmp1, df_tmp2, df_tmp3)

        if user_key != other_key:
            df_all.drop(df_all[df_all['user_name'] == i].index, inplace = True)
            
    return df_all

# 'C5001'고객의 선호 장르와 같은 고객들의 데이터만 남겨봄
genre('C5011',df1,df_concat)


Unnamed: 0,user_name,customer_id,American Sniper,Ant-Man,Avatar,Avengers: Age of Ultron,Batman v Superman: Dawn of Justice,Captain America: Civil War,Captain America: The Winter Soldier,Coco,...,Toy Story 3,Trainwreck,Transformers,Transformers: Dark of the Moon,Transformers: Revenge of the Fallen,Trolls,Up,Wonder Woman,X-Men: Apocalypse,Zootopia
0,C5001,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,...,0.0,0.0,0.0,1.0,1.0,2.0,1.0,1.0,0.0,0.0
1,C5002,1.0,3.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,C5003,2.0,1.0,1.0,2.0,0.0,0.0,0.0,0.0,1.0,...,0.0,2.0,0.0,0.0,0.0,1.0,2.0,1.0,1.0,0.0
3,C5004,3.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,...,0.0,0.0,1.0,1.0,4.0,1.0,0.0,1.0,0.0,1.0
5,C5006,5.0,1.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3270,C8271,3270.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
3271,C8272,3271.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3272,C8273,3272.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
3274,C8275,3274.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0


# 4. 'C5001' 고객에 대해 추천 알고리즘 분석

In [28]:
# 'C5001'고객의 연령대와 같은 고객들의 데이터만 남김
df_user_age = user_age('C5001', df1, df_concat)
df_user_age

Unnamed: 0,user_name,customer_id,American Sniper,Ant-Man,Avatar,Avengers: Age of Ultron,Batman v Superman: Dawn of Justice,Captain America: Civil War,Captain America: The Winter Soldier,Coco,...,Toy Story 3,Trainwreck,Transformers,Transformers: Dark of the Moon,Transformers: Revenge of the Fallen,Trolls,Up,Wonder Woman,X-Men: Apocalypse,Zootopia
0,C5001,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,...,0.0,0.0,0.0,1.0,1.0,2.0,1.0,1.0,0.0,0.0
10,C5011,10.0,1.0,1.0,0.0,0.0,1.0,1.0,3.0,0.0,...,0.0,1.0,2.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0
21,C5022,21.0,4.0,0.0,3.0,2.0,2.0,0.0,2.0,0.0,...,0.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
26,C5027,26.0,1.0,0.0,0.0,2.0,1.0,0.0,2.0,1.0,...,0.0,1.0,1.0,0.0,1.0,0.0,2.0,0.0,1.0,1.0
32,C5033,32.0,0.0,1.0,0.0,0.0,3.0,0.0,0.0,1.0,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3252,C8253,3252.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3257,C8258,3257.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2.0,0.0,0.0
3261,C8262,3261.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3264,C8265,3264.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [29]:
# 'C5001'고객의 성별과 같은 고객들의 데이터만 남김
df_user_gender = gender('C5001', df1, df_user_age)
df_user_gender

Unnamed: 0,user_name,customer_id,American Sniper,Ant-Man,Avatar,Avengers: Age of Ultron,Batman v Superman: Dawn of Justice,Captain America: Civil War,Captain America: The Winter Soldier,Coco,...,Toy Story 3,Trainwreck,Transformers,Transformers: Dark of the Moon,Transformers: Revenge of the Fallen,Trolls,Up,Wonder Woman,X-Men: Apocalypse,Zootopia
0,C5001,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,...,0.0,0.0,0.0,1.0,1.0,2.0,1.0,1.0,0.0,0.0
10,C5011,10.0,1.0,1.0,0.0,0.0,1.0,1.0,3.0,0.0,...,0.0,1.0,2.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0
26,C5027,26.0,1.0,0.0,0.0,2.0,1.0,0.0,2.0,1.0,...,0.0,1.0,1.0,0.0,1.0,0.0,2.0,0.0,1.0,1.0
43,C5044,43.0,0.0,2.0,0.0,0.0,0.0,1.0,0.0,1.0,...,0.0,0.0,1.0,1.0,1.0,0.0,3.0,1.0,0.0,0.0
63,C5064,63.0,1.0,0.0,0.0,2.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,2.0,1.0,1.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3219,C8220,3219.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3230,C8231,3230.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0
3252,C8253,3252.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3257,C8258,3257.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2.0,0.0,0.0


In [30]:
# 'C5001'고객의 선호 장르와 같은 고객들의 데이터만 남김
df_user_genre = genre('C5001', df1, df_user_gender)
df_user_genre

Unnamed: 0,user_name,customer_id,American Sniper,Ant-Man,Avatar,Avengers: Age of Ultron,Batman v Superman: Dawn of Justice,Captain America: Civil War,Captain America: The Winter Soldier,Coco,...,Toy Story 3,Trainwreck,Transformers,Transformers: Dark of the Moon,Transformers: Revenge of the Fallen,Trolls,Up,Wonder Woman,X-Men: Apocalypse,Zootopia
0,C5001,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,...,0.0,0.0,0.0,1.0,1.0,2.0,1.0,1.0,0.0,0.0
10,C5011,10.0,1.0,1.0,0.0,0.0,1.0,1.0,3.0,0.0,...,0.0,1.0,2.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0
26,C5027,26.0,1.0,0.0,0.0,2.0,1.0,0.0,2.0,1.0,...,0.0,1.0,1.0,0.0,1.0,0.0,2.0,0.0,1.0,1.0
43,C5044,43.0,0.0,2.0,0.0,0.0,0.0,1.0,0.0,1.0,...,0.0,0.0,1.0,1.0,1.0,0.0,3.0,1.0,0.0,0.0
63,C5064,63.0,1.0,0.0,0.0,2.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,2.0,1.0,1.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3219,C8220,3219.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3230,C8231,3230.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0
3252,C8253,3252.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3257,C8258,3257.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2.0,0.0,0.0


In [31]:
# 남은 데이터에서 'user_name','customer_id' 컬럼 열 제거
df_final = df_user_genre.drop(['user_name','customer_id'], axis = 1)
df_final

Unnamed: 0,American Sniper,Ant-Man,Avatar,Avengers: Age of Ultron,Batman v Superman: Dawn of Justice,Captain America: Civil War,Captain America: The Winter Soldier,Coco,Daddy's Home,Deadpool,...,Toy Story 3,Trainwreck,Transformers,Transformers: Dark of the Moon,Transformers: Revenge of the Fallen,Trolls,Up,Wonder Woman,X-Men: Apocalypse,Zootopia
0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,...,0.0,0.0,0.0,1.0,1.0,2.0,1.0,1.0,0.0,0.0
10,1.0,1.0,0.0,0.0,1.0,1.0,3.0,0.0,1.0,0.0,...,0.0,1.0,2.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0
26,1.0,0.0,0.0,2.0,1.0,0.0,2.0,1.0,0.0,0.0,...,0.0,1.0,1.0,0.0,1.0,0.0,2.0,0.0,1.0,1.0
43,0.0,2.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,...,0.0,0.0,1.0,1.0,1.0,0.0,3.0,1.0,0.0,0.0
63,1.0,0.0,0.0,2.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,2.0,1.0,1.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3219,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3230,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0
3252,1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3257,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2.0,0.0,0.0


In [32]:
# 남은 데이터의 컬럼 열들을 합침
P_score = [0 for i in range(len(df_final.columns))]
df_z = df_final.reset_index()
df_z.drop('index', axis = 1, inplace = True)
for i in range(len(df_z.index)):
    P_score += df_z.loc[i,:].values

In [33]:
# 각 열의 데이터들을 합친 결과
P_score

array([ 57., 116., 154.,  91.,  97.,  83.,  53.,  50.,  64.,  79.,   7.,
       115.,  79., 168.,  50., 103.,  57.,  69.,  73.,  57.,  48.,  95.,
       113.,  48.,  85., 157., 239.,  72.,  10.,  73., 143., 109., 148.,
        37.,  73.,  71., 138.,  60.,  84., 102., 114., 108., 144.,  84.,
        64.,  44.,  91., 112.,  52., 136.,  61., 257.,  93.,  77., 108.,
        61.,  74.,  64., 244., 177.,  12., 128.,  65.,  70.,  73.,  93.,
        59.,  64.,  10.,  44.,  45.,  62.,  11.,  51.,  50., 256., 220.,
       238.,  78.,  50.,  37.,  11., 136.,  69.,  10., 119.,  80.,  13.,
        95., 137., 112.,  97.,  57.,  81.])

In [34]:
# 영화 제목들을 컬럼으로 하는 df_matrix_f 생성
movie_list = list(df1['title'].unique())
movie_list = sorted(movie_list, key=str)
df_matrix_f = pd.DataFrame(columns = movie_list)
df_matrix_f   #결과확인

Unnamed: 0,American Sniper,Ant-Man,Avatar,Avengers: Age of Ultron,Batman v Superman: Dawn of Justice,Captain America: Civil War,Captain America: The Winter Soldier,Coco,Daddy's Home,Deadpool,...,Toy Story 3,Trainwreck,Transformers,Transformers: Dark of the Moon,Transformers: Revenge of the Fallen,Trolls,Up,Wonder Woman,X-Men: Apocalypse,Zootopia


In [35]:
# df_matrix_f 에 P-score를 결합
%%time

df_matrix_f.loc[0] = P_score

CPU times: user 1.65 ms, sys: 0 ns, total: 1.65 ms
Wall time: 1.56 ms


In [36]:
# 결과 확인
df_matrix_f

Unnamed: 0,American Sniper,Ant-Man,Avatar,Avengers: Age of Ultron,Batman v Superman: Dawn of Justice,Captain America: Civil War,Captain America: The Winter Soldier,Coco,Daddy's Home,Deadpool,...,Toy Story 3,Trainwreck,Transformers,Transformers: Dark of the Moon,Transformers: Revenge of the Fallen,Trolls,Up,Wonder Woman,X-Men: Apocalypse,Zootopia
0,57.0,116.0,154.0,91.0,97.0,83.0,53.0,50.0,64.0,79.0,...,10.0,119.0,80.0,13.0,95.0,137.0,112.0,97.0,57.0,81.0


In [37]:
# 'C5001'고객이 이미 본 영화들을 seen_movies에 저장
seen_movies = df2.loc['C5001'][df2.loc['C5001'] > 0].index
seen_movies

Index(['American Sniper', 'Ant-Man', 'Captain America: Civil War',
       'Captain America: The Winter Soldier', 'Coco', 'Despicable Me 2',
       'Fifty Shades of Grey', 'Frozen', 'Gravity', 'Guardians of the Galaxy',
       'Guardians of the Galaxy Vol. 2', 'Harry Potter and the Goblet of Fire',
       'Harry Potter and the Half-Blood Prince', 'I Am Legend', 'Jason Bourne',
       'Jurassic World', 'Justice League', 'Mad Max: Fury Road', 'Minions',
       'Moana', 'Monsters University', 'Shrek the Third', 'Sing',
       'Spider-Man: Homecoming', 'Spy', 'Star Trek', 'Suicide Squad',
       'The Chronicles of Narnia: The Lion, the Witc', 'The Dark Knight Rises',
       'The Good Dinosaur', 'The Hunger Games: Catching Fire',
       'The Hunger Games: Mockingjay - Part 1', 'The Martian', 'The Revenant',
       'The SpongeBob Movie: Sponge Out of Water',
       'Transformers: Dark of the Moon', 'Transformers: Revenge of the Fallen',
       'Trolls', 'Up', 'Wonder Woman'],
      dtype='obj

In [38]:
# df_matrix_f에서 'C5001' 고객이 이미 본 영화 제목 칼럼 열들을 제거 
for i in df_matrix_f.columns:
    if i in seen_movies:
        df_matrix_f.drop(i, axis = 1, inplace = True)
df_matrix_f

Unnamed: 0,Avatar,Avengers: Age of Ultron,Batman v Superman: Dawn of Justice,Daddy's Home,Deadpool,Despicable Me,Despicable Me 3,Doctor Strange,Dunkirk,Finding Dory,...,The Peanuts Movie,The Secret Life of Pets,The Twilight Saga: Eclipse,The Twilight Saga: New Moon,Thor: Ragnarok,Toy Story 3,Trainwreck,Transformers,X-Men: Apocalypse,Zootopia
0,154.0,91.0,97.0,64.0,79.0,7.0,79.0,168.0,50.0,57.0,...,238.0,50.0,11.0,136.0,69.0,10.0,119.0,80.0,57.0,81.0


In [39]:
# 'C5001' 고객이 가장 선호할 만한 영화 3개를 추천
cnt = 0
while True:
    m_score = max(df_matrix_f.iloc[0,:])
    li = []
    for i in range(len(df_matrix_f.columns)):

        if df_matrix_f.iloc[0,i] == m_score:
            print(df_matrix_f.columns[i])
            li.append(df_matrix_f.columns[i])
            cnt += 1
    df_matrix_f.drop(li, axis = 1, inplace = True)
    
    if cnt >= 3:
        break

Spectre
The Hunger Games: Mockingjay - Part 2
Straight Outta Compton


# 5. 영화 추천 알고리즘 생성

In [46]:
# 영화 추천 알고리즘 함수 생성
def recommend(user_id, df, df_concat):
    df_age = user_age(user_id, df, df_concat)
    df_g = gender(user_id, df, df_age)
    df_m = married(user_id, df, df_g)
    df_under_12 = kids_under12(user_id, df, df_m)
    df_gen = genre(user_id, df, df_under_12)
    
    df_final = df_gen.drop(['user_name','customer_id'], axis = 1)
    
    P_score = [0 for i in range(len(df_final.columns))]
    df_z = df_final.reset_index()
    df_z.drop('index', axis = 1, inplace = True)
    for i in range(len(df_z.index)):
        P_score += df_z.loc[i,:].values
    
    movie_list = list(df1['title'].unique())
    movie_list = sorted(movie_list, key=str)
    df_matrix_f = pd.DataFrame(columns = movie_list)

    df_matrix_f.loc[0] = P_score
    
    seen_movies = df2.loc[user_id][df2.loc[user_id] > 0].index
    
    print('{}님은'.format(user_id))
    for i in seen_movies:
        print(i)
    print('를 보셨습니다.')
    print()
    
    for i in df_matrix_f.columns:
        if i in seen_movies:
            df_matrix_f.drop(i, axis = 1, inplace = True)
    
    print('{}님을 위한 추천 영화 입니다.'.format(user_id))
    
    cnt = 0
    li = []
    while True:
        m_score = max(df_matrix_f.iloc[0,:])
        for i in range(len(df_matrix_f.columns)):

            if df_matrix_f.iloc[0,i] == m_score:
                li.append(df_matrix_f.columns[i])
                cnt += 1
        if cnt >= 3:
            total = [i for i in range(cnt)]
            choose = random.sample(total, 3)
            for i in range(3):
                print(li[choose[i]])
            break
            
        for i in li:
            if i in df_matrix_f.columns:
                df_matrix_f.drop(i, axis = 1, inplace = True)

# 6. 결과 확인

In [47]:
# 'C5001' 고객의 추천 영화 3개 출력
recommend('C5001', df1, df_concat)

C5001님은
American Sniper
Ant-Man
Captain America: Civil War
Captain America: The Winter Soldier
Coco
Despicable Me 2
Fifty Shades of Grey
Frozen
Gravity
Guardians of the Galaxy
Guardians of the Galaxy Vol. 2
Harry Potter and the Goblet of Fire
Harry Potter and the Half-Blood Prince
I Am Legend
Jason Bourne
Jurassic World
Justice League
Mad Max: Fury Road
Minions
Moana
Monsters University
Shrek the Third
Sing
Spider-Man: Homecoming
Spy
Star Trek
Suicide Squad
The Chronicles of Narnia: The Lion, the Witc
The Dark Knight Rises
The Good Dinosaur
The Hunger Games: Catching Fire
The Hunger Games: Mockingjay - Part 1
The Martian
The Revenant
The SpongeBob Movie: Sponge Out of Water
Transformers: Dark of the Moon
Transformers: Revenge of the Fallen
Trolls
Up
Wonder Woman
를 보셨습니다.

C5001님을 위한 추천 영화 입니다.
Trainwreck
Doctor Strange
Inside Out


In [48]:
# 'C5040' 고객의 추천 영화 3개 출력
recommend('C5040', df1, df_concat)

C5040님은
American Sniper
Avengers: Age of Ultron
Batman v Superman: Dawn of Justice
Captain America: The Winter Soldier
Daddy's Home
Dunkirk
Fifty Shades of Grey
Harry Potter and the Half-Blood Prince
Harry Potter and the Order of the Phoenix
Hidden Figures
I Am Legend
Indiana Jones and the Kingdom of the Crystal
Inside Out
Iron Man
Mad Max: Fury Road
Mission: Impossible - Rogue Nation
Pirates of the Caribbean: At World's End
San Andreas
Skyfall
Spider-Man: Homecoming
Star Trek
Straight Outta Compton
Suicide Squad
The Blind Side
The Dark Knight
The Dark Knight Rises
The Hangover
The Hangover Part II
The Hobbit: The Battle of the Five Armies
The Hunger Games
The Hunger Games: Mockingjay - Part 1
The SpongeBob Movie: Sponge Out of Water
The Twilight Saga: Eclipse
Thor: Ragnarok
Transformers
Zootopia
를 보셨습니다.

C5040님을 위한 추천 영화 입니다.
Finding Dory
The Revenant
Despicable Me 3


In [49]:
# 'C8079' 고객의 추천 영화 3개 출력
recommend('C8079', df1, df_concat)

C8079님은
Ant-Man
Get Out
Harry Potter and the Goblet of Fire
Hidden Figures
Hotel Transylvania 2
Jason Bourne
Jumanji: Welcome to the Jungle
Spectre
Spider-Man: Homecoming
Star Trek Beyond
The Blind Side
The Fate of the Furious
The Peanuts Movie
Trolls
Wonder Woman
를 보셨습니다.

C8079님을 위한 추천 영화 입니다.
The Dark Knight
Transformers: Dark of the Moon
Shrek the Third


In [50]:
# 'C6789' 고객의 추천 영화 3개 출력
recommend('C6789', df1, df_concat)

C6789님은
Avatar
Daddy's Home
Deadpool
Doctor Strange
Dunkirk
Fifty Shades of Grey
Finding Dory
Hidden Figures
Hotel Transylvania 2
Inside Out
Iron Man 2
It
Jason Bourne
Mad Max: Fury Road
Minions
Mission: Impossible - Rogue Nation
Moana
Pitch Perfect 2
San Andreas
Spectre
Spider-Man: Homecoming
Spy
Straight Outta Compton
The Blind Side
The Hunger Games: Mockingjay - Part 2
The Martian
The Peanuts Movie
The Twilight Saga: New Moon
Trainwreck
X-Men: Apocalypse
Zootopia
를 보셨습니다.

C6789님을 위한 추천 영화 입니다.
The Chronicles of Narnia: The Lion, the Witc
Despicable Me 3
Avengers: Age of Ultron


In [51]:
# 'C7082' 고객의 추천 영화 3개 출력
recommend('C7082', df1, df_concat)

C7082님은
Avengers: Age of Ultron
Coco
Daddy's Home
Deadpool
Despicable Me 3
Dunkirk
Hidden Figures
Jurassic World
Minions
Monsters University
Sing
Spider-Man: Homecoming
Trainwreck
Transformers: Dark of the Moon
Zootopia
를 보셨습니다.

C7082님을 위한 추천 영화 입니다.
It
The Dark Knight Rises
Trolls
