In [3]:
import numpy as np
import pandas as pd
from math import sqrt

# import jtplot module in notebook
from jupyterthemes import jtplot

# choose which theme to inherit plotting style from
# onedork | grade3 | oceans16 | chesterish | monokai | solarizedl | solarizedd
jtplot.style(theme='onedork')

# set "context" (paper, notebook, talk, poster)
# scale font-size of ticklabels, legend, etc.
# remove spines from x and y axes and make grid dashed
jtplot.style(context='talk', fscale=1.4, spines=False, gridlines='--')

# turn on X- and Y-axis tick marks (default=False)
# turn off the axis grid lines (default=True)
# and set the default figure size
jtplot.style(ticks=True, grid=False, figsize=(6, 4.5))

# reset default matplotlib rcParams
jtplot.reset()

In [4]:
critics = {
    '조용필': {
        '택시운전사': 2.5,
        '겨울왕국': 3.5,
        '리빙라스베가스': 3.0,
        '넘버3': 3.5,
        '사랑과전쟁': 2.5,
        '세계대전': 3.0,
    },
    'BTS': {
        '택시운전사': 1.0,
        '겨울왕국': 4.5,
        '리빙라스베가스': 0.5,
        '넘버3': 1.5,
        '사랑과전쟁': 4.5,
        '세계대전': 5.0,
    },
    '강감찬': {
        '택시운전사': 3.0,
        '겨울왕국': 3.5,
        '리빙라스베가스': 1.5,
        '넘버3': 5.0,
        '세계대전': 3.0,
        '사랑과전쟁': 3.5,
    },
    '을지문덕': {
        '택시운전사': 2.5,
        '겨울왕국': 3.0,
        '넘버3': 3.5,
        '세계대전': 4.0,
    },
    '김유신': {
        '겨울왕국': 3.5,
        '리빙라스베가스': 3.0,
        '세계대전': 4.5,
        '넘버3': 4.0,
        '사랑과전쟁': 2.5,
    },
    '유성룡': {
        '택시운전사': 3.0,
        '겨울왕국': 4.0,
        '리빙라스베가스': 2.0,
        '넘버3': 3.0,
        '세계대전': 3.5,
        '사랑과전쟁': 2.0,
    },
    '이황': {
        '택시운전사': 3.0,
        '겨울왕국': 4.0,
        '세계대전': 3.0,
        '넘버3': 5.0,
        '사랑과전쟁': 3.5,
    },
    '이이': {'겨울왕국': 4.5, '사랑과전쟁': 1.0,
             '넘버3': 4.0},
}


In [5]:
def sim_pearson(data, name1, name2):
    sumX=0 # X의 합
    sumY=0 # Y의 합
    sumPowX=0 # X 제곱의 합
    sumPowY=0 # Y 제곱의 합
    sumXY=0 # X*Y의 합
    count=0 #영화 개수
    
    for i in data[name1]: # i = key
        if i in data[name2]: # 같은 영화를 평가했을때만
            sumX+=data[name1][i]
            sumY+=data[name2][i]
            sumPowX+=pow(data[name1][i],2)
            sumPowY+=pow(data[name2][i],2)
            sumXY+=data[name1][i]*data[name2][i]
            count+=1
    
    return ( sumXY- ((sumX*sumY)/count) )/ sqrt( (sumPowX - (pow(sumX,2) / count)) * (sumPowY - (pow(sumY,2)/count)))
 

In [6]:
def top_match(data, name, n, sim_f=sim_pearson):
#구현
    myList=[]
    for i in data:
        if name!=i:
            myList.append((sim_f(data,name,i),i))
    myList.sort()
    myList.reverse()
    return myList[:n]

In [103]:
def sim_distance(data,user1, user2):
    #유클리디안 기반 거리(같은 영화 본것에 대한)
    rsum=0
    for i in data[user1]: #i:손흥민이 본 영화 중 한 편
        if i in data[user2]:#같은 영화
            rsum+=pow(data[user1][i]-data[user2][i],2)
    return 1/(1+sqrt(rsum))

In [127]:
def getRec(data, name):
    myRec=[]
    myList=[]
    a=[]
    see_movie_List=list(map(str,data[name].keys()))
    see_all_movie_List = list(map(str, data['조용필'].keys()))
    not_see_movie_List = set(see_all_movie_List) - set(see_movie_List) 
    
    for movie in not_see_movie_List:
        sum_score =0
        total=0
        for person in data:
            if data[person].get(movie):
                sum_score += data[person][movie]*sim_distance(data, name, person )
                total += sim_distance(data, name, person)
        predict_score= sum_score/total
        myList.append([movie,predict_score])
    myList.sort(key = lambda element : element[1])
    print("추천영화:{0}, 예상평점: {1}".format(myList[-1][0],myList[-1][1]))
                

Unnamed: 0,조용필,BTS,강감찬,을지문덕,김유신,유성룡,이황,이이
겨울왕국,3.5,4.5,3.5,3.0,3.5,4.0,4.0,4.5
넘버3,3.5,1.5,5.0,3.5,4.0,3.0,5.0,4.0
리빙라스베가스,3.0,0.5,1.5,,3.0,2.0,,
사랑과전쟁,2.5,4.5,3.5,,2.5,2.0,3.5,1.0
세계대전,3.0,5.0,3.0,4.0,4.5,3.5,3.0,
택시운전사,2.5,1.0,3.0,2.5,,3.0,3.0,


In [129]:
getRec(critics,'이이')

추천영화:세계대전, 예상평점: 3.6796317295600414
