In [1]:
# Data Load & Treat
import pandas as pd
import numpy as np

In [2]:
# Read From Data
boardgame_description = pd.read_pickle('./resources/input/boardgame.pkl')
boardgame_own_history = pd.read_pickle('./resources/input/user_own_data.pkl')
boardgame_play_history = pd.read_pickle('./resources/input/user_play_data.pkl')

In [3]:
# Data Preprocessing
boardgame_play_history['played_at'] = boardgame_play_history['values'].apply(lambda x: x['played_at'])

boardgame_play_history = boardgame_play_history.drop(columns = ['uid', 'values', 'play_history'])
boardgame_play_history = boardgame_play_history.reset_index().rename(columns = {'index': 'user_id'})
boardgame_play_history = boardgame_play_history[boardgame_play_history['played_at'] != '0000-00-00']

boardgame_play_history = pd.DataFrame(boardgame_play_history.groupby('user_id')['boardgame'].apply(lambda x: list(x)))

In [4]:
# Data Preprocessing
boardgame_own_history['rating'] = boardgame_own_history['values'].apply(lambda x: float(x['rating']) if x['rating']!= 'N/A' else np.nan)
boardgame_own_history['boardgame_list'] = boardgame_own_history.groupby('boardgame')['rating'].transform('count')

boardgame_own_history = boardgame_own_history[boardgame_own_history['boardgame_list'] > 20]
boardgame_own_history = boardgame_own_history.dropna(subset = ['rating'])
boardgame_own_history = boardgame_own_history.drop_duplicates(['uid', 'boardgame'])

In [5]:
# Recommendation System Module
# Item Based RS
from modeling.item_based import DescriptionBasedRS, Item2VecRS

# CF Based RS
from modeling.collaborative_filtering import DeepLearningRS

In [6]:
dbrs = DescriptionBasedRS(boardgame_description.reset_index(), 'description', 'index')
dbrs.most_similar_object('Gloomhaven')

Unnamed: 0_level_0,name,description,similarity
label_encoder,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,Gloomhaven,Gloomhaven is a game of Euro-inspired tactica...,1.0
4408,Combat!,Combat! is a solitaire game on man to man comb...,0.317133
8876,Death Over The Kingdom,In Death Over the Kingdom you have two games i...,0.310003
4492,No Retreat! Italian Front: 1943-45,(from GMT website:)<br/><br/>Following on the ...,0.304381
3244,Tranquility,Jump on board the good ship Tranquility and se...,0.299989
1199,Traders of Osaka,"In Traders of Carthage, players are merchants ...",0.298014
6115,Scout!,SCOUT! is a ladder-climbing in which cards hav...,0.295283
6232,RONE,RONE: Races of New Era is a strategic post-apo...,0.292171
3628,No Retreat!: The North African Front,(from GMT website:)<br/><br/>Following on the ...,0.289788
1674,Timeline Challenge,Timeline is a card game played using 110 cards...,0.288974


In [7]:
i2v = Item2VecRS(boardgame_play_history.reset_index(), 'boardgame', iteration = 10)
i2v.most_similar_object('Gloomhaven')

소실된 자료: 21


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))




Unnamed: 0_level_0,name,Dimension_0,Dimension_1,Dimension_2,Dimension_3,Dimension_4,similarity
label_encoder,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1214,Gloomhaven,0.009487,-0.026568,-0.019721,0.012839,0.00906,1.0
1547,Legendary: A Marvel Deck Building Game,0.014993,-0.024829,-0.017272,0.014218,0.009975,0.985358
2837,Theurgy,0.00309,-0.015472,-0.017224,0.006542,0.010644,0.95827
2034,Polis,0.001915,-0.015476,-0.01916,0.01008,0.005127,0.956257
707,Cranium Whoonu,0.015384,-0.021809,-0.019093,0.009492,0.000657,0.95121
1747,Monopoly Deal Card Game,0.005863,-0.016888,-0.024142,0.014564,0.012511,0.949409
2147,Red7,0.005265,-0.026588,-0.025492,0.026784,0.018501,0.946496
1449,Junk Orbit,0.010492,-0.026313,-0.010913,0.018347,0.002715,0.94646
814,Dice Throne: Season One,0.003059,-0.02543,-0.014431,0.003314,0.013566,0.937628
2446,Star Trek Chrono-Trek,0.005687,-0.024964,-0.012748,0.00154,0.004258,0.93726


In [8]:
dl = DeepLearningRS(boardgame_own_history, user_col = 'uid', item_col = 'boardgame', value_col = 'rating', iteration = 10)
dl.most_prefer_object('1awesomeguy')

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))




user,item,1awesomeguy
0,Torres,0.327265
1,Elysium,0.303778
2,Root,0.284608
3,Lord of the Rings: The Confrontation,0.276217
4,YINSH,0.268965
5,Skull,0.267984
6,Mysterium,0.266461
7,For Sale,0.258628
8,Broom Service,0.256692
9,Carcassonne: The River,0.249965


### 평가 숫자가 많으면 평균 rating은 비슷하게 나타난다?
#### Ovecomming Issue
+ 모집단 평균을 반영하지 못함
    + 극단적인 예시로, 모집단 평균이 6인데도 불구하고 prediction value가 0인 경우 존재
    + 이는 그냥 그 사람들이 그 물건을 싫어하는 것?

## Deep Learning Based RS
+ Input Layer: user one hot encoding vector
+ Output Layer: user rating vector


> Layer
1. Input Layer
2. Hidden Layer -> Hidden Layer -> ... -> Hidden Layer
3. Output Layer
    + Multiply Non-Zero outcome filter matrix
4. Calculate cost: `MSELoss`