In [1]:
# Data Load & Treat
import pandas as pd
import numpy as np

In [2]:
# Read From Data
boardgame_description = pd.read_pickle('../resources/input/boardgame.pkl')
boardgame_own_history = pd.read_pickle('../resources/input/user_own_data.pkl')
boardgame_play_history = pd.read_pickle('../resources/input/user_play_data.pkl')

In [3]:
# Data Preprocessing
boardgame_play_history['played_at'] = boardgame_play_history['values'].apply(lambda x: x['played_at'])

boardgame_play_history = boardgame_play_history.drop(columns = ['uid', 'values', 'play_history'])
boardgame_play_history = boardgame_play_history.reset_index().rename(columns = {'index': 'user_id'})
boardgame_play_history = boardgame_play_history[boardgame_play_history['played_at'] != '0000-00-00']

boardgame_play_history = pd.DataFrame(boardgame_play_history.groupby('user_id')['boardgame'].apply(lambda x: list(x)))

In [4]:
# Data Preprocessing
boardgame_own_history['rating'] = boardgame_own_history['values'].apply(lambda x: float(x['rating']) if x['rating']!= 'N/A' else np.nan)
boardgame_own_history['boardgame_list'] = boardgame_own_history.groupby('boardgame')['rating'].transform('count')

boardgame_own_history = boardgame_own_history[boardgame_own_history['boardgame_list'] > 20]
boardgame_own_history = boardgame_own_history.dropna(subset = ['rating'])
boardgame_own_history = boardgame_own_history.drop_duplicates(['uid', 'boardgame'])

In [5]:
# Recommendation System Module
# Item Based RS
from DescriptionBasedRS import DescriptionBasedRS
from Item2VecRS import Item2VecRS

# CF Based RS
from DeepLearningRS import DeepLearningRS

In [6]:
dbrs = DescriptionBasedRS(boardgame_description.reset_index(), 'description', 'index')
dbrs.most_similar_object('Gloomhaven')

Unnamed: 0_level_0,name,description,similarity
label_encoder,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,Gloomhaven,Gloomhaven is a game of Euro-inspired tactica...,1.0
4408,Combat!,Combat! is a solitaire game on man to man comb...,0.317133
8876,Death Over The Kingdom,In Death Over the Kingdom you have two games i...,0.310003
4492,No Retreat! Italian Front: 1943-45,(from GMT website:)<br/><br/>Following on the ...,0.304381
3244,Tranquility,Jump on board the good ship Tranquility and se...,0.299989
1199,Traders of Osaka,"In Traders of Carthage, players are merchants ...",0.298014
6115,Scout!,SCOUT! is a ladder-climbing in which cards hav...,0.295283
6232,RONE,RONE: Races of New Era is a strategic post-apo...,0.292171
3628,No Retreat!: The North African Front,(from GMT website:)<br/><br/>Following on the ...,0.289788
1674,Timeline Challenge,Timeline is a card game played using 110 cards...,0.288974


In [7]:
i2v = Item2VecRS(boardgame_play_history.reset_index(), 'boardgame', iteration = 10)
i2v.most_similar_object('Gloomhaven')

소실된 자료: 21


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))




Unnamed: 0_level_0,name,Dimension_0,Dimension_1,Dimension_2,Dimension_3,Dimension_4,similarity
label_encoder,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1214,Gloomhaven,0.020913,-0.003462,-0.006009,-0.006349,-0.003373,1.0
2934,TransAmerica,0.020771,-0.000816,-0.008917,-0.007755,0.001298,0.965155
2317,Shadowrift,0.027443,-0.002786,-0.012914,-0.003159,-0.008185,0.964934
3155,Western Legends,0.021614,-0.006087,-0.006891,0.000502,-0.002561,0.949295
2437,Square Mile,0.008079,-0.001755,-0.001789,-0.005633,-0.002526,0.947954
1547,Legendary: A Marvel Deck Building Game,0.021753,-0.002508,-0.009239,-0.015914,-0.004055,0.946691
598,Claim,0.018149,0.000108,-0.006065,-0.012811,-0.003708,0.942441
549,Chai,0.02436,-0.0043,-0.003193,-0.01524,0.002144,0.931978
1912,Outfoxed!,0.013692,-0.000721,-0.008736,-0.002489,0.00065,0.93044
3008,Unfair Expansion: Alien B-movie Dinosaur Western,0.024013,0.005008,-0.005642,-0.003219,-0.004273,0.92827


In [6]:
dl = DeepLearningRS(boardgame_own_history, user_col = 'uid', item_col = 'boardgame', value_col = 'rating', iteration = 10)
dl.most_prefer_object('1awesomeguy')

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))




user,item,1awesomeguy
0,Jambo,0.286873
1,Twilight Struggle,0.244202
2,Food Chain Magnate,0.24284
3,Claustrophobia,0.240303
4,Codenames: Duet,0.233673
5,Merchants & Marauders,0.233636
6,Fresco,0.228808
7,Terraforming Mars,0.227729
8,Ticket to Ride: Märklin,0.226388
9,In the Year of the Dragon,0.226255


### 평가 숫자가 많으면 평균 rating은 비슷하게 나타난다?
#### Ovecomming Issue
+ 모집단 평균을 반영하지 못함
    + 극단적인 예시로, 모집단 평균이 6인데도 불구하고 prediction value가 0인 경우 존재
    + 이는 그냥 그 사람들이 그 물건을 싫어하는 것?

## Deep Learning Based RS
+ Input Layer: user one hot encoding vector
+ Output Layer: user rating vector


> Layer
1. Input Layer
2. Hidden Layer -> Hidden Layer -> ... -> Hidden Layer
3. Output Layer
    + Multiply Non-Zero outcome filter matrix
4. Calculate cost: `MSELoss`