In [1]:
# Data Load & Treat
import pandas as pd
import numpy as np

In [2]:
# Read From Data
boardgame_description = pd.read_pickle('./resources/input/boardgame.pkl')
boardgame_own_history = pd.read_pickle('./resources/input/user_own_data.pkl')
boardgame_play_history = pd.read_pickle('./resources/input/user_play_data.pkl')

In [3]:
# Data Preprocessing
boardgame_play_amount = boardgame_play_history.groupby(['uid', 'boardgame'])[['values']].count().reset_index()

In [4]:
# Data Preprocessing
boardgame_play_history['played_at'] = boardgame_play_history['values'].apply(lambda x: x['played_at'])

boardgame_play_history = boardgame_play_history.drop(columns = ['uid', 'values', 'play_history'])
boardgame_play_history = boardgame_play_history.reset_index().rename(columns = {'index': 'user_id'})
boardgame_play_history = boardgame_play_history[boardgame_play_history['played_at'] != '0000-00-00']

boardgame_play_history = pd.DataFrame(boardgame_play_history.groupby('user_id')['boardgame'].apply(lambda x: list(x)))

In [5]:
# Data Preprocessing
boardgame_own_history['rating'] = boardgame_own_history['values'].apply(lambda x: float(x['rating']) if x['rating']!= 'N/A' else np.nan)
boardgame_own_history['boardgame_list'] = boardgame_own_history.groupby('boardgame')['rating'].transform('count')

boardgame_own_history = boardgame_own_history[boardgame_own_history['boardgame_list'] > 20]
boardgame_own_history = boardgame_own_history.dropna(subset = ['rating'])
boardgame_own_history = boardgame_own_history.drop_duplicates(['uid', 'boardgame'])

In [6]:
# Recommendation System Module
# Item Based RS
from modeling.item_based import DescriptionBasedRS, Item2VecRS

# CF Based RS
from modeling.collaborative_filtering import DeepLearningRS, ImplicitFeedbackRS

In [None]:
dbrs = DescriptionBasedRS(boardgame_description.reset_index(), 'description', 'index')
dbrs.most_similar_object('Gloomhaven')

In [None]:
i2v = Item2VecRS(boardgame_play_history.reset_index(), 'boardgame', iteration = 10)
i2v.most_similar_object('Gloomhaven')

In [20]:
dl = DeepLearningRS(boardgame_own_history, user_col = 'uid', item_col = 'boardgame', value_col = 'rating', iteration = 10000)
dl.most_prefer_object('1awesomeguy')

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3000.0), HTML(value='')))




user,item,1awesomeguy
0,Mansions of Madness: Second Edition,13.047338
1,Shadows over Camelot,12.801888
2,Dungeon Petz,12.529935
3,Five Tribes,12.490986
4,Modern Art,12.157318
5,Caverna: The Cave Farmers,12.08019
6,Commands & Colors: Ancients,11.639956
7,The Pillars of the Earth,11.510962
8,Sherlock Holmes Consulting Detective: The Tham...,11.314445
9,The Manhattan Project,11.297258


In [8]:
IF = ImplicitFeedbackRS(boardgame_play_amount, user_col = 'uid', item_col = 'boardgame', value_col = 'values', iteration = 10000)
IF.most_prefer_object('1awesomeguy')

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10000.0), HTML(value='')))




user,item,1awesomeguy
0,Leaving Earth,1.172474
1,Ludus Gladiatorius,1.153663
2,The Three Musketeers: The Queen's Pendants,1.10843
3,Dust Tactics,1.090361
4,1-48Combat,1.073837
5,Sooty Saves Sixpence,1.039146
6,Zombies all around us,1.037263
7,Planet Apocalypse,1.032443
8,Dungeon Hack: The Dice Game,1.031934
9,Zombies at the door,1.028518


### 평가 숫자가 많으면 평균 rating은 비슷하게 나타난다?
#### Ovecomming Issue
+ 모집단 평균을 반영하지 못함
    + 극단적인 예시로, 모집단 평균이 6인데도 불구하고 prediction value가 0인 경우 존재
    + 이는 그냥 그 사람들이 그 물건을 싫어하는 것?

## Deep Learning Based RS
+ Input Layer: user one hot encoding vector
+ Output Layer: user rating vector


> Layer
1. Input Layer
2. Hidden Layer -> Hidden Layer -> ... -> Hidden Layer
3. Output Layer
    + Multiply Non-Zero outcome filter matrix
4. Calculate cost: `MSELoss`