In [13]:
from sklearn.pipeline import Pipeline
from steps.feature import ImpactClassifier
from steps.feature import ImpactWeigher
from steps.filter import ChampionReleaseFilter
from steps.reader import PatchHistoryReader, ReleaseReader, PlayHistoryReader
from steps.transform import ImpactAggregator
import joblib

cr_dict = ReleaseReader('data/champions.csv').to_dict()
skins_df = ReleaseReader('data/skins.csv').to_df()
play_df = PlayHistoryReader('data/playhistory.jsonl').to_df()
impact_model = joblib.load('model/impact_classifier.pickle')

pipe = Pipeline([
    ('crf', ChampionReleaseFilter(cr_dict)),
    ('iclf', ImpactClassifier(impact_model)),
    ('iwg', ImpactWeigher()),
    ('iagg', ImpactAggregator())
])

raw_df = PatchHistoryReader('data/patches.jsonl').to_df()
df = pipe.transform(raw_df)

In [201]:
from sklearn.base import BaseEstimator, TransformerMixin
import pandas as pd
import numpy as np


class HistoryCombiner(BaseEstimator, TransformerMixin):

    def __init__(self, play_df, skins_df, play_metrics_agg='mean'):
        self.play_df = play_df
        self.skins_df = skins_df[skins_df['skin'] != 'Original']
        self.play_metrics_agg = play_metrics_agg

    def fit(self, X, y=None):
        return self

    def transform(self, X, y=None):
        dates = np.sort(X['date'].unique())
        play_df = self.group_play(dates)
        skins_df = self.group_skins(dates)
        X_ = X.join(play_df, on=['date', 'champion'], how='outer')\
            .join(skins_df, on=['date', 'champion'], how='outer')\
            .fillna({'diff': 0, 'buff': 0, 'nerf': 0, 'skin': 0}, axis=0)\
            .drop('patch', axis=1)\
            .sort_values(by=['champion', 'date'])
        min_date, max_date = self.play_df['date'].min(), X['date'].max()
        X_ = X_[(min_date <= X_['date']) & (X_['date'] <= max_date)].reset_index(drop=True)
        return self.with_total_skins(self.fill_play(X_))\
            .dropna(axis=0).reset_index(drop=True)
    
    def group_skins(self, dates):
        return self.with_patch_date(dates, self.skins_df, 'release')\
            .groupby(['date', 'champion'])['skin']\
            .count()
    
    def group_play(self, dates):
        return self.with_patch_date(dates, self.play_df, 'date')\
            .groupby(['date', 'champion'])\
            .agg(self.play_metrics_agg)

    def with_patch_date(self, dates, df, key):
        bounds_df = df.copy()
        bounds_df['date'] = df[key]\
            .apply(self.upper_bound, args=(dates,))
        return bounds_df
    
    def upper_bound(self, x, dates):
        l, r = 0, len(dates)
        while (l < r):
            m = l + (r - l) // 2
            if x < dates[m]:
                r = m
            else:
                l = m + 1
        return dates[l] if 0 <= l < len(dates) else None
    
    def fill_play(self, X):
        n = len(X) - 1
        def fill(row, col):
            if not np.isnan(row[col]):
                return row[col]
            i, c = row['index'], row['champion']
            if i > 0 and X.iloc[i-1]['champion'] == c and i < n and X.iloc[i+1]['champion'] == c:
                return (X.iloc[i-1][col] + X.iloc[i+1][col]) / 2
        for col in ('popularity', 'winrate', 'banrate'):
            X[col] = X.reset_index().apply(fill, axis=1, args=(col,))
        return X
    
    def with_total_skins(self, X):
        X['total_skins'] = X.groupby('champion')['skin'].cumsum()
        return X


In [202]:
hdf = HistoryCombiner(play_df, skins_df).transform(df)
hdf

Unnamed: 0,date,champion,diff,buff,nerf,popularity,winrate,banrate,skin,total_skins
0,2014-03-18,Aatrox,0.0,0.0,0.0,0.041186,0.5147,0.008651,0.0,0.0
1,2014-04-03,Aatrox,0.0,0.0,0.0,0.044511,0.5064,0.002803,0.0,0.0
2,2014-04-21,Aatrox,0.0,0.0,0.0,0.041942,0.5036,0.004218,0.0,0.0
3,2014-05-08,Aatrox,0.0,0.0,0.0,0.035388,0.4981,0.005171,0.0,0.0
4,2014-05-22,Aatrox,0.0,0.0,0.0,0.026381,0.4941,0.006110,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
25079,2022-01-05,Zyra,0.0,0.0,0.0,0.040819,0.5108,0.019420,0.0,6.0
25080,2022-01-20,Zyra,0.0,0.0,0.0,0.031985,0.4984,0.010657,0.0,6.0
25081,2022-02-02,Zyra,0.0,0.0,0.0,0.033227,0.5162,0.011589,0.0,6.0
25082,2022-02-16,Zyra,0.0,0.0,0.0,0.034053,0.5159,0.011444,0.0,6.0
