# receive and transform data

the data should be received, then transformed, (then saved locally)

In [1]:
import sys
import yaml
sys.path.append('/home/joel/projects/driftlon/analysis')
sys.path.append('/home/joel/projects/driftlon')
sys.path.append('/home/joel/projects/driftlon/data_fetcher')
from analysis_utils import DataFetcher, transform_data_to_np
from get_from_db import DBReader
from write_to_db import DBWriter
import numpy as np
import pandas as pd
import tqdm

load data

In [2]:
mongo_config = yaml.safe_load(open('/home/joel/projects/driftlon/config.yml', 'r'))['mongodb']
db_reader = DBReader(mongo_config['address'], mongo_config['username'], mongo_config['password'])
db_writer = DBWriter(mongo_config['address'], mongo_config['username'], mongo_config['password'])
fetcher = DataFetcher(mongo_config['address'], mongo_config['username'], mongo_config['password'])

In [3]:
# raw_data, target = fetcher.get_raw_data_batch(10, offset=10000)
raw_filtered_data, targets, player_ids = fetcher.get_filtered_data_batch(1000, offset=3000)

### to pandas

In [4]:
data, feature_names = list(zip(*map(transform_data_to_np, raw_filtered_data)))

In [5]:
data_pd = pd.DataFrame(raw_filtered_data)
data_pd.head();

load config files, only meta_fields is used

In [6]:
non_numeric_fields_path = './non_numeric_fields.yaml'

with open(non_numeric_fields_path, 'r') as file_:
    non_numeric_fields = yaml.load(file_.read(),  Loader=yaml.BaseLoader)
    

In [7]:
meta_fields_path = './meta_fields.yaml'

with open(meta_fields_path, 'r') as file_:
    meta_fields = yaml.load(file_.read(),  Loader=yaml.BaseLoader)    

drop columns that should not be used

In [8]:
dropped_pd = data_pd.drop(meta_fields, axis=1)
dropped_pd.head();

build pipelines for categorical and numeric data

then use an imputer to add MISSING values

then add the players SHARE on kills/deaths/dmg etc

(then add the users dmg/kills/deaths etc PER MIN)

In [9]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder

from custom_transformer import TeamShareAdder

In [10]:
numeric_fields = [column_name for column_name in dropped_pd.columns if column_name not in non_numeric_fields]
cat_fields = [column_name for column_name in dropped_pd.columns if column_name in non_numeric_fields]

In [11]:
def aggregation_config_builder(stats):
    return [(f'stats_{stat}', f'team_{stat}') for stat in stats]

aggregation_config = aggregation_config_builder(['totalDamageDealtToChampions', 'kills', 'deaths', 'assists', 'totalDamageTaken', 'goldEarned'])

In [12]:
num_pipeline = Pipeline([('imputer', SimpleImputer(strategy='median')), 
                         ('team_share_adder', TeamShareAdder(numeric_fields, aggregation_config, mongo_config))])
# num_pipeline = Pipeline([('imputer', SimpleImputer(strategy='median'))])

In [13]:
cat_pipeline = Pipeline([('one_hot', OneHotEncoder())])

In [14]:
full_pipeline = ColumnTransformer([('num', num_pipeline, numeric_fields), ('cat', cat_pipeline, ['timeline_lane'])])

In [15]:
data_post_pipeline = full_pipeline.fit_transform(dropped_pd);

In [16]:
post_df = pd.DataFrame(data_post_pipeline)
post_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,85,86,87,88,89,90,91,92,93,94
0,8.0,8.0,23.0,3.0,1.0,3.0,935.0,0.0,0.0,0.0,...,0.181818,0.489362,407.609195,0.340446,0.194152,0.0,0.0,0.0,0.0,1.0
1,10.0,3.0,3.0,7.0,1.0,2.0,1343.0,0.0,0.0,0.0,...,0.142857,0.103448,1018.296296,0.235488,0.251449,0.0,0.0,0.0,0.0,1.0
2,2.0,5.0,2.0,0.0,1.0,0.0,470.0,0.0,0.0,0.0,...,0.416667,0.060606,466.125,0.173754,0.259257,0.0,0.0,0.0,1.0,0.0
3,9.0,4.0,6.0,5.0,2.0,2.0,653.0,1.0,0.0,0.0,...,0.129032,0.206897,323.3,0.174682,0.251183,0.0,0.0,0.0,0.0,1.0
4,4.0,7.0,7.0,0.0,1.0,0.0,343.0,0.0,0.0,0.0,...,0.35,0.233333,305.571429,0.217337,0.200435,0.0,0.0,0.0,0.0,1.0


save the result

In [17]:
for i, game in tqdm.tqdm(list(enumerate(post_df.itertuples()))):
    db_writer.write_processed_game(game, targets[i], player_ids[i])

100%|██████████| 1000/1000 [01:22<00:00, 12.09it/s]
