### General information

This kernel is intented to help mlcourse.ai participants with doing EDA, Feature Engineering and building models.

* At first I'll do basic EDA of the data;
* After this I'll build a baseline model to see how good model can be on the basic data;
* Then I'll create new features based on the main features and train a model again to see whether there is an improvement;
* After this I'll try to extract new features from json files and see whether it helps;

In [10]:
# Libraries
import numpy as np
import pandas as pd
pd.set_option('max_columns', None)
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

import collections
import datetime
import json
import ast
import time
from scipy import stats
import os
from sklearn import model_selection
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn import metrics
from sklearn import linear_model
from sklearn.neighbors import NearestNeighbors
from sklearn.feature_selection import GenericUnivariateSelect, SelectPercentile, SelectKBest, f_classif, mutual_info_classif, RFE
from sklearn.model_selection import train_test_split, StratifiedKFold, KFold, cross_val_score, GridSearchCV, RepeatedStratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.feature_extraction.text import TfidfVectorizer

import xgboost as xgb
import lightgbm as lgb
import eli5
from eli5.sklearn import PermutationImportance
import shap
from tqdm import tqdm_notebook
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from mlxtend.plotting import plot_sequential_feature_selection as plot_sfs
import statsmodels.api as sm
import warnings
warnings.filterwarnings('ignore')
from catboost import CatBoostClassifier
from itertools import combinations


# import json
import altair as alt
from  altair.vega import v3
from IPython.display import HTML
from IPython.display import display_html
!pip install ujson
import ujson as json

import plotly.offline as py
py.init_notebook_mode(connected=True)
import plotly.graph_objs as go
import plotly.tools as tls
from plotly import tools
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, iplot

import warnings
warnings.filterwarnings("ignore")
init_notebook_mode(connected=True)

SEED = 42



In [2]:

class ColumnDataProcessor:
    to_scale = True

    def replaceNaNValues(self, A):
        A[np.isnan(A)] = 0
        A[np.isinf(A)] = 0
        return A

    def add_team_features(self, df, c, r_columns, d_columns):
        drop_features = []
        # df['r_total_' + c] = df[r_columns].sum(1)
        # df['d_total_' + c] = df[d_columns].sum(1)
        # df['total_' + c + '_ratio'] = df['r_total_' + c] / df['d_total_' + c]
        # df['total_' + c + '_ratio'] = self.replaceNaNValues(df['total_' + c + '_ratio'])
        # drop_features = drop_features + ['r_total_' + c, 'd_total_' + c]

        df['r_std_' + c] = df[r_columns].std(1)
        df['d_std_' + c] = df[d_columns].std(1)
        df['std_' + c + '_ratio'] = df['r_std_' + c] / df['d_std_' + c]
        df['std_' + c + '_ratio'] = self.replaceNaNValues(df['std_' + c + '_ratio'])
        drop_features = drop_features + ['r_std_' + c, 'd_std_' + c]

        df['r_mean_' + c] = df[r_columns].mean(1)
        df['d_mean_' + c] = df[d_columns].mean(1)
        df['mean_' + c + '_ratio'] = df['r_mean_' + c] / df['d_mean_' + c]
        df['mean_' + c + '_ratio'] = self.replaceNaNValues(df['mean_' + c + '_ratio'])
        # drop_features = drop_features + ['r_mean_' + c, 'd_mean_' + c]

        df = df.drop(r_columns, axis=1)
        df = df.drop(d_columns, axis=1)
        df = df.drop(drop_features, axis=1)
        return df

    # As we see coordinate features (x and y) are quite important. However, I think we need to combine them into one
    # feature.Simplest idea is the distance from the left bottom corner. So, short distances mean near own base,
    # long distances - near the enemy base
    def make_coordinate_features(self, df):
        for team in 'r', 'd':
            players = [f'{team}{i}' for i in range(1, 6)]  # r1, r2...
            for player in players:
                df[f'{player}_distance'] = np.sqrt(df[f'{player}_x'] ** 2 + df[f'{player}_y'] ** 2)
                df.drop(columns=[f'{player}_x', f'{player}_y'], inplace=True)
        return df

    # def hot_feat_hero_id(self, df):
    #     for team in ['r', 'd']:
    #         for i in range(1, 6):
    #             df = pd.concat([df, pd.get_dummies(df[f'{team}{i}_hero_id'], prefix=f'{team}{i}_hero_id')], axis=1)
    #
    #     return df

    def hero_id_subset_analyzer(self, text):
        # it takes a string of hero ids (like '1 2 5 4 3') as input
        ids = set()
        for i in range(1, 4):  # we need all subset of lenght 1-3. I think longer combinations are not relevant
            hero_ids = text.split(' ')  # '1 2 5 4 3'-> ['1', '2', '5', '4', '3']
            hero_ids.sort()  # sort them as '1 2 5 4 3' and '3 1 4 5 3' should produce the same set of tokens
            combs = set(
                combinations(hero_ids, i))  # all combinations of length i e.g for 2 are: (1,2), (1,3)... (2,5)... etc
            ids = ids.union(combs)
        ids = {"_".join(item) for item in ids}  # convert from lists to string e.g. (1,2) -> '1_2'
        return ids

    def replace_hero_ids(self, train, test):
        vectorizer = TfidfVectorizer(self.hero_id_subset_analyzer, ngram_range=(1, 1), max_features=1000,
                                     tokenizer=lambda s: s.split())
        train = self.replace_hero_ids_df(train, vectorizer)
        test = self.replace_hero_ids_df(test, vectorizer, train=False)
        return train, test

    def replace_hero_ids_df(self, df, vectorizer, train=True):

        # ngram range is (1,1) as all combinations are created by analyser
        # 1000 features - I think it's enough to cover all heroes + popular combos

        for team in 'r', 'd':
            players = [f'{team}{i}' for i in range(1, 6)]  # r1, r2,...
            hero_columns = [f'{player}_hero_id' for player in players]  # r1_hero_id,....

            # combine all hero id columns into one
            df_hero_id_as_text = df[hero_columns].apply(lambda row: ' '.join([str(i) for i in row]), axis=1).tolist()

            if train:
                new_cols = pd.DataFrame(vectorizer.fit_transform(df_hero_id_as_text).todense(),
                                        columns=vectorizer.get_feature_names())
            else:
                new_cols = pd.DataFrame(vectorizer.transform(df_hero_id_as_text).todense(),
                                        columns=vectorizer.get_feature_names())

            # add index to vectorized dataset - needed for merge?
            new_cols['match_id_hash'] = df.index.values
            new_cols = new_cols.set_index('match_id_hash').add_prefix(f'{team}_hero_')  # e.g.r_hero_10_21

            # df = pd.merge(df, new_cols)
            df = pd.merge(df, new_cols, on='match_id_hash')
            df.drop(columns=hero_columns, inplace=True)

        return df

    def prepare_data(self, train, target, test, features_list):
        print('prepare_data.. Start')

        r_heroes = [f'r{i}_hero_id' for i in range(1, 6)]
        d_heroes = [f'd{i}_hero_id' for i in range(1, 6)]

        train = self.make_coordinate_features(train)
        test = self.make_coordinate_features(test)
        # As the distance is also a numeric feature convert it into the team features
        features_list = features_list + ['distance']

        print('prepare_data.. Adding team features')
        for c in features_list:
            r_columns = [f'r{i}_{c}' for i in range(1, 6)]
            d_columns = [f'd{i}_{c}' for i in range(1, 6)]

            train = self.add_team_features(train, c, r_columns, d_columns)
            test = self.add_team_features(test, c, r_columns, d_columns)

            if self.to_scale:
                features_to_scale = \
                    ['std_' + c + '_ratio', 'mean_' + c + '_ratio', 'r_mean_' + c, 'd_mean_' + c]
                #    ['total_' + c + '_ratio', 'std_' + c + '_ratio', 'mean_' + c + '_ratio']  # + r_heroes + d_heroes
                scaler = MinMaxScaler()
                train[features_to_scale] = scaler.fit_transform(train[features_to_scale])
                test[features_to_scale] = scaler.transform(test[features_to_scale])

        print('prepare_data.. Replace heroes id')

        train, test = self.replace_hero_ids(train, test)
        # train = self.hot_feat_hero_id(train)
        # test = self.hot_feat_hero_id(test)

        feat_to_drop = ['game_time', 'game_mode', 'lobby_type', 'objectives_len', 'chat_len']  # + r_heroes + d_heroes
        print('prepare_data.. Drop extra columns: {}'.format(feat_to_drop))
        train = train.drop(feat_to_drop, axis=1)
        test = test.drop(feat_to_drop, axis=1)

        return self.prepare_data_simple(train, target, test)

    def prepare_data_simple(self, train, targets, test):
        X = train.reset_index(drop=True)
        y = targets['radiant_win']
        X_test = test.reset_index(drop=True)

        # for col in train.columns:
        #     if train[col].isnull().any():
        #         print(col, train[col].isnull().sum())
        #
        # for col in test.columns:
        #     if test[col].isnull().any():
        #         print(col, test[col].isnull().sum())

        print("\n\nPrepared data frame: ")
        print(X.describe())
        print('Dimensions: train {}, test {}'.format(X.shape, X_test.shape))

        return X, y, X_test


class CSVDataPrepare:

    def read_data_frame(self):
        PATH_TO_DATA = '../input/'

        # Train dataset
        df_train_features = pd.read_csv(os.path.join(PATH_TO_DATA, 'train_features.csv'), index_col='match_id_hash')
        df_train_targets = pd.read_csv(os.path.join(PATH_TO_DATA, 'train_targets.csv'), index_col='match_id_hash')
        # Test dataset
        df_test_features = pd.read_csv(os.path.join(PATH_TO_DATA, 'test_features.csv'), index_col='match_id_hash')
        # Check if there is missing data
        print("Original data frame (CSV): ")
        # print('df_train_features.isnull() {}'.format(df_train_features.isnull().values.any()))
        # print('df_test_features.isnull() {}'.format(df_test_features.isnull().values.any()))
        print(df_train_features.shape)
        # print(df_train_features.index.values)
        return df_train_features, df_train_targets, df_test_features

    def prepareDataOld(self, train, target, test):
        # Let's combine train and test datasets in one dataset.
        # This allows for addding new features for both datasets at the same time.
        df_full_features = pd.concat([train, test])

        # Index to split the training and test data sets
        idx_split = train.shape[0]

        # That is,
        # df_train_features == df_full_features[:idx_split]
        # df_test_features == df_full_features[idx_split:]

        df_full_features.drop(['game_time', 'game_mode', 'lobby_type', 'objectives_len', 'chat_len'],
                              inplace=True, axis=1)

        # Clearly the hero_id is a categorical feature, so let's one-hot encode it. Note that according to wiki there are
        # 117 heroes, however in our dataset there are 116 heroes with ids 1, 2, ..., 114, 119, 120.
        # You will get the same result for all teams and players, here I use r1.
        np.sort(np.unique(df_full_features['r1_hero_id'].values.flatten()))

        for t in ['r', 'd']:
            for i in range(1, 6):
                df_full_features = pd.get_dummies(df_full_features, columns=[f'{t}{i}_hero_id'])
        #         df_full_features = pd.concat([df_full_features,
        #           pd.get_dummies(df_full_features[f'{t}{i}_hero_id'], prefix=f'{t}{i}_hero_id')], axis=1)

        # Finally let's scale the player-features that have relatively large values, such as gold, lh, xp etc.
        player_features = set(f[3:] for f in train.columns[5:])
        features_to_scale = []
        for t in ['r', 'd']:
            for i in range(1, 6):
                for f in player_features - {'hero_id', 'firstblood_claimed', 'teamfight_participation'}:
                    features_to_scale.append(f'{t}{i}_{f}')
        df_full_features_scaled = df_full_features.copy()
        df_full_features_scaled[features_to_scale] = MinMaxScaler().fit_transform(
            df_full_features_scaled[features_to_scale])

        df_full_features_scaled.head()
        df_full_features_scaled.max().sort_values(ascending=False).head(12)

        # Let's construct X and y arrays.
        X_train = df_full_features_scaled[:idx_split]
        X_test = df_full_features_scaled[idx_split:]
        y_train = target['radiant_win'].map({True: 1, False: 0})

        print(X_train.head())
        print(X_train.describe())

        # splitting whole dataset on train and test
        # X_train = data.loc[:test_index].drop(["y"], axis=1)
        # y_train = data.loc[:test_index]["y"]
        # X_test = data.loc[test_index:].drop(["y"], axis=1)
        # y_test = data.loc[test_index:]["y"]

        return X_train, X_test, y_train

    def prepareValidationTensors(self, X_train, X_test, y_train, test_size=0.2):
        # Perform a train/validation split
        X_train_part, X_valid, y_train_part, y_valid = train_test_split(X_train, y_train,
                                                                        test_size=test_size,
                                                                        random_state=SEED)

        # Convert to pytorch tensors
        X_train_tensor = torch.from_numpy(X_train_part.values).float()
        X_valid_tensor = torch.from_numpy(X_valid.values).float()
        y_train_tensor = torch.from_numpy(y_train_part.values).float()
        y_valid_tensor = torch.from_numpy(y_valid.values).float()
        X_test_tensor = torch.from_numpy(X_test.values).float()

        # Create the train and validation dataloaders
        train_dataset = data.TensorDataset(X_train_tensor, y_train_tensor)
        valid_dataset = data.TensorDataset(X_valid_tensor, y_valid_tensor)

        dataloaders = {'train': data.DataLoader(train_dataset, batch_size=1000, shuffle=True, num_workers=2),
                       'valid': data.DataLoader(valid_dataset, batch_size=1000, shuffle=False, num_workers=2)}
        return dataloaders, X_train_tensor, X_valid_tensor, y_train_tensor, y_valid_tensor, X_test_tensor

    # My idea behind this FE is the following: Let's take gold, for example. Gold earned by each player can't give
    # us a lot of information. But what is we take total gold by the team? Maybe teams with more gold earned usually
    # win. What if we take mean and std of players' gold in a team? Maybe teams where players tend to have similar
    # parameters are more likely to win. Let's try creating these features.
    FEATURES_LIST = ['kills', 'deaths', 'assists', 'denies', 'gold', 'lh', 'xp', 'health', 'max_health', 'max_mana',
                     'level', 'stuns', 'creeps_stacked', 'camps_stacked', 'rune_pickups',
                     'firstblood_claimed', 'teamfight_participation', 'towers_killed', 'roshans_killed', 'obs_placed',
                     'sen_placed']

    def prepare_data(self, train, target, test):
        engineering = ColumnDataProcessor()
        train, target, test = engineering.prepare_data(train, target, test, self.FEATURES_LIST)

        return train, target, test


class JsonDataPrepare:
    MATCH_FEATURES = [
        ('game_time', lambda m: m['game_time']),
        ('game_mode', lambda m: m['game_mode']),
        ('lobby_type', lambda m: m['lobby_type']),
        ('objectives_len', lambda m: len(m['objectives'])),
        ('chat_len', lambda m: len(m['chat'])),
    ]

    PLAYER_FIELDS = [
        'hero_id',

        'kills',
        'deaths',
        'assists',
        'denies',

        'gold',
        'lh',
        'xp',
        'health',
        'max_health',
        'max_mana',
        'level',

        'x',
        'y',

        'stuns',
        'creeps_stacked',
        'camps_stacked',
        'rune_pickups',
        'firstblood_claimed',
        'teamfight_participation',
        'towers_killed',
        'roshans_killed',
        'obs_placed',
        'sen_placed',
    ]

    def extract_features_csv(self, match):
        row = [
            ('match_id_hash', match['match_id_hash']),
        ]

        for field, f in self.MATCH_FEATURES:
            row.append((field, f(match)))

        for slot, player in enumerate(match['players']):
            if slot < 5:
                player_name = 'r%d' % (slot + 1)
            else:
                player_name = 'd%d' % (slot - 4)

            for field in self.PLAYER_FIELDS:
                column_name = '%s_%s' % (player_name, field)
                row.append((column_name, player[field]))
            row.append((f'{player_name}_ability_level', len(player['ability_upgrades'])))
            row.append((f'{player_name}_max_hero_hit', player['max_hero_hit']['value']))
            row.append((f'{player_name}_purchase_count', len(player['purchase_log'])))
            row.append((f'{player_name}_count_ability_use', sum(player['ability_uses'].values())))
            row.append((f'{player_name}_damage_dealt', sum(player['damage'].values())))
            row.append((f'{player_name}_damage_received', sum(player['damage_taken'].values())))

        return collections.OrderedDict(row)

    def extract_targets_csv(self, match, targets):
        return collections.OrderedDict([('match_id_hash', match['match_id_hash'])] + [
            (field, targets[field])
            for field in ['game_time', 'radiant_win', 'duration', 'time_remaining', 'next_roshan_team']
        ])

    def read_matches(self, matches_file):
        MATCHES_COUNT = {
            'test_matches.jsonl': 10000,
            'train_matches.jsonl': 39675,
        }
        _, filename = os.path.split(matches_file)
        total_matches = MATCHES_COUNT.get(filename)

        with open(matches_file) as fin:
            for line in tqdm_notebook(fin, total=total_matches):
                yield json.loads(line)

    def read_data_frame(self):
        PATH_TO_DATA = '../input/'
        df_new_features = []
        df_new_targets = []

        for match in self.read_matches(os.path.join(PATH_TO_DATA, 'train_matches.jsonl')):
            # match_id_hash = match['match_id_hash']
            features = self.extract_features_csv(match)
            targets = self.extract_targets_csv(match, match['targets'])

            df_new_features.append(features)
            df_new_targets.append(targets)

        df_new_features = pd.DataFrame.from_records(df_new_features).set_index('match_id_hash')
        df_new_targets = pd.DataFrame.from_records(df_new_targets).set_index('match_id_hash')

        test_new_features = []
        for match in self.read_matches(os.path.join(PATH_TO_DATA, 'test_matches.jsonl')):
            # match_id_hash = match['match_id_hash']
            features = self.extract_features_csv(match)

            test_new_features.append(features)

        test_new_features = pd.DataFrame.from_records(test_new_features).set_index('match_id_hash')

        print("Original data frame (JSON): ")
        print(df_new_features.shape)

        return df_new_features, df_new_targets, test_new_features

    FEATURES_LIST = ['kills', 'deaths', 'assists', 'denies', 'gold', 'lh', 'xp', 'health', 'max_health', 'max_mana',
                     'level', 'stuns', 'creeps_stacked', 'camps_stacked', 'rune_pickups',
                     'firstblood_claimed', 'teamfight_participation', 'towers_killed', 'roshans_killed', 'obs_placed',
                     'sen_placed', 'ability_level', 'max_hero_hit', 'purchase_count', 'count_ability_use',
                     'damage_dealt', 'damage_received']

    def prepare_data(self, train, target, test):
        engineering = ColumnDataProcessor()
        train, target, test = engineering.prepare_data(train, target, test, self.FEATURES_LIST)

        return train, target, test


data_loader_csv = CSVDataPrepare()
data_loader_json = JsonDataPrepare()
data_loader = data_loader_json
print('added')

added


In [3]:
df_train_features, df_train_targets, df_test_features = data_loader.read_data_frame();

Original data frame (CSV): 
(39675, 245)


In [4]:
# Data check
df_train_features.head()


Unnamed: 0_level_0,game_time,game_mode,lobby_type,objectives_len,chat_len,r1_hero_id,r1_kills,r1_deaths,r1_assists,r1_denies,r1_gold,r1_lh,r1_xp,r1_health,r1_max_health,r1_max_mana,r1_level,r1_x,r1_y,r1_stuns,r1_creeps_stacked,r1_camps_stacked,r1_rune_pickups,r1_firstblood_claimed,r1_teamfight_participation,r1_towers_killed,r1_roshans_killed,r1_obs_placed,r1_sen_placed,r2_hero_id,r2_kills,r2_deaths,r2_assists,r2_denies,r2_gold,r2_lh,r2_xp,r2_health,r2_max_health,r2_max_mana,r2_level,r2_x,r2_y,r2_stuns,r2_creeps_stacked,r2_camps_stacked,r2_rune_pickups,r2_firstblood_claimed,r2_teamfight_participation,r2_towers_killed,r2_roshans_killed,r2_obs_placed,r2_sen_placed,r3_hero_id,r3_kills,r3_deaths,r3_assists,r3_denies,r3_gold,r3_lh,r3_xp,r3_health,r3_max_health,r3_max_mana,r3_level,r3_x,r3_y,r3_stuns,r3_creeps_stacked,r3_camps_stacked,r3_rune_pickups,r3_firstblood_claimed,r3_teamfight_participation,r3_towers_killed,r3_roshans_killed,r3_obs_placed,r3_sen_placed,r4_hero_id,r4_kills,r4_deaths,r4_assists,r4_denies,r4_gold,r4_lh,r4_xp,r4_health,r4_max_health,r4_max_mana,r4_level,r4_x,r4_y,r4_stuns,r4_creeps_stacked,r4_camps_stacked,r4_rune_pickups,r4_firstblood_claimed,r4_teamfight_participation,r4_towers_killed,r4_roshans_killed,r4_obs_placed,r4_sen_placed,r5_hero_id,r5_kills,r5_deaths,r5_assists,r5_denies,r5_gold,r5_lh,r5_xp,r5_health,r5_max_health,r5_max_mana,r5_level,r5_x,r5_y,r5_stuns,r5_creeps_stacked,r5_camps_stacked,r5_rune_pickups,r5_firstblood_claimed,r5_teamfight_participation,r5_towers_killed,r5_roshans_killed,r5_obs_placed,r5_sen_placed,d1_hero_id,d1_kills,d1_deaths,d1_assists,d1_denies,d1_gold,d1_lh,d1_xp,d1_health,d1_max_health,d1_max_mana,d1_level,d1_x,d1_y,d1_stuns,d1_creeps_stacked,d1_camps_stacked,d1_rune_pickups,d1_firstblood_claimed,d1_teamfight_participation,d1_towers_killed,d1_roshans_killed,d1_obs_placed,d1_sen_placed,d2_hero_id,d2_kills,d2_deaths,d2_assists,d2_denies,d2_gold,d2_lh,d2_xp,d2_health,d2_max_health,d2_max_mana,d2_level,d2_x,d2_y,d2_stuns,d2_creeps_stacked,d2_camps_stacked,d2_rune_pickups,d2_firstblood_claimed,d2_teamfight_participation,d2_towers_killed,d2_roshans_killed,d2_obs_placed,d2_sen_placed,d3_hero_id,d3_kills,d3_deaths,d3_assists,d3_denies,d3_gold,d3_lh,d3_xp,d3_health,d3_max_health,d3_max_mana,d3_level,d3_x,d3_y,d3_stuns,d3_creeps_stacked,d3_camps_stacked,d3_rune_pickups,d3_firstblood_claimed,d3_teamfight_participation,d3_towers_killed,d3_roshans_killed,d3_obs_placed,d3_sen_placed,d4_hero_id,d4_kills,d4_deaths,d4_assists,d4_denies,d4_gold,d4_lh,d4_xp,d4_health,d4_max_health,d4_max_mana,d4_level,d4_x,d4_y,d4_stuns,d4_creeps_stacked,d4_camps_stacked,d4_rune_pickups,d4_firstblood_claimed,d4_teamfight_participation,d4_towers_killed,d4_roshans_killed,d4_obs_placed,d4_sen_placed,d5_hero_id,d5_kills,d5_deaths,d5_assists,d5_denies,d5_gold,d5_lh,d5_xp,d5_health,d5_max_health,d5_max_mana,d5_level,d5_x,d5_y,d5_stuns,d5_creeps_stacked,d5_camps_stacked,d5_rune_pickups,d5_firstblood_claimed,d5_teamfight_participation,d5_towers_killed,d5_roshans_killed,d5_obs_placed,d5_sen_placed
match_id_hash,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1,Unnamed: 202_level_1,Unnamed: 203_level_1,Unnamed: 204_level_1,Unnamed: 205_level_1,Unnamed: 206_level_1,Unnamed: 207_level_1,Unnamed: 208_level_1,Unnamed: 209_level_1,Unnamed: 210_level_1,Unnamed: 211_level_1,Unnamed: 212_level_1,Unnamed: 213_level_1,Unnamed: 214_level_1,Unnamed: 215_level_1,Unnamed: 216_level_1,Unnamed: 217_level_1,Unnamed: 218_level_1,Unnamed: 219_level_1,Unnamed: 220_level_1,Unnamed: 221_level_1,Unnamed: 222_level_1,Unnamed: 223_level_1,Unnamed: 224_level_1,Unnamed: 225_level_1,Unnamed: 226_level_1,Unnamed: 227_level_1,Unnamed: 228_level_1,Unnamed: 229_level_1,Unnamed: 230_level_1,Unnamed: 231_level_1,Unnamed: 232_level_1,Unnamed: 233_level_1,Unnamed: 234_level_1,Unnamed: 235_level_1,Unnamed: 236_level_1,Unnamed: 237_level_1,Unnamed: 238_level_1,Unnamed: 239_level_1,Unnamed: 240_level_1,Unnamed: 241_level_1,Unnamed: 242_level_1,Unnamed: 243_level_1,Unnamed: 244_level_1,Unnamed: 245_level_1
a400b8f29dece5f4d266f49f1ae2e98a,155,22,7,1,11,11,0,0,0,0,543,7,533,358,600,350.93784,2,116,122,0.0,0,0,1,0,0.0,0,0,0,0,78,0,0,0,3,399,4,478,636,720,254.93774,2,124,126,0.0,0,0,0,0,0.0,0,0,0,0,14,0,1,0,0,304,0,130,700,700,242.93773,1,70,156,0.0,0,0,1,0,0.0,0,0,0,0,59,0,0,0,1,389,4,506,399,700,326.9378,2,170,86,0.0,0,0,0,0,0.0,0,0,0,0,77,0,0,0,0,402,10,344,422,800,314.9378,2,120,100,0.0,0,0,0,0,0.0,0,0,0,0,12,0,0,1,13,982,12,780,650,720,386.93787,3,82,170,0.0,0,0,1,0,1.0,0,0,0,0,21,0,0,0,6,788,9,706,640,640,422.9379,3,174,90,0.0,0,0,2,0,0.0,0,0,0,0,60,0,0,0,1,531,0,307,720,720,242.93773,2,180,84,0.299948,0,0,2,0,0.0,0,0,0,0,84,1,0,0,0,796,0,421,760,760,326.9378,2,90,150,0.0,0,0,2,1,1.0,0,0,1,0,34,0,0,0,0,851,11,870,593,680,566.93805,3,128,128,0.0,0,0,0,0,0.0,0,0,0,0
b9c57c450ce74a2af79c9ce96fac144d,658,4,0,3,10,15,7,2,0,7,5257,52,3937,1160,1160,566.93805,8,76,78,0.0,0,0,0,0,0.4375,0,0,0,0,96,3,1,2,3,3394,19,3897,1352,1380,386.93787,8,78,166,8.397949,0,0,4,0,0.3125,0,0,0,0,27,1,1,4,2,2212,4,2561,710,860,530.938,6,156,146,11.964951,2,1,4,0,0.3125,0,0,3,1,63,4,0,3,12,4206,38,4459,420,880,482.93796,9,154,148,0.0,0,0,3,0,0.4375,0,0,1,2,89,1,0,5,4,3103,14,2712,856,900,446.93793,6,150,148,21.697395,0,0,2,0,0.375,1,0,0,0,58,1,2,0,4,2823,24,3281,700,700,686.9382,7,88,170,3.165901,1,1,3,0,0.25,0,0,1,0,14,1,6,0,1,2466,17,2360,758,1040,326.9378,6,156,98,0.06665,0,0,1,1,0.25,0,0,4,2,1,1,3,1,7,3624,29,3418,485,800,350.93784,7,124,144,0.299955,2,1,4,0,0.5,0,0,0,0,56,0,3,2,3,2808,18,2730,567,1160,410.9379,6,124,142,0.0,0,0,6,0,0.5,0,0,0,0,92,0,2,0,1,1423,8,1136,800,800,446.93793,4,180,176,0.0,0,0,0,0,0.0,0,0,0,0
6db558535151ea18ca70a6892197db41,21,23,0,0,0,101,0,0,0,0,176,0,0,680,680,506.938,1,118,118,0.0,0,0,0,0,0.0,0,0,0,0,51,0,0,0,0,176,0,0,720,720,278.93777,1,156,104,0.0,0,0,0,0,0.0,0,0,0,0,44,0,0,0,0,176,0,0,568,600,254.93774,1,78,144,0.0,0,0,1,0,0.0,0,0,0,0,49,0,0,0,0,176,0,0,580,580,254.93774,1,150,78,0.0,0,0,1,0,0.0,0,0,0,0,53,0,0,0,0,176,0,0,580,580,374.93787,1,78,142,0.0,0,0,1,0,0.0,0,0,0,0,18,0,0,0,0,96,0,0,660,660,266.93774,1,180,178,0.0,0,0,0,0,0.0,0,0,0,0,67,0,0,0,0,96,0,0,586,620,278.93777,1,100,174,0.0,0,0,0,0,0.0,0,0,0,0,47,0,0,0,0,96,0,0,660,660,290.93777,1,178,112,0.0,0,0,1,0,0.0,0,0,0,0,40,0,0,0,0,96,0,0,600,600,302.93777,1,176,110,0.0,0,0,0,0,0.0,0,0,0,0,17,0,0,0,0,96,0,0,640,640,446.93793,1,162,162,0.0,0,0,0,0,0.0,0,0,0,0
46a0ddce8f7ed2a8d9bd5edcbb925682,576,22,7,1,4,14,1,0,3,1,1613,0,1471,900,900,290.93777,4,170,96,2.366089,0,0,5,0,0.571429,0,0,0,0,99,1,0,1,2,2816,30,3602,878,1100,494.93796,8,82,154,0.0,0,0,1,0,0.285714,0,0,0,0,101,3,1,1,9,4017,44,4811,980,980,902.93835,9,126,128,0.0,0,0,2,1,0.571429,0,0,2,0,26,1,1,2,1,1558,2,1228,640,640,422.9379,4,120,138,7.098264,0,0,5,0,0.428571,0,0,2,0,41,0,0,1,30,3344,55,3551,1079,1100,362.93784,7,176,94,1.932884,0,0,0,0,0.142857,0,0,0,0,18,0,0,0,0,2712,69,2503,825,1160,338.93784,6,94,158,0.0,3,1,4,0,0.0,0,0,0,0,98,1,3,0,5,2217,23,3310,735,880,506.938,7,126,142,0.0,0,0,1,0,0.5,0,0,1,0,8,0,1,1,6,3035,44,2508,817,860,350.93784,6,78,160,0.0,0,0,1,0,0.5,0,0,0,0,69,0,2,0,0,2004,16,1644,1160,1160,386.93787,4,176,100,4.998863,0,0,2,0,0.0,0,0,0,0,86,0,1,0,1,1333,2,1878,630,740,518.938,5,82,160,8.664527,3,1,3,0,0.0,0,0,2,0
b1b35ff97723d9b7ade1c9c3cf48f770,453,22,7,1,3,42,0,1,1,0,1404,9,1351,1000,1000,338.93784,4,80,164,9.930903,0,0,4,0,0.5,0,0,0,0,69,1,0,0,0,1840,14,1693,868,1000,350.93784,5,78,166,1.832892,0,0,0,1,0.5,0,0,0,0,27,0,1,0,0,1204,10,3210,578,860,792.93823,7,120,122,3.499146,0,0,0,0,0.0,0,0,0,0,104,0,0,2,0,1724,21,1964,777,980,434.93793,5,138,94,0.0,0,0,1,0,1.0,0,0,0,0,65,1,2,0,0,1907,8,1544,281,820,446.93793,4,174,100,0.0,0,0,6,0,0.5,0,0,0,0,23,1,0,0,0,1422,10,1933,709,940,362.93784,5,84,170,11.03072,0,0,1,0,0.25,0,0,0,0,22,1,0,0,1,1457,12,1759,712,820,482.93796,5,174,106,2.199399,0,0,1,0,0.25,0,0,0,0,35,0,0,1,2,2402,35,3544,349,720,434.93793,7,128,126,0.0,0,0,2,0,0.25,0,0,0,0,72,2,1,0,0,1697,12,1651,680,680,374.93787,4,176,108,13.596678,0,0,2,0,0.5,0,0,0,0,1,0,1,1,8,2199,32,1919,692,740,302.93777,5,104,162,0.0,2,1,2,0,0.25,0,0,0,0


In [5]:
# --== PREPARE DATA ==--
X_train, y_train, X_test = data_loader.prepare_data(df_train_features.copy(), df_train_targets.copy(), df_test_features.copy())

print(X_train.shape)
X_train.describe()

prepare_data.. Start
prepare_data.. Adding team features
prepare_data.. Replace heroes id
prepare_data.. Drop extra columns: ['game_time', 'game_mode', 'lobby_type', 'objectives_len', 'chat_len']


Prepared data frame: 
       std_kills_ratio  r_mean_kills  d_mean_kills  mean_kills_ratio  \
count     39675.000000  39675.000000  39675.000000      39675.000000   
mean          0.061604      0.202468      0.191507          0.033854   
std           0.055005      0.170373      0.164446          0.033886   
min           0.000000      0.000000      0.000000          0.000000   
25%           0.028271      0.051948      0.048780          0.016317   
50%           0.051111      0.168831      0.158537          0.028189   
75%           0.081119      0.311688      0.292683          0.042161   
max           1.000000      1.000000      1.000000          1.000000   

       std_deaths_ratio  r_mean_deaths  d_mean_deaths  mean_deaths_ratio  \
count      39675.000000   39675.000000   39675.000000  

Unnamed: 0,std_kills_ratio,r_mean_kills,d_mean_kills,mean_kills_ratio,std_deaths_ratio,r_mean_deaths,d_mean_deaths,mean_deaths_ratio,std_assists_ratio,r_mean_assists,d_mean_assists,mean_assists_ratio,std_denies_ratio,r_mean_denies,d_mean_denies,mean_denies_ratio,std_gold_ratio,r_mean_gold,d_mean_gold,mean_gold_ratio,std_lh_ratio,r_mean_lh,d_mean_lh,mean_lh_ratio,std_xp_ratio,r_mean_xp,d_mean_xp,mean_xp_ratio,std_health_ratio,r_mean_health,d_mean_health,mean_health_ratio,std_max_health_ratio,r_mean_max_health,d_mean_max_health,mean_max_health_ratio,std_max_mana_ratio,r_mean_max_mana,d_mean_max_mana,mean_max_mana_ratio,std_level_ratio,r_mean_level,d_mean_level,mean_level_ratio,std_stuns_ratio,r_mean_stuns,d_mean_stuns,mean_stuns_ratio,std_creeps_stacked_ratio,r_mean_creeps_stacked,d_mean_creeps_stacked,mean_creeps_stacked_ratio,std_camps_stacked_ratio,r_mean_camps_stacked,d_mean_camps_stacked,mean_camps_stacked_ratio,std_rune_pickups_ratio,r_mean_rune_pickups,d_mean_rune_pickups,mean_rune_pickups_ratio,std_firstblood_claimed_ratio,r_mean_firstblood_claimed,d_mean_firstblood_claimed,mean_firstblood_claimed_ratio,std_teamfight_participation_ratio,r_mean_teamfight_participation,d_mean_teamfight_participation,mean_teamfight_participation_ratio,std_towers_killed_ratio,r_mean_towers_killed,d_mean_towers_killed,mean_towers_killed_ratio,std_roshans_killed_ratio,r_mean_roshans_killed,d_mean_roshans_killed,mean_roshans_killed_ratio,std_obs_placed_ratio,r_mean_obs_placed,d_mean_obs_placed,mean_obs_placed_ratio,std_sen_placed_ratio,r_mean_sen_placed,d_mean_sen_placed,mean_sen_placed_ratio,std_distance_ratio,r_mean_distance,d_mean_distance,mean_distance_ratio,r_hero_1,r_hero_10,r_hero_100,r_hero_101,r_hero_102,r_hero_103,r_hero_104,r_hero_105,r_hero_106,r_hero_107,r_hero_108,r_hero_109,r_hero_11,r_hero_110,r_hero_111,r_hero_112,r_hero_113,r_hero_114,r_hero_119,r_hero_12,r_hero_120,r_hero_13,r_hero_14,r_hero_15,r_hero_16,r_hero_17,r_hero_18,r_hero_19,r_hero_2,r_hero_20,r_hero_21,r_hero_22,r_hero_23,r_hero_25,r_hero_26,r_hero_27,r_hero_28,r_hero_29,r_hero_3,r_hero_30,r_hero_31,r_hero_32,r_hero_33,r_hero_34,r_hero_35,r_hero_36,r_hero_37,r_hero_38,r_hero_39,r_hero_4,r_hero_40,r_hero_41,r_hero_42,r_hero_43,r_hero_44,r_hero_45,r_hero_46,r_hero_47,r_hero_48,r_hero_49,r_hero_5,r_hero_50,r_hero_51,r_hero_52,r_hero_53,r_hero_54,r_hero_55,r_hero_56,r_hero_57,r_hero_58,r_hero_59,r_hero_6,r_hero_60,r_hero_61,r_hero_62,r_hero_63,r_hero_64,r_hero_65,r_hero_66,r_hero_67,r_hero_68,r_hero_69,r_hero_7,r_hero_70,r_hero_71,r_hero_72,r_hero_73,r_hero_74,r_hero_75,r_hero_76,r_hero_77,r_hero_78,r_hero_79,r_hero_8,r_hero_80,r_hero_81,r_hero_82,r_hero_83,r_hero_84,r_hero_85,r_hero_86,r_hero_87,r_hero_88,r_hero_89,r_hero_9,r_hero_90,r_hero_91,r_hero_92,r_hero_93,r_hero_94,r_hero_95,r_hero_96,r_hero_97,r_hero_98,r_hero_99,d_hero_1,d_hero_10,d_hero_100,d_hero_101,d_hero_102,d_hero_103,d_hero_104,d_hero_105,d_hero_106,d_hero_107,d_hero_108,d_hero_109,d_hero_11,d_hero_110,d_hero_111,d_hero_112,d_hero_113,d_hero_114,d_hero_119,d_hero_12,d_hero_120,d_hero_13,d_hero_14,d_hero_15,d_hero_16,d_hero_17,d_hero_18,d_hero_19,d_hero_2,d_hero_20,d_hero_21,d_hero_22,d_hero_23,d_hero_25,d_hero_26,d_hero_27,d_hero_28,d_hero_29,d_hero_3,d_hero_30,d_hero_31,d_hero_32,d_hero_33,d_hero_34,d_hero_35,d_hero_36,d_hero_37,d_hero_38,d_hero_39,d_hero_4,d_hero_40,d_hero_41,d_hero_42,d_hero_43,d_hero_44,d_hero_45,d_hero_46,d_hero_47,d_hero_48,d_hero_49,d_hero_5,d_hero_50,d_hero_51,d_hero_52,d_hero_53,d_hero_54,d_hero_55,d_hero_56,d_hero_57,d_hero_58,d_hero_59,d_hero_6,d_hero_60,d_hero_61,d_hero_62,d_hero_63,d_hero_64,d_hero_65,d_hero_66,d_hero_67,d_hero_68,d_hero_69,d_hero_7,d_hero_70,d_hero_71,d_hero_72,d_hero_73,d_hero_74,d_hero_75,d_hero_76,d_hero_77,d_hero_78,d_hero_79,d_hero_8,d_hero_80,d_hero_81,d_hero_82,d_hero_83,d_hero_84,d_hero_85,d_hero_86,d_hero_87,d_hero_88,d_hero_89,d_hero_9,d_hero_90,d_hero_91,d_hero_92,d_hero_93,d_hero_94,d_hero_95,d_hero_96,d_hero_97,d_hero_98,d_hero_99
count,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0
mean,0.061604,0.202468,0.191507,0.033854,0.073844,0.190137,0.205605,0.036635,0.069143,0.143377,0.135277,0.028159,0.031943,0.242268,0.209353,0.018335,0.018475,0.168558,0.166036,0.012738,0.051708,0.135105,0.132774,0.07986,0.01061,0.280882,0.293306,0.082808,0.002525,0.226354,0.227467,0.001141,0.062983,0.218447,0.214365,0.329352,0.061282,0.239185,0.256154,0.267203,0.083583,0.428357,0.42878,0.277379,6.292326e-05,0.106498,0.10077,0.428482,0.010864,0.028702,0.036881,0.011152,0.035677,0.062744,0.047832,0.028889,0.051274,0.145044,0.182215,0.063235,0.0,0.464726,0.454518,0.0,0.055651,0.526488,0.419326,0.183333,0.066324,0.140277,0.129416,0.054707,0.005018,0.029177,0.026954,0.005189,0.056678,0.202335,0.200568,0.049728,0.024736,0.046427,0.051526,0.015981,0.001062,0.47526,0.566538,0.419004,0.02216,0.021637,0.023851,0.023487,0.01263,0.008376,0.026916,0.011698,0.015602,0.010252,0.021242,0.016351,0.050929,0.01359,0.008645,0.008945,0.006253,0.027273,0.026244,0.024335,0.024305,0.010538,0.064363,0.01067,0.025757,0.020858,0.0188,0.020479,0.022805,0.017022,0.03114,0.02889,0.028622,0.019248,0.027326,0.030017,0.015222,0.01307,0.014217,0.030794,0.016125,0.042946,0.011316,0.022228,0.035613,0.022174,0.009694,0.007082,0.017212,0.017492,0.023122,0.034667,0.033963,0.011094,0.034408,0.009785,0.014842,0.022445,0.022657,0.024102,0.034252,0.013837,0.016338,0.010211,0.01837,0.02794,0.00506,0.02377,0.017287,0.009513,0.022965,0.015765,0.009409,0.008555,0.02476,0.016269,0.01927,0.00541,0.003465,0.015627,0.019019,0.017578,0.021817,0.012103,0.022484,0.01765,0.010611,0.038613,0.015747,0.024582,0.011479,0.008373,0.014222,0.043154,0.005306,0.018173,0.005849,0.008899,0.021214,0.009696,0.03164,0.024732,0.015703,0.006855,0.022654,0.009728,0.01298,0.009405,0.021968,0.013538,0.023321,0.011017,0.014165,0.014287,0.030581,0.021685,0.02274,0.023589,0.024531,0.012834,0.008359,0.027715,0.012682,0.016745,0.010362,0.021549,0.014848,0.04968,0.013368,0.008502,0.008467,0.006309,0.027034,0.025834,0.02501,0.023497,0.010907,0.063715,0.01074,0.025646,0.020872,0.019789,0.019467,0.023079,0.017264,0.030642,0.028408,0.0287,0.018411,0.026338,0.029797,0.016037,0.013366,0.013643,0.030612,0.016076,0.044552,0.010381,0.02287,0.035604,0.023178,0.008948,0.00726,0.01641,0.017538,0.023169,0.034438,0.034109,0.01112,0.034994,0.010334,0.015151,0.02216,0.023298,0.024125,0.032998,0.013708,0.016702,0.009295,0.01955,0.028932,0.004731,0.023052,0.017957,0.009445,0.022582,0.015183,0.010262,0.00861,0.024889,0.017607,0.019361,0.005235,0.003957,0.014977,0.018932,0.017781,0.022719,0.011986,0.02146,0.018432,0.011044,0.039401,0.015946,0.023915,0.011408,0.008418,0.014632,0.042018,0.00524,0.017845,0.00571,0.009918,0.020804,0.009856,0.030886,0.024985,0.015323,0.006861,0.022363,0.008227,0.011995,0.010025,0.022373,0.013472,0.022881,0.011272,0.013751,0.014716,0.03097
std,0.055005,0.170373,0.164446,0.033886,0.058744,0.161333,0.17095,0.035126,0.063916,0.140974,0.135114,0.033304,0.032193,0.151012,0.131188,0.018869,0.012932,0.139372,0.139129,0.01058,0.026759,0.116595,0.116284,0.029934,0.013773,0.238614,0.251207,0.023237,0.005386,0.110375,0.111774,0.005139,0.045237,0.14302,0.146852,0.080216,0.050116,0.156798,0.171347,0.095246,0.053048,0.269518,0.271793,0.06216,0.006630529,0.108324,0.101018,0.003836,0.031525,0.050994,0.062913,0.034621,0.069989,0.09207,0.068374,0.063454,0.039807,0.102051,0.129522,0.047567,0.0,0.49876,0.497933,0.0,0.046115,0.191535,0.155003,0.08295,0.10668,0.175758,0.173326,0.104712,0.037951,0.084055,0.082375,0.040527,0.053786,0.16284,0.162527,0.052002,0.045645,0.068518,0.077547,0.03189,0.00538,0.149022,0.144581,0.075733,0.097575,0.097078,0.100032,0.099944,0.079327,0.067234,0.104896,0.077164,0.08595,0.072978,0.09698,0.087319,0.122657,0.082049,0.068163,0.069149,0.059259,0.104671,0.103728,0.100796,0.101093,0.073725,0.124833,0.073943,0.10286,0.095777,0.091988,0.095164,0.099241,0.088713,0.10963,0.106777,0.10648,0.092952,0.105055,0.108305,0.085256,0.080726,0.083024,0.108815,0.087216,0.118512,0.076105,0.097891,0.113017,0.098069,0.071271,0.062542,0.089005,0.089806,0.099283,0.112779,0.111938,0.075006,0.112162,0.071641,0.084224,0.098032,0.098318,0.100725,0.112606,0.082464,0.087783,0.072656,0.091553,0.105466,0.054342,0.100072,0.089894,0.070944,0.098729,0.086073,0.070278,0.067848,0.101262,0.087523,0.093162,0.055776,0.045738,0.08617,0.092858,0.090292,0.097338,0.078001,0.098389,0.089675,0.073907,0.115793,0.086501,0.101085,0.076035,0.067393,0.083025,0.118971,0.055297,0.090887,0.057755,0.068897,0.096593,0.071385,0.109767,0.1017,0.086411,0.061589,0.098444,0.071625,0.080187,0.070174,0.097501,0.081344,0.099509,0.075449,0.083334,0.083559,0.109388,0.097013,0.098647,0.099574,0.101278,0.080006,0.066993,0.105819,0.079781,0.088305,0.073225,0.097244,0.084267,0.121934,0.081583,0.067635,0.067529,0.059516,0.104425,0.103068,0.101611,0.100125,0.074957,0.124832,0.074001,0.102844,0.095637,0.093822,0.093469,0.099442,0.089184,0.109134,0.106161,0.106502,0.091288,0.103912,0.107855,0.08712,0.081442,0.081774,0.10888,0.087217,0.119339,0.073374,0.09875,0.112969,0.09965,0.069156,0.063219,0.08752,0.089917,0.099293,0.112531,0.112096,0.075092,0.112727,0.073285,0.084955,0.097469,0.099226,0.100565,0.111464,0.082015,0.088588,0.069865,0.093446,0.106602,0.052867,0.099015,0.091056,0.070696,0.098066,0.084751,0.072967,0.067884,0.101513,0.090319,0.093124,0.055018,0.048602,0.084494,0.092891,0.090472,0.098752,0.077671,0.096508,0.091218,0.075082,0.116473,0.086931,0.100101,0.075832,0.067358,0.084086,0.118252,0.054984,0.090295,0.057165,0.071947,0.095998,0.071815,0.108932,0.101912,0.085648,0.061636,0.097865,0.066775,0.077632,0.072331,0.09826,0.081105,0.09856,0.07594,0.0824,0.084768,0.109802
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.028271,0.051948,0.04878,0.016317,0.038313,0.05814,0.063291,0.01859,0.030221,0.030488,0.028571,0.012018,0.01357,0.124031,0.112583,0.008617,0.011585,0.0555,0.054341,0.010821,0.036431,0.043239,0.041391,0.063318,0.006321,0.084893,0.087303,0.072808,0.001375,0.145696,0.145315,0.000816,0.034793,0.100163,0.093871,0.278859,0.029435,0.108163,0.113121,0.202613,0.053294,0.2,0.2,0.238284,5.684225e-07,0.025756,0.025723,0.428481,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026895,0.067568,0.07874,0.035088,0.0,0.0,0.0,0.0,0.028653,0.479167,0.381818,0.160858,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.030842,0.064516,0.064516,0.027273,0.0,0.0,0.0,0.0,0.000403,0.395682,0.506185,0.374135,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.051111,0.168831,0.158537,0.028189,0.064522,0.151163,0.177215,0.03125,0.056177,0.103659,0.091429,0.021823,0.023447,0.224806,0.198675,0.014054,0.01613,0.135761,0.131311,0.012228,0.047711,0.105553,0.101407,0.076923,0.008813,0.21189,0.219266,0.081804,0.002048,0.19807,0.198943,0.000997,0.051979,0.190256,0.183324,0.323737,0.047712,0.212275,0.225145,0.258013,0.076923,0.408333,0.4,0.272727,1.086856e-06,0.072702,0.069777,0.428481,0.0,0.007194,0.014493,0.0,0.0,0.047619,0.028571,0.0,0.042461,0.128378,0.15748,0.05303,0.0,0.0,0.0,0.0,0.048173,0.559524,0.446154,0.194765,0.0,0.083333,0.083333,0.0,0.0,0.0,0.0,0.0,0.051271,0.16129,0.16129,0.045455,0.009396,0.023529,0.026667,0.00496,0.00069,0.491669,0.56639,0.429129,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.081119,0.311688,0.292683,0.042161,0.096148,0.290698,0.316456,0.045455,0.091887,0.219512,0.205714,0.034912,0.03958,0.341085,0.291391,0.022343,0.022295,0.249131,0.244779,0.013919,0.061639,0.199237,0.196192,0.09304,0.01221,0.424668,0.444823,0.091826,0.003011,0.285272,0.286401,0.001212,0.077347,0.311377,0.309774,0.373033,0.077457,0.339883,0.36545,0.320203,0.102317,0.633333,0.633333,0.311349,1.912534e-06,0.152105,0.142417,0.428481,0.009419,0.035971,0.043478,0.008333,0.048312,0.095238,0.057143,0.029412,0.062779,0.202703,0.259843,0.075758,0.0,1.0,1.0,0.0,0.072394,0.642857,0.51476,0.225338,0.116927,0.25,0.25,0.083333,0.0,0.0,0.0,0.0,0.067947,0.322581,0.322581,0.055556,0.030035,0.058824,0.066667,0.017857,0.001129,0.556592,0.643482,0.471249,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.540892,0.550592,0.536322,0.544614,0.607894,0.638147,0.528174,0.619678,0.590042,0.626336,0.542498,0.568598,0.425554,0.616241,0.635839,0.622383,0.643416,0.521004,0.533083,0.528604,0.540197,0.612889,0.373553,0.610099,0.532799,0.548366,0.566014,0.5665,0.554631,0.581328,0.508702,0.518716,0.525167,0.557447,0.526177,0.508433,0.59504,0.608771,0.588175,0.515265,0.58695,0.460133,0.609426,0.551542,0.479207,0.551653,0.618467,0.655722,0.568526,0.572427,0.546254,0.485535,0.501559,0.615569,0.49352,0.620268,0.580024,0.545782,0.548156,0.54119,0.506169,0.603729,0.567213,0.614659,0.566675,0.510448,0.681459,0.532195,0.591428,0.621519,0.534528,0.573603,0.621585,0.628246,0.527911,0.575903,0.578754,0.654971,0.687251,0.581738,0.563677,0.574555,0.555476,0.615517,0.54033,0.563476,0.625949,0.47682,0.578019,0.520022,0.597631,0.635757,0.589554,0.459818,0.66763,0.562827,0.657987,0.627137,0.559176,0.630296,0.516806,0.532645,0.577293,0.640727,0.544868,0.626579,0.597138,0.626296,0.552296,0.600762,0.545705,0.614941,0.60091,0.605884,0.511773,0.531943,0.54363,0.542487,0.538699,0.620374,0.62669,0.517154,0.606583,0.568378,0.618728,0.546345,0.58527,0.42537,0.605266,0.624808,0.630442,0.650358,0.516914,0.524809,0.537567,0.537077,0.617691,0.371612,0.60697,0.543705,0.539312,0.556252,0.558761,0.539142,0.576546,0.520986,0.515917,0.510542,0.556558,0.532071,0.50632,0.582614,0.605966,0.586983,0.502961,0.587718,0.447296,0.618384,0.527068,0.475486,0.556668,0.633125,0.638786,0.575601,0.568852,0.54561,0.489814,0.487557,0.603077,0.479459,0.620646,0.571719,0.535114,0.534729,0.529393,0.498468,0.593943,0.5712,0.620021,0.562987,0.503492,0.655924,0.546918,0.558095,0.629457,0.547652,0.584977,0.61654,0.637084,0.537377,0.569998,0.566917,0.65972,0.701529,0.57859,0.580397,0.565831,0.558674,0.60189,0.547126,0.556565,0.600744,0.462563,0.577956,0.529238,0.608856,0.64305,0.592691,0.459826,0.663176,0.564796,0.65332,0.631517,0.551443,0.62935,0.519087,0.536996,0.602642,0.655585,0.540801,0.649739,0.611847,0.610983,0.535897,0.593172,0.533548,0.607675,0.602292,0.596786,0.520087


In [6]:
X_train.describe()
# X_train = X_train.drop(['r1_kills'], axis=1)
# X_train.shape

Unnamed: 0,std_kills_ratio,r_mean_kills,d_mean_kills,mean_kills_ratio,std_deaths_ratio,r_mean_deaths,d_mean_deaths,mean_deaths_ratio,std_assists_ratio,r_mean_assists,d_mean_assists,mean_assists_ratio,std_denies_ratio,r_mean_denies,d_mean_denies,mean_denies_ratio,std_gold_ratio,r_mean_gold,d_mean_gold,mean_gold_ratio,std_lh_ratio,r_mean_lh,d_mean_lh,mean_lh_ratio,std_xp_ratio,r_mean_xp,d_mean_xp,mean_xp_ratio,std_health_ratio,r_mean_health,d_mean_health,mean_health_ratio,std_max_health_ratio,r_mean_max_health,d_mean_max_health,mean_max_health_ratio,std_max_mana_ratio,r_mean_max_mana,d_mean_max_mana,mean_max_mana_ratio,std_level_ratio,r_mean_level,d_mean_level,mean_level_ratio,std_stuns_ratio,r_mean_stuns,d_mean_stuns,mean_stuns_ratio,std_creeps_stacked_ratio,r_mean_creeps_stacked,d_mean_creeps_stacked,mean_creeps_stacked_ratio,std_camps_stacked_ratio,r_mean_camps_stacked,d_mean_camps_stacked,mean_camps_stacked_ratio,std_rune_pickups_ratio,r_mean_rune_pickups,d_mean_rune_pickups,mean_rune_pickups_ratio,std_firstblood_claimed_ratio,r_mean_firstblood_claimed,d_mean_firstblood_claimed,mean_firstblood_claimed_ratio,std_teamfight_participation_ratio,r_mean_teamfight_participation,d_mean_teamfight_participation,mean_teamfight_participation_ratio,std_towers_killed_ratio,r_mean_towers_killed,d_mean_towers_killed,mean_towers_killed_ratio,std_roshans_killed_ratio,r_mean_roshans_killed,d_mean_roshans_killed,mean_roshans_killed_ratio,std_obs_placed_ratio,r_mean_obs_placed,d_mean_obs_placed,mean_obs_placed_ratio,std_sen_placed_ratio,r_mean_sen_placed,d_mean_sen_placed,mean_sen_placed_ratio,std_distance_ratio,r_mean_distance,d_mean_distance,mean_distance_ratio,r_hero_1,r_hero_10,r_hero_100,r_hero_101,r_hero_102,r_hero_103,r_hero_104,r_hero_105,r_hero_106,r_hero_107,r_hero_108,r_hero_109,r_hero_11,r_hero_110,r_hero_111,r_hero_112,r_hero_113,r_hero_114,r_hero_119,r_hero_12,r_hero_120,r_hero_13,r_hero_14,r_hero_15,r_hero_16,r_hero_17,r_hero_18,r_hero_19,r_hero_2,r_hero_20,r_hero_21,r_hero_22,r_hero_23,r_hero_25,r_hero_26,r_hero_27,r_hero_28,r_hero_29,r_hero_3,r_hero_30,r_hero_31,r_hero_32,r_hero_33,r_hero_34,r_hero_35,r_hero_36,r_hero_37,r_hero_38,r_hero_39,r_hero_4,r_hero_40,r_hero_41,r_hero_42,r_hero_43,r_hero_44,r_hero_45,r_hero_46,r_hero_47,r_hero_48,r_hero_49,r_hero_5,r_hero_50,r_hero_51,r_hero_52,r_hero_53,r_hero_54,r_hero_55,r_hero_56,r_hero_57,r_hero_58,r_hero_59,r_hero_6,r_hero_60,r_hero_61,r_hero_62,r_hero_63,r_hero_64,r_hero_65,r_hero_66,r_hero_67,r_hero_68,r_hero_69,r_hero_7,r_hero_70,r_hero_71,r_hero_72,r_hero_73,r_hero_74,r_hero_75,r_hero_76,r_hero_77,r_hero_78,r_hero_79,r_hero_8,r_hero_80,r_hero_81,r_hero_82,r_hero_83,r_hero_84,r_hero_85,r_hero_86,r_hero_87,r_hero_88,r_hero_89,r_hero_9,r_hero_90,r_hero_91,r_hero_92,r_hero_93,r_hero_94,r_hero_95,r_hero_96,r_hero_97,r_hero_98,r_hero_99,d_hero_1,d_hero_10,d_hero_100,d_hero_101,d_hero_102,d_hero_103,d_hero_104,d_hero_105,d_hero_106,d_hero_107,d_hero_108,d_hero_109,d_hero_11,d_hero_110,d_hero_111,d_hero_112,d_hero_113,d_hero_114,d_hero_119,d_hero_12,d_hero_120,d_hero_13,d_hero_14,d_hero_15,d_hero_16,d_hero_17,d_hero_18,d_hero_19,d_hero_2,d_hero_20,d_hero_21,d_hero_22,d_hero_23,d_hero_25,d_hero_26,d_hero_27,d_hero_28,d_hero_29,d_hero_3,d_hero_30,d_hero_31,d_hero_32,d_hero_33,d_hero_34,d_hero_35,d_hero_36,d_hero_37,d_hero_38,d_hero_39,d_hero_4,d_hero_40,d_hero_41,d_hero_42,d_hero_43,d_hero_44,d_hero_45,d_hero_46,d_hero_47,d_hero_48,d_hero_49,d_hero_5,d_hero_50,d_hero_51,d_hero_52,d_hero_53,d_hero_54,d_hero_55,d_hero_56,d_hero_57,d_hero_58,d_hero_59,d_hero_6,d_hero_60,d_hero_61,d_hero_62,d_hero_63,d_hero_64,d_hero_65,d_hero_66,d_hero_67,d_hero_68,d_hero_69,d_hero_7,d_hero_70,d_hero_71,d_hero_72,d_hero_73,d_hero_74,d_hero_75,d_hero_76,d_hero_77,d_hero_78,d_hero_79,d_hero_8,d_hero_80,d_hero_81,d_hero_82,d_hero_83,d_hero_84,d_hero_85,d_hero_86,d_hero_87,d_hero_88,d_hero_89,d_hero_9,d_hero_90,d_hero_91,d_hero_92,d_hero_93,d_hero_94,d_hero_95,d_hero_96,d_hero_97,d_hero_98,d_hero_99
count,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0,39675.0
mean,0.061604,0.202468,0.191507,0.033854,0.073844,0.190137,0.205605,0.036635,0.069143,0.143377,0.135277,0.028159,0.031943,0.242268,0.209353,0.018335,0.018475,0.168558,0.166036,0.012738,0.051708,0.135105,0.132774,0.07986,0.01061,0.280882,0.293306,0.082808,0.002525,0.226354,0.227467,0.001141,0.062983,0.218447,0.214365,0.329352,0.061282,0.239185,0.256154,0.267203,0.083583,0.428357,0.42878,0.277379,6.292326e-05,0.106498,0.10077,0.428482,0.010864,0.028702,0.036881,0.011152,0.035677,0.062744,0.047832,0.028889,0.051274,0.145044,0.182215,0.063235,0.0,0.464726,0.454518,0.0,0.055651,0.526488,0.419326,0.183333,0.066324,0.140277,0.129416,0.054707,0.005018,0.029177,0.026954,0.005189,0.056678,0.202335,0.200568,0.049728,0.024736,0.046427,0.051526,0.015981,0.001062,0.47526,0.566538,0.419004,0.02216,0.021637,0.023851,0.023487,0.01263,0.008376,0.026916,0.011698,0.015602,0.010252,0.021242,0.016351,0.050929,0.01359,0.008645,0.008945,0.006253,0.027273,0.026244,0.024335,0.024305,0.010538,0.064363,0.01067,0.025757,0.020858,0.0188,0.020479,0.022805,0.017022,0.03114,0.02889,0.028622,0.019248,0.027326,0.030017,0.015222,0.01307,0.014217,0.030794,0.016125,0.042946,0.011316,0.022228,0.035613,0.022174,0.009694,0.007082,0.017212,0.017492,0.023122,0.034667,0.033963,0.011094,0.034408,0.009785,0.014842,0.022445,0.022657,0.024102,0.034252,0.013837,0.016338,0.010211,0.01837,0.02794,0.00506,0.02377,0.017287,0.009513,0.022965,0.015765,0.009409,0.008555,0.02476,0.016269,0.01927,0.00541,0.003465,0.015627,0.019019,0.017578,0.021817,0.012103,0.022484,0.01765,0.010611,0.038613,0.015747,0.024582,0.011479,0.008373,0.014222,0.043154,0.005306,0.018173,0.005849,0.008899,0.021214,0.009696,0.03164,0.024732,0.015703,0.006855,0.022654,0.009728,0.01298,0.009405,0.021968,0.013538,0.023321,0.011017,0.014165,0.014287,0.030581,0.021685,0.02274,0.023589,0.024531,0.012834,0.008359,0.027715,0.012682,0.016745,0.010362,0.021549,0.014848,0.04968,0.013368,0.008502,0.008467,0.006309,0.027034,0.025834,0.02501,0.023497,0.010907,0.063715,0.01074,0.025646,0.020872,0.019789,0.019467,0.023079,0.017264,0.030642,0.028408,0.0287,0.018411,0.026338,0.029797,0.016037,0.013366,0.013643,0.030612,0.016076,0.044552,0.010381,0.02287,0.035604,0.023178,0.008948,0.00726,0.01641,0.017538,0.023169,0.034438,0.034109,0.01112,0.034994,0.010334,0.015151,0.02216,0.023298,0.024125,0.032998,0.013708,0.016702,0.009295,0.01955,0.028932,0.004731,0.023052,0.017957,0.009445,0.022582,0.015183,0.010262,0.00861,0.024889,0.017607,0.019361,0.005235,0.003957,0.014977,0.018932,0.017781,0.022719,0.011986,0.02146,0.018432,0.011044,0.039401,0.015946,0.023915,0.011408,0.008418,0.014632,0.042018,0.00524,0.017845,0.00571,0.009918,0.020804,0.009856,0.030886,0.024985,0.015323,0.006861,0.022363,0.008227,0.011995,0.010025,0.022373,0.013472,0.022881,0.011272,0.013751,0.014716,0.03097
std,0.055005,0.170373,0.164446,0.033886,0.058744,0.161333,0.17095,0.035126,0.063916,0.140974,0.135114,0.033304,0.032193,0.151012,0.131188,0.018869,0.012932,0.139372,0.139129,0.01058,0.026759,0.116595,0.116284,0.029934,0.013773,0.238614,0.251207,0.023237,0.005386,0.110375,0.111774,0.005139,0.045237,0.14302,0.146852,0.080216,0.050116,0.156798,0.171347,0.095246,0.053048,0.269518,0.271793,0.06216,0.006630529,0.108324,0.101018,0.003836,0.031525,0.050994,0.062913,0.034621,0.069989,0.09207,0.068374,0.063454,0.039807,0.102051,0.129522,0.047567,0.0,0.49876,0.497933,0.0,0.046115,0.191535,0.155003,0.08295,0.10668,0.175758,0.173326,0.104712,0.037951,0.084055,0.082375,0.040527,0.053786,0.16284,0.162527,0.052002,0.045645,0.068518,0.077547,0.03189,0.00538,0.149022,0.144581,0.075733,0.097575,0.097078,0.100032,0.099944,0.079327,0.067234,0.104896,0.077164,0.08595,0.072978,0.09698,0.087319,0.122657,0.082049,0.068163,0.069149,0.059259,0.104671,0.103728,0.100796,0.101093,0.073725,0.124833,0.073943,0.10286,0.095777,0.091988,0.095164,0.099241,0.088713,0.10963,0.106777,0.10648,0.092952,0.105055,0.108305,0.085256,0.080726,0.083024,0.108815,0.087216,0.118512,0.076105,0.097891,0.113017,0.098069,0.071271,0.062542,0.089005,0.089806,0.099283,0.112779,0.111938,0.075006,0.112162,0.071641,0.084224,0.098032,0.098318,0.100725,0.112606,0.082464,0.087783,0.072656,0.091553,0.105466,0.054342,0.100072,0.089894,0.070944,0.098729,0.086073,0.070278,0.067848,0.101262,0.087523,0.093162,0.055776,0.045738,0.08617,0.092858,0.090292,0.097338,0.078001,0.098389,0.089675,0.073907,0.115793,0.086501,0.101085,0.076035,0.067393,0.083025,0.118971,0.055297,0.090887,0.057755,0.068897,0.096593,0.071385,0.109767,0.1017,0.086411,0.061589,0.098444,0.071625,0.080187,0.070174,0.097501,0.081344,0.099509,0.075449,0.083334,0.083559,0.109388,0.097013,0.098647,0.099574,0.101278,0.080006,0.066993,0.105819,0.079781,0.088305,0.073225,0.097244,0.084267,0.121934,0.081583,0.067635,0.067529,0.059516,0.104425,0.103068,0.101611,0.100125,0.074957,0.124832,0.074001,0.102844,0.095637,0.093822,0.093469,0.099442,0.089184,0.109134,0.106161,0.106502,0.091288,0.103912,0.107855,0.08712,0.081442,0.081774,0.10888,0.087217,0.119339,0.073374,0.09875,0.112969,0.09965,0.069156,0.063219,0.08752,0.089917,0.099293,0.112531,0.112096,0.075092,0.112727,0.073285,0.084955,0.097469,0.099226,0.100565,0.111464,0.082015,0.088588,0.069865,0.093446,0.106602,0.052867,0.099015,0.091056,0.070696,0.098066,0.084751,0.072967,0.067884,0.101513,0.090319,0.093124,0.055018,0.048602,0.084494,0.092891,0.090472,0.098752,0.077671,0.096508,0.091218,0.075082,0.116473,0.086931,0.100101,0.075832,0.067358,0.084086,0.118252,0.054984,0.090295,0.057165,0.071947,0.095998,0.071815,0.108932,0.101912,0.085648,0.061636,0.097865,0.066775,0.077632,0.072331,0.09826,0.081105,0.09856,0.07594,0.0824,0.084768,0.109802
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.028271,0.051948,0.04878,0.016317,0.038313,0.05814,0.063291,0.01859,0.030221,0.030488,0.028571,0.012018,0.01357,0.124031,0.112583,0.008617,0.011585,0.0555,0.054341,0.010821,0.036431,0.043239,0.041391,0.063318,0.006321,0.084893,0.087303,0.072808,0.001375,0.145696,0.145315,0.000816,0.034793,0.100163,0.093871,0.278859,0.029435,0.108163,0.113121,0.202613,0.053294,0.2,0.2,0.238284,5.684225e-07,0.025756,0.025723,0.428481,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026895,0.067568,0.07874,0.035088,0.0,0.0,0.0,0.0,0.028653,0.479167,0.381818,0.160858,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.030842,0.064516,0.064516,0.027273,0.0,0.0,0.0,0.0,0.000403,0.395682,0.506185,0.374135,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.051111,0.168831,0.158537,0.028189,0.064522,0.151163,0.177215,0.03125,0.056177,0.103659,0.091429,0.021823,0.023447,0.224806,0.198675,0.014054,0.01613,0.135761,0.131311,0.012228,0.047711,0.105553,0.101407,0.076923,0.008813,0.21189,0.219266,0.081804,0.002048,0.19807,0.198943,0.000997,0.051979,0.190256,0.183324,0.323737,0.047712,0.212275,0.225145,0.258013,0.076923,0.408333,0.4,0.272727,1.086856e-06,0.072702,0.069777,0.428481,0.0,0.007194,0.014493,0.0,0.0,0.047619,0.028571,0.0,0.042461,0.128378,0.15748,0.05303,0.0,0.0,0.0,0.0,0.048173,0.559524,0.446154,0.194765,0.0,0.083333,0.083333,0.0,0.0,0.0,0.0,0.0,0.051271,0.16129,0.16129,0.045455,0.009396,0.023529,0.026667,0.00496,0.00069,0.491669,0.56639,0.429129,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.081119,0.311688,0.292683,0.042161,0.096148,0.290698,0.316456,0.045455,0.091887,0.219512,0.205714,0.034912,0.03958,0.341085,0.291391,0.022343,0.022295,0.249131,0.244779,0.013919,0.061639,0.199237,0.196192,0.09304,0.01221,0.424668,0.444823,0.091826,0.003011,0.285272,0.286401,0.001212,0.077347,0.311377,0.309774,0.373033,0.077457,0.339883,0.36545,0.320203,0.102317,0.633333,0.633333,0.311349,1.912534e-06,0.152105,0.142417,0.428481,0.009419,0.035971,0.043478,0.008333,0.048312,0.095238,0.057143,0.029412,0.062779,0.202703,0.259843,0.075758,0.0,1.0,1.0,0.0,0.072394,0.642857,0.51476,0.225338,0.116927,0.25,0.25,0.083333,0.0,0.0,0.0,0.0,0.067947,0.322581,0.322581,0.055556,0.030035,0.058824,0.066667,0.017857,0.001129,0.556592,0.643482,0.471249,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.540892,0.550592,0.536322,0.544614,0.607894,0.638147,0.528174,0.619678,0.590042,0.626336,0.542498,0.568598,0.425554,0.616241,0.635839,0.622383,0.643416,0.521004,0.533083,0.528604,0.540197,0.612889,0.373553,0.610099,0.532799,0.548366,0.566014,0.5665,0.554631,0.581328,0.508702,0.518716,0.525167,0.557447,0.526177,0.508433,0.59504,0.608771,0.588175,0.515265,0.58695,0.460133,0.609426,0.551542,0.479207,0.551653,0.618467,0.655722,0.568526,0.572427,0.546254,0.485535,0.501559,0.615569,0.49352,0.620268,0.580024,0.545782,0.548156,0.54119,0.506169,0.603729,0.567213,0.614659,0.566675,0.510448,0.681459,0.532195,0.591428,0.621519,0.534528,0.573603,0.621585,0.628246,0.527911,0.575903,0.578754,0.654971,0.687251,0.581738,0.563677,0.574555,0.555476,0.615517,0.54033,0.563476,0.625949,0.47682,0.578019,0.520022,0.597631,0.635757,0.589554,0.459818,0.66763,0.562827,0.657987,0.627137,0.559176,0.630296,0.516806,0.532645,0.577293,0.640727,0.544868,0.626579,0.597138,0.626296,0.552296,0.600762,0.545705,0.614941,0.60091,0.605884,0.511773,0.531943,0.54363,0.542487,0.538699,0.620374,0.62669,0.517154,0.606583,0.568378,0.618728,0.546345,0.58527,0.42537,0.605266,0.624808,0.630442,0.650358,0.516914,0.524809,0.537567,0.537077,0.617691,0.371612,0.60697,0.543705,0.539312,0.556252,0.558761,0.539142,0.576546,0.520986,0.515917,0.510542,0.556558,0.532071,0.50632,0.582614,0.605966,0.586983,0.502961,0.587718,0.447296,0.618384,0.527068,0.475486,0.556668,0.633125,0.638786,0.575601,0.568852,0.54561,0.489814,0.487557,0.603077,0.479459,0.620646,0.571719,0.535114,0.534729,0.529393,0.498468,0.593943,0.5712,0.620021,0.562987,0.503492,0.655924,0.546918,0.558095,0.629457,0.547652,0.584977,0.61654,0.637084,0.537377,0.569998,0.566917,0.65972,0.701529,0.57859,0.580397,0.565831,0.558674,0.60189,0.547126,0.556565,0.600744,0.462563,0.577956,0.529238,0.608856,0.64305,0.592691,0.459826,0.663176,0.564796,0.65332,0.631517,0.551443,0.62935,0.519087,0.536996,0.602642,0.655585,0.540801,0.649739,0.611847,0.610983,0.535897,0.593172,0.533548,0.607675,0.602292,0.596786,0.520087


### First model

Training function

In [7]:
def train_model(X, X_test, y, params, folds, model_type='lgb', plot_feature_importance=False, averaging='usual', model=None):
    oof = np.zeros(len(X))
    prediction = np.zeros(len(X_test))
    scores = []
    feature_importance = pd.DataFrame()
    for fold_n, (train_index, valid_index) in enumerate(folds.split(X, y)):
        print('Fold', fold_n, 'started at', time.ctime())
        X_train, X_valid = X.loc[train_index], X.loc[valid_index]
        y_train, y_valid = y[train_index], y[valid_index]
        
        if model_type == 'lgb':
            train_data = lgb.Dataset(X_train, label=y_train)
            valid_data = lgb.Dataset(X_valid, label=y_valid)
            
            model = lgb.train(params,
                    train_data,
                    num_boost_round=20000,
                    valid_sets = [train_data, valid_data],
                    verbose_eval=1000,
                    early_stopping_rounds = 200)
            
            y_pred_valid = model.predict(X_valid)
            y_pred = model.predict(X_test, num_iteration=model.best_iteration)
            
        if model_type == 'xgb':
            train_data = xgb.DMatrix(data=X_train, label=y_train, feature_names=X_train.columns)
            valid_data = xgb.DMatrix(data=X_valid, label=y_valid, feature_names=X_train.columns)

            watchlist = [(train_data, 'train'), (valid_data, 'valid_data')]
            model = xgb.train(dtrain=train_data, num_boost_round=20000, evals=watchlist, early_stopping_rounds=200, verbose_eval=500, params=params)
            y_pred_valid = model.predict(xgb.DMatrix(X_valid, feature_names=X_train.columns), ntree_limit=model.best_ntree_limit)
            y_pred = model.predict(xgb.DMatrix(X_test, feature_names=X_train.columns), ntree_limit=model.best_ntree_limit)
        
        if model_type == 'sklearn':
            model = model
            model.fit(X_train, y_train)
            y_pred_valid = model.predict_proba(X_valid).reshape(-1,)
            score = roc_auc_score(y_valid, y_pred_valid)
            # print(f'Fold {fold_n}. AUC: {score:.4f}.')
            # print('')
            
            y_pred = model.predict_proba(X_test)[:, 1]
            
        if model_type == 'glm':
            model = sm.GLM(y_train, X_train, family=sm.families.Binomial())
            model_results = model.fit()
            model_results.predict(X_test)
            y_pred_valid = model_results.predict(X_valid).reshape(-1,)
            score = roc_auc_score(y_valid, y_pred_valid)
            
            y_pred = model_results.predict(X_test)
            
        if model_type == 'cat':
            model = CatBoostClassifier(iterations=20000, learning_rate=0.05, loss_function='Logloss',  eval_metric='AUC', **params)
            model.fit(X_train, y_train, eval_set=(X_valid, y_valid), cat_features=[], use_best_model=True, verbose=False)

            y_pred_valid = model.predict_proba(X_valid)[:, 1]
            y_pred = model.predict_proba(X_test)[:, 1]
            
        oof[valid_index] = y_pred_valid.reshape(-1,)
        scores.append(roc_auc_score(y_valid, y_pred_valid))

        if averaging == 'usual':
            prediction += y_pred
        elif averaging == 'rank':
            prediction += pd.Series(y_pred).rank().values  
        
        if model_type == 'lgb':
            # feature importance
            fold_importance = pd.DataFrame()
            fold_importance["feature"] = X.columns
            fold_importance["importance"] = model.feature_importance()
            fold_importance["fold"] = fold_n + 1
            feature_importance = pd.concat([feature_importance, fold_importance], axis=0)

    prediction /= n_fold
    
    print('CV mean score: {0:.4f}, std: {1:.4f}.'.format(np.mean(scores), np.std(scores)))
    
    if model_type == 'lgb':
        feature_importance["importance"] /= n_fold
        if plot_feature_importance:
            cols = feature_importance[["feature", "importance"]].groupby("feature").mean().sort_values(
                by="importance", ascending=False)[:50].index

            best_features = feature_importance.loc[feature_importance.feature.isin(cols)]

            plt.figure(figsize=(16, 12));
            sns.barplot(x="importance", y="feature", data=best_features.sort_values(by="importance", ascending=False));
            plt.title('LGB Features (avg over folds)');
        
            return oof, prediction, feature_importance
        return oof, prediction, scores
    
    else:
        return oof, prediction, scores
    
    
def lgb_model_tunning(X, y, params):
    print('lgb_model_tunning... Start')

    # Create parameters to search
    gridParams = {
        'learning_rate': [0.005, 0.01],
        'n_estimators': [40],
        'num_leaves': [12, 16, 32, 64],
        'boosting_type': ['gbdt'],
        'objective': ['binary'],
        'random_state': [SEED],  # Updated from 'seed'
        'colsample_bytree': [0.65, 0.66],
        'subsample': [0.7, 0.75],
        'reg_alpha': [1, 1.2],
        'reg_lambda': [1, 1.2, 1.4],
    }

    # Create classifier to use. Note that parameters have to be input manually
    # not as a dict!
    mdl = lgb.LGBMClassifier(boosting_type='gbdt',
                             objective='binary',
                             n_jobs=4,  # Updated from 'nthread'
                             silent=True,
                             max_depth=params['max_depth'],
                             # max_bin=params['max_bin'],
                             # subsample_for_bin=params['subsample_for_bin'],
                             # subsample=params['subsample'],
                             # subsample_freq=params['subsample_freq'],
                             # min_split_gain=params['min_split_gain'],
                             # min_child_weight=params['min_child_weight'],
                             # min_child_samples=params['min_child_samples'],
                             # scale_pos_weight=params['scale_pos_weight']
                             )

    # To view the default model params:
    print(mdl.get_params().keys())

    # Create the grid
    grid = GridSearchCV(mdl, gridParams,
                        verbose=0,
                        cv=4,
                        n_jobs=4)
    # Run the grid
    grid.fit(X, y)

    # Print the best parameters found
    print('===== Best params =====')
    print(grid.best_params_)
    print(grid.best_score_)

    # Using parameters already set above, replace in the best from the grid search
    params['colsample_bytree'] = grid.best_params_['colsample_bytree']
    params['learning_rate'] = grid.best_params_['learning_rate']
    # params['max_bin'] = grid.best_params_['max_bin']
    params['num_leaves'] = grid.best_params_['num_leaves']
    params['reg_alpha'] = grid.best_params_['reg_alpha']
    params['reg_lambda'] = grid.best_params_['reg_lambda']
    params['subsample'] = grid.best_params_['subsample']
    # params['subsample_for_bin'] = grid.best_params_['subsample_for_bin']

    print('Fitting with params: ')
    print(params)
    return params

n_fold = 5
folds = StratifiedKFold(n_splits=n_fold, shuffle=True, random_state=42)

print('added')

added


In [11]:
params = {'boost': 'gbdt',
          'feature_fraction': 0.05,
          'learning_rate': 0.01,
          'max_depth': -1,  
          'metric':'auc',
          'min_data_in_leaf': 50,
          'num_leaves': 32,
          'num_threads': -1,
          'verbosity': 1,
          'objective': 'binary'
         }

tunning = True
if tunning:
    params = lgb_model_tunning(X_train, y_train, params)

oof_lgb, prediction_lgb, scores = train_model(X_train, X_test, y_train, params=params, folds=folds, model_type='lgb', plot_feature_importance=True)

lgb_model_tunning... Start
dict_keys(['boosting_type', 'class_weight', 'colsample_bytree', 'importance_type', 'learning_rate', 'max_depth', 'min_child_samples', 'min_child_weight', 'min_split_gain', 'n_estimators', 'n_jobs', 'num_leaves', 'objective', 'random_state', 'reg_alpha', 'reg_lambda', 'silent', 'subsample', 'subsample_for_bin', 'subsample_freq'])


KeyboardInterrupt: 

In [None]:
sub = pd.read_csv('../input/sample_submission.csv')
sub['radiant_win_prob'] = prediction_lgb
sub.to_csv('submission.csv', index=False)
sub.head()