In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

### Loading DATA

In [7]:
# data_train = pd.read_csv('../data/train_features.csv', index_col='match_id_hash')

# data_test = pd.read_csv('../data/test_features.csv', index_col='match_id_hash')

# feature_afk = pd.read_csv('afk_feature.csv')

# train_targets = pd.read_csv('../data/train_targets.csv', index_col='match_id_hash')

data_train_engineered = pd.read_csv('data_train_engineered.csv', index_col='match_id_hash')
list(data_train_engineered.columns)

['game_time',
 'game_mode',
 'lobby_type',
 'objectives_len',
 'chat_len',
 'afk',
 'r_kills',
 'r_deaths',
 'r_assists',
 'r_denies',
 'r_gold',
 'r_lh',
 'r_xp',
 'r_health',
 'r_max_health',
 'r_max_mana',
 'r_level',
 'r_stuns',
 'r_creeps_stacked',
 'r_camps_stacked',
 'r_rune_pickups',
 'r_firstblood_claimed',
 'r_teamfight_participation',
 'r_towers_killed',
 'r_roshans_killed',
 'r_obs_placed',
 'r_sen_placed',
 'd_kills',
 'd_deaths',
 'd_assists',
 'd_denies',
 'd_gold',
 'd_lh',
 'd_xp',
 'd_health',
 'd_max_health',
 'd_max_mana',
 'd_level',
 'd_stuns',
 'd_creeps_stacked',
 'd_camps_stacked',
 'd_rune_pickups',
 'd_firstblood_claimed',
 'd_teamfight_participation',
 'd_towers_killed',
 'd_roshans_killed',
 'd_obs_placed',
 'd_sen_placed']

In [None]:
# Берём только те игры, которые уже идут больше 10 минут
X = data_train_engineered[(data_train_engineered['game_time'] >= 600) & (data_train_engineered['game_time'] <= 3000)]
y = data_train_engineered[(data_train_engineered['game_time'] >= 600) & (data_train_engineered['game_time'] <= 3000)]['radiant_win']

X = X.drop(columns=['r_health', 'r_max_health', 'r_max_mana', 'r_stuns', 'r_creeps_stacked', 'r_camps_stacked', 'r_rune_pickups', 
                'r_firstblood_claimed', 'r_obs_placed', 'r_sen_placed',
                'd_health', 'd_max_health', 'd_max_mana', 'd_stuns', 'd_creeps_stacked', 'd_camps_stacked', 'd_rune_pickups', 
                'd_firstblood_claimed', 'd_obs_placed', 'd_sen_placed'])


X_test = pd.read_csv('data_test_engineered.csv', index_col='match_id_hash')
X_test = X_test.drop(columns=['r_health', 'r_max_health', 'r_max_mana', 'r_stuns', 'r_creeps_stacked', 'r_camps_stacked', 'r_rune_pickups', 
                'r_firstblood_claimed', 'r_obs_placed', 'r_sen_placed',
                'd_health', 'd_max_health', 'd_max_mana', 'd_stuns', 'd_creeps_stacked', 'd_camps_stacked', 'd_rune_pickups', 
                'd_firstblood_claimed', 'd_obs_placed', 'd_sen_placed'])

### Training the best model found

In [4]:
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.linear_model import LogisticRegression

def search_grid_logistreg_params(X_train, y_train, X_test, parameters : "{'C': [C1, C2, ...], 'tol': [tolerance1, ...]}"):
    """ Returns an array of probabilities """
    best_scores = []
    best_params = []
    # rfc = RandomForestClassifier(n_estimators = 50, n_jobs=-1, random_state=22, oob_score=True, class_weight='balanced')
    logistic_reg = LogisticRegression(solver='lbfgs', n_jobs=-1, random_state=22)
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=22)
    # parameters = {'max_features': [5], 'min_samples_leaf': [10], 'max_depth': [37]}
    # parameters = {'C': [C], 'tol': [tolerance]}
    gsc = GridSearchCV(estimator=logistic_reg, cv=skf, param_grid=parameters, n_jobs=-1, scoring='roc_auc')
    gsc.fit(X_train, y_train)
    print(gsc.best_score_)
    print(gsc.best_params_)
    best_params.append(gsc.best_params_)
    best_scores.append(gsc.best_score_)
    
    return gsc.predict_proba(X_test.values)

In [16]:
%%time
# for i in range(3):
#     i += 1
params = {'C': [1], 'tol': [0.25, 0.1, 0.5], 'max_iter': [1000, 2350, 3000]}
result = search_grid_logistreg_params(X, y, X_test, parameters=params)

0.8553785960398015
{'C': 1, 'max_iter': 1000, 'tol': 0.25}
0.8553785960398015
{'C': 1, 'max_iter': 1000, 'tol': 0.25}
0.8553785960398015
{'C': 1, 'max_iter': 1000, 'tol': 0.25}
Wall time: 24.1 s


In [10]:
result

array([[4.82854405e-01, 5.17145595e-01],
       [9.99827287e-01, 1.72712814e-04],
       [9.99996907e-01, 3.09264903e-06],
       ...,
       [9.82823878e-02, 9.01717612e-01],
       [3.55161336e-03, 9.96448387e-01],
       [2.59623434e-11, 1.00000000e+00]])

### Submission

In [11]:
import datetime

df_submission = pd.DataFrame({'radiant_win_prob': result[:, 1]}, index=X_test.index)

submission_filename = 'submission_{}.csv'.format(
    datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))

df_submission.to_csv(submission_filename)

print('Submission saved to {}'.format(submission_filename))

Submission saved to submission_2019-09-05_23-00-46.csv


### Features

In [None]:
# afk_features
data_train = data_train.join(feature_afk.set_index('match_id_hash'))
print(data_train.shape)

data_train['afk'] = data_train['afk'].fillna('0')

data_train.info()
data_train.head(3)
# [print(n) for n in list(train_targets.isna().sum()) if n != 0]  # Проверка на пропущенные данные


 ### Team stats 

In [None]:
# data_train = pd.read_csv('../data/test_features.csv')
import re

for col in list(data_train.columns):
    r_regexp = re.compile(r'r[0-9]_(.*)').findall(col)
    d_regexp = re.compile(r'd[0-9]_(.*)').findall(col)

    if len(r_regexp) > 0:
        data_train['r_' + r_regexp[0]] = 0
    if len(d_regexp) > 0:
        data_train['d_' + d_regexp[0]] = 0  

for col in list(data_train.columns):
    r_regexp = re.compile(r'r[0-9]_(.*)').findall(col)
    d_regexp = re.compile(r'd[0-9]_(.*)').findall(col)

    if len(r_regexp) > 0:
        data_train['r_' + r_regexp[0]] = data_train['r_' + r_regexp[0]] + data_train[re.compile(r'r[0-9]_.*').findall(col)[0]]
        data_train.drop(columns=[re.compile(r'r[0-9]_.*').findall(col)[0]], inplace=True)
        
    if len(d_regexp) > 0:
        data_train['d_' + d_regexp[0]] = data_train['d_' + d_regexp[0]] + data_train[re.compile(r'd[0-9]_.*').findall(col)[0]]
        data_train.drop(columns=[re.compile(r'd[0-9]_.*').findall(col)[0]], inplace=True)

data_train.drop(columns=['r_hero_id', 'd_hero_id', 'r_x', 'r_y', 'd_x', 'd_y'], inplace=True)

In [None]:
# Проверяем, есть ли NA
[print(n) for n in list(data_train.isna().sum()) if n != 0]

In [None]:
data_train.to_csv('data_test_engineered.csv')


In [None]:
len(list(data_train.columns))

## Features Engineering from raw data

In [None]:
import json
matches = []
with open('../data/train_matches.jsonl') as fin:
    # read the 18-th line
    for i in range(500):
        line = fin.readline()
        matches.append(json.loads(line))
    # read JSON into a Python object 
    

In [None]:
matches[0]['players'][i]['hero_inventory']

In [None]:
afk_players = pd.DataFrame()
for match in matches:
    for i, game in enumerate(match):
        if len(match['players'][i]['actions']) < 3:
            afk_players = afk_players.append(pd.DataFrame(data={'match_id_hash': match['match_id_hash'], 'afk': 1}, index=[i]))
        # else:
        #     afk_players = afk_players.append(pd.DataFrame(data={'afk_true': 0}, index=[match['match_id_hash']]))
            # print(f"player_slot {match['players'][i]['player_slot']} in match {match['match_id_hash']} was AFK")
afk_players

In [None]:
afk_players = afk_players.groupby('match_id_hash').sum()
afk_players

In [None]:
%time
import os

try:
    import ujson as json
except ModuleNotFoundError:
    import json
    print ('Please install ujson to read JSON oblects faster')
    
try:
    from tqdm import tqdm_notebook
except ModuleNotFoundError:
    tqdm_notebook = lambda x: x
    print ('Please install tqdm to track progress with Python loops')

def read_matches(matches_file):
    
    MATCHES_COUNT = {
        'test_matches.jsonl': 10000,
        'train_matches.jsonl': 39675,
    }
    _, filename = os.path.split(matches_file)
    total_matches = MATCHES_COUNT.get(filename)
    
    with open(matches_file) as fin:
        for line in tqdm_notebook(fin, total=total_matches):
            yield json.loads(line)
            
afk_players = pd.DataFrame()   
# processing each game
for match in read_matches('../data/train_matches.jsonl'):
    
   # processing each player
    for i, player in enumerate(match['players']):
        if len(match['players'][i]['actions']) < 5:
            afk_players = afk_players.append(pd.DataFrame(data={'match_id_hash': match['match_id_hash'], 'afk': 1}, index=[i]))

In [None]:
afk_players = afk_players.groupby('match_id_hash').sum().reset_index()
print(afk_players.info())
afk_players.head()

In [None]:
afk_players.set_index('match_id_hash').to_csv('afk_feature.csv')

## Teaching models

In [26]:
train_targets = pd.read_csv('../data/train_targets.csv', index_col='match_id_hash')
data_train_engineered = pd.read_csv('data_train_engineered.csv', index_col='match_id_hash')

# Берём только те игры, которые уже идут больше 10 минут
X = data_train_engineered[data_train_engineered['game_time'] >= 600]
y = train_targets[train_targets['game_time'] >= 600]['radiant_win']
X = X.drop(columns=['game_mode', 'lobby_type', 'objectives_len', 'chat_len'])

In [27]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=22)

In [28]:
# scaler = StandardScaler()
# 
# X_train_norm = scaler.fit_transform(X_train)
y_train = y_train.replace({False: 0, True: 1})

In [29]:
X_train

Unnamed: 0_level_0,game_time,afk,r_kills,r_deaths,r_assists,r_denies,r_gold,r_lh,r_xp,r_health,...,d_stuns,d_creeps_stacked,d_camps_stacked,d_rune_pickups,d_firstblood_claimed,d_teamfight_participation,d_towers_killed,d_roshans_killed,d_obs_placed,d_sen_placed
match_id_hash,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
7790c750fae2f11d611f099120c30213,1835,0.0,33,23,59,48,56587,490,61014,6495,...,79.516773,9,2,40,0,2.521739,1,0,10,5
fe4364b28448c357d8be41c1867f4201,1940,0.0,27,22,37,19,61952,472,71862,9573,...,41.329959,3,2,29,0,3.454545,4,0,13,0
1a78c253e1f52d6fb4b5730171dafdc5,1847,0.0,27,31,40,47,58640,587,69087,1400,...,81.889206,0,0,49,1,2.741935,6,0,13,4
7078754061efd6fe6298749eec31f771,2308,0.0,30,27,61,36,84853,840,91458,5374,...,171.128818,4,3,40,0,3.000000,4,1,17,27
a142bc0a8f970aefebbcbb2a0aa0b553,1124,0.0,21,17,29,29,32098,254,35378,5777,...,83.279341,3,2,25,0,2.235294,1,0,8,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
a8354f5f5e6286a445416a3122bd304f,878,0.0,7,7,8,31,20387,219,21697,2639,...,15.829595,0,0,20,0,2.571429,1,0,5,0
65e67ee292fa5ac3d72b26d2d5e00b20,1464,0.0,16,42,32,53,44123,403,46366,2890,...,85.212512,3,1,29,1,2.500000,5,0,9,4
020f371c667dbd7182845120857b7326,2540,0.0,30,47,49,58,84744,852,110953,7029,...,160.083404,0,0,42,1,2.638298,4,1,17,11
552a01d3a8783be1f3cb87e9315b0a9c,2977,0.0,36,38,59,37,119069,1102,113302,9071,...,152.534522,2,1,62,0,2.710526,3,0,18,9


In [30]:
%%time
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier(random_state=22, n_estimators=500)
rfc.fit(X=X_train, y=y_train)
y_pred = rfc.predict(X_test)
roc_auc_score(y_true=y_test, y_score=y_pred)


Wall time: 29.6 s


0.7538953742703396

In [31]:
%%time
from sklearn.neighbors import KNeighborsClassifier
knc = KNeighborsClassifier()
knc.fit(X_train, y_train)
# knc.predict_proba(X_test)
roc_auc_score(y_test, knc.predict(X_test))

Wall time: 1.64 s


0.7338275130716275

In [32]:
%%time
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
qda = QuadraticDiscriminantAnalysis()
qda.fit(X_train, y_train)
roc_auc_score(y_test, qda.predict(X_test))

Wall time: 671 ms


0.7401674917814942

In [35]:
%%time
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(random_state=22, solver='lbfgs', max_iter=2000, C=1, tol=0.25)
lr.fit(X_train, y_train)
roc_auc_score(y_test, lr.predict(X_test))

Wall time: 4.28 s


0.7659721459562083

# Logistic Regression
params:
{class_weight='balanced', random_state=22, solver='lbfgs', max_iter=1500, n_jobs=-1, {'C': 0.75, 'tol': 0.25}}

In [34]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(class_weight='balanced', random_state=22, solver='lbfgs', max_iter=1500, n_jobs=-1, C=1, tol=0.25)
lr.fit(X_train, y_train)
roc_auc_score(y_test, lr.predict(X_test))

0.7675908946029456

In [None]:
# pd.DataFrame(rfc.feature_importances_, X_train.columns.values, columns = ['Coef']).sort_values(by='Coef', ascending=False)
# X_train

In [None]:
data_train_engineered[['game_time']]

### Grid Search

In [None]:
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.linear_model import LogisticRegression

def search_grid_logistreg_params(X_train, y_train, X_test, parameters : "{'C': [C1, C2, ...], 'tol': [tolerance1, ...]}"):
    """ Returns an array of probabilities """
    best_scores = []
    best_params = []
    # rfc = RandomForestClassifier(n_estimators = 50, n_jobs=-1, random_state=22, oob_score=True, class_weight='balanced')
    logistic_reg = LogisticRegression(class_weight='balanced', solver='lbfgs', max_iter=1500, n_jobs=-1, warm_start=True)
    skf = StratifiedKFold(n_splits=5, shuffle=True)
    # parameters = {'max_features': [5], 'min_samples_leaf': [10], 'max_depth': [37]}
    # parameters = {'C': [C], 'tol': [tolerance]}
    gsc = GridSearchCV(estimator=logistic_reg, cv=skf, param_grid=parameters, n_jobs=-1, scoring='roc_auc')
    gsc.fit(X_train, y_train)
    print(gsc.best_score_)
    print(gsc.best_params_)
    best_params.append(gsc.best_params_)
    best_scores.append(gsc.best_score_)
    
    return gsc.predict_proba(X_test)        #, logistic_reg.coef_


In [None]:
# X = pd.read_csv('data_train_engineered.csv', index_col='match_id_hash')
# y = pd.read_csv('../data/train_targets.csv', index_col='match_id_hash')
X_test = pd.read_csv('data_test_engineered.csv', index_col='match_id_hash')

In [4]:
y = y['radiant_win'].map({True: 1, False: 0})
y

match_id_hash
a400b8f29dece5f4d266f49f1ae2e98a    0
b9c57c450ce74a2af79c9ce96fac144d    1
6db558535151ea18ca70a6892197db41    1
46a0ddce8f7ed2a8d9bd5edcbb925682    1
b1b35ff97723d9b7ade1c9c3cf48f770    0
                                   ..
defd0caeed6ea83d7d5fbdec013fe7d1    0
bc7a87ed5f9c2bca55f9f7a93da0b0c5    1
e2ca68ac1a6847f4a37f6c9c8ee8695b    0
47ad6454ede66c1c78fdaa9391dfc556    1
9928dfde50efcbdb2055da23dcdbc101    1
Name: radiant_win, Length: 39675, dtype: int64

0.8110833787000072
{'C': 0.75, 'tol': 0.25}


ValueError: X has 48 features per sample; expecting 245

In [9]:
X = pd.read_csv('../data/train_features.csv', index_col='match_id_hash')
y = pd.read_csv('../data/train_targets.csv', index_col='match_id_hash')['radiant_win'].map({True: 1, False: 0})

In [10]:
%%time
logistic_reg = LogisticRegression(class_weight='balanced', solver='lbfgs', max_iter=1500, n_jobs=-1, warm_start=True, intercept_scaling=1000)
skf = StratifiedKFold(n_splits=5, shuffle=True)
parameters = {'C': [0.75], 'tol': [0.25]}
gsc = GridSearchCV(estimator=logistic_reg, cv=skf, param_grid=parameters, n_jobs=-1, scoring='roc_auc')
gsc.fit(X, y)


Wall time: 3min 6s


GridSearchCV(cv=StratifiedKFold(n_splits=5, random_state=None, shuffle=True),
             error_score='raise-deprecating',
             estimator=LogisticRegression(C=1.0, class_weight='balanced',
                                          dual=False, fit_intercept=True,
                                          intercept_scaling=1000, l1_ratio=None,
                                          max_iter=1500, multi_class='warn',
                                          n_jobs=-1, penalty='l2',
                                          random_state=None, solver='lbfgs',
                                          tol=0.0001, verbose=0,
                                          warm_start=True),
             iid='warn', n_jobs=-1, param_grid={'C': [0.75], 'tol': [0.25]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='roc_auc', verbose=0)

In [14]:
X_test = pd.read_csv('../data/test_features.csv', index_col='match_id_hash')
y_test_pred = gsc.predict_proba(X_test.values)
y_test_pred

array([[0.46631374, 0.53368626],
       [0.04987757, 0.95012243],
       [0.0494491 , 0.9505509 ],
       ...,
       [0.5176153 , 0.4823847 ],
       [0.35476761, 0.64523239],
       [0.924952  , 0.075048  ]])

In [None]:
y_test_pred[:, 1]

In [None]:
df_submission = pd.DataFrame({'radiant_win_prob': y_test_pred[:, 1]}, index=X_test.index)
df_submission.astype(object).min()

In [None]:
df_submission.to_csv('submission.csv')


In [None]:
import datetime
submission_filename = 'submission_{}.csv'.format(
    datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
df_submission.to_csv(submission_filename)
print('Submission saved to {}'.format(submission_filename))

In [15]:
gsc.best_score_

0.8115903840526241

In [11]:
list(X.columns)

['game_time',
 'game_mode',
 'lobby_type',
 'objectives_len',
 'chat_len',
 'afk',
 'r_kills',
 'r_deaths',
 'r_assists',
 'r_denies',
 'r_gold',
 'r_lh',
 'r_xp',
 'r_health',
 'r_max_health',
 'r_max_mana',
 'r_level',
 'r_stuns',
 'r_creeps_stacked',
 'r_camps_stacked',
 'r_rune_pickups',
 'r_firstblood_claimed',
 'r_teamfight_participation',
 'r_towers_killed',
 'r_roshans_killed',
 'r_obs_placed',
 'r_sen_placed',
 'd_kills',
 'd_deaths',
 'd_assists',
 'd_denies',
 'd_gold',
 'd_lh',
 'd_xp',
 'd_health',
 'd_max_health',
 'd_max_mana',
 'd_level',
 'd_stuns',
 'd_creeps_stacked',
 'd_camps_stacked',
 'd_rune_pickups',
 'd_firstblood_claimed',
 'd_teamfight_participation',
 'd_towers_killed',
 'd_roshans_killed',
 'd_obs_placed',
 'd_sen_placed']

In [12]:
X.drop(columns=['r_health', 'r_max_health', 'r_max_mana', 'r_stuns', 'r_creeps_stacked', 'r_camps_stacked', 'r_rune_pickups', 
                'r_firstblood_claimed', 'r_obs_placed', 'r_sen_placed',
                'd_health', 'd_max_health', 'd_max_mana', 'd_stuns', 'd_creeps_stacked', 'd_camps_stacked', 'd_rune_pickups', 
                'd_firstblood_claimed', 'd_obs_placed', 'd_sen_placed'])

Unnamed: 0_level_0,game_time,game_mode,lobby_type,objectives_len,chat_len,afk,r_kills,r_deaths,r_assists,r_denies,...,d_deaths,d_assists,d_denies,d_gold,d_lh,d_xp,d_level,d_teamfight_participation,d_towers_killed,d_roshans_killed
match_id_hash,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
b9c57c450ce74a2af79c9ce96fac144d,658,4,0,3,10,0.0,16,4,14,28,...,16,3,16,13144,96,12925,30,1.500000,0,0
c96d629dc0c39f0c616d1949938a6ba6,1016,22,0,1,0,0.0,9,12,12,33,...,10,13,38,20819,165,27341,49,2.000000,0,0
df50f738c454d430a33b2bf90cb9ef1a,636,22,7,1,5,0.0,2,7,6,17,...,3,8,65,15938,177,16068,33,2.142857,0,0
7469e9440ea3d19b211a484647c7126e,1054,22,0,8,5,0.0,14,11,23,30,...,14,25,49,27107,218,25212,46,3.272727,2,0
0aa1c6d7630433a93fe42da5ac810368,1797,22,7,9,2,0.0,21,35,31,28,...,22,42,32,55254,510,65908,85,2.200000,3,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6066cc7417b43c749d551e123d00f0c8,1002,4,0,4,0,0.0,6,18,11,7,...,7,27,10,26154,239,26848,49,2.500000,0,0
defd0caeed6ea83d7d5fbdec013fe7d1,1783,22,0,8,23,0.0,19,32,32,52,...,21,55,36,51638,438,65289,83,2.718750,3,0
e2ca68ac1a6847f4a37f6c9c8ee8695b,643,22,7,1,23,0.0,4,20,3,26,...,4,11,42,17506,139,17487,37,1.450000,0,0
47ad6454ede66c1c78fdaa9391dfc556,2405,22,7,12,4,0.0,42,40,53,37,...,42,68,51,81561,849,107064,111,2.675000,2,0


In [12]:
X



Unnamed: 0_level_0,game_time,game_mode,lobby_type,objectives_len,chat_len,afk,r_kills,r_deaths,r_assists,r_denies,...,d_deaths,d_assists,d_denies,d_gold,d_lh,d_xp,d_level,d_teamfight_participation,d_towers_killed,d_roshans_killed
match_id_hash,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
b9c57c450ce74a2af79c9ce96fac144d,658,4,0,3,10,0.0,16,4,14,28,...,16,3,16,13144,96,12925,30,1.500000,0,0
c96d629dc0c39f0c616d1949938a6ba6,1016,22,0,1,0,0.0,9,12,12,33,...,10,13,38,20819,165,27341,49,2.000000,0,0
df50f738c454d430a33b2bf90cb9ef1a,636,22,7,1,5,0.0,2,7,6,17,...,3,8,65,15938,177,16068,33,2.142857,0,0
7469e9440ea3d19b211a484647c7126e,1054,22,0,8,5,0.0,14,11,23,30,...,14,25,49,27107,218,25212,46,3.272727,2,0
0aa1c6d7630433a93fe42da5ac810368,1797,22,7,9,2,0.0,21,35,31,28,...,22,42,32,55254,510,65908,85,2.200000,3,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6066cc7417b43c749d551e123d00f0c8,1002,4,0,4,0,0.0,6,18,11,7,...,7,27,10,26154,239,26848,49,2.500000,0,0
defd0caeed6ea83d7d5fbdec013fe7d1,1783,22,0,8,23,0.0,19,32,32,52,...,21,55,36,51638,438,65289,83,2.718750,3,0
e2ca68ac1a6847f4a37f6c9c8ee8695b,643,22,7,1,23,0.0,4,20,3,26,...,4,11,42,17506,139,17487,37,1.450000,0,0
47ad6454ede66c1c78fdaa9391dfc556,2405,22,7,12,4,0.0,42,40,53,37,...,42,68,51,81561,849,107064,111,2.675000,2,0
