# Predict match outcomes by first 15 minutes

## Content

1. Intro
2. Setup
3. Prepare data
4. Train model & choose param
5. Test model

## Intro

Second part is dedicated to building ML model for Dota2 prediction

___

## Setup

In [1]:
import numpy as np
import pandas as pd

In [None]:
import torch

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

___

## Prepare data

In [4]:
df_match = pd.read_csv('../archive/match.csv')

In [5]:
Q3 = np.quantile(df_match['duration'], 0.75)
Q1 = np.quantile(df_match['duration'], 0.25)
IQR = Q3 - Q1
lowerRange = Q1 - 1.5 * IQR
upperRange = Q3 + 1.5 * IQR

In [6]:
dfIqr = df_match[df_match['duration'].between(lowerRange, upperRange)]

In [7]:
df_match_for_analysis = dfIqr[dfIqr['duration'] > 900][['match_id', 'first_blood_time', 'radiant_win']]

In [8]:
df_player_time = pd.read_csv('../archive/player_time.csv')

In [9]:
df_player_time_5_min = df_player_time[df_player_time['times'] < 301]

In [10]:
df_player_time_5_min['gold_t_r'] = df_player_time_5_min['gold_t_1'] + df_player_time_5_min['gold_t_2'] + df_player_time_5_min['gold_t_3'] + df_player_time_5_min['gold_t_4'] + df_player_time_5_min['gold_t_0']
df_player_time_5_min['lh_t_r'] = df_player_time_5_min['lh_t_0'] + df_player_time_5_min['lh_t_1'] + df_player_time_5_min['lh_t_2'] + df_player_time_5_min['lh_t_3'] + df_player_time_5_min['lh_t_4']
df_player_time_5_min['xp_t_r'] = df_player_time_5_min['xp_t_0'] + df_player_time_5_min['xp_t_1'] + df_player_time_5_min['xp_t_2'] + df_player_time_5_min['xp_t_3'] + df_player_time_5_min['xp_t_4']

df_player_time_5_min['gold_t_d'] = df_player_time_5_min['gold_t_128'] + df_player_time_5_min['gold_t_129'] + df_player_time_5_min['gold_t_130'] + df_player_time_5_min['gold_t_131'] + df_player_time_5_min['gold_t_132']
df_player_time_5_min['lh_t_d'] = df_player_time_5_min['lh_t_128'] + df_player_time_5_min['lh_t_129'] + df_player_time_5_min['lh_t_130'] + df_player_time_5_min['lh_t_131'] + df_player_time_5_min['lh_t_132']
df_player_time_5_min['xp_t_d'] = df_player_time_5_min['xp_t_128'] + df_player_time_5_min['xp_t_129'] + df_player_time_5_min['xp_t_130'] + df_player_time_5_min['xp_t_131'] + df_player_time_5_min['xp_t_132']

In [11]:
df_player_time_5_min = df_player_time_5_min[['match_id', 'times', 'gold_t_r', 'lh_t_r','xp_t_r','gold_t_d','lh_t_d','xp_t_d']]

In [12]:
df_player_time_5_min_sum = df_player_time_5_min.groupby("match_id").sum()

In [13]:
df_match_5_min = df_match_for_analysis.merge(df_player_time_5_min_sum, on='match_id')

In [14]:
df_objectives = pd.read_csv('../archive/objectives.csv')
df_objectives_5_min = df_objectives[df_objectives['time'] < 301]

In [15]:
def team_r(row, message):
    if row['subtype'] == message:
        if row['player1'] < 5:
            return 1
        else:
            return 0

In [16]:
def team_d(row, message):
    if row['subtype'] == message:
        if row['player1'] > 4:
            return 1
        else:
            return 0

In [17]:
df_objectives_5_min['roshan_radiant'] = df_objectives_5_min.apply(lambda x: team_r(x, 'CHAT_MESSAGE_ROSHAN_KILL'), axis = 1)
df_objectives_5_min['roshan_dire'] = df_objectives_5_min.apply(lambda x: team_d(x, 'CHAT_MESSAGE_ROSHAN_KILL'), axis = 1)
#
df_objectives_5_min['firstblood_radiant'] = df_objectives_5_min.apply(lambda x: team_r(x, 'CHAT_MESSAGE_FIRSTBLOOD'), axis = 1)
df_objectives_5_min['firstblood_dire'] = df_objectives_5_min.apply(lambda x: team_d(x, 'CHAT_MESSAGE_FIRSTBLOOD'), axis = 1)
#
df_objectives_5_min['tower_radiant'] = df_objectives_5_min.apply(lambda x: team_r(x, 'CHAT_MESSAGE_TOWER_KILL'), axis = 1)
df_objectives_5_min['tower_dire'] = df_objectives_5_min.apply(lambda x: team_d(x, 'CHAT_MESSAGE_TOWER_KILL'), axis = 1)

In [18]:
df_objectives_5_min = df_objectives_5_min[['match_id', 'roshan_radiant', 'roshan_dire', 'firstblood_radiant', 'firstblood_dire', 'tower_radiant', 'tower_dire']]

In [19]:
df_objectives_5_min.fillna(0, inplace=True)

In [20]:
df_objectives_5_min_sum = df_objectives_5_min.groupby('match_id').sum()

In [21]:
df = df_match_5_min.merge(df_objectives_5_min_sum, on='match_id')

In [22]:
df.head()

Unnamed: 0,match_id,first_blood_time,radiant_win,times,gold_t_r,lh_t_r,xp_t_r,gold_t_d,lh_t_d,xp_t_d,roshan_radiant,roshan_dire,firstblood_radiant,firstblood_dire,tower_radiant,tower_dire
0,0,1,True,900,17971,114,18471,20637,200,18134,0.0,0.0,1.0,0.0,0.0,0.0
1,1,221,False,900,15743,163,15001,19164,169,17249,0.0,0.0,1.0,0.0,0.0,0.0
2,2,190,False,900,15690,147,17671,13179,100,16446,0.0,0.0,0.0,1.0,0.0,0.0
3,3,40,False,900,14252,88,15816,15398,121,16217,0.0,0.0,0.0,1.0,0.0,0.0
4,4,58,True,900,19958,187,16293,15805,123,14025,0.0,0.0,1.0,0.0,0.0,0.0


___

## Train model