![](http://yarnph.files.wordpress.com/2016/01/f88adce4-846c-4a90-a724-08306c9b692b-jpg.png?w=1920&h=768&crop=1)

# Importing Libraries

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
pd.options.mode.chained_assignment = None  # default='warn'
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from sklearn import metrics
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.linear_model import LogisticRegression

# Loading training data

In [None]:
train1 = pd.read_csv('../input/dota2game/train/train1.csv')
train9 = pd.read_csv('../input/dota2game/train/train9.csv')
hero_data = pd.read_csv('../input/dota2game/train/hero_data.csv')

In [None]:
test1 = pd.read_csv('../input/dota2game/test/test1.csv')
test9 = pd.read_csv('../input/dota2game/test/test9.csv')

In [None]:
hero_data = pd.get_dummies(hero_data, columns=['primary_attr', 'attack_type'])
hero_data.drop(['primary_attr_agi', 'attack_type_Melee'], axis = 1, inplace = True)

hero_data.head()

# Removing columns that have constant values

In [None]:
check = hero_data.describe()
check.loc[['min','mean', 'max'],:]

In [None]:
hero_data.drop(['base_health','base_mana','base_mana_regen'], axis='columns', inplace=True)
check = hero_data.describe()
check.columns

In [None]:
hero_data[['hero_id', 'roles']]

# Spliting roles so that details can be made into individual columns

In [None]:
roles  = hero_data.roles.str.split(":",expand=True)
datafull = pd.DataFrame()

for index in range(roles.shape[1]):
    tempdata = pd.DataFrame(roles[index].unique().tolist())
    datafull = pd.concat([tempdata, datafull], axis=0)
    
array_list = datafull[0].unique()
datafull = pd.DataFrame()

for index in range(len(array_list)):
    datafull[array_list[index]] = np.zeros(roles.shape[0])

    
for index in range(roles.shape[0]):
    for index1 in range(roles.shape[1]):
        datafull[roles.loc[index,index1]][index] = 1
        
datafull.drop(datafull.columns[[0]], axis = 1, inplace = True)

# Updating heros table with details on each role

In [None]:
hero_data.drop('roles', axis = 1, inplace = True)

hero_data = pd.concat([hero_data, datafull], axis=1)

# Train 1 and Test 1 analysis

![](http://www.pcgamesn.com/wp-content/uploads/2019/10/best-dota-2-heroes-group1-900x506.jpg)

# Update Train1 with details on the hero

In [None]:
train1

In [None]:
array_list = hero_data.columns
for index in range(len(array_list)):
    if array_list[index] != 'hero_id':
        train1[array_list[index]] = np.zeros(train1.shape[0])
    

In [None]:
for index in range(train1.shape[0]):
    data2 = hero_data[hero_data['hero_id'] == train1['hero_id'][index]]
    data2.drop('hero_id', axis = 1, inplace = True)
    
    for index1 in range(data2.shape[1]):
        dat3 = data2.iloc[0,index1]
        col_num=train1.columns.get_loc(data2.columns[index1])
        train1.iat[index,col_num] = dat3

In [None]:
train1.head()

# Removing unnecessary columns

In [None]:
train1.drop('id', axis = 1, inplace = True)

In [None]:
train1.columns

In [None]:
plt.figure(figsize=(20,10))
sns.heatmap(train1.corr(), cmap="viridis",annot=True,linewidths=0.1)

# Training model to predict wins

In [None]:
X=train1[['user_id', 'hero_id', 'num_games', 'base_health_regen', 'base_armor', 'base_magic_resistance',
       'base_attack_min', 'base_attack_max', 'base_strength', 'base_agility',
       'base_intelligence', 'strength_gain', 'agility_gain',
       'intelligence_gain', 'attack_range', 'projectile_speed', 'attack_rate',
       'move_speed', 'turn_rate', 'primary_attr_int', 'primary_attr_str', 'attack_type_Ranged',
       'Disabler', 'Nuker', 'Pusher', 'Escape', 'Initiator', 'Jungler', 'Durable', 'Support', 'Carry']]
Y=train1[['num_wins']]
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,random_state=0)

In [None]:
model = XGBRegressor()
model.fit(X_train, Y_train)
Y_pred = model.predict(X_test)

ActVPred = pd.DataFrame({'Actual': Y_test['num_wins'], 'Predicted': Y_pred})
print(ActVPred)

#Checking the accuracy of Linear Regression
print('Mean Absolute Error:', metrics.mean_absolute_error(Y_test['num_wins'], Y_pred))
print('Mean Squared Error:', metrics.mean_squared_error(Y_test['num_wins'], Y_pred))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(Y_test['num_wins'], Y_pred)))

# Training model to predict kda_ratio

In [None]:
X=train1[['user_id', 'hero_id', 'num_games', 'base_health_regen', 'base_armor', 'base_magic_resistance',
       'base_attack_min', 'base_attack_max', 'base_strength', 'base_agility',
       'base_intelligence', 'strength_gain', 'agility_gain',
       'intelligence_gain', 'attack_range', 'projectile_speed', 'attack_rate',
       'move_speed', 'turn_rate', 'primary_attr_int', 'primary_attr_str', 'attack_type_Ranged',
       'Disabler', 'Nuker', 'Pusher', 'Escape', 'Initiator', 'Jungler', 'Durable', 'Support', 'Carry']]
Y=train1[['kda_ratio']]
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,random_state=0)

In [None]:
model1 = XGBRegressor()
model1.fit(X_train, Y_train)
Y_pred = model1.predict(X_test)

ActVPred = pd.DataFrame({'Actual': Y_test['kda_ratio'], 'Predicted': Y_pred})
print(ActVPred)

#Checking the accuracy of Linear Regression
print('Mean Absolute Error:', metrics.mean_absolute_error(Y_test['kda_ratio'], Y_pred))
print('Mean Squared Error:', metrics.mean_squared_error(Y_test['kda_ratio'], Y_pred))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(Y_test['kda_ratio'], Y_pred)))

# Predicting test1

In [None]:
test1.drop('id', axis = 1, inplace = True)

In [None]:
array_list = hero_data.columns
for index in range(len(array_list)):
    if array_list[index] != 'hero_id':
        test1[array_list[index]] = np.zeros(test1.shape[0])

for index in range(test1.shape[0]):
    data2 = hero_data[hero_data['hero_id'] == test1['hero_id'][index]]
    data2.drop('hero_id', axis = 1, inplace = True)
    
    for index1 in range(data2.shape[1]):
        dat3 = data2.iloc[0,index1]
        col_num=test1.columns.get_loc(data2.columns[index1])
        test1.iat[index,col_num] = dat3

In [None]:
test1.head().T

# Test1 predictions on number of wins and KDA ratios

In [None]:
Y_num_wins = model.predict(test1).astype('int32')

Y_kda_ratio = model1.predict(test1).astype('int32')

# Train 9 and Test 9 analysis

![](http://estnn.com/wp-content/uploads/2019/04/Dota2-Heroes-1-1024x576.jpg)

In [None]:
train9.head()

In [None]:
array_list = hero_data.columns
for index in range(len(array_list)):
    if array_list[index] != 'hero_id':
        train9[array_list[index]] = np.zeros(train9.shape[0])
        
        
for index in range(train9.shape[0]):
    data2 = hero_data[hero_data['hero_id'] == train9['hero_id'][index]]
    data2.drop('hero_id', axis = 1, inplace = True)
    
    for index1 in range(data2.shape[1]):
        dat3 = data2.iloc[0,index1]
        col_num=train9.columns.get_loc(data2.columns[index1])
        train9.iat[index,col_num] = dat3

# Removing unnecessary columns

In [None]:
train9.drop('id', axis = 1, inplace = True)

In [None]:
train9.columns

In [None]:
plt.figure(figsize=(20,10))
sns.heatmap(train9.corr(), cmap="viridis",annot=True,linewidths=0.1)

# Training model to predict wins

In [None]:
X=train9[['user_id', 'hero_id', 'num_games', 'base_health_regen', 'base_armor', 'base_magic_resistance',
       'base_attack_min', 'base_attack_max', 'base_strength', 'base_agility',
       'base_intelligence', 'strength_gain', 'agility_gain',
       'intelligence_gain', 'attack_range', 'projectile_speed', 'attack_rate',
       'move_speed', 'turn_rate', 'primary_attr_int', 'primary_attr_str', 'attack_type_Ranged',
       'Disabler', 'Nuker', 'Pusher', 'Escape', 'Initiator', 'Jungler', 'Durable', 'Support', 'Carry']]
Y=train9[['num_wins']]
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,random_state=0)

In [None]:
model = XGBRegressor()
model.fit(X_train, Y_train)
Y_pred = model.predict(X_test)

ActVPred = pd.DataFrame({'Actual': Y_test['num_wins'], 'Predicted': Y_pred})
print(ActVPred)

#Checking the accuracy of Linear Regression
print('Mean Absolute Error:', metrics.mean_absolute_error(Y_test['num_wins'], Y_pred))
print('Mean Squared Error:', metrics.mean_squared_error(Y_test['num_wins'], Y_pred))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(Y_test['num_wins'], Y_pred)))

# Training to predict KDA ratios

In [None]:
X=train9[['user_id', 'hero_id', 'num_games', 'base_health_regen', 'base_armor', 'base_magic_resistance',
       'base_attack_min', 'base_attack_max', 'base_strength', 'base_agility',
       'base_intelligence', 'strength_gain', 'agility_gain',
       'intelligence_gain', 'attack_range', 'projectile_speed', 'attack_rate',
       'move_speed', 'turn_rate', 'primary_attr_int', 'primary_attr_str', 'attack_type_Ranged',
       'Disabler', 'Nuker', 'Pusher', 'Escape', 'Initiator', 'Jungler', 'Durable', 'Support', 'Carry']]
Y=train9[['kda_ratio']]
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,random_state=0)

In [None]:
model1 = XGBRegressor()
model1.fit(X_train, Y_train)
Y_pred = model1.predict(X_test)

ActVPred = pd.DataFrame({'Actual': Y_test['kda_ratio'], 'Predicted': Y_pred})
print(ActVPred)

#Checking the accuracy of Linear Regression
print('Mean Absolute Error:', metrics.mean_absolute_error(Y_test['kda_ratio'], Y_pred))
print('Mean Squared Error:', metrics.mean_squared_error(Y_test['kda_ratio'], Y_pred))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(Y_test['kda_ratio'], Y_pred)))

# Predicting Test 9

In [None]:
Y_actual_wins = test9[['num_wins']]
Y_actual_kdaratio = test9[['kda_ratio']]
test9.drop(['num_wins','kda_ratio'], axis = 1, inplace = True)
test9.drop('id', axis = 1, inplace = True)

array_list = hero_data.columns
for index in range(len(array_list)):
    if array_list[index] != 'hero_id':
        test9[array_list[index]] = np.zeros(test9.shape[0])

for index in range(test9.shape[0]):
    data2 = hero_data[hero_data['hero_id'] == test9['hero_id'][index]]
    data2.drop('hero_id', axis = 1, inplace = True)
    
    for index1 in range(data2.shape[1]):
        dat3 = data2.iloc[0,index1]
        col_num=test9.columns.get_loc(data2.columns[index1])
        test9.iat[index,col_num] = dat3


# Test9 predictions on number of wins and KDA ratios

In [None]:
Y_num_wins = model.predict(test9)

Y_kda_ratio = model1.predict(test9)

In [None]:
print("------------------------------Predicting number of Wins------------------------------")
ActVPred = pd.DataFrame({'Actual': Y_actual_wins['num_wins'], 'Predicted': Y_num_wins})
print(ActVPred)

#Checking the accuracy of Linear Regression
print('Mean Absolute Error:', metrics.mean_absolute_error(Y_actual_wins['num_wins'], Y_num_wins))
print('Mean Squared Error:', metrics.mean_squared_error(Y_actual_wins['num_wins'], Y_num_wins))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(Y_actual_wins['num_wins'], Y_num_wins)))

print("------------------------------Predicting KDA Ratios------------------------------")
ActVPred = pd.DataFrame({'Actual': Y_actual_kdaratio['kda_ratio'], 'Predicted': Y_kda_ratio})
print(ActVPred)

#Checking the accuracy of Linear Regression
print('Mean Absolute Error:', metrics.mean_absolute_error(Y_actual_kdaratio['kda_ratio'], Y_kda_ratio))
print('Mean Squared Error:', metrics.mean_squared_error(Y_actual_kdaratio['kda_ratio'], Y_kda_ratio))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(Y_actual_kdaratio['kda_ratio'], Y_kda_ratio)))

