In [2]:
from neural_network import NeuralNetwork
from data import Instance, DataSet
from miscellaneous import initialize_data, plot_graph, plot_points
from activation_function import *
import matplotlib.pyplot as plt
import pandas as pd
import copy
from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import confusion_matrix
import seaborn as sns
import warnings
import itertools
import time
warnings.filterwarnings('ignore')

Using TensorFlow backend.


In [3]:
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [42]:
start_time = time.time()

# Carregando dados
print('Carregando dados ...', end = '')
participants = pd.read_csv('../dataset/participants.csv')
champs = pd.read_csv('../dataset/champs.csv')
mtime = pd.read_csv('../dataset/matches_MODIFIED.csv')
stats1 = pd.read_csv('../dataset/stats1_MODIFIED.csv')
stats2 = pd.read_csv('../dataset/stats2_MODIFIED.csv')
stats = pd.concat([stats1, stats2])
print( 28*'.'+' OK')

# Removendo atributos desnecessários
print('Removendo atributos desnecessários[1] ...', end = '')
participants.drop(columns = ['ss1', 'ss2', 'role', 'position', 'player'], inplace = True)
print( 7*'.'+' OK')

# Removendo atributos desnecessários
print('Removendo atributos desnecessários[2] ...', end = '')
stats.drop(columns = ['trinket', 'assists', 'firstblood'], inplace = True)
print( 7*'.'+' OK')

participants.set_index('id', inplace = True)
stats.set_index('id', inplace = True)
ioData = pd.DataFrame.copy(participants)
ioData = ioData.join(pd.DataFrame.copy(stats))

# Reconstruindo linhas apropriadamente
print('Reconstruindo linhas apropriadamente ...', end = '')
final_frame = pd.DataFrame()
p = 0
for i in range(2):
    for j in range(5):
        p = ioData[i*5+j::10].rename( columns = {'championid': 'championid_'+str(i)+str(j), 'matchid': 'matchid_'+str(i)+str(j),
                                                      'win': 'win_'+str(i)+str(j), 'kills': 'kills_'+str(i)+str(j), 'deaths': 'deaths_'+str(i)+str(j)} )
        if(final_frame.empty):
            final_frame = pd.DataFrame.copy(p).reset_index(drop = True)
        else:
            final_frame = final_frame.join(pd.DataFrame.copy(p).reset_index(drop = True))
ioData = final_frame
print( 8*'.'+' OK')


# Reconstruindo linhas apropriadamente
print('Reconstruindo linhas apropriadamente ...', end = '')


ioData['matchid'] = ioData['matchid_00']
ioData['win0'] = ioData['win_00']
ioData['win1'] = ioData['win_10']
ioData['kills0'] = ioData['kills_00'] + ioData['kills_01'] + ioData['kills_02'] + ioData['kills_03'] + ioData['kills_04']
ioData['kills1'] = ioData['kills_10'] + ioData['kills_11'] + ioData['kills_12'] + ioData['kills_13'] + ioData['kills_14']
ioData['deaths0'] = ioData['deaths_00'] + ioData['deaths_01'] + ioData['deaths_02'] + ioData['deaths_03'] + ioData['deaths_04']
ioData['deaths1'] = ioData['deaths_10'] + ioData['deaths_11'] + ioData['deaths_12'] + ioData['deaths_13'] + ioData['deaths_14']
ioData['kd_ratio0'] = ioData['kills0']/ioData['deaths0']
ioData['kd_ratio1'] = ioData['kills1']/ioData['deaths1']

for i in range(2):
    for j in range(5):
        n = str(i)+str(j)
        ioData.drop(columns = ['win_'+n, 'kills_'+n, 'deaths_'+n, 'matchid_'+n], inplace = True)
    ioData.drop(columns = ['deaths'+str(i), 'kills'+str(i)], inplace = True)

ioData = ioData.join(mtime)
ioData.drop(columns = ['id', 'matchid'], inplace = True)

print( 8*'.'+' OK')

min_max_s = MinMaxScaler(feature_range = (0.0, 1.0))
for col in ['duration']:
    ioData[col] = min_max_s.fit_transform(ioData[col].values.reshape(-1, 1))

kd0 = pd.Series.copy(ioData['kd_ratio0'])
kd0[ kd0 > 1.0 ] = 1.0
kd0[ kd0 < 1.0 ] = 0.0
kd1 = 1.0 - kd0
win0 = ioData['win0']
win1 = ioData['win1']

durationTerm = 1.0 - ioData['duration']
killTerm = 0.5 + 0.5 * (ioData['kd_ratio0'] * kd1 + ioData['kd_ratio1'] * kd0) * (win0 * kd0 + win1 * kd1) - 0.5 * (ioData['kd_ratio0'] * kd1 + ioData['kd_ratio1'] * kd0) * (win0 * kd1 + win1 * kd0)
    
ioData['dificult'] = durationTerm * 0.4 + killTerm * 0.6
ioData['dificult0'] = 0.5 + 0.5*( ioData['dificult'] * ioData['win1'] - ioData['dificult'] * ioData['win0']  )
ioData['dificult1'] = 0.5 + 0.5*( ioData['dificult'] * ioData['win0'] - ioData['dificult'] * ioData['win1']  )
ioData.drop(columns = ['dificult', 'win0', 'win1' ], inplace = True)


champs = champs.sort_values(by = ['name'])
champs_name = champs['name'].tolist()
champs = champs.sort_values(by = ['id'])
champs.set_index('id', inplace = True)


for i in range(2):
    for j in range(5):
        col = 'championid_'+str(i)+str(j)
        ioData[col] = ioData[col].replace(champs['name'])
        
elapsed_time = time.time() - start_time
print('Tempo total ' + 33*'.' + ' ' + time.strftime("%M:%S", time.gmtime(elapsed_time)))
                                                       
#champ_stats.head()
#champs.head()
#display(participants)
#display(frame)
display(ioData)

#champs['new_id'] = champs.index
#display(champs)

Carregando dados ............................... OK
Removendo atributos desnecessários[1] .......... OK
Removendo atributos desnecessários[2] .......... OK
Reconstruindo linhas apropriadamente ........... OK
Reconstruindo linhas apropriadamente ........... OK
Tempo total ................................. 00:18


Unnamed: 0,championid_00,championid_01,championid_02,championid_03,championid_04,championid_10,championid_11,championid_12,championid_13,championid_14,kd_ratio0,kd_ratio1,duration,dificult0,dificult1
0,Warwick,Nami,Draven,Fiora,Viktor,Skarner,Galio,Ahri,Jinx,VelKoz,0.512821,1.950000,0.358050,0.855313,0.144687
1,Ziggs,Cassiopeia,Yasuo,Twisted Fate,Evelynn,Caitlyn,Wukong,Syndra,Karma,Warwick,1.031250,0.969697,0.313060,0.641933,0.358067
2,Draven,Janna,Lux,Amumu,Vayne,Ahri,Vi,Caitlyn,Nami,Twitch,0.625000,1.600000,0.269111,0.889928,0.110072
3,LeBlanc,Jinx,Warwick,Poppy,Zed,Tristana,Gragas,Master Yi,Rakan,Lissandra,0.923077,1.083333,0.367423,0.914977,0.085023
4,Orianna,Urgot,Wukong,Caitlyn,Zac,Lucian,Warwick,Karma,Darius,Morgana,1.085714,0.846154,0.390960,0.644885,0.355115
5,Miss Fortune,Ahri,Warwick,Thresh,Garen,Nami,Tryndamere,Lee Sin,Ziggs,Yasuo,1.833333,0.545455,0.292231,0.126628,0.873372
6,Master Yi,Karthus,Yasuo,Lucian,Taric,Kassadin,Nasus,Leona,Twitch,Warwick,0.622222,1.607143,0.291398,0.885054,0.114946
7,Master Yi,Thresh,Jinx,Lucian,Wukong,Darius,Ezreal,VelKoz,Caitlyn,Warwick,0.580645,1.722222,0.307019,0.875693,0.124307
8,Lux,Ashe,Zilean,Jarvan IV,Warwick,Thresh,LeBlanc,Teemo,Lee Sin,Caitlyn,0.820513,1.218750,0.350969,0.902883,0.097117
9,Illaoi,Sejuani,KogMaw,Janna,Akali,Lissandra,Lucian,Blitzcrank,Renekton,Warwick,1.600000,0.625000,0.281400,0.112530,0.887470


In [51]:
new_io = pd.DataFrame.copy(ioData)
new_columns = pd.DataFrame()

# One-hot encoding para role e position
print('One-hot encoding para role e position ...', end = '')
for i in range(2):
    mul = 1.0 if i == 0 else -1.0
    for j in range(5):
        if( i == 0 and j == 0):
            new_columns = pd.DataFrame.copy(pd.get_dummies( new_io['championid_'+str(i)+str(j)], prefix = ''))
        else:
            new_columns = new_columns + mul*pd.DataFrame.copy(pd.get_dummies( new_io['championid_'+str(i)+str(j)], prefix = ''))
        new_io.drop(columns = ['championid_'+str(i)+str(j)], inplace = True)

new_io = pd.concat([new_io, new_columns], axis=1)

params = [ '_'+name for name in champs_name ]
new_io = new_io[ [k for k in params if k in new_io.columns.tolist()] + new_io.columns.tolist()[0:5] ]
new_io.drop(columns = ['kd_ratio0', 'kd_ratio1', 'duration'], inplace = True)

display(new_io)

One-hot encoding para role e position ...

Unnamed: 0,_Aatrox,_Ahri,_Akali,_Alistar,_Amumu,_Anivia,_Annie,_Ashe,_Aurelion Sol,_Azir,...,_Xin Zhao,_Yasuo,_Yorick,_Zac,_Zed,_Ziggs,_Zilean,_Zyra,dificult0,dificult1
0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.855313,0.144687
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.641933,0.358067
2,0.0,-1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.889928,0.110072
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.914977,0.085023
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.644885,0.355115
5,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,-1.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.126628,0.873372
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.885054,0.114946
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.875693,0.124307
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.902883,0.097117
9,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.112530,0.887470


In [40]:
inp = pd.DataFrame.copy(ioData)

min_max_s = MinMaxScaler(feature_range = (0.0, 1.0))

for col in inp:
    inp[col] = min_max_s.fit_transform(inp[col].values.reshape(-1, 1))
    
display(inp)

ValueError: Input contains NaN, infinity or a value too large for dtype('float64').

In [None]:
# Reconstruindo linhas apropriadamente
print('Reconstruindo linhas apropriadamente ...', end = '')
final_frame = pd.DataFrame()
p = 0
for i in range(2):
    for j in range(5):
        p = participants[i*5+j::10].rename( columns = {'championid': 'championid_'+str(i)+str(j), 'matchid': 'matchid'} )
        if i + j != 0:
            p.drop(columns = ['matchid'], inplace = True)
        if(final_frame.empty):
            final_frame = pd.DataFrame.copy(p).reset_index(drop = True)
        else:
            final_frame = final_frame.join(pd.DataFrame.copy(p).reset_index(drop = True))
participants = final_frame
print( 8*'.'+' OK')
        
elapsed_time = time.time() - start_time
print('Tempo total ' + 33*'.' + ' ' + time.strftime("%M:%S", time.gmtime(elapsed_time)))

In [None]:
# Reconstruindo linhas apropriadamente
print('Reconstruindo linhas apropriadamente ...', end = '')
final_frame = pd.DataFrame()
p = 0
for i in range(2):
    for j in range(5):
        p = participants[i*5+j::10].rename( columns = {'championid': 'championid_'+str(i)+str(j), 'role': 'role_'+str(i)+str(j), 'position': 'position_'+str(i)+str(j)} )
        if(final_frame.empty):
            final_frame = pd.DataFrame.copy(p).reset_index(drop = True)
        else:
            final_frame = final_frame.join(pd.DataFrame.copy(p).reset_index(drop = True))
participants = final_frame
print( 8*'.'+' OK')
    
# Substituindo IDs por nomes dos champions
print('Substituindo IDs por nomes dos champions ...', end = '')
for i in range(2):
    for j in range(5):
        s = champs.set_index('id')['name']
        participants['championid_'+str(i)+str(j)] = participants['championid_'+str(i)+str(j)].replace(s)
        print('\rSubstituindo IDs por nomes dos champions ....... ' + str(i*5+j+1) + '/10', end = '')
print('\rSubstituindo IDs por nomes dos champions ...', end = '')
print( 4*'.'+' OK       ')
        
# Juntando tabelas
print('Juntando tabelas ...', end = '')
params = ['hp', 'hp5', 'mp', 'mp5', 'ad', 'ar', 'as', 'mr', 'ms', 'range']
s = champ_stats.set_index('name')
for i in range(2):
    for j in range(5):
        for param in params:
            participants[param+'_'+str(i)+str(j)] = participants['championid_'+str(i)+str(j)]
            participants[param+'_'+str(i)+str(j)] = participants[param+'_'+str(i)+str(j)].replace(s[param])
            print('\rJuntando tabelas ...' + 28*'.' + ' ' + str(i*5+j+1) + '/10 - ' + param + '    ', end = '')
print('\rJuntando tabelas ...', end = '')
print( 28*'.'+' OK               ')

# Removendo o nome dos champios
print('Removendo o nome dos champios ...', end = '')
for i in range(2):
    for j in range(5):
        participants.drop(columns = ['championid_'+str(i)+str(j)], inplace = True)
print( 15*'.'+' OK      ')

# One-hot encoding para role e position
print('One-hot encoding para role e position ...', end = '')
for i in range(2):
    for j in range(5):
        participants = pd.concat([participants, pd.get_dummies( participants['role_'+str(i)+str(j)], prefix = 'role_'+str(i)+str(j))],axis=1)
        participants = pd.concat([participants, pd.get_dummies( participants['position_'+str(i)+str(j)], prefix = 'position_'+str(i)+str(j))],axis=1)
        participants.drop(['role_'+str(i)+str(j), 'position_'+str(i)+str(j), 'role_'+str(i)+str(j)+'_NONE'], axis = 1, inplace = True)
        if 'role_' + str(i) + str(j) + '_DUO' not in participants.columns.values:
            participants['role_' + str(i) + str(j) + '_DUO'] = 0
print( 7*'.'+' OK        ')
        
        
# Reordenando atributos
print('Reordenando atributos ...', end = '')
params = ['role', 'position'] + params
atribute_list_final = []
for i in range(2):
    for j in range(5):
        for param in params:
            if(param == 'role'):
                for k in ['SOLO', 'DUO', 'DUO_CARRY', 'DUO_SUPPORT']:
                    atribute_list_final.append(param+'_'+str(i)+str(j)+'_'+k)
            elif(param == 'position'):
                for k in ['BOT', 'MID', 'TOP', 'JUNGLE']:
                    atribute_list_final.append(param+'_'+str(i)+str(j)+'_'+k)
            else:
                atribute_list_final.append(param+'_'+str(i)+str(j))

participants = participants[atribute_list_final]
print( 23*'.'+' OK      ')
