# Importações

In [1]:
import numpy as np
import pandas as pd
from collections import defaultdict

pd.set_option('display.max_columns', 100)

In [2]:
anos = [2014,2015,2016,2017,2018]
numeroDeRodadas = 38
profundidadeDoIndice = 5
posicoesAtaque = ['ata','mei']
posicoesDefesa = ['lat','zag','gol']
pontuacaoUtilizada = 'atletas_media_num'

# Leitura dos arquivos

### Arquivo de times

In [3]:
times = pd.read_csv('../data/times_ids.csv')

dict_times_nomeCartola = dict(zip(times['nome.cbf'], times['nome.cartola']))
dict_times_nomeCartola['Ceará - CE']='Ceará'

dict_cod_times_nomeCartola = dict(zip(times['cod.older'].astype(np.float64), times['nome.cartola']))

### Arquivo de posições

In [4]:
posicoes = pd.read_csv('../data/posicoes_ids.csv')

dict_posicoes = dict(zip(posicoes['Cod'].astype(np.float64), posicoes['abbr']))

### Tratando arquivos de partidas

In [5]:
partidas = pd.DataFrame()
for ano in anos:    
    nomeDoArquivoPartidas= '../data/'+ str(ano) + '/' + str(ano) + '_partidas.csv'
    partidasAno = pd.read_csv(nomeDoArquivoPartidas)    
    partidasAno['home_score'] = ''
    partidasAno['away_score'] = ''
    for index, partida in partidasAno.iterrows():
        resultados = partida['score'].split(" x ")
        if (len(resultados)==2):
            partida['home_score'] = resultados[0]
            partida['away_score'] = resultados[1]
        partidasAno.iloc[partida.name] = partida
        
    partidasAno['Ano'] = [ano for i in range(0,len(partidasAno))]
    partidasAno = partidasAno.query('away_score != "" and home_score != ""')
    partidas = partidas.append(partidasAno,sort=False)  

partidas['home_score'] = partidas['home_score'].astype(np.float64)  
partidas['away_score'] = partidas['away_score'].astype(np.float64) 
partidas['home_team'] = partidas['home_team'].astype(np.str).map(dict_times_nomeCartola)
partidas['away_team'] = partidas['away_team'].astype(np.str).map(dict_times_nomeCartola) 
partidas['Ano'] = partidas['Ano'].astype(np.int32) 
partidas = partidas.reset_index()

In [6]:
partidas.shape

(1899, 13)

In [7]:
partidas.head()

Unnamed: 0.1,index,Unnamed: 0,game,round,date,home_team,score,away_team,arena,X,home_score,away_score,Ano
0,0,1.0,1,1,20/04/2014 - 18:30,Flamengo,0 x 0,Goiás,Mané Garrincha - Brasilia - DF,,0.0,0.0,2014
1,1,2.0,2,1,19/04/2014 - 18:30,Fluminense,3 x 0,Figueirense,Maracanã - Rio de Janeiro - RJ,,3.0,0.0,2014
2,2,3.0,3,1,20/04/2014 - 16:00,São Paulo,3 x 0,Botafogo,Morumbi - Sao Paulo - SP,,3.0,0.0,2014
3,3,4.0,4,1,20/04/2014 - 18:30,Santos,1 x 1,Sport,Vila Belmiro - Santos - SP,,1.0,1.0,2014
4,4,5.0,5,1,20/04/2014 - 16:00,Atlético-PR,1 x 0,Grêmio,Orlando Scarpelli - Florianopolis - SC,,1.0,0.0,2014


### Tratando arquivos de scoults

In [8]:
scouts = pd.DataFrame()

for ano in anos:    
    inicioDosAquivos='../data/' + str(ano)
    if (ano == 2018):
        for numeroDaRodada in range(1, numeroDeRodadas+1):
            nomeDoArquivoScoutsCsv= inicioDosAquivos + '/rodada-' + str(numeroDaRodada) + '.csv'
            scout = pd.read_csv(nomeDoArquivoScoutsCsv)
            scout['Ano'] = [ano for i in range(0,len(scout))]
            scouts = scouts.append(scout,sort=False)
    else:
        nomeDoArquivoScoutsCsv= inicioDosAquivos + '/'+ str(ano) + '_scouts_raw.csv'
        scout = pd.read_csv(nomeDoArquivoScoutsCsv)
        scout['Ano'] = [ano for i in range(0,len(scout))]  
        if (ano!=2017):
            nomeDoArquivoJogadoresCsv= inicioDosAquivos +  '/'+ str(ano) + '_jogadores.csv'
            jogadores = pd.read_csv(nomeDoArquivoJogadoresCsv)
            jogadores['PosicaoID'] = jogadores['PosicaoID'].map(dict_posicoes)
            dict_idJogadores_posicoes = dict(zip(jogadores['ID'].astype(np.float64), jogadores['PosicaoID']))
            scout['atletas.posicao_id'] = scout['AtletaID'].map(dict_idJogadores_posicoes)
            
            scout = scout.query('ClubeID == ClubeID')
            scout['atletas.clube.id.full.name'] = scout['ClubeID'].map(dict_cod_times_nomeCartola) 
            scout['atletas.clube.id.full.name'] = scout['ClubeID'].map(dict_cod_times_nomeCartola) 
            scout['atletas.pontos_num'] = scout['Pontos'].astype(np.float64)  
            scout['atletas.media_num'] = scout['PontosMedia'].astype(np.float64)
            scout['atletas.preco_num'] = scout['Preco'].astype(np.float64)  
            scout['atletas.variacao_num'] = scout['PrecoVariacao'].astype(np.float64)
            scout['atletas.variacao_num'] = scout['PrecoVariacao'].astype(np.float64)
        scouts = scouts.append(scout,sort=False)
scouts.columns = scouts.columns.map(lambda x : x.replace('.','_') if isinstance(x, (str)) else x)
scouts = scouts.query('atletas_clube_id_full_name == atletas_clube_id_full_name and atletas_rodada_id!=0')

In [9]:
scouts.shape

(139728, 52)

In [10]:
scouts.head()

Unnamed: 0.1,AtletaID,atletas_rodada_id,ClubeID,Participou,Posicao,Jogos,Pontos,PontosMedia,Preco,PrecoVariacao,Partida,Mando,Titular,Substituido,TempoJogado,Nota,FS,PE,A,FT,FD,FF,G,I,PP,RB,FC,GC,CA,CV,SG,DD,DP,GS,Ano,atletas_posicao_id,atletas_clube_id_full_name,atletas_pontos_num,atletas_media_num,atletas_preco_num,atletas_variacao_num,Unnamed: 0,athletes_atletas_scout,atletas_apelido,atletas_atleta_id,atletas_clube_id,atletas_foto,atletas_jogos_num,atletas_nome,Rodada,atletas_status_id,atletas_slug
1,36443.0,1.0,285.0,1.0,1.0,1.0,5.0,5.0,10.6,1.6,179879.0,1.0,1.0,0.0,1.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2014,gol,Internacional,5.0,5.0,10.6,1.6,,,,,,,,,,,
2,36443.0,2.0,285.0,1.0,1.0,2.0,-3.0,1.0,8.27,-2.33,179882.0,0.0,1.0,0.0,1.0,5.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2014,gol,Internacional,-3.0,1.0,8.27,-2.33,,,,,,,,,,,
3,36443.0,3.0,285.0,1.0,1.0,3.0,-2.6,-0.2,6.81,-1.46,179904.0,1.0,1.0,0.0,1.0,6.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2014,gol,Internacional,-2.6,-0.2,6.81,-1.46,,,,,,,,,,,
4,36443.0,4.0,285.0,1.0,1.0,4.0,4.0,0.85,7.96,1.15,179922.0,1.0,1.0,0.0,1.0,6.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,1.0,2014,gol,Internacional,4.0,0.85,7.96,1.15,,,,,,,,,,,
5,36443.0,5.0,285.0,1.0,1.0,5.0,5.0,1.68,8.61,0.65,179943.0,0.0,1.0,0.0,1.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2014,gol,Internacional,5.0,1.68,8.61,0.65,,,,,,,,,,,


## Inserções dos dados analisados

### Verifica participação de jogador

In [11]:
def setaParticipacaoDeJogador(jogador):  
    if (jogador['atletas_pontos_num'] == 0 and jogador['atletas_variacao_num'] == 0):
        return False
    else:
        return True
scouts["participou"] = scouts.apply(lambda jogador: setaParticipacaoDeJogador(jogador), axis=1)  
scouts = scouts.query('participou == True')

### Valores do time mandante

In [12]:
def HomeTeamValues():
    partidas['AttackStrengthHomeTeam_jogo']=''
    partidas['DefenseStrengthHomeTeam_jogo']=''
    partidas['StrengthHomeTeam_jogo']=''
    partidas['AttackValueHomeTeam']=''
    partidas['DefenseValueHomeTeam']=''
    partidas['ValueHomeTeam']=''
    for time_id in dict_times_nomeCartola:
        partidasTimeDaCasa = partidas.query('home_team == "'+dict_times_nomeCartola[time_id]+'"')
        for index, partidaTimeDaCasa in partidasTimeDaCasa.iterrows():
            scoutsDaPartida = scouts.query('atletas_rodada_id == '+str(partidaTimeDaCasa['round']) +' and Ano == '+str(partidaTimeDaCasa['Ano']) + ' and atletas_clube_id_full_name == "'+ partidaTimeDaCasa['home_team'] +'"')
            scoutsDeAtaque = scoutsDaPartida[scoutsDaPartida['atletas_posicao_id'].isin(posicoesAtaque)]
            scoutsDeDefesa = scoutsDaPartida[scoutsDaPartida['atletas_posicao_id'].isin(posicoesDefesa)]
            
            partidaTimeDaCasa['AttackStrengthHomeTeam_jogo']=scoutsDeAtaque['atletas_pontos_num'].mean(axis = 0)
            partidaTimeDaCasa['DefenseStrengthHomeTeam_jogo']=scoutsDeDefesa['atletas_pontos_num'].mean(axis = 0)
            partidaTimeDaCasa['StrengthHomeTeam_jogo']=scoutsDaPartida['atletas_pontos_num'].mean(axis = 0)            
            
            partidaTimeDaCasa['AttackValueHomeTeam']=scoutsDeAtaque['atletas_preco_num'].mean(axis = 0)
            partidaTimeDaCasa['DefenseValueHomeTeam']=scoutsDeDefesa['atletas_preco_num'].mean(axis = 0)
            partidaTimeDaCasa['ValueHomeTeam']=scoutsDaPartida['atletas_preco_num'].mean(axis = 0)   
            
            partidas.iloc[partidaTimeDaCasa.name] = partidaTimeDaCasa  
HomeTeamValues()    

### Valores do time visitante

In [13]:
def AwayTeamValues():
    partidas['AttackStrengthAwayTeam_jogo']=''
    partidas['DefenseStrengthAwayTeam_jogo']=''
    partidas['StrengthAwayTeam_jogo']=''
    partidas['AttackValueAwayTeam']=''
    partidas['DefenseValueAwayTeam']=''
    partidas['ValueAwayTeam']=''
    for time_id in dict_times_nomeCartola:
        partidasTimeVisitante = partidas.query('away_team == "'+dict_times_nomeCartola[time_id]+'"')
        for index, partidaTimeVisitante in partidasTimeVisitante.iterrows():
            scoutsDaPartida = scouts.query('atletas_rodada_id == '+str(partidaTimeVisitante['round']) +' and Ano == '+str(partidaTimeVisitante['Ano']) + ' and atletas_clube_id_full_name == "'+ partidaTimeVisitante['away_team'] +'"')
            scoutsDeAtaque = scoutsDaPartida[scoutsDaPartida['atletas_posicao_id'].isin(posicoesAtaque)]
            scoutsDeDefesa = scoutsDaPartida[scoutsDaPartida['atletas_posicao_id'].isin(posicoesDefesa)]
            partidaTimeVisitante['AttackStrengthAwayTeam_jogo']=scoutsDeAtaque['atletas_pontos_num'].mean(axis = 0)
            partidaTimeVisitante['DefenseStrengthAwayTeam_jogo']=scoutsDeDefesa['atletas_pontos_num'].mean(axis = 0)
            partidaTimeVisitante['StrengthAwayTeam_jogo']=scoutsDaPartida['atletas_pontos_num'].mean(axis = 0)            
            
            partidaTimeVisitante['AttackValueAwayTeam']=scoutsDeAtaque['atletas_preco_num'].mean(axis = 0)
            partidaTimeVisitante['DefenseValueAwayTeam']=scoutsDeDefesa['atletas_preco_num'].mean(axis = 0)
            partidaTimeVisitante['ValueAwayTeam']=scoutsDaPartida['atletas_preco_num'].mean(axis = 0)
            partidas.iloc[partidaTimeVisitante.name] = partidaTimeVisitante  
AwayTeamValues()    

### Seta resultado da partida

In [14]:
def setaResultadoDoJogo(partida):
    if (partida['home_score']==partida['away_score']):
        return 0
    elif (partida['home_score'] > partida['away_score']):
        return 1
    else:
        return -1
partidas["resultado"] = partidas.apply(lambda partida: setaResultadoDoJogo(partida), axis=1)

### Média de gols no torneio e Índices nos últimos jogos do torneio

In [15]:
def CalculaMediaDeDoisVetores(vetor1,vetor2):
    media1=0
    media2=0
    if (vetor1.size!=0):
        media1=vetor1.mean(axis = 0)
    if (vetor2.size!=0):
        media2=vetor2.mean(axis = 0)
    if (vetor1.size + vetor2.size>0):
        return (media1*vetor1.size + media2*vetor2.size)/(vetor1.size + vetor2.size)
    else:
        return np.nan
    
def CalculaMediasDeGolsUltimosJogos(ultimaPartidasComoMandante,ultimaPartidasComoVisitante):    
    mediaDeGolsAFavor = CalculaMediaDeDoisVetores(ultimaPartidasComoMandante['home_score'],ultimaPartidasComoVisitante['away_score']) 
    mediaDeGolsContra = CalculaMediaDeDoisVetores(ultimaPartidasComoMandante['away_score'],ultimaPartidasComoVisitante['home_score'])
    
    return mediaDeGolsAFavor, mediaDeGolsContra

def CalculaForcasUltimosJogos(ultimaPartidasComoMandante,ultimaPartidasComoVisitante):    
    mediaForcaAtaque = CalculaMediaDeDoisVetores(ultimaPartidasComoMandante['AttackStrengthHomeTeam_jogo'],ultimaPartidasComoVisitante['AttackStrengthAwayTeam_jogo']) 
    mediaForcaDefesa = CalculaMediaDeDoisVetores(ultimaPartidasComoMandante['DefenseStrengthHomeTeam_jogo'],ultimaPartidasComoVisitante['DefenseStrengthAwayTeam_jogo']) 
    mediaForcaGeral = CalculaMediaDeDoisVetores(ultimaPartidasComoMandante['StrengthHomeTeam_jogo'],ultimaPartidasComoVisitante['StrengthAwayTeam_jogo']) 
    
    return mediaForcaAtaque, mediaForcaDefesa, mediaForcaGeral

def CalculaIndiceDeResultadosUltimosJogos(ultimaPartidasComoMandante,ultimaPartidasComoVisitante):
    ultimaPartidas = ultimaPartidasComoMandante.append(ultimaPartidasComoVisitante)
    if (ultimaPartidas.shape[0]>0):
        partidasVencidas = ultimaPartidasComoMandante.query('resultado == 1').append(ultimaPartidasComoVisitante.query('resultado == -1'))
        partidasPerdidas = ultimaPartidasComoMandante.query('resultado == -1').append(ultimaPartidasComoVisitante.query('resultado == 1'))
        partidasEmpatadas = ultimaPartidas.query('resultado == 0')
        indiceVencidas = partidasVencidas.shape[0]/ultimaPartidas.shape[0]
        indicePerdidas = partidasPerdidas.shape[0]/ultimaPartidas.shape[0]
        indiceEmpatadas = partidasEmpatadas.shape[0]/ultimaPartidas.shape[0]
        return indiceVencidas, indicePerdidas, indiceEmpatadas
    else:
        return np.nan, np.nan, np.nan
    

def AverageGoalsAndIndiceResults():
    partidas['AverageGoalsInFavorHomeTeam']=''
    partidas['AverageGoalsAgainstHomeTeam']=''
    partidas['AverageGoalsInFavorAwayTeam']=''
    partidas['AverageGoalsAgainstAwayTeam']=''
    partidas['HomeWinLastFiveIndice']=''
    partidas['HomeDrawLastFiveIndice']=''
    partidas['HomeDefeatLastFiveIndice']=''
    partidas['AwayWinLastFiveIndice']=''
    partidas['AwayDrawLastFiveIndice']=''
    partidas['AwayDefeatLastFiveIndice']=''
    partidas['AttackStrengthHomeTeam']=''
    partidas['DefenseStrengthHomeTeam']=''
    partidas['StrengthHomeTeam']=''
    partidas['AttackStrengthAwayTeam']=''
    partidas['DefenseStrengthAwayTeam']=''
    partidas['StrengthAwayTeam']=''
    for time_id in dict_times_nomeCartola:
        partidasDoTime = partidas.query('home_team == "'+dict_times_nomeCartola[time_id]+'" or away_team == "'+dict_times_nomeCartola[time_id]+'"')
        partidasComoMandante = partidasDoTime.query('home_team == "'+dict_times_nomeCartola[time_id]+'"')
        partidasComoVisitante = partidasDoTime.query('away_team == "'+dict_times_nomeCartola[time_id]+'"')
        
        for index, partidaComoMandante in partidasComoMandante.iterrows():       
            
            ultimaPartidas = partidasDoTime.query('round <= '+str(partidaComoMandante['round'])+' and Ano == '+str(partidaComoMandante['Ano'])).tail(profundidadeDoIndice)
            ultimaPartidasComoMandante = ultimaPartidas.query('home_team == "'+dict_times_nomeCartola[time_id]+'"')
            ultimaPartidasComoVisitante = ultimaPartidas.query('away_team == "'+dict_times_nomeCartola[time_id]+'"')
            
            mediaForcaAtaque, mediaForcaDefesa, mediaForcaGeral = CalculaForcasUltimosJogos(ultimaPartidasComoMandante,ultimaPartidasComoVisitante)
            partidaComoMandante['AttackStrengthHomeTeam']=mediaForcaAtaque
            partidaComoMandante['DefenseStrengthHomeTeam']=mediaForcaDefesa
            partidaComoMandante['StrengthHomeTeam']=mediaForcaGeral
            
            ultimaPartidas = partidasDoTime.query('round < '+str(partidaComoMandante['round'])+' and Ano == '+str(partidaComoMandante['Ano']))
            ultimaPartidasComoMandante = ultimaPartidas.query('home_team == "'+dict_times_nomeCartola[time_id]+'"')
            ultimaPartidasComoVisitante = ultimaPartidas.query('away_team == "'+dict_times_nomeCartola[time_id]+'"')
            
            mediaDeGolsAFavor, mediaDeGolsContra = CalculaMediasDeGolsUltimosJogos(ultimaPartidasComoMandante,ultimaPartidasComoVisitante)
            partidaComoMandante['AverageGoalsInFavorHomeTeam'] = mediaDeGolsAFavor
            partidaComoMandante['AverageGoalsAgainstHomeTeam'] = mediaDeGolsContra
            
            ultimaPartidas = ultimaPartidas.tail(profundidadeDoIndice)
            ultimaPartidasComoMandante = ultimaPartidas.query('home_team == "'+dict_times_nomeCartola[time_id]+'"')
            ultimaPartidasComoVisitante = ultimaPartidas.query('away_team == "'+dict_times_nomeCartola[time_id]+'"')
            
            indiceVencidas, indicePerdidas, indiceEmpatadas = CalculaIndiceDeResultadosUltimosJogos(ultimaPartidasComoMandante,ultimaPartidasComoVisitante)            
            partidaComoMandante['HomeWinLastFiveIndice'] = indiceVencidas
            partidaComoMandante['HomeDrawLastFiveIndice'] = indiceEmpatadas
            partidaComoMandante['HomeDefeatLastFiveIndice'] = indicePerdidas
            
            partidas.iloc[partidaComoMandante.name] = partidaComoMandante
        
        for index, partidaComoVisitante in partidasComoVisitante.iterrows():      
            ultimaPartidas = partidasDoTime.query('round <= '+str(partidaComoVisitante['round'])+' and Ano == '+str(partidaComoVisitante['Ano'])).tail(profundidadeDoIndice)
            ultimaPartidasComoMandante = ultimaPartidas.query('home_team == "'+dict_times_nomeCartola[time_id]+'"')
            ultimaPartidasComoVisitante = ultimaPartidas.query('away_team == "'+dict_times_nomeCartola[time_id]+'"')
            
            mediaForcaAtaque, mediaForcaDefesa, mediaForcaGeral = CalculaForcasUltimosJogos(ultimaPartidasComoMandante,ultimaPartidasComoVisitante)
            partidaComoVisitante['AttackStrengthAwayTeam']=mediaForcaAtaque
            partidaComoVisitante['DefenseStrengthAwayTeam']=mediaForcaDefesa
            partidaComoVisitante['StrengthAwayTeam']=mediaForcaGeral 
            
            ultimaPartidas = partidasDoTime.query('round < '+str(partidaComoVisitante['round'])+' and Ano == '+str(partidaComoVisitante['Ano']))
            ultimaPartidasComoMandante = ultimaPartidas.query('home_team == "'+dict_times_nomeCartola[time_id]+'"')
            ultimaPartidasComoVisitante = ultimaPartidas.query('away_team == "'+dict_times_nomeCartola[time_id]+'"')
            
            mediaDeGolsAFavor, mediaDeGolsContra = CalculaMediasDeGolsUltimosJogos(ultimaPartidasComoMandante,ultimaPartidasComoVisitante)
            partidaComoVisitante['AverageGoalsInFavorAwayTeam'] = mediaDeGolsAFavor
            partidaComoVisitante['AverageGoalsAgainstAwayTeam'] = mediaDeGolsContra
            
            ultimaPartidas = ultimaPartidas.tail(profundidadeDoIndice)
            ultimaPartidasComoMandante = ultimaPartidas.query('home_team == "'+dict_times_nomeCartola[time_id]+'"')
            ultimaPartidasComoVisitante = ultimaPartidas.query('away_team == "'+dict_times_nomeCartola[time_id]+'"')
            
            indiceVencidas, indicePerdidas, indiceEmpatadas = CalculaIndiceDeResultadosUltimosJogos(ultimaPartidasComoMandante,ultimaPartidasComoVisitante)            
            partidaComoVisitante['AwayWinLastFiveIndice'] = indiceVencidas
            partidaComoVisitante['AwayDrawLastFiveIndice'] = indiceEmpatadas
            partidaComoVisitante['AwayDefeatLastFiveIndice'] = indicePerdidas  
            
            partidas.iloc[partidaComoVisitante.name] = partidaComoVisitante
            
AverageGoalsAndIndiceResults()

### Remove colunas e linhas desnecessárias

In [16]:
partidas = partidas.drop(columns=['index','game','round','date','home_team','score','away_team','arena','home_score','away_score','Ano','AttackStrengthHomeTeam_jogo','DefenseStrengthHomeTeam_jogo','StrengthHomeTeam_jogo','AttackStrengthAwayTeam_jogo','DefenseStrengthAwayTeam_jogo','StrengthAwayTeam_jogo'])
partidas.drop(partidas.columns[partidas.columns.str.contains('X',case = False)],axis = 1, inplace = True)
partidas = partidas.loc[:, ~partidas.columns.str.contains('^Unnamed')]
partidas = partidas.dropna()

## Exportar arquivo final 

In [17]:
partidas.head(40)

Unnamed: 0,AttackValueHomeTeam,DefenseValueHomeTeam,ValueHomeTeam,AttackValueAwayTeam,DefenseValueAwayTeam,ValueAwayTeam,resultado,AverageGoalsInFavorHomeTeam,AverageGoalsAgainstHomeTeam,AverageGoalsInFavorAwayTeam,AverageGoalsAgainstAwayTeam,HomeWinLastFiveIndice,HomeDrawLastFiveIndice,HomeDefeatLastFiveIndice,AwayWinLastFiveIndice,AwayDrawLastFiveIndice,AwayDefeatLastFiveIndice,AttackStrengthHomeTeam,DefenseStrengthHomeTeam,StrengthHomeTeam,AttackStrengthAwayTeam,DefenseStrengthAwayTeam,StrengthAwayTeam
10,9.16857,7.37286,8.09333,10.7333,9.474,10.622,0,0.0,3.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,2.72679,0.404762,1.57067,3.64444,3.64,3.709
11,7.965,5.81833,6.83867,5.96286,7.965,6.75786,1,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,2.92361,0.798333,2.06933,2.14206,3.72167,2.63007
12,8.91889,18.036,12.342,6.47429,11.3417,8.67214,1,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,2.31111,9.14,4.83633,1.25952,3.1,1.69071
13,6.82889,10.434,8.072,13.9057,15.65,14.4546,-1,2.0,1.0,3.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.63889,2.31,1.90867,3.96984,6.17,4.86749
14,6.1125,12.0917,8.644,9.43333,10.134,9.82733,0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.82083,6.48571,4.08124,1.61667,3.55167,2.273
15,10.8725,9.42833,10.3847,11.9075,16.0583,13.9853,0,2.0,1.0,3.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,2.54167,1.6,2.229,3.19375,5.86667,4.41833
16,7.35625,6.33333,6.918,8.66,10.31,9.20533,0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,4.3,1.81667,3.29833,2.19444,4.52667,3.24367
17,4.30875,4.97333,4.838,9.29667,11.476,9.97,1,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.64444,1.465,1.56367,1.34236,3.51,2.23493
18,4.113,3.9075,3.91933,9.75375,5.63833,7.93733,-1,0.0,3.0,1.0,2.0,0.0,0.0,1.0,0.0,0.0,1.0,0.836111,-0.18,0.513,4.07014,3.18167,3.84367
19,5.61667,13.25,8.23667,3.92375,5.27833,4.508,1,0.0,0.0,1.0,2.0,0.0,1.0,0.0,0.0,0.0,1.0,1.16667,7.18,3.37467,0.78125,1.80833,1.24367


In [18]:
partidas.tail(40)

Unnamed: 0,AttackValueHomeTeam,DefenseValueHomeTeam,ValueHomeTeam,AttackValueAwayTeam,DefenseValueAwayTeam,ValueAwayTeam,resultado,AverageGoalsInFavorHomeTeam,AverageGoalsAgainstHomeTeam,AverageGoalsInFavorAwayTeam,AverageGoalsAgainstAwayTeam,HomeWinLastFiveIndice,HomeDrawLastFiveIndice,HomeDefeatLastFiveIndice,AwayWinLastFiveIndice,AwayDrawLastFiveIndice,AwayDefeatLastFiveIndice,AttackStrengthHomeTeam,DefenseStrengthHomeTeam,StrengthHomeTeam,AttackStrengthAwayTeam,DefenseStrengthAwayTeam,StrengthAwayTeam
1859,6.51556,4.064,5.64867,7.21333,5.826,6.978,-1,0.970588,1.61765,1.38235,0.970588,0.0,0.8,0.2,0.4,0.4,0.2,2.86233,1.70914,2.45559,3.36,2.36867,3.0196
1860,5.38556,8.438,6.754,5.21667,7.714,6.32333,1,0.970588,0.970588,1.14706,1.35294,0.2,0.4,0.4,0.2,0.4,0.4,2.20222,2.902,2.48658,2.57556,2.44457,2.67773
1861,8.705,7.73333,8.422,6.39,5.17,6.34571,1,1.47059,1.11765,1.08824,1.14706,0.2,0.2,0.6,0.6,0.2,0.2,2.46861,3.50867,2.96492,3.53417,2.47105,2.98752
1862,2.61875,3.81,3.37,8.91778,7.436,8.80133,0,0.441176,1.55882,1.61765,0.676471,0.0,0.4,0.6,0.6,0.4,0.0,1.8275,1.88533,1.84427,3.055,3.05533,3.1388
1863,6.4825,8.32333,7.33533,6.883,10.7725,8.18333,1,0.823529,1.20588,1.14706,0.970588,0.0,0.4,0.6,0.2,0.2,0.6,1.49214,1.01143,1.31194,1.91222,2.117,2.05227
1864,7.02889,9.42,7.99667,8.04444,11.754,9.536,1,0.941176,1.29412,1.38235,0.735294,0.4,0.0,0.6,0.4,0.6,0.0,2.38333,4.396,3.17533,2.1455,3.59133,2.71666
1865,5.46556,4.934,5.512,9.30778,11.15,10.36,-1,0.941176,1.55882,1.61765,0.735294,0.6,0.4,0.0,0.6,0.4,0.0,2.94628,5.261,3.76293,3.02861,4.85133,3.74827
1866,7.56375,9.32333,8.558,5.84333,8.016,6.894,1,1.32353,0.911765,0.911765,0.911765,0.2,0.8,0.0,0.8,0.0,0.2,2.89833,4.36048,3.50364,3.0125,1.59,2.62987
1867,7.56,10.91,9.23667,4.82222,6.32,5.59667,1,1.32353,0.735294,0.911765,1.38235,0.4,0.4,0.2,0.4,0.0,0.6,3.65222,4.57933,4.03787,1.83889,3.26333,2.45189
1868,5.87556,7.876,6.90333,4.81778,7.298,5.888,0,0.911765,1.23529,0.852941,1.08824,0.2,0.2,0.6,0.2,0.2,0.6,0.90623,3.86867,1.9708,2.58914,2.98076,2.7084


In [19]:
partidas.shape

(1436, 23)

In [20]:

export_csv = partidas.to_csv ('../data/dados_limpos.csv', index = None, header=True)