In [1]:
import pandas as pd
pd.set_option('display.max_columns', None)
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics

In [2]:
df_escalacao = pd.read_csv("../Data/Camp_Brasileiro_2023_escalacao.csv")
df_team_stats = pd.read_csv("../Data_Lake/Camp_Brasileiro/2023/Camp_Brasileiro_2023_team_stats_final.csv")
df_players_stats = pd.read_csv("../Data_Lake/Camp_Brasileiro/2023/Camp_Brasileiro_2023_players_stats_final.csv")
df_Games = pd.read_csv("../Data_Lake/Camp_Brasileiro/2023/Camp_Brasileiro_2023_Games_final.csv")

df_stats = df_players_stats

In [None]:
df_team_stats

# Lista de games e das medias

In [3]:
lista_games_2024 = df_stats['fixture_id'].unique().tolist()
List_AVG = ['rating','minutes','offsides','shots_total','shots_on','goals_total','goals_conceded','assists','saves','passes_total','passes_key','passes_accuracy','tackles_total','tackles_blocks','tackles_interceptions','duels_total','duels_won','dribbles_attempts','dribbles_success','dribbles_past','fouls_drawn','fouls_committed','cards_yellow','cards_red','penalty_won','penalty_committed','penalty_scored','penalty_missed','penalty_saved']

# Media das estatísticas dos players que estão escalados para jogar a partida de n° fixture_id nos últimos 5 jogos 

In [4]:
def calcular_media_stats(df, fixture_x):
    # Lista de jogadores que participaram do fixture_x
    jogadores_em_x = df[df['fixture_id'] == fixture_x]['player_id'].unique()
    
    # Filtra os jogos anteriores dos jogadores de x
    jogos_anteriores = df[(df['player_id'].isin(jogadores_em_x)) & (df['fixture_id'] < fixture_x)]
    
    # Ordena os jogos por jogador e fixture_id (cronologicamente)
    jogos_ordenados = jogos_anteriores.sort_values(['player_id', 'fixture_id'])
    
    # Seleciona os três últimos jogos de cada jogador
    ultimos_tres = jogos_ordenados.groupby('player_id').tail(3)           #Quantos jogos vou pegar 
    
    # Calcula a média das estatísticas
    media_estatisticas = ultimos_tres.groupby(['player_id','team_id'])[List_AVG].mean().reset_index()
    
    # Garante todos os jogadores de x, mesmo sem jogos anteriores
    resultado_final = pd.DataFrame({'player_id': jogadores_em_x})
    resultado_final = resultado_final.merge(media_estatisticas, on='player_id', how='left')
    
    return resultado_final

In [5]:
# Lista para armazenar os DataFrames de cada fixture
lista_dfs = []

# Iterar sobre cada fixture_id na lista_games_2024
for fixture_x in lista_games_2024:
    # Calcular a média dos últimos três jogos para o fixture atual
    df_medias = calcular_media_stats(df_stats, fixture_x)
    
    # Adicionar a coluna 'fixture_id' ao DataFrame resultante (para identificar o jogo)
    df_medias['fixture_id'] = fixture_x
    
    # Adicionar à lista
    lista_dfs.append(df_medias)

# Concatenar todos os DataFrames da lista em um único df_features
df_features = pd.concat(lista_dfs, ignore_index=True)

#Pegando estatísticas só de players que jogaram em média mais de 45 minutos
df_features = df_features[df_features['minutes'] >= 45]


- Para cada jogador e jogo, ela calcula a média de suas estatísticas nos últimos 5 jogos anteriores
- As estatísticas são armazenas em df_features que é uma tabela (DataFrame) que resume o desempenho passado de jogadores para cada jogo (fixture) 

In [6]:
df_features

Unnamed: 0,player_id,team_id,rating,minutes,offsides,shots_total,shots_on,goals_total,goals_conceded,assists,saves,passes_total,passes_key,passes_accuracy,tackles_total,tackles_blocks,tackles_interceptions,duels_total,duels_won,dribbles_attempts,dribbles_success,dribbles_past,fouls_drawn,fouls_committed,cards_yellow,cards_red,penalty_won,penalty_committed,penalty_scored,penalty_missed,penalty_saved,fixture_id
457,10080,124.0,7.500000,90.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,2.0,53.000000,0.000000,42.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,1005659
458,10267,124.0,7.700000,90.000000,0.000000,0.000000,0.000000,0.000000,0.0,1.000000,0.0,43.000000,1.000000,36.000000,1.000000,1.0,0.000000,8.000000,4.000000,2.000000,1.000000,1.000000,1.000000,1.000000,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,1005659
459,10306,124.0,7.300000,79.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,41.000000,0.000000,35.000000,1.000000,2.0,0.000000,6.000000,5.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,1005659
461,363323,124.0,6.700000,45.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,20.000000,0.000000,16.000000,1.000000,1.0,0.000000,6.000000,4.000000,1.000000,0.000000,0.000000,1.000000,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,1005659
462,265784,124.0,7.300000,90.000000,0.000000,1.000000,0.000000,0.000000,0.0,0.000000,0.0,48.000000,1.000000,39.000000,2.000000,0.0,2.000000,13.000000,10.000000,2.000000,2.000000,1.000000,6.000000,1.000000,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,1005659
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17431,10130,134.0,6.800000,95.666667,0.000000,0.666667,0.000000,0.000000,0.0,0.333333,0.0,35.000000,0.666667,29.333333,1.000000,0.0,2.000000,8.333333,4.000000,1.333333,0.666667,0.333333,1.333333,1.666667,0.333333,0.0,0.0,0.333333,0.0,0.0,0.0,1006028
17433,6031,134.0,6.800000,85.000000,0.000000,0.333333,0.000000,0.000000,0.0,0.000000,0.0,30.666667,0.666667,21.666667,0.666667,0.0,2.000000,6.666667,4.000000,2.000000,1.333333,0.333333,1.333333,0.666667,0.333333,0.0,0.0,0.000000,0.0,0.0,0.0,1006028
17434,9962,134.0,7.666667,82.333333,0.666667,1.666667,0.666667,0.333333,0.0,0.333333,0.0,31.666667,3.000000,27.000000,0.333333,0.0,0.000000,7.666667,3.000000,3.000000,1.333333,0.666667,1.333333,1.000000,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,1006028
17436,67204,134.0,6.733333,76.666667,0.000000,0.333333,0.000000,0.000000,0.0,0.000000,0.0,26.666667,1.333333,23.000000,1.000000,0.0,0.000000,6.333333,2.666667,1.000000,0.333333,0.333333,1.333333,2.000000,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,1006028


# Criando df com resultado dos jogos e informação de mando de campo

In [8]:
df_Games = df_Games.rename(columns={'id_partida': 'fixture_id'})
df_jogos = df_Games[['fixture_id', 'Home_Team','Away_Team','home.id','away.id']]
df_form = df_escalacao[['fixture_id','player_id','team']]
df_home_away = pd.merge(df_form,df_jogos, on='fixture_id',how='inner')

Criando Book de variáveis

In [9]:
#Vai dizer se está jogando em casa ou fora de casa
df_home_away['condicao'] = df_home_away['team'] == df_home_away['Home_Team']
df_var = pd.merge(df_features,df_home_away, on = ['fixture_id','player_id'])

#Media das estatisticas dos jogadores nos ultimos 5 jogos 
df_var = df_var.groupby(['fixture_id','team'])[List_AVG].mean().reset_index()

Adicionando coluna sobre o mando de campo

In [10]:
# Inicializando uma lista vazia para armazenar os valores de 'home' (True ou False)
home_values = []

# Iterando sobre cada linha do DataFrame df_var
for index, row_var in df_var.iterrows():
    fixture_id_var = row_var['fixture_id']
    team_var = row_var['team']

    # Encontra a linha correspondente em df_Games com o mesmo fixture_id
    # Usando .loc para selecionar a linha onde 'fixture_id' é igual a fixture_id_var
    # Usando .iloc[0] para pegar a primeira (e única esperada) linha como uma Series
    games_row = df_Games.loc[df_Games['fixture_id'] == fixture_id_var]

    # Verificando se encontramos uma linha correspondente em df_Games
    if not games_row.empty:
        # Obtém o 'Home_Team' do df_Games para este fixture_id
        home_team_games = games_row['Home_Team'].iloc[0] # .iloc[0] para pegar o valor da Series

        # Compara o 'team' de df_var com o 'Home_Team' de df_Games
        is_home = (team_var == home_team_games)
        home_values.append(is_home) # Adiciona True ou False à lista
    else:
        home_values.append(False) # Ou poderia ser home_values.append(None) para NaN. Não pensei nisso ainda. Acho que não vou precisar 

# Adiciona a lista de valores 'home' como uma nova coluna no df_var
df_var['home'] = home_values

#Removendo linhas sem valores
df_var.dropna(inplace=True)

In [11]:
df_var

Unnamed: 0,fixture_id,team,rating,minutes,offsides,shots_total,shots_on,goals_total,goals_conceded,assists,saves,passes_total,passes_key,passes_accuracy,tackles_total,tackles_blocks,tackles_interceptions,duels_total,duels_won,dribbles_attempts,dribbles_success,dribbles_past,fouls_drawn,fouls_committed,cards_yellow,cards_red,penalty_won,penalty_committed,penalty_scored,penalty_missed,penalty_saved,home
0,1005659,Atletico Paranaense,7.091667,78.666667,0.083333,0.416667,0.250000,0.166667,0.000000,0.166667,0.166667,28.833333,0.833333,22.333333,1.000000,0.500000,0.750000,9.750000,4.500000,0.833333,0.500000,1.166667,1.166667,1.416667,0.166667,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,False
1,1005659,Fluminense,7.176923,73.538462,0.230769,0.846154,0.615385,0.230769,0.000000,0.230769,0.153846,31.923077,0.615385,26.461538,1.076923,0.461538,0.384615,7.692308,4.692308,1.153846,0.615385,0.615385,1.769231,0.538462,0.153846,0.000000,0.076923,0.000000,0.000000,0.076923,0.000000,True
2,1005660,Palmeiras,6.972727,83.181818,0.000000,0.727273,0.363636,0.181818,0.090909,0.181818,0.272727,37.818182,0.727273,31.636364,1.363636,0.090909,0.454545,9.818182,4.727273,1.545455,1.272727,0.818182,0.909091,1.727273,0.090909,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,False
3,1005660,Vasco DA Gama,7.154545,84.363636,0.181818,0.363636,0.272727,0.181818,0.090909,0.181818,0.818182,17.090909,0.545455,11.454545,2.090909,0.727273,1.000000,9.636364,4.818182,1.545455,0.454545,1.272727,1.272727,0.727273,0.454545,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,True
4,1005661,America Mineiro,6.670000,78.100000,0.200000,0.500000,0.200000,0.000000,0.300000,0.000000,0.500000,24.100000,0.800000,18.300000,1.500000,0.300000,1.500000,8.100000,3.300000,1.100000,0.600000,0.500000,0.700000,2.100000,0.300000,0.000000,0.000000,0.100000,0.000000,0.000000,0.100000,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
733,1006026,Coritiba,6.283333,70.125000,0.000000,0.375000,0.125000,0.000000,0.083333,0.000000,0.541667,25.083333,0.500000,19.750000,1.083333,0.333333,0.708333,8.416667,4.166667,1.833333,0.750000,0.958333,1.250000,0.833333,0.166667,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,True
734,1006027,Atletico-MG,6.856667,78.300000,0.133333,0.600000,0.366667,0.200000,0.033333,0.133333,0.433333,30.866667,0.600000,25.200000,1.200000,0.433333,0.733333,7.500000,3.333333,1.200000,0.600000,0.800000,0.733333,1.033333,0.200000,0.033333,0.000000,0.033333,0.000000,0.000000,0.000000,False
735,1006027,Bahia,7.000000,83.636364,0.121212,0.757576,0.363636,0.181818,0.151515,0.121212,0.212121,28.878788,1.090909,23.090909,1.484848,0.242424,1.030303,7.212121,3.606061,0.939394,0.272727,0.696970,0.757576,0.848485,0.151515,0.000000,0.030303,0.000000,0.121212,0.000000,0.000000,True
736,1006028,Atletico Paranaense,7.033333,80.208333,0.125000,0.791667,0.375000,0.125000,0.041667,0.083333,0.500000,28.208333,0.916667,22.833333,0.916667,0.041667,0.791667,6.916667,3.625000,1.416667,0.750000,0.416667,1.166667,0.875000,0.125000,0.000000,0.000000,0.041667,0.000000,0.000000,0.041667,False


# Separando estatísticas em time da casa e visitante

In [12]:
# Passo 1: Pivotar o DataFrame
df_pivot = df_var.set_index(['fixture_id', 'home']).unstack('home')

# Passo 2: Renomear colunas
df_pivot.columns = [
    f"{col}_{'home' if home else 'away'}" 
    for col, home in df_pivot.columns
]
df_pivot

Unnamed: 0_level_0,team_away,team_home,rating_away,rating_home,minutes_away,minutes_home,offsides_away,offsides_home,shots_total_away,shots_total_home,shots_on_away,shots_on_home,goals_total_away,goals_total_home,goals_conceded_away,goals_conceded_home,assists_away,assists_home,saves_away,saves_home,passes_total_away,passes_total_home,passes_key_away,passes_key_home,passes_accuracy_away,passes_accuracy_home,tackles_total_away,tackles_total_home,tackles_blocks_away,tackles_blocks_home,tackles_interceptions_away,tackles_interceptions_home,duels_total_away,duels_total_home,duels_won_away,duels_won_home,dribbles_attempts_away,dribbles_attempts_home,dribbles_success_away,dribbles_success_home,dribbles_past_away,dribbles_past_home,fouls_drawn_away,fouls_drawn_home,fouls_committed_away,fouls_committed_home,cards_yellow_away,cards_yellow_home,cards_red_away,cards_red_home,penalty_won_away,penalty_won_home,penalty_committed_away,penalty_committed_home,penalty_scored_away,penalty_scored_home,penalty_missed_away,penalty_missed_home,penalty_saved_away,penalty_saved_home
fixture_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1
1005659,Atletico Paranaense,Fluminense,7.091667,7.176923,78.666667,73.538462,0.083333,0.230769,0.416667,0.846154,0.250000,0.615385,0.166667,0.230769,0.000000,0.000000,0.166667,0.230769,0.166667,0.153846,28.833333,31.923077,0.833333,0.615385,22.333333,26.461538,1.000000,1.076923,0.500000,0.461538,0.750000,0.384615,9.750000,7.692308,4.500000,4.692308,0.833333,1.153846,0.500000,0.615385,1.166667,0.615385,1.166667,1.769231,1.416667,0.538462,0.166667,0.153846,0.000000,0.0,0.0,0.076923,0.000000,0.000000,0.000000,0.000000,0.0,0.076923,0.000000,0.0
1005660,Palmeiras,Vasco DA Gama,6.972727,7.154545,83.181818,84.363636,0.000000,0.181818,0.727273,0.363636,0.363636,0.272727,0.181818,0.181818,0.090909,0.090909,0.181818,0.181818,0.272727,0.818182,37.818182,17.090909,0.727273,0.545455,31.636364,11.454545,1.363636,2.090909,0.090909,0.727273,0.454545,1.000000,9.818182,9.636364,4.727273,4.818182,1.545455,1.545455,1.272727,0.454545,0.818182,1.272727,0.909091,1.272727,1.727273,0.727273,0.090909,0.454545,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0
1005661,America Mineiro,Sao Paulo,6.670000,6.981818,78.100000,80.636364,0.200000,0.000000,0.500000,1.181818,0.200000,0.636364,0.000000,0.090909,0.300000,0.181818,0.000000,0.090909,0.500000,0.090909,24.100000,51.181818,0.800000,0.909091,18.300000,45.727273,1.500000,1.363636,0.300000,0.272727,1.500000,0.545455,8.100000,8.454545,3.300000,4.909091,1.100000,1.363636,0.600000,1.090909,0.500000,1.000000,0.700000,1.545455,2.100000,1.090909,0.300000,0.181818,0.000000,0.0,0.0,0.000000,0.100000,0.000000,0.000000,0.000000,0.0,0.000000,0.100000,0.0
1005662,Atletico-MG,Santos,7.009091,6.836364,82.272727,78.909091,0.181818,0.090909,1.545455,0.818182,0.909091,0.181818,0.090909,0.000000,0.181818,0.090909,0.090909,0.000000,0.090909,0.454545,47.181818,32.545455,1.363636,0.727273,41.454545,26.181818,1.090909,1.636364,0.272727,0.272727,0.636364,0.909091,9.000000,8.727273,4.454545,3.909091,1.636364,1.727273,1.181818,0.818182,0.636364,0.909091,0.818182,0.545455,1.272727,1.272727,0.545455,0.181818,0.000000,0.0,0.0,0.000000,0.000000,0.090909,0.000000,0.000000,0.0,0.000000,0.000000,0.0
1005663,Corinthians,Goias,6.800000,6.827273,82.727273,79.636364,0.363636,0.090909,1.000000,0.727273,0.636364,0.181818,0.181818,0.000000,0.090909,0.181818,0.090909,0.000000,0.090909,0.090909,30.181818,40.000000,0.272727,1.090909,23.636364,33.090909,1.272727,1.090909,0.181818,0.363636,1.000000,0.909091,7.272727,10.727273,3.818182,6.000000,1.181818,1.363636,0.454545,1.181818,0.636364,0.454545,1.000000,1.636364,0.909091,1.363636,0.272727,0.181818,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1006024,Palmeiras,Cruzeiro,7.046667,6.946667,80.600000,75.566667,0.033333,0.233333,1.066667,0.966667,0.466667,0.366667,0.133333,0.033333,0.066667,0.033333,0.100000,0.033333,0.266667,0.233333,31.333333,29.800000,1.233333,0.866667,25.700000,24.400000,1.333333,1.133333,0.233333,0.133333,0.533333,0.433333,7.466667,7.066667,3.866667,3.733333,1.266667,0.900000,0.633333,0.366667,0.733333,0.700000,1.266667,1.333333,0.933333,0.933333,0.100000,0.166667,0.066667,0.0,0.0,0.033333,0.000000,0.000000,0.000000,0.000000,0.0,0.033333,0.000000,0.0
1006025,Botafogo,Internacional,7.022222,6.860606,78.777778,80.818182,0.185185,0.060606,1.111111,0.636364,0.407407,0.212121,0.111111,0.090909,0.074074,0.030303,0.037037,0.060606,0.296296,0.212121,33.555556,37.333333,1.148148,0.757576,27.962963,31.242424,1.000000,0.939394,0.222222,0.272727,0.629630,0.545455,7.444444,6.878788,3.851852,2.818182,1.000000,0.939394,0.370370,0.484848,0.296296,0.727273,1.296296,0.848485,0.962963,1.030303,0.111111,0.212121,0.037037,0.0,0.0,0.030303,0.000000,0.000000,0.037037,0.030303,0.0,0.000000,0.000000,0.0
1006026,Corinthians,Coritiba,5.594444,6.283333,66.000000,70.125000,0.055556,0.000000,0.611111,0.375000,0.166667,0.125000,0.166667,0.000000,0.000000,0.083333,0.000000,0.000000,0.000000,0.541667,26.888889,25.083333,0.555556,0.500000,23.111111,19.750000,0.833333,1.083333,0.222222,0.333333,0.444444,0.708333,5.888889,8.416667,3.000000,4.166667,1.500000,1.833333,0.666667,0.750000,0.277778,0.958333,0.944444,1.250000,0.388889,0.833333,0.222222,0.166667,0.000000,0.0,0.0,0.000000,0.111111,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0
1006027,Atletico-MG,Bahia,6.856667,7.000000,78.300000,83.636364,0.133333,0.121212,0.600000,0.757576,0.366667,0.363636,0.200000,0.181818,0.033333,0.151515,0.133333,0.121212,0.433333,0.212121,30.866667,28.878788,0.600000,1.090909,25.200000,23.090909,1.200000,1.484848,0.433333,0.242424,0.733333,1.030303,7.500000,7.212121,3.333333,3.606061,1.200000,0.939394,0.600000,0.272727,0.800000,0.696970,0.733333,0.757576,1.033333,0.848485,0.200000,0.151515,0.033333,0.0,0.0,0.030303,0.033333,0.000000,0.000000,0.121212,0.0,0.000000,0.000000,0.0


# Construção da variável resposta

Adicionando a coluna de gols total do jogo

In [13]:
df_Games['Total_Gols'] = df_Games['Gols_Home'] + df_Games['Gols_Away']
df_tot_gols = df_Games[['fixture_id','Total_Gols']]
df_pivot = pd.merge(df_pivot,df_tot_gols, on='fixture_id')

In [14]:
df_pivot['Total_Gols'] = df_pivot['Total_Gols'] > 2.5
df_pivot

Unnamed: 0,fixture_id,team_away,team_home,rating_away,rating_home,minutes_away,minutes_home,offsides_away,offsides_home,shots_total_away,shots_total_home,shots_on_away,shots_on_home,goals_total_away,goals_total_home,goals_conceded_away,goals_conceded_home,assists_away,assists_home,saves_away,saves_home,passes_total_away,passes_total_home,passes_key_away,passes_key_home,passes_accuracy_away,passes_accuracy_home,tackles_total_away,tackles_total_home,tackles_blocks_away,tackles_blocks_home,tackles_interceptions_away,tackles_interceptions_home,duels_total_away,duels_total_home,duels_won_away,duels_won_home,dribbles_attempts_away,dribbles_attempts_home,dribbles_success_away,dribbles_success_home,dribbles_past_away,dribbles_past_home,fouls_drawn_away,fouls_drawn_home,fouls_committed_away,fouls_committed_home,cards_yellow_away,cards_yellow_home,cards_red_away,cards_red_home,penalty_won_away,penalty_won_home,penalty_committed_away,penalty_committed_home,penalty_scored_away,penalty_scored_home,penalty_missed_away,penalty_missed_home,penalty_saved_away,penalty_saved_home,Total_Gols
0,1005659,Atletico Paranaense,Fluminense,7.091667,7.176923,78.666667,73.538462,0.083333,0.230769,0.416667,0.846154,0.250000,0.615385,0.166667,0.230769,0.000000,0.000000,0.166667,0.230769,0.166667,0.153846,28.833333,31.923077,0.833333,0.615385,22.333333,26.461538,1.000000,1.076923,0.500000,0.461538,0.750000,0.384615,9.750000,7.692308,4.500000,4.692308,0.833333,1.153846,0.500000,0.615385,1.166667,0.615385,1.166667,1.769231,1.416667,0.538462,0.166667,0.153846,0.000000,0.0,0.0,0.076923,0.000000,0.000000,0.000000,0.000000,0.0,0.076923,0.000000,0.0,False
1,1005660,Palmeiras,Vasco DA Gama,6.972727,7.154545,83.181818,84.363636,0.000000,0.181818,0.727273,0.363636,0.363636,0.272727,0.181818,0.181818,0.090909,0.090909,0.181818,0.181818,0.272727,0.818182,37.818182,17.090909,0.727273,0.545455,31.636364,11.454545,1.363636,2.090909,0.090909,0.727273,0.454545,1.000000,9.818182,9.636364,4.727273,4.818182,1.545455,1.545455,1.272727,0.454545,0.818182,1.272727,0.909091,1.272727,1.727273,0.727273,0.090909,0.454545,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,True
2,1005661,America Mineiro,Sao Paulo,6.670000,6.981818,78.100000,80.636364,0.200000,0.000000,0.500000,1.181818,0.200000,0.636364,0.000000,0.090909,0.300000,0.181818,0.000000,0.090909,0.500000,0.090909,24.100000,51.181818,0.800000,0.909091,18.300000,45.727273,1.500000,1.363636,0.300000,0.272727,1.500000,0.545455,8.100000,8.454545,3.300000,4.909091,1.100000,1.363636,0.600000,1.090909,0.500000,1.000000,0.700000,1.545455,2.100000,1.090909,0.300000,0.181818,0.000000,0.0,0.0,0.000000,0.100000,0.000000,0.000000,0.000000,0.0,0.000000,0.100000,0.0,True
3,1005662,Atletico-MG,Santos,7.009091,6.836364,82.272727,78.909091,0.181818,0.090909,1.545455,0.818182,0.909091,0.181818,0.090909,0.000000,0.181818,0.090909,0.090909,0.000000,0.090909,0.454545,47.181818,32.545455,1.363636,0.727273,41.454545,26.181818,1.090909,1.636364,0.272727,0.272727,0.636364,0.909091,9.000000,8.727273,4.454545,3.909091,1.636364,1.727273,1.181818,0.818182,0.636364,0.909091,0.818182,0.545455,1.272727,1.272727,0.545455,0.181818,0.000000,0.0,0.0,0.000000,0.000000,0.090909,0.000000,0.000000,0.0,0.000000,0.000000,0.0,False
4,1005663,Corinthians,Goias,6.800000,6.827273,82.727273,79.636364,0.363636,0.090909,1.000000,0.727273,0.636364,0.181818,0.181818,0.000000,0.090909,0.181818,0.090909,0.000000,0.090909,0.090909,30.181818,40.000000,0.272727,1.090909,23.636364,33.090909,1.272727,1.090909,0.181818,0.363636,1.000000,0.909091,7.272727,10.727273,3.818182,6.000000,1.181818,1.363636,0.454545,1.181818,0.636364,0.454545,1.000000,1.636364,0.909091,1.363636,0.272727,0.181818,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
365,1006024,Palmeiras,Cruzeiro,7.046667,6.946667,80.600000,75.566667,0.033333,0.233333,1.066667,0.966667,0.466667,0.366667,0.133333,0.033333,0.066667,0.033333,0.100000,0.033333,0.266667,0.233333,31.333333,29.800000,1.233333,0.866667,25.700000,24.400000,1.333333,1.133333,0.233333,0.133333,0.533333,0.433333,7.466667,7.066667,3.866667,3.733333,1.266667,0.900000,0.633333,0.366667,0.733333,0.700000,1.266667,1.333333,0.933333,0.933333,0.100000,0.166667,0.066667,0.0,0.0,0.033333,0.000000,0.000000,0.000000,0.000000,0.0,0.033333,0.000000,0.0,False
366,1006025,Botafogo,Internacional,7.022222,6.860606,78.777778,80.818182,0.185185,0.060606,1.111111,0.636364,0.407407,0.212121,0.111111,0.090909,0.074074,0.030303,0.037037,0.060606,0.296296,0.212121,33.555556,37.333333,1.148148,0.757576,27.962963,31.242424,1.000000,0.939394,0.222222,0.272727,0.629630,0.545455,7.444444,6.878788,3.851852,2.818182,1.000000,0.939394,0.370370,0.484848,0.296296,0.727273,1.296296,0.848485,0.962963,1.030303,0.111111,0.212121,0.037037,0.0,0.0,0.030303,0.000000,0.000000,0.037037,0.030303,0.0,0.000000,0.000000,0.0,True
367,1006026,Corinthians,Coritiba,5.594444,6.283333,66.000000,70.125000,0.055556,0.000000,0.611111,0.375000,0.166667,0.125000,0.166667,0.000000,0.000000,0.083333,0.000000,0.000000,0.000000,0.541667,26.888889,25.083333,0.555556,0.500000,23.111111,19.750000,0.833333,1.083333,0.222222,0.333333,0.444444,0.708333,5.888889,8.416667,3.000000,4.166667,1.500000,1.833333,0.666667,0.750000,0.277778,0.958333,0.944444,1.250000,0.388889,0.833333,0.222222,0.166667,0.000000,0.0,0.0,0.000000,0.111111,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,False
368,1006027,Atletico-MG,Bahia,6.856667,7.000000,78.300000,83.636364,0.133333,0.121212,0.600000,0.757576,0.366667,0.363636,0.200000,0.181818,0.033333,0.151515,0.133333,0.121212,0.433333,0.212121,30.866667,28.878788,0.600000,1.090909,25.200000,23.090909,1.200000,1.484848,0.433333,0.242424,0.733333,1.030303,7.500000,7.212121,3.333333,3.606061,1.200000,0.939394,0.600000,0.272727,0.800000,0.696970,0.733333,0.757576,1.033333,0.848485,0.200000,0.151515,0.033333,0.0,0.0,0.030303,0.033333,0.000000,0.000000,0.121212,0.0,0.000000,0.000000,0.0,True


In [15]:
df_pivot['Total_Gols'] = df_pivot['Total_Gols'].astype(int)

In [16]:
df = df_pivot

In [None]:
# 1. Definindo as features e o target
features = df.columns[3:-2].tolist()  # Seleciona as colunas de features
target = 'Total_Gols'  # Define a coluna target

X = df[features]  # Features
y = df[target]    # Target

# 2. Dividir os dados em treino e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 3. Definir o modelo de Random Forest para classificação
model = RandomForestClassifier(random_state=42)

# 4. Definir os parâmetros para o GridSearchCV
param_grid = {
    'n_estimators': [50, 100, 200],  # Número de árvores na floresta
    'max_depth': [None, 10, 20, 30],  # Profundidade máxima das árvores
    'min_samples_split': [2, 5, 10],  # Número mínimo de amostras para dividir um nó
    'min_samples_leaf': [1, 2, 4],    # Número mínimo de amostras em uma folha
    'max_features': ['auto', 'sqrt']  # Número de features consideradas para divisão
}

# 5. Configurar o GridSearchCV
grid_search = GridSearchCV(
    estimator=model,
    param_grid=param_grid,
    scoring='accuracy',  # Métrica de avaliação (acurácia)
    cv=5,  # Número de folds na validação cruzada
    n_jobs=-1,  # Usar todos os núcleos do processador
    verbose=2  # Mostrar logs durante o treinamento
)

# 6. Treinar o modelo com GridSearchCV
grid_search.fit(X_train, y_train)

# 7. Melhores parâmetros encontrados
print("Melhores parâmetros encontrados:")
print(grid_search.best_params_)

# 8. Melhor modelo
best_model = grid_search.best_estimator_

# 9. Fazer previsões com o melhor modelo
y_pred = best_model.predict(X_test)

#---
y_train_pred = best_model.predict(X_train)
y_train_prob = best_model.predict_proba(X_train)
y_test_pred = best_model.predict(X_test)
y_test_prob = best_model.predict_proba(X_test)
print(f'Acurácia treino: {metrics.accuracy_score(y_train, y_train_pred):.4f}')
print(f'Acurácia teste: {metrics.accuracy_score(y_test, y_test_pred):.4f}')
print(f'AUC treino: {metrics.roc_auc_score(y_train, y_train_prob[:,1]):.4f}')
print(f'AUC teste: {metrics.roc_auc_score(y_test, y_test_prob[:,1]):.4f}')


Fitting 5 folds for each of 216 candidates, totalling 1080 fits


540 fits failed out of a total of 1080.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
443 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\paulo\OneDrive\Área de Trabalho\projetos\Footballab\.venv\lib\site-packages\sklearn\model_selection\_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\paulo\OneDrive\Área de Trabalho\projetos\Footballab\.venv\lib\site-packages\sklearn\base.py", line 1382, in wrapper
    estimator._validate_params()
  File "c:\Users\paulo\OneDrive\Área de Trabalho\projetos\Footballab\.venv\lib\site-packages\sklearn\base.py", line 436, in _validate_params
    validate_parameter_constraints(
  File "c:\Users\paulo\OneD

Melhores parâmetros encontrados:
{'max_depth': None, 'max_features': 'sqrt', 'min_samples_leaf': 4, 'min_samples_split': 2, 'n_estimators': 200}
Acurácia treino: 0.9966
Acurácia teste: 0.5676
AUC treino: 1.0000
AUC teste: 0.5938
