In [5]:
import pandas as pd
pd.set_option('display.max_columns', None)
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

In [None]:
df_escalacao = pd.read_csv("../Data/Camp_Brasileiro_2024_escalacao.csv")
df_team_stats = pd.read_csv("../Data_Lake/Camp_Brasileiro/2024/Camp_Brasileiro_2024_team_stats_final.csv")
df_players_stats = pd.read_csv("../Data_Lake/Camp_Brasileiro/2024/Camp_Brasileiro_2024_players_stats_final.csv")
df_Games = pd.read_csv("../Data_Lake/Camp_Brasileiro/2024/Camp_Brasileiro_2024_Games_final.csv")

df_stats = df_players_stats

# Lista de games e das medias

In [None]:
lista_games_2024 = df_stats['fixture_id'].unique().tolist()
List_AVG = ['rating','minutes','offsides','shots_total','shots_on','goals_total','goals_conceded','assists','saves','passes_total','passes_key','passes_accuracy','tackles_total','tackles_blocks','tackles_interceptions','duels_total','duels_won','dribbles_attempts','dribbles_success','dribbles_past','fouls_drawn','fouls_committed','cards_yellow','cards_red','penalty_won','penalty_committed','penalty_scored','penalty_missed','penalty_saved']

# Media das estatísticas dos players que estão escalados para jogar a partida de n° fixture_id nos últimos 5 jogos 

In [8]:
def calcular_media_stats(df, fixture_x):
    # Lista de jogadores que participaram do fixture_x
    jogadores_em_x = df[df['fixture_id'] == fixture_x]['player_id'].unique()
    
    # Filtra os jogos anteriores dos jogadores de x
    jogos_anteriores = df[(df['player_id'].isin(jogadores_em_x)) & (df['fixture_id'] < fixture_x)]
    
    # Ordena os jogos por jogador e fixture_id (cronologicamente)
    jogos_ordenados = jogos_anteriores.sort_values(['player_id', 'fixture_id'])
    
    # Seleciona os três últimos jogos de cada jogador
    ultimos_tres = jogos_ordenados.groupby('player_id').tail(3)           #Quantos jogos vou pegar 
    
    # Calcula a média das estatísticas
    media_estatisticas = ultimos_tres.groupby(['player_id','team_id'])[List_AVG].mean().reset_index()
    
    # Garante todos os jogadores de x, mesmo sem jogos anteriores
    resultado_final = pd.DataFrame({'player_id': jogadores_em_x})
    resultado_final = resultado_final.merge(media_estatisticas, on='player_id', how='left')
    
    return resultado_final

In [None]:
# Lista para armazenar os DataFrames de cada fixture
lista_dfs = []

# Iterar sobre cada fixture_id na lista_games_2024
for fixture_x in lista_games_2024:
    # Calcular a média dos últimos três jogos para o fixture atual
    df_medias = calcular_media_stats(df_stats, fixture_x)
    
    # Adicionar a coluna 'fixture_id' ao DataFrame resultante (para identificar o jogo)
    df_medias['fixture_id'] = fixture_x
    
    # Adicionar à lista
    lista_dfs.append(df_medias)

# Concatenar todos os DataFrames da lista em um único df_features
df_features = pd.concat(lista_dfs, ignore_index=True)

#Pegando estatísticas só de players que jogaram em média mais de 45 minutos
df_features = df_features[df_features['minutes'] >= 45]


- Para cada jogador e jogo, ela calcula a média de suas estatísticas nos últimos 5 jogos anteriores
- As estatísticas são armazenas em df_features que é uma tabela (DataFrame) que resume o desempenho passado de jogadores para cada jogo (fixture) 

In [None]:
df_features

Unnamed: 0,player_id,team_id,rating,minutes,offsides,shots_total,shots_on,goals_total,goals_conceded,assists,saves,passes_total,passes_key,passes_accuracy,tackles_total,tackles_blocks,tackles_interceptions,duels_total,duels_won,dribbles_attempts,dribbles_success,dribbles_past,fouls_drawn,fouls_committed,cards_yellow,cards_red,penalty_won,penalty_committed,penalty_scored,penalty_missed,penalty_saved,fixture_id
459,10298,118.0,7.300000,90.000000,0.0,0.000000,0.000000,0.000000,2.0,0.000000,7.0,32.000000,0.000000,22.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,1180367
460,30,118.0,6.900000,90.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.0,52.000000,2.000000,43.000000,2.000000,2.000000,0.000000,12.000000,5.000000,1.000000,0.000000,1.000000,2.000000,2.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,1180367
462,10377,118.0,6.300000,90.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.0,44.000000,0.000000,40.000000,1.000000,0.000000,0.000000,8.000000,3.000000,0.000000,0.000000,0.000000,2.000000,2.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,1180367
463,197383,118.0,6.300000,90.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.0,47.000000,1.000000,40.000000,0.000000,0.000000,4.000000,11.000000,2.000000,3.000000,0.000000,1.000000,1.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,1180367
464,180236,118.0,6.700000,64.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.0,45.000000,0.000000,42.000000,0.000000,0.000000,0.000000,12.000000,4.000000,0.000000,0.000000,2.000000,4.000000,1.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,1180367
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17301,9890,133.0,6.733333,46.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.0,26.333333,0.000000,23.333333,1.333333,0.000000,1.333333,5.000000,2.000000,0.666667,0.333333,0.666667,0.000000,1.333333,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,1180733
17302,375565,133.0,7.133333,94.666667,0.0,1.333333,1.000000,0.000000,0.0,0.000000,0.0,50.000000,0.666667,44.333333,2.000000,0.000000,1.333333,7.666667,4.333333,0.666667,0.333333,0.666667,1.333333,1.666667,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,1180733
17303,147,133.0,7.433333,64.666667,0.0,1.333333,0.666667,0.333333,0.0,0.333333,0.0,38.000000,0.333333,33.666667,0.666667,0.000000,0.000000,7.333333,3.666667,2.000000,1.333333,0.333333,1.333333,1.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,1180733
17304,12797,133.0,7.333333,49.000000,0.0,2.000000,1.333333,0.333333,0.0,0.333333,0.0,12.333333,0.333333,11.666667,0.333333,0.000000,0.000000,4.333333,2.666667,0.333333,0.333333,0.333333,1.000000,0.333333,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,1180733


# Criando df com resultado dos jogos e informação de mando de campo

In [11]:
df_Games = df_Games.rename(columns={'id_partida': 'fixture_id'})
df_jogos = df_Games[['fixture_id', 'Home_Team','Away_Team','home.id','away.id']]
df_form = df_escalacao[['fixture_id','player_id','team']]
df_home_away = pd.merge(df_form,df_jogos, on='fixture_id',how='inner')

Criando Book de variáveis

In [12]:
#Vai dizer se está jogando em casa ou fora de casa
df_home_away['condicao'] = df_home_away['team'] == df_home_away['Home_Team']
df_var = pd.merge(df_features,df_home_away, on = ['fixture_id','player_id'])

#Media das estatisticas dos jogadores nos ultimos 5 jogos 
df_var = df_var.groupby(['fixture_id','team'])[List_AVG].mean().reset_index()

Adicionando coluna sobre o mando de campo

In [None]:
# Inicializando uma lista vazia para armazenar os valores de 'home' (True ou False)
home_values = []

# Iterando sobre cada linha do DataFrame df_var
for index, row_var in df_var.iterrows():
    fixture_id_var = row_var['fixture_id']
    team_var = row_var['team']

    # Encontra a linha correspondente em df_Games com o mesmo fixture_id
    # Usando .loc para selecionar a linha onde 'fixture_id' é igual a fixture_id_var
    # Usando .iloc[0] para pegar a primeira (e única esperada) linha como uma Series
    games_row = df_Games.loc[df_Games['fixture_id'] == fixture_id_var]

    # Verificando se encontramos uma linha correspondente em df_Games
    if not games_row.empty:
        # Obtém o 'Home_Team' do df_Games para este fixture_id
        home_team_games = games_row['Home_Team'].iloc[0] # .iloc[0] para pegar o valor da Series

        # Compara o 'team' de df_var com o 'Home_Team' de df_Games
        is_home = (team_var == home_team_games)
        home_values.append(is_home) # Adiciona True ou False à lista
    else:
        home_values.append(False) # Ou poderia ser home_values.append(None) para NaN. Não pensei nisso ainda. Acho que não vou precisar 

# Adiciona a lista de valores 'home' como uma nova coluna no df_var
df_var['home'] = home_values

#Removendo linhas sem valores
df_var.dropna(inplace=True)

In [14]:
df_var

Unnamed: 0,fixture_id,team,rating,minutes,offsides,shots_total,shots_on,goals_total,goals_conceded,assists,saves,passes_total,passes_key,passes_accuracy,tackles_total,tackles_blocks,tackles_interceptions,duels_total,duels_won,dribbles_attempts,dribbles_success,dribbles_past,fouls_drawn,fouls_committed,cards_yellow,cards_red,penalty_won,penalty_committed,penalty_scored,penalty_missed,penalty_saved,home
0,1180365,Atletico Paranaense,7.610000,85.200000,0.200000,1.100000,0.600000,0.200000,0.000000,0.300000,0.000000,49.000000,1.500000,42.700000,1.200000,0.000000,1.000000,6.000000,3.700000,0.900000,0.600000,0.100000,0.800000,1.100000,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.0,False
1,1180365,Gremio,6.754545,77.363636,0.090909,0.363636,0.090909,0.090909,0.000000,0.000000,0.090909,41.909091,0.636364,36.818182,0.909091,0.454545,0.636364,8.181818,3.727273,0.818182,0.454545,1.090909,1.363636,0.636364,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.0,True
2,1180366,Atletico-MG,6.910000,78.000000,0.200000,0.300000,0.100000,0.000000,0.000000,0.000000,0.200000,31.400000,0.400000,24.700000,1.000000,0.300000,0.700000,8.000000,4.200000,1.500000,0.700000,0.900000,1.200000,1.000000,0.300000,0.0,0.000000,0.000000,0.0,0.0,0.0,True
3,1180366,Criciuma,6.788889,86.888889,0.111111,0.333333,0.222222,0.000000,0.111111,0.111111,0.333333,39.666667,0.444444,32.222222,1.000000,0.444444,1.222222,8.444444,3.888889,1.444444,0.777778,1.777778,1.222222,0.777778,0.222222,0.0,0.000000,0.000000,0.0,0.0,0.0,False
4,1180367,Bahia,6.650000,78.666667,0.083333,0.416667,0.250000,0.000000,0.166667,0.083333,0.583333,37.250000,0.416667,31.083333,0.833333,0.166667,0.750000,9.166667,3.500000,1.416667,0.666667,0.583333,1.333333,0.916667,0.083333,0.0,0.000000,0.000000,0.0,0.0,0.0,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
733,1180732,Internacional,6.829630,74.000000,0.074074,0.703704,0.370370,0.074074,0.185185,0.148148,0.259259,30.555556,0.481481,25.074074,1.444444,0.074074,0.555556,7.148148,3.407407,1.148148,0.481481,0.518519,0.814815,0.962963,0.222222,0.0,0.037037,0.000000,0.0,0.0,0.0,False
734,1180733,Cuiaba,6.946667,78.066667,0.066667,0.666667,0.333333,0.133333,0.000000,0.000000,0.000000,24.733333,0.400000,20.533333,1.866667,0.466667,0.666667,7.600000,3.800000,1.066667,0.466667,0.733333,0.733333,0.666667,0.066667,0.0,0.000000,0.000000,0.0,0.0,0.0,True
735,1180733,Vasco DA Gama,6.755556,75.592593,0.000000,0.740741,0.333333,0.148148,0.185185,0.148148,0.407407,30.592593,0.333333,26.555556,0.925926,0.296296,0.666667,5.111111,2.555556,0.444444,0.296296,0.259259,0.592593,0.962963,0.185185,0.0,0.000000,0.000000,0.0,0.0,0.0,False
736,1180734,Cruzeiro,7.070370,85.074074,0.074074,1.074074,0.296296,0.148148,0.148148,0.037037,0.481481,41.592593,0.925926,36.851852,0.888889,0.259259,0.259259,6.962963,2.851852,1.185185,0.333333,0.629630,0.814815,1.222222,0.148148,0.0,0.000000,0.000000,0.0,0.0,0.0,False


# Separando estatísticas em time da casa e visitante

In [15]:
# Passo 1: Pivotar o DataFrame
df_pivot = df_var.set_index(['fixture_id', 'home']).unstack('home')

# Passo 2: Renomear colunas
df_pivot.columns = [
    f"{col}_{'home' if home else 'away'}" 
    for col, home in df_pivot.columns
]
df_pivot

Unnamed: 0_level_0,team_away,team_home,rating_away,rating_home,minutes_away,minutes_home,offsides_away,offsides_home,shots_total_away,shots_total_home,shots_on_away,shots_on_home,goals_total_away,goals_total_home,goals_conceded_away,goals_conceded_home,assists_away,assists_home,saves_away,saves_home,passes_total_away,passes_total_home,passes_key_away,passes_key_home,passes_accuracy_away,passes_accuracy_home,tackles_total_away,tackles_total_home,tackles_blocks_away,tackles_blocks_home,tackles_interceptions_away,tackles_interceptions_home,duels_total_away,duels_total_home,duels_won_away,duels_won_home,dribbles_attempts_away,dribbles_attempts_home,dribbles_success_away,dribbles_success_home,dribbles_past_away,dribbles_past_home,fouls_drawn_away,fouls_drawn_home,fouls_committed_away,fouls_committed_home,cards_yellow_away,cards_yellow_home,cards_red_away,cards_red_home,penalty_won_away,penalty_won_home,penalty_committed_away,penalty_committed_home,penalty_scored_away,penalty_scored_home,penalty_missed_away,penalty_missed_home,penalty_saved_away,penalty_saved_home
fixture_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1
1180365,Atletico Paranaense,Gremio,7.610000,6.754545,85.200000,77.363636,0.200000,0.090909,1.100000,0.363636,0.600000,0.090909,0.200000,0.090909,0.000000,0.000000,0.300000,0.000000,0.000000,0.090909,49.000000,41.909091,1.500000,0.636364,42.700000,36.818182,1.200000,0.909091,0.000000,0.454545,1.000000,0.636364,6.000000,8.181818,3.700000,3.727273,0.900000,0.818182,0.600000,0.454545,0.100000,1.090909,0.800000,1.363636,1.100000,0.636364,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0
1180366,Criciuma,Atletico-MG,6.788889,6.910000,86.888889,78.000000,0.111111,0.200000,0.333333,0.300000,0.222222,0.100000,0.000000,0.000000,0.111111,0.000000,0.111111,0.000000,0.333333,0.200000,39.666667,31.400000,0.444444,0.400000,32.222222,24.700000,1.000000,1.000000,0.444444,0.300000,1.222222,0.700000,8.444444,8.000000,3.888889,4.200000,1.444444,1.500000,0.777778,0.700000,1.777778,0.900000,1.222222,1.200000,0.777778,1.000000,0.222222,0.300000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0
1180367,Fluminense,Bahia,7.333333,6.650000,85.222222,78.666667,0.000000,0.083333,1.777778,0.416667,1.111111,0.250000,0.222222,0.000000,0.222222,0.166667,0.222222,0.083333,0.666667,0.583333,44.777778,37.250000,1.666667,0.416667,39.333333,31.083333,0.333333,0.833333,0.000000,0.166667,0.444444,0.750000,8.666667,9.166667,4.111111,3.500000,2.444444,1.416667,0.888889,0.666667,0.444444,0.583333,1.666667,1.333333,0.555556,0.916667,0.222222,0.083333,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0
1180368,Sao Paulo,Flamengo,7.045455,7.036364,80.090909,80.181818,0.181818,0.090909,0.636364,0.818182,0.272727,0.181818,0.000000,0.181818,0.000000,0.090909,0.000000,0.000000,0.000000,0.272727,56.727273,36.818182,0.636364,0.818182,50.363636,31.636364,1.181818,0.545455,0.090909,0.181818,0.818182,0.454545,8.818182,6.727273,5.454545,3.181818,1.636364,0.909091,1.090909,0.545455,0.636364,0.727273,1.090909,1.000000,0.818182,1.000000,0.000000,0.272727,0.000000,0.000000,0.000000,0.090909,0.000000,0.090909,0.0,0.090909,0.000000,0.000000,0.0,0.0
1180369,Atletico Goianiense,Botafogo,6.760000,6.609091,71.400000,75.909091,0.200000,0.000000,0.700000,0.454545,0.400000,0.181818,0.100000,0.090909,0.200000,0.000000,0.000000,0.090909,0.000000,0.181818,16.400000,22.818182,0.700000,0.454545,11.600000,17.909091,0.500000,0.909091,0.200000,0.454545,0.200000,0.636364,6.800000,6.909091,3.500000,3.000000,1.100000,1.090909,0.700000,0.727273,0.600000,0.636364,1.200000,0.636364,0.800000,1.000000,0.300000,0.000000,0.000000,0.000000,0.100000,0.000000,0.000000,0.000000,0.0,0.000000,0.100000,0.000000,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1180730,Fluminense,Palmeiras,7.033333,6.918182,73.454545,71.363636,0.121212,0.000000,0.878788,1.030303,0.333333,0.515152,0.060606,0.090909,0.030303,0.121212,0.060606,0.060606,0.272727,0.242424,40.757576,30.939394,1.121212,1.121212,36.333333,25.393939,0.878788,1.303030,0.060606,0.242424,0.424242,0.757576,6.818182,7.212121,3.151515,3.939394,1.454545,1.333333,0.636364,0.757576,0.545455,0.787879,0.696970,0.878788,0.575758,0.696970,0.090909,0.090909,0.030303,0.030303,0.030303,0.000000,0.000000,0.000000,0.0,0.000000,0.060606,0.000000,0.0,0.0
1180731,Criciuma,RB Bragantino,7.000000,6.375758,88.740741,73.181818,0.185185,0.060606,0.814815,0.606061,0.333333,0.212121,0.074074,0.121212,0.259259,0.181818,0.074074,0.121212,0.518519,0.303030,31.037037,24.212121,0.814815,0.575758,25.222222,18.545455,1.555556,1.060606,0.370370,0.545455,0.851852,0.454545,7.962963,7.454545,4.074074,3.878788,1.148148,1.000000,0.740741,0.454545,1.037037,0.636364,0.407407,1.181818,1.037037,0.848485,0.111111,0.212121,0.000000,0.000000,0.000000,0.000000,0.037037,0.030303,0.0,0.000000,0.000000,0.000000,0.0,0.0
1180732,Internacional,Fortaleza EC,6.829630,6.606667,74.000000,74.733333,0.074074,0.000000,0.703704,0.233333,0.370370,0.066667,0.074074,0.000000,0.185185,0.166667,0.148148,0.033333,0.259259,0.500000,30.555556,25.766667,0.481481,0.400000,25.074074,21.566667,1.444444,1.100000,0.074074,0.400000,0.555556,0.633333,7.148148,6.266667,3.407407,3.033333,1.148148,0.600000,0.481481,0.333333,0.518519,0.633333,0.814815,0.866667,0.962963,0.933333,0.222222,0.166667,0.000000,0.000000,0.037037,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.033333,0.0,0.0
1180733,Vasco DA Gama,Cuiaba,6.755556,6.946667,75.592593,78.066667,0.000000,0.066667,0.740741,0.666667,0.333333,0.333333,0.148148,0.133333,0.185185,0.000000,0.148148,0.000000,0.407407,0.000000,30.592593,24.733333,0.333333,0.400000,26.555556,20.533333,0.925926,1.866667,0.296296,0.466667,0.666667,0.666667,5.111111,7.600000,2.555556,3.800000,0.444444,1.066667,0.296296,0.466667,0.259259,0.733333,0.592593,0.733333,0.962963,0.666667,0.185185,0.066667,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0


# Construção da variável resposta

Adicionando a coluna de gols total do jogo

In [16]:
df_Games['Total_Gols'] = df_Games['Gols_Home'] + df_Games['Gols_Away']
df_tot_gols = df_Games[['fixture_id','Total_Gols']]
df_pivot = pd.merge(df_pivot,df_tot_gols, on='fixture_id')

In [17]:
df_pivot['Total_Gols'] = df_pivot['Total_Gols'] > 2.5
df_pivot

Unnamed: 0,fixture_id,team_away,team_home,rating_away,rating_home,minutes_away,minutes_home,offsides_away,offsides_home,shots_total_away,shots_total_home,shots_on_away,shots_on_home,goals_total_away,goals_total_home,goals_conceded_away,goals_conceded_home,assists_away,assists_home,saves_away,saves_home,passes_total_away,passes_total_home,passes_key_away,passes_key_home,passes_accuracy_away,passes_accuracy_home,tackles_total_away,tackles_total_home,tackles_blocks_away,tackles_blocks_home,tackles_interceptions_away,tackles_interceptions_home,duels_total_away,duels_total_home,duels_won_away,duels_won_home,dribbles_attempts_away,dribbles_attempts_home,dribbles_success_away,dribbles_success_home,dribbles_past_away,dribbles_past_home,fouls_drawn_away,fouls_drawn_home,fouls_committed_away,fouls_committed_home,cards_yellow_away,cards_yellow_home,cards_red_away,cards_red_home,penalty_won_away,penalty_won_home,penalty_committed_away,penalty_committed_home,penalty_scored_away,penalty_scored_home,penalty_missed_away,penalty_missed_home,penalty_saved_away,penalty_saved_home,Total_Gols
0,1180365,Atletico Paranaense,Gremio,7.610000,6.754545,85.200000,77.363636,0.200000,0.090909,1.100000,0.363636,0.600000,0.090909,0.200000,0.090909,0.000000,0.000000,0.300000,0.000000,0.000000,0.090909,49.000000,41.909091,1.500000,0.636364,42.700000,36.818182,1.200000,0.909091,0.000000,0.454545,1.000000,0.636364,6.000000,8.181818,3.700000,3.727273,0.900000,0.818182,0.600000,0.454545,0.100000,1.090909,0.800000,1.363636,1.100000,0.636364,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0,False
1,1180366,Criciuma,Atletico-MG,6.788889,6.910000,86.888889,78.000000,0.111111,0.200000,0.333333,0.300000,0.222222,0.100000,0.000000,0.000000,0.111111,0.000000,0.111111,0.000000,0.333333,0.200000,39.666667,31.400000,0.444444,0.400000,32.222222,24.700000,1.000000,1.000000,0.444444,0.300000,1.222222,0.700000,8.444444,8.000000,3.888889,4.200000,1.444444,1.500000,0.777778,0.700000,1.777778,0.900000,1.222222,1.200000,0.777778,1.000000,0.222222,0.300000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0,False
2,1180367,Fluminense,Bahia,7.333333,6.650000,85.222222,78.666667,0.000000,0.083333,1.777778,0.416667,1.111111,0.250000,0.222222,0.000000,0.222222,0.166667,0.222222,0.083333,0.666667,0.583333,44.777778,37.250000,1.666667,0.416667,39.333333,31.083333,0.333333,0.833333,0.000000,0.166667,0.444444,0.750000,8.666667,9.166667,4.111111,3.500000,2.444444,1.416667,0.888889,0.666667,0.444444,0.583333,1.666667,1.333333,0.555556,0.916667,0.222222,0.083333,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0,True
3,1180368,Sao Paulo,Flamengo,7.045455,7.036364,80.090909,80.181818,0.181818,0.090909,0.636364,0.818182,0.272727,0.181818,0.000000,0.181818,0.000000,0.090909,0.000000,0.000000,0.000000,0.272727,56.727273,36.818182,0.636364,0.818182,50.363636,31.636364,1.181818,0.545455,0.090909,0.181818,0.818182,0.454545,8.818182,6.727273,5.454545,3.181818,1.636364,0.909091,1.090909,0.545455,0.636364,0.727273,1.090909,1.000000,0.818182,1.000000,0.000000,0.272727,0.000000,0.000000,0.000000,0.090909,0.000000,0.090909,0.0,0.090909,0.000000,0.000000,0.0,0.0,True
4,1180369,Atletico Goianiense,Botafogo,6.760000,6.609091,71.400000,75.909091,0.200000,0.000000,0.700000,0.454545,0.400000,0.181818,0.100000,0.090909,0.200000,0.000000,0.000000,0.090909,0.000000,0.181818,16.400000,22.818182,0.700000,0.454545,11.600000,17.909091,0.500000,0.909091,0.200000,0.454545,0.200000,0.636364,6.800000,6.909091,3.500000,3.000000,1.100000,1.090909,0.700000,0.727273,0.600000,0.636364,1.200000,0.636364,0.800000,1.000000,0.300000,0.000000,0.000000,0.000000,0.100000,0.000000,0.000000,0.000000,0.0,0.000000,0.100000,0.000000,0.0,0.0,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
364,1180730,Fluminense,Palmeiras,7.033333,6.918182,73.454545,71.363636,0.121212,0.000000,0.878788,1.030303,0.333333,0.515152,0.060606,0.090909,0.030303,0.121212,0.060606,0.060606,0.272727,0.242424,40.757576,30.939394,1.121212,1.121212,36.333333,25.393939,0.878788,1.303030,0.060606,0.242424,0.424242,0.757576,6.818182,7.212121,3.151515,3.939394,1.454545,1.333333,0.636364,0.757576,0.545455,0.787879,0.696970,0.878788,0.575758,0.696970,0.090909,0.090909,0.030303,0.030303,0.030303,0.000000,0.000000,0.000000,0.0,0.000000,0.060606,0.000000,0.0,0.0,False
365,1180731,Criciuma,RB Bragantino,7.000000,6.375758,88.740741,73.181818,0.185185,0.060606,0.814815,0.606061,0.333333,0.212121,0.074074,0.121212,0.259259,0.181818,0.074074,0.121212,0.518519,0.303030,31.037037,24.212121,0.814815,0.575758,25.222222,18.545455,1.555556,1.060606,0.370370,0.545455,0.851852,0.454545,7.962963,7.454545,4.074074,3.878788,1.148148,1.000000,0.740741,0.454545,1.037037,0.636364,0.407407,1.181818,1.037037,0.848485,0.111111,0.212121,0.000000,0.000000,0.000000,0.000000,0.037037,0.030303,0.0,0.000000,0.000000,0.000000,0.0,0.0,True
366,1180732,Internacional,Fortaleza EC,6.829630,6.606667,74.000000,74.733333,0.074074,0.000000,0.703704,0.233333,0.370370,0.066667,0.074074,0.000000,0.185185,0.166667,0.148148,0.033333,0.259259,0.500000,30.555556,25.766667,0.481481,0.400000,25.074074,21.566667,1.444444,1.100000,0.074074,0.400000,0.555556,0.633333,7.148148,6.266667,3.407407,3.033333,1.148148,0.600000,0.481481,0.333333,0.518519,0.633333,0.814815,0.866667,0.962963,0.933333,0.222222,0.166667,0.000000,0.000000,0.037037,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.033333,0.0,0.0,True
367,1180733,Vasco DA Gama,Cuiaba,6.755556,6.946667,75.592593,78.066667,0.000000,0.066667,0.740741,0.666667,0.333333,0.333333,0.148148,0.133333,0.185185,0.000000,0.148148,0.000000,0.407407,0.000000,30.592593,24.733333,0.333333,0.400000,26.555556,20.533333,0.925926,1.866667,0.296296,0.466667,0.666667,0.666667,5.111111,7.600000,2.555556,3.800000,0.444444,1.066667,0.296296,0.466667,0.259259,0.733333,0.592593,0.733333,0.962963,0.666667,0.185185,0.066667,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0,True


In [18]:
df_pivot['Total_Gols'] = df_pivot['Total_Gols'].astype(int)

In [19]:
df = df_pivot

In [None]:
# 1. Definindo as features e o target
features = df.columns[3:-2].tolist()  # Seleciona as colunas de features
target = 'Total_Gols'  # Define a coluna target

X = df[features]  # Features
y = df[target]    # Target

# 2. Dividir os dados em treino e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 3. Definir o modelo de Random Forest para classificação
model = RandomForestClassifier(random_state=42)

# 4. Definir os parâmetros para o GridSearchCV
param_grid = {
    'n_estimators': [50, 100, 200],  # Número de árvores na floresta
    'max_depth': [None, 10, 20, 30],  # Profundidade máxima das árvores
    'min_samples_split': [2, 5, 10],  # Número mínimo de amostras para dividir um nó
    'min_samples_leaf': [1, 2, 4],    # Número mínimo de amostras em uma folha
    'max_features': ['auto', 'sqrt']  # Número de features consideradas para divisão
}

# 5. Configurar o GridSearchCV
grid_search = GridSearchCV(
    estimator=model,
    param_grid=param_grid,
    scoring='accuracy',  # Métrica de avaliação (acurácia)
    cv=5,  # Número de folds na validação cruzada
    n_jobs=-1,  # Usar todos os núcleos do processador
    verbose=2  # Mostrar logs durante o treinamento
)

# 6. Treinar o modelo com GridSearchCV
grid_search.fit(X_train, y_train)

# 7. Melhores parâmetros encontrados
print("Melhores parâmetros encontrados:")
print(grid_search.best_params_)

# 8. Melhor modelo
best_model = grid_search.best_estimator_

# 9. Fazer previsões com o melhor modelo
y_pred = best_model.predict(X_test)

# 10. Avaliar o modelo
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)


Fitting 5 folds for each of 216 candidates, totalling 1080 fits
Melhores parâmetros encontrados:
{'max_depth': 10, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 100}


540 fits failed out of a total of 1080.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
263 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\paulo\OneDrive\Área de Trabalho\projetos\Footballab\.venv\lib\site-packages\sklearn\model_selection\_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\paulo\OneDrive\Área de Trabalho\projetos\Footballab\.venv\lib\site-packages\sklearn\base.py", line 1382, in wrapper
    estimator._validate_params()
  File "c:\Users\paulo\OneDrive\Área de Trabalho\projetos\Footballab\.venv\lib\site-packages\sklearn\base.py", line 436, in _validate_params
    validate_parameter_constraints(
  File "c:\Users\paulo\OneD

# Testando das métricas do primeiro modelo

In [21]:
print(f"Acurácia: {accuracy}")
print(f"Precisão: {precision}")
print(f"Recall: {recall}")
print(f"F1-Score: {f1}")
print("Matriz de Confusão:")
print(conf_matrix)

Acurácia: 0.527027027027027
Precisão: 0.5263157894736842
Recall: 0.2777777777777778
F1-Score: 0.36363636363636365
Matriz de Confusão:
[[29  9]
 [26 10]]


In [22]:
y_test.mean()

np.float64(0.4864864864864865)