In [17]:
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from io_helper import IOHelper

pd.set_option('future.no_silent_downcasting', True)

## Definindo as colunas que serão usadas para treinar o modelo
team_map_results_columns_to_add  = ["Id", "Kills", "Deaths", "PlusMinus", "Adr", "Kast", "Rating"]
team_map_results_columns_to_drop = ["Id", "Kills", "Deaths", "Adr", "Kast"]

map_results = [f"map{map_num}Team{team_num}Side{side}Player{player_num}{attribute}"
                for map_num in range(1, 6) 
                for team_num in range(1, 3) 
                for player_num in range(1, 6)
                for attribute in team_map_results_columns_to_add
                for side in ["Both", "CounterTerrorist", "Terrorist"]]

## Definindo as colunas que serão retiradas para a análise
drop_columns = [f"map{map_num}Team{team_num}Side{side}Player{player_num}{attribute}"
                for map_num in range(1, 6) 
                for team_num in range(1, 3) 
                for player_num in range(1, 6)
                for attribute in team_map_results_columns_to_drop
                for side in ["Both", "CounterTerrorist", "Terrorist"]]

### Listando as colunas que serão usadas das partidas, como: Jogadores, Mortes, Assistências etc.
matches_train_data_columns = ['eventId', 'matchId', 'mapBestOf'] + map_results

matches_results_train_data = pd.read_csv('../../data/raw/matches_results.csv')
matches_train_data = pd.read_csv('../../data/raw/matches.csv')

matches_train_data = matches_train_data[matches_train_data_columns]

matches_results_train_data_columns = ['eventId', 'matchId', 'TeamOneScore', 'TeamTwoScore', 'teamOneWon', 'teamTwoWon']
matches_results_train_data = matches_results_train_data[[col for col in matches_results_train_data.columns if any(s in col for s in matches_results_train_data_columns)]]

## Combinando os dados gerais das partidas com os detalhes das partidas
full_matches_train_data = pd.merge(matches_results_train_data, matches_train_data, on='matchId', how='inner')

## Data Wrangling
full_matches_train_data.fillna(0, inplace=True)
full_matches_train_data.replace("Not Available", 0, inplace=True)
full_matches_train_data.drop(['eventId_y', 'eventId_x', 'matchId'], axis=1, inplace=True)

full_matches_train_data.drop(drop_columns, axis=1, inplace=True)
full_matches_train_data.drop(['mapBestOf'], axis=1, inplace=True)

# Features (colunas que serão utilizadas para fazer a previsão)
X = full_matches_train_data.drop(['teamOneWon', 'teamTwoWon'], axis=1)
X = X.apply(pd.to_numeric, errors='coerce')

# Target (coluna que será prevista)
y = full_matches_train_data['teamOneWon']
y = y.apply(pd.to_numeric, errors='coerce')

print(f"Número de amostras em X?{X.shape} e y:{y.shape}")

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)

IO_helper = IOHelper(filepath='../../data/processed')
IO_helper.write(X_train)
display(X_train.head())

# Treinamento do modelo Random Forest
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Fazer previsões com o conjunto de teste
y_pred = model.predict(X_test)

# Calcular a acurácia
accuracy = accuracy_score(y_test, y_pred)
print(f'Acurácia do Modelo de Random Forest: {accuracy:.2f}')
print(f"Score do modelo de Random Forest: {model.score(X_test, y_test)}")

Número de amostras em X?(696, 300) e y:(696,)
Erro ao escrever o DataFrame: [Errno 21] Is a directory: '../../data/processed'


  matches_train_data = pd.read_csv('../../data/raw/matches.csv')


Unnamed: 0,map1Team1SideBothPlayer1PlusMinus,map1Team1SideCounterTerroristPlayer1PlusMinus,map1Team1SideTerroristPlayer1PlusMinus,map1Team1SideBothPlayer1Rating,map1Team1SideCounterTerroristPlayer1Rating,map1Team1SideTerroristPlayer1Rating,map1Team1SideBothPlayer2PlusMinus,map1Team1SideCounterTerroristPlayer2PlusMinus,map1Team1SideTerroristPlayer2PlusMinus,map1Team1SideBothPlayer2Rating,...,map5Team2SideTerroristPlayer4PlusMinus,map5Team2SideBothPlayer4Rating,map5Team2SideCounterTerroristPlayer4Rating,map5Team2SideTerroristPlayer4Rating,map5Team2SideBothPlayer5PlusMinus,map5Team2SideCounterTerroristPlayer5PlusMinus,map5Team2SideTerroristPlayer5PlusMinus,map5Team2SideBothPlayer5Rating,map5Team2SideCounterTerroristPlayer5Rating,map5Team2SideTerroristPlayer5Rating
330,15,10,10,1.63,2.08,1.96,10,9,1,1.45,...,0,0,0,0,0,0,0,0,0,0
510,1,6,-5,1.18,1.44,0.82,2,6,-4,1.05,...,0,0,0,0,0,0,0,0,0,0
629,4,-1,5,1.07,0.93,1.47,-7,-1,3,0.86,...,0,0,0,0,0,0,0,0,0,0
607,20,15,5,2.02,2.72,1.55,4,4,0,1.28,...,0,0,0,0,0,0,0,0,0,0
31,11,10,6,2.02,2.17,2.19,12,11,1,1.81,...,0,0,0,0,0,0,0,0,0,0


Acurácia do Modelo de Random Forest: 0.83
Score do modelo de Random Forest: 0.8304597701149425
