In [None]:
pip install pandas matplotlib seaborn scikit-learn xgboost

In [None]:
import pandas as pd

from io_helper import IOHelper

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

## Merge Data
map_results = [f"map{map_num}Team{team_num}Side{side}Player{player_num}{attribute}"
                for map_num in range(1, 6) 
                for team_num in range(1, 3) 
                for player_num in range(1, 6)
                for attribute in ["Id", "Kills", "Deaths", "PlusMinus", "Adr", "Kast", "Rating"]
                for side in ["Both", "CounterTerrorist", "Terrorist"]]

matches_train_data_columns = ['eventId', 'matchId', 'mapBestOf'] + map_results

matches_train_data = pd.read_csv('../../data/raw/matches.csv')
matches_train_data = matches_train_data[matches_train_data_columns]

matches_results_train_data = pd.read_csv('../../data/raw/matches_results.csv')
matches_results_train_data_columns = ['eventId', 'matchId', 'TeamOneScore', 'TeamTwoScore', 'teamOneWon', 'teamTwoWon']
matches_results_train_data = matches_results_train_data[[col for col in matches_results_train_data.columns if any(s in col for s in matches_results_train_data_columns)]]

full_matches_train_data = pd.merge(matches_results_train_data, matches_train_data, on='matchId', how='inner')

## Data Wrangling
full_matches_train_data.fillna(0, inplace=True)
full_matches_train_data.replace("Not Available", 0, inplace=True)
full_matches_train_data.rename(columns={'eventId_x': 'eventId'}, inplace=True)
full_matches_train_data.drop(['eventId_y'], axis=1, inplace=True)

all_kast_columns = [f"map{map_num}Team{team_num}Side{side}Player{player_num}{attribute}"
                for map_num in range(1, 6) 
                for team_num in range(1, 3) 
                for player_num in range(1, 6)
                for attribute in ["Kast"]
                for side in ["Both", "CounterTerrorist", "Terrorist"]]

full_matches_train_data.drop(all_kast_columns, axis=1, inplace=True)

head = full_matches_train_data[:5]
IOHelper('head.csv').write(head)

# Features
X = full_matches_train_data.drop('teamOneWon', axis=1)
IOHelper('x.csv').write(X)

# Target
y = full_matches_train_data['teamOneWon']
IOHelper('y.csv').write(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LogisticRegression(solver='lbfgs', max_iter=1000)

# Treinar o modelo
model.fit(X_train, y_train)

# Prever no conjunto de teste
y_pred = model.predict(X_test)

# Avaliar o modelo
accuracy = accuracy_score(y_test, y_pred)
print(f'Acurácia do Modelo: {accuracy:.2f}')