In [1]:
import math
import pandas as pd
import numpy as np
from scipy import stats

import seaborn as sns
from sklearn import metrics, datasets
from sklearn.model_selection import train_test_split
import xgboost as xgb
from sklearn.metrics import accuracy_score

import matplotlib.pyplot as plt
import matplotlib
%matplotlib inline

random_seed = 42

In [2]:
data = pd.read_csv("../data/featured/data.csv", sep=',')

Xdata = data.drop('teamA_win', axis=1)
ydata = data.teamA_win

In [3]:
Xtrain, Xval, ytrain, yval = train_test_split(Xdata, ydata, test_size=0.4, random_state=random_seed)

In [4]:
model = xgb.XGBClassifier(
    objective='binary:logistic',  # pro binární klasifikaci
    eval_metric='logloss',        # metriku můžeš měnit
    random_state=random_seed
)

# Trénování
model.fit(Xtrain, ytrain)

# Predikce
y_pred = model.predict(Xval)

# Vyhodnocení
print("Accuracy:", accuracy_score(yval, y_pred))

Accuracy: 0.6911544227886057


In [5]:
import xgboost as xgb
from sklearn.model_selection import ParameterGrid
from sklearn.metrics import accuracy_score

# Předpokládám, že máš Xtrain, ytrain, Xval, yval připravené

param_grid = {
    'max_depth': [1,2,3,5,10,20],
    'learning_rate': [0.3,0.4,0.5,0.6],
    'n_estimators': [2,5,10,20,50,100,200]
}

best_score = 0
best_params = None
best_model = None

for params in ParameterGrid(param_grid):
    model = xgb.XGBClassifier(
        eval_metric='logloss',
        **params,
        random_state=random_seed
    )
    model.fit(Xtrain, ytrain)
    preds = model.predict(Xval)
    score = accuracy_score(yval, preds)
    
    print(f"Params: {params}, Accuracy: {score:.4f}")
    
    if score > best_score:
        best_score = score
        best_params = params
        best_model = model

print(f"\nNejlepší parametry: {best_params} s přesností {best_score:.4f}")

Params: {'learning_rate': 0.3, 'max_depth': 1, 'n_estimators': 2}, Accuracy: 0.6942
Params: {'learning_rate': 0.3, 'max_depth': 1, 'n_estimators': 5}, Accuracy: 0.6927
Params: {'learning_rate': 0.3, 'max_depth': 1, 'n_estimators': 10}, Accuracy: 0.6897
Params: {'learning_rate': 0.3, 'max_depth': 1, 'n_estimators': 20}, Accuracy: 0.6867
Params: {'learning_rate': 0.3, 'max_depth': 1, 'n_estimators': 50}, Accuracy: 0.6897
Params: {'learning_rate': 0.3, 'max_depth': 1, 'n_estimators': 100}, Accuracy: 0.6837
Params: {'learning_rate': 0.3, 'max_depth': 1, 'n_estimators': 200}, Accuracy: 0.6897
Params: {'learning_rate': 0.3, 'max_depth': 2, 'n_estimators': 2}, Accuracy: 0.6882
Params: {'learning_rate': 0.3, 'max_depth': 2, 'n_estimators': 5}, Accuracy: 0.6792
Params: {'learning_rate': 0.3, 'max_depth': 2, 'n_estimators': 10}, Accuracy: 0.6777
Params: {'learning_rate': 0.3, 'max_depth': 2, 'n_estimators': 20}, Accuracy: 0.6762
Params: {'learning_rate': 0.3, 'max_depth': 2, 'n_estimators': 50},

In [14]:
model = xgb.XGBClassifier(
        eval_metric='logloss',
        max_depth=21,
        learning_rate=0.3,
        n_estimators=43,
    ).fit(Xtrain, ytrain)

preds = model.predict(Xval)
score = accuracy_score(yval, preds)

print(f"Accuracy: {score:.4f}")

Accuracy: 0.7368
