In [2]:
import math
import pandas as pd
import numpy as np
from scipy import stats

import seaborn as sns
from sklearn import metrics, datasets
from sklearn.model_selection import train_test_split
import xgboost as xgb
from sklearn.metrics import accuracy_score

import matplotlib.pyplot as plt
import matplotlib
%matplotlib inline

random_seed = 30

In [3]:
data = pd.read_csv("../data/featured/data.csv", sep=';')

Xdata = data.drop('teamA_win', axis=1)
ydata = data.teamA_win

In [4]:
Xtrain, Xval, ytrain, yval = train_test_split(Xdata, ydata, test_size=0.4, random_state=random_seed)

In [5]:
model = xgb.XGBClassifier(
    objective='binary:logistic',  # pro binární klasifikaci
    eval_metric='logloss',        # metriku můžeš měnit
    random_state=random_seed
)

# Trénování
model.fit(Xtrain, ytrain)

# Predikce
y_pred = model.predict(Xval)

# Vyhodnocení
print("Accuracy:", accuracy_score(yval, y_pred))

Accuracy: 0.604982206405694


In [8]:
import xgboost as xgb
from sklearn.model_selection import ParameterGrid
from sklearn.metrics import accuracy_score

# Předpokládám, že máš Xtrain, ytrain, Xval, yval připravené

param_grid = {
    'max_depth': range(1,5),
    'learning_rate': [0.35,0.4,0.45],
    'n_estimators': range(10,30)
}

best_score = 0
best_params = None
best_model = None

for params in ParameterGrid(param_grid):
    model = xgb.XGBClassifier(
        eval_metric='logloss',
        **params,
        random_state=random_seed
    )
    model.fit(Xtrain, ytrain)
    preds = model.predict(Xval)
    score = accuracy_score(yval, preds)
    
    print(f"Params: {params}, Accuracy: {score:.4f}")
    
    if score > best_score:
        best_score = score
        best_params = params
        best_model = model

print(f"\nNejlepší parametry: {best_params} s přesností {best_score:.4f}")

Params: {'learning_rate': 0.35, 'max_depth': 1, 'n_estimators': 10}, Accuracy: 0.6263
Params: {'learning_rate': 0.35, 'max_depth': 1, 'n_estimators': 11}, Accuracy: 0.6441
Params: {'learning_rate': 0.35, 'max_depth': 1, 'n_estimators': 12}, Accuracy: 0.6335
Params: {'learning_rate': 0.35, 'max_depth': 1, 'n_estimators': 13}, Accuracy: 0.6335
Params: {'learning_rate': 0.35, 'max_depth': 1, 'n_estimators': 14}, Accuracy: 0.6441
Params: {'learning_rate': 0.35, 'max_depth': 1, 'n_estimators': 15}, Accuracy: 0.6406
Params: {'learning_rate': 0.35, 'max_depth': 1, 'n_estimators': 16}, Accuracy: 0.6335
Params: {'learning_rate': 0.35, 'max_depth': 1, 'n_estimators': 17}, Accuracy: 0.6406
Params: {'learning_rate': 0.35, 'max_depth': 1, 'n_estimators': 18}, Accuracy: 0.6335
Params: {'learning_rate': 0.35, 'max_depth': 1, 'n_estimators': 19}, Accuracy: 0.6548
Params: {'learning_rate': 0.35, 'max_depth': 1, 'n_estimators': 20}, Accuracy: 0.6477
Params: {'learning_rate': 0.35, 'max_depth': 1, 'n_est

In [11]:
model = xgb.XGBClassifier(
        eval_metric='logloss',
        max_depth=1,
        learning_rate=0.35,
        n_estimators=52,
    ).fit(Xtrain, ytrain)

preds = model.predict(Xval)
score = accuracy_score(yval, preds)

print(f"Accuracy: {score:.4f}")

Accuracy: 0.6489
