In [22]:
import math
import pandas as pd
import numpy as np
from scipy import stats

import seaborn as sns
from sklearn import metrics, datasets
from sklearn.model_selection import train_test_split
import xgboost as xgb
from sklearn.metrics import accuracy_score

import matplotlib.pyplot as plt
import matplotlib
%matplotlib inline

random_seed = 30

In [23]:
data = pd.read_csv("../data/featured/data.csv", sep=';')

Xdata = data.drop('teamA_win', axis=1)
ydata = data.teamA_win

In [24]:
Xtrain, Xval, ytrain, yval = train_test_split(Xdata, ydata, test_size=0.4, random_state=random_seed)

In [25]:
model = xgb.XGBClassifier(
    objective='binary:logistic',  # pro binární klasifikaci
    eval_metric='logloss',        # metriku můžeš měnit
)

# Trénování
model.fit(Xtrain, ytrain)

# Predikce
y_pred = model.predict(Xval)

# Vyhodnocení
print("Accuracy:", accuracy_score(yval, y_pred))

Accuracy: 0.779788838612368


In [18]:
import xgboost as xgb
from sklearn.model_selection import ParameterGrid
from sklearn.metrics import accuracy_score

# Předpokládám, že máš Xtrain, ytrain, Xval, yval připravené

param_grid = {
    'max_depth': range(13,18),
    'learning_rate': [0.25,0.3],
    'n_estimators': range(90,110)
}

best_score = 0
best_params = None
best_model = None

for params in ParameterGrid(param_grid):
    model = xgb.XGBClassifier(
        eval_metric='logloss',
        **params
    )
    model.fit(Xtrain, ytrain)
    preds = model.predict(Xval)
    score = accuracy_score(yval, preds)
    
    print(f"Params: {params}, Accuracy: {score:.4f}")
    
    if score > best_score:
        best_score = score
        best_params = params
        best_model = model

print(f"\nNejlepší parametry: {best_params} s přesností {best_score:.4f}")

Params: {'learning_rate': 0.25, 'max_depth': 13, 'n_estimators': 90}, Accuracy: 0.7692
Params: {'learning_rate': 0.25, 'max_depth': 13, 'n_estimators': 91}, Accuracy: 0.7692
Params: {'learning_rate': 0.25, 'max_depth': 13, 'n_estimators': 92}, Accuracy: 0.7692
Params: {'learning_rate': 0.25, 'max_depth': 13, 'n_estimators': 93}, Accuracy: 0.7677
Params: {'learning_rate': 0.25, 'max_depth': 13, 'n_estimators': 94}, Accuracy: 0.7677
Params: {'learning_rate': 0.25, 'max_depth': 13, 'n_estimators': 95}, Accuracy: 0.7677
Params: {'learning_rate': 0.25, 'max_depth': 13, 'n_estimators': 96}, Accuracy: 0.7692
Params: {'learning_rate': 0.25, 'max_depth': 13, 'n_estimators': 97}, Accuracy: 0.7692
Params: {'learning_rate': 0.25, 'max_depth': 13, 'n_estimators': 98}, Accuracy: 0.7692
Params: {'learning_rate': 0.25, 'max_depth': 13, 'n_estimators': 99}, Accuracy: 0.7677
Params: {'learning_rate': 0.25, 'max_depth': 13, 'n_estimators': 100}, Accuracy: 0.7677
Params: {'learning_rate': 0.25, 'max_depth

In [26]:
model = xgb.XGBClassifier(
        eval_metric='logloss',
        max_depth=3,
        learning_rate=0.2,
        n_estimators=200
    ).fit(Xtrain, ytrain)

preds = model.predict(Xval)
score = accuracy_score(yval, preds)

print(f"Accuracy: {score:.4f}")

Accuracy: 0.7903
