In [1]:
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score, confusion_matrix
from xgboost import XGBClassifier

In [2]:
# Uploading prepared data from files
X = pd.read_pickle('prepared_X.pkl')
y = pd.read_pickle('prepared_y.pkl')
print("The prepared data has been uploaded successfully.")

The prepared data has been uploaded successfully.


In [3]:
# Finding the index of the 'Surface' column automatically
categorical_features = ['Surface']
categorical_indices = [X.columns.get_loc(col) for col in categorical_features]

# --- Сonfiguring and train the preprocessor on the training data ---
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_indices)
    ],
    remainder='passthrough'
)

X_encoded = preprocessor.fit_transform(X)

In [4]:
# Slightly increased the learning_rate# 
model_xgb = XGBClassifier(n_estimators=72, learning_rate=0.009, random_state=42, use_label_encoder=False, eval_metric='logloss')
print("We are training the final model...")
model_xgb.fit(X_encoded, y)
print("The model is ready to work")

We are training the final model...
The model is ready to work


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


In [5]:
# 1st is Ben Shelton, 2nd is Karen Khachanov
rank1, rank2 = 7, 16
pts1, pts2 = 3520, 2590
odd1, odd2 = 1.51, 2.60

# Counting derivative features
rank_diff = rank1 - rank2
pts_diff = pts1 - pts2

# Creating a dictionary
prediction_data = {
    'Rank_1': [rank1],
    'Rank_2': [rank2],
    'Pts_1': [pts1],
    'Pts_2': [pts2],
    'Rank_Difference': [rank_diff],
    'Pts_Difference': [pts_diff],
    'Surface': ['Hard'],
    'Odd_1': [odd1],
    'Odd_2': [odd2]
}

# Converting into DataFrame
X_predict = pd.DataFrame(prediction_data)
display(X_predict) # display() will display the table beautifully.

Unnamed: 0,Rank_1,Rank_2,Pts_1,Pts_2,Rank_Difference,Pts_Difference,Surface,Odd_1,Odd_2
0,7,16,3520,2590,-9,930,Hard,1.51,2.6


In [6]:
# 1. Using the same preprocessor (only .transform()!)
X_predict_encoded = preprocessor.transform(X_predict)

# 2. We make a prediction and get the probabilities
prediction_result = model_xgb.predict(X_predict_encoded)
prediction_proba = model_xgb.predict_proba(X_predict_encoded)

# 3. Displaying the results
print("--- CONCLUSION OF XGBoost ---")
if prediction_result[0] == 1:
    print("Prediction: Player 1 WINS (Ben Shelton)")
else:
    print("Prediction: Player 2 WINS (Karen Khachanov)")

print(f"\nEstimation of the model's confidence (Probability):")
print(f"  - Shelton's win (1): {prediction_proba[0][1]:.2%}")
print(f"  - Khachanov's win (2): {prediction_proba[0][0]:.2%}")

--- CONCLUSION OF XGBoost ---
Prediction: Player 1 WINS (Ben Shelton)

Estimation of the model's confidence (Probability):
  - Shelton's win (1): 57.82%
  - Khachanov's win (2): 42.18%
