In [1]:
import pandas as pd
from itertools import combinations
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import f1_score

import warnings

warnings.filterwarnings("ignore")

In [2]:
X = pd.read_csv('../../data/imputed/df.csv')
y = pd.read_csv('../../data/imputed/y.csv')

In [3]:
home_player_ratings = [col for index, col in enumerate(X.columns) if col.startswith('player_rating_home_player_')]
away_player_ratings = [col for index, col in enumerate(X.columns) if col.startswith('player_rating_away_player_')]
team_performance_metrics = [col for index, col in enumerate(X.columns) if col.startswith('ewm_') or col.startswith('points_') or col.startswith('home_weighted_wins') or col.startswith('away_weighted_wins')]
team_avg_variance_ratings = [col for index, col in enumerate(X.columns) if col in ['average_rating_home', 'average_rating_away', 'variance_rating_home', 'variance_rating_away', 'std_dev_rating_home', 'std_dev_rating_away']]
rating_range_avg_diff = [col for index, col in enumerate(X.columns) if col in ['rating_range_home', 'rating_range_away', 'average_rating_diff']]
attack_defense_strength = [col for index, col in enumerate(X.columns) if col in ['diff_avg_rating_attack', 'diff_avg_rating_defence']]
home_away_team_strength = [col for index, col in enumerate(X.columns) if 'home' in col or 'away' in col]


In [4]:
interaction_constraints = [
    home_player_ratings,
    away_player_ratings,
    team_performance_metrics,
    # team_avg_variance_ratings,
    # top_players,
    # rating_range_avg_diff,
    # attack_defense_strength,
    home_away_team_strength
]

In [5]:
X_train_full, X_val_full, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Dictionary to store F1 score for each combination
f1_scores = {}

# Iterate over all combinations of interaction constraints
for i in range(1, len(interaction_constraints) + 1):
    for combo in combinations(interaction_constraints, i):
        # Initialize the XGBClassifier with the current combination of interaction constraints
        xgb_model = XGBClassifier(random_state=42, enable_categorical=True, interaction_constraints=list(combo))

        # Train the model
        xgb_model.fit(X_train_full, y_train)

        # Predictions and evaluation
        y_pred = xgb_model.predict(X_val_full)
        f1 = f1_score(y_val, y_pred, average='weighted')
        
        f1_scores[f1] = combo

# Find the best combination
best_f1_score = max(f1_scores, key=f1_scores.get)
best_combo = f1_scores[best_f1_score]

# Print the best combination and its F1 score
print(f'Best interaction constraints: {best_combo}')
print(f'Best F1 Score: {best_f1_score}')
        

Best interaction constraints: (['player_rating_home_player_1', 'player_rating_home_player_2', 'player_rating_home_player_3', 'player_rating_home_player_4', 'player_rating_home_player_5', 'player_rating_home_player_6', 'player_rating_home_player_7', 'player_rating_home_player_8', 'player_rating_home_player_9', 'player_rating_home_player_10', 'player_rating_home_player_11'], ['player_rating_away_player_1', 'player_rating_away_player_2', 'player_rating_away_player_3', 'player_rating_away_player_4', 'player_rating_away_player_5', 'player_rating_away_player_6', 'player_rating_away_player_7', 'player_rating_away_player_8', 'player_rating_away_player_9', 'player_rating_away_player_10', 'player_rating_away_player_11'], ['ewm_home_team_goals', 'ewm_away_team_goals', 'ewm_home_team_goals_conceded', 'ewm_away_team_goals_conceded', 'points_home', 'points_away', 'home_weighted_wins', 'away_weighted_wins', 'ewm_shoton_home', 'ewm_shoton_away', 'ewm_possession_home', 'ewm_possession_away'], ['player_