In [1]:
import numpy as np
import pandas as pd
from skopt import gp_minimize
from skopt.space import Real
from skopt.utils import use_named_args
from skopt.plots import plot_convergence, plot_objective
import matplotlib.pyplot as plt
import teamModel

model = teamModel.TeamModel()

# Define the parameter search space
space = [
    Real(0.0001, 0.01, name='epsilon', prior='log-uniform'),
    Real(0.1, 1.0, name='season_penalty', prior='uniform')
]

# Define the objective function to minimize
@use_named_args(space)
def objective(epsilon, season_penalty):

    model.fit_models(matches, epsilon=epsilon, season_penalty=season_penalty)

    # Initialize lists to store prediction errors
    home_goal_errors = []
    away_goal_errors = []
    
    # Loop through validation set
    for index, row in val_df.iterrows():
        home = row['home_team']
        away = row['away_team']
        actual_home_goals = row['home_goals']
        actual_away_goals = row['away_goals']
        
        # Make prediction with current parameters
        prediction = model.predict_match(home, away)
        predicted_home_goals = prediction['home_goals']
        predicted_away_goals = prediction['away_goals']
        
        # Calculate absolute errors
        home_error = abs(actual_home_goals - predicted_home_goals)
        away_error = abs(actual_away_goals - predicted_away_goals)
        
        home_goal_errors.append(home_error)
        away_goal_errors.append(away_error)
    
    # Calculate overall MAE
    overall_mae = (sum(home_goal_errors) + sum(away_goal_errors)) / (len(home_goal_errors) + len(away_goal_errors))
    
    # Print current iteration for monitoring progress
    print(f"Epsilon: {epsilon:.4f}, Season Penalty: {season_penalty:.4f}, MAE: {overall_mae:.4f}")
    
    return overall_mae

# Number of optimization iterations to perform
n_calls = 25

# Run Bayesian optimization
result = gp_minimize(
    objective,
    space,
    n_calls=n_calls,
    random_state=42,
    verbose=True,
    n_initial_points=5  # Number of initial random points before Bayesian optimization kicks in
)

# Extract the best parameters and score
best_epsilon = result.x[0]
best_season_penalty = result.x[1]
best_mae = result.fun

print("\n" + "="*50)
print("Optimization Results:")
print(f"Best epsilon: {best_epsilon:.4f}")
print(f"Best season_penalty: {best_season_penalty:.4f}")
print(f"Best MAE: {best_mae:.4f}")

# Create a dataframe with all evaluated parameters
results_df = pd.DataFrame({
    'iteration': range(len(result.func_vals)),
    'epsilon': [x[0] for x in result.x_iters],
    'season_penalty': [x[1] for x in result.x_iters],
    'mae': result.func_vals
})

# Sort by MAE to see the best parameter combinations
sorted_results = results_df.sort_values('mae')
print("\nTop 5 parameter combinations:")
print(sorted_results.head(5))

# Save results to CSV
results_df.to_csv('bayesian_optimization_results.csv', index=False)

# Create visualization plots
plt.figure(figsize=(12, 5))

# Plot optimization convergence
plt.subplot(1, 2, 1)
plot_convergence(result)
plt.title('Convergence Plot')

# Plot parameter importance and interactions
plt.subplot(1, 2, 2)
plot_objective(result, dimensions=['epsilon', 'season_penalty'])
plt.title('Parameter Space')

plt.tight_layout()
plt.savefig('optimization_plots.png')
plt.show()

# Optional: Create a contour plot for better visualization
from skopt.plots import plot_objective_2D

plt.figure(figsize=(10, 8))
plot_objective_2D(result, dimensions=['epsilon', 'season_penalty'])
plt.tight_layout()
plt.savefig('parameter_contour.png')
plt.show()

# Apply the best parameters to your model
model.epsilon = best_epsilon
model.season_penalty = best_season_penalty

print("\nModel updated with the best parameters!")

ModuleNotFoundError: No module named 'teamModel'