# Bookie Evaluator - Backtesting & ML Training

This notebook provides tools for backtesting strategies and training machine learning models to improve prediction accuracy for football match outcomes.

## Overview

- **Data Loading**: Import historical match data from your bookie_evaluator database
- **Model Training**: Train and evaluate multiple ML models
- **Hyperparameter Tuning**: Optimize model parameters
- **Backtesting**: Run simulations on historical data
- **Visualization**: Analyze performance metrics and feature importance

Let's begin by setting up our environment and importing the necessary libraries.

In [2]:
import os
import sys
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
from tqdm.notebook import tqdm
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import warnings

# Suppress warnings
warnings.filterwarnings('ignore')

# Set up plotting
plt.style.use('ggplot')
sns.set(style="whitegrid")

# Add project root to Python path
module_path = os.path.abspath(os.path.join('.'))
if module_path not in sys.path:
    sys.path.append(module_path)

# Import project modules
from models.odds_analyzer import OddsAnalyzer
from ml.model import FootballMatchPredictor
from utils.file_utils import load_all_match_analyses

# Initialize core components
data_dir = os.path.join(os.path.dirname(os.path.abspath('.')), 'data')
analyzer = OddsAnalyzer(data_dir=data_dir)
ml_predictor = FootballMatchPredictor(data_dir=data_dir, model_dir='ml/models')

print(f"Environment set up successfully!")

Environment set up successfully!


## Data Loading and Preparation

First, we'll load the historical match data from our database. This includes both the match analyses and the recorded results.

In [3]:
def load_training_data():
    """Load and prepare historical match data for training and backtesting"""
    print("Loading historical match data...")
    
    # Load all match analyses
    raw_matches = load_all_match_analyses(data_dir)
    print(f"Found {len(raw_matches)} match analyses")
    
    # Get completed matches from outcome performance data
    outcome_matches = analyzer.outcome_performance.get('matches', [])
    print(f"Found {len(outcome_matches)} recorded match results")
    
    # Combine data to ensure we have match analyses with results
    all_matches = []
    for match in raw_matches:
        match_id = match.get('match_id', '')
        outcome_match = next((m for m in outcome_matches if m.get('match_id') == match_id), None)
        
        if outcome_match:
            # Add the actual result to the match analysis
            match['actual_result'] = outcome_match.get('actual_result')
            all_matches.append(match)
    
    print(f"Combined {len(all_matches)} matches with both analysis and results")
    
    # Convert to a pandas DataFrame for easier manipulation
    match_df = pd.DataFrame(all_matches)
    
    return all_matches, match_df

# Load the data
all_matches, match_df = load_training_data()

if not all_matches:
    print("No historical match data with results found. Please record some match results first.")
else:
    # Display basic statistics
    if 'actual_result' in match_df.columns:
        result_counts = match_df['actual_result'].value_counts()
        print("\nOutcome distribution:")
        print(result_counts)
        
        # Create pie chart
        plt.figure(figsize=(8, 6))
        plt.pie(result_counts, labels=result_counts.index, autopct='%1.1f%%', startangle=90)
        plt.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle
        plt.title('Distribution of Match Outcomes')
        plt.show()

Loading historical match data...
Found 0 match analyses
Found 0 recorded match results
Combined 0 matches with both analysis and results
No historical match data with results found. Please record some match results first.


## Exploratory Data Analysis

Let's explore the historical data to understand the patterns and relationships between bookmaker odds and match outcomes.

In [None]:
def extract_bookmaker_dataframe(matches):
    """Extract a flat DataFrame from nested bookmaker data"""
    rows = []
    
    for match in matches:
        match_id = match.get('match_id', '')
        home_team = match.get('home_team', '')
        away_team = match.get('away_team', '')
        actual_result = match.get('actual_result', None)
        competition = match.get('competition', '')
        match_date = match.get('match_date', '')
        
        for bookie in match.get('bookmakers', []):
            row = {
                'match_id': match_id,
                'home_team': home_team,
                'away_team': away_team,
                'competition': competition,
                'match_date': match_date,
                'actual_result': actual_result,
                'bookmaker': bookie.get('name', ''),
                'home_odds': bookie.get('home_odds', 0),
                'draw_odds': bookie.get('draw_odds', 0),
                'away_odds': bookie.get('away_odds', 0),
                'implied_home_prob': bookie.get('implied_home_prob', 0),
                'implied_draw_prob': bookie.get('implied_draw_prob', 0),
                'implied_away_prob': bookie.get('implied_away_prob', 0),
                'margin': bookie.get('margin', 0)
            }
            rows.append(row)
    
    return pd.DataFrame(rows)

# Extract flat DataFrame for analysis if we have data
if all_matches:
    bookie_df = extract_bookmaker_dataframe(all_matches)
    
    # Display basic information
    print(f"Bookmaker data shape: {bookie_df.shape}")
    print(f"Unique bookmakers: {bookie_df['bookmaker'].nunique()}")
    print(f"Unique matches: {bookie_df['match_id'].nunique()}")
    
    # Display the first few rows
    bookie_df.head()

In [None]:
if 'bookie_df' in locals() and not bookie_df.empty:
    # Analyze bookmaker implied probabilities vs actual outcomes
    plt.figure(figsize=(15, 10))
    
    # Create subplots for home, draw, away
    plt.subplot(1, 3, 1)
    sns.boxplot(x="actual_result", y="implied_home_prob", data=bookie_df)
    plt.title("Home Win Probability by Actual Result")
    plt.ylim(0, 1)
    
    plt.subplot(1, 3, 2)
    sns.boxplot(x="actual_result", y="implied_draw_prob", data=bookie_df)
    plt.title("Draw Probability by Actual Result")
    plt.ylim(0, 1)
    
    plt.subplot(1, 3, 3)
    sns.boxplot(x="actual_result", y="implied_away_prob", data=bookie_df)
    plt.title("Away Win Probability by Actual Result")
    plt.ylim(0, 1)
    
    plt.tight_layout()
    plt.show()
    
    # Check average probabilities by actual result
    avg_probs = bookie_df.groupby('actual_result')[
        ['implied_home_prob', 'implied_draw_prob', 'implied_away_prob']
    ].mean().reset_index()
    
    avg_probs

## Machine Learning Model Training

Now let's train different machine learning models and evaluate their performance.

In [None]:
def train_and_evaluate_models(matches, test_size=0.2, random_state=42):
    """Train ML models and evaluate their performance"""
    print("Preparing training data...")
    X, y = ml_predictor.prepare_training_data(matches)
    
    if X.empty or y.empty:
        print("Failed to extract features from historical matches.")
        return None
    
    print(f"Training data prepared: {X.shape[0]} samples with {X.shape[1]} features")
    
    print("\nTraining machine learning models...")
    results = ml_predictor.train_models(X, y, test_size=test_size, random_state=random_state)
    
    if 'error' in results:
        print(f"Error training models: {results['error']}")
        return None
    
    print("Model training complete!")
    
    # Prepare results for tabular display
    model_results = []
    for model_name, metrics in results.items():
        accuracy = metrics.get('accuracy', 0) * 100
        cv_mean = metrics.get('cross_val_mean', 0) * 100
        cv_std = metrics.get('cross_val_std', 0) * 100
        brier = metrics.get('brier_score', 0)
        
        model_results.append({
            'Model': model_name.replace('_', ' ').title(),
            'Test Accuracy': accuracy,
            'Cross-Val Accuracy': cv_mean,
            'Cross-Val Std': cv_std,
            'Brier Score': brier
        })
    
    # Create DataFrame and sort by accuracy
    results_df = pd.DataFrame(model_results).sort_values('Test Accuracy', ascending=False)
    
    # Generate plot of model performance
    plt.figure(figsize=(12, 6))
    
    # Plot test accuracy with cross-validation range
    ax = sns.barplot(x='Model', y='Test Accuracy', data=results_df, color='skyblue')
    
    # Add error bars for cross-validation
    for i, row in results_df.iterrows():
        ax.errorbar(i, row['Cross-Val Accuracy'], yerr=row['Cross-Val Std'], 
                   fmt='o', color='black', capsize=5)
    
    plt.title('Model Performance Comparison')
    plt.ylabel('Accuracy (%)')
    plt.ylim(0, 100)
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()
    
    return X, y, results, results_df

# Train models if we have data
if all_matches:
    X, y, model_results, results_df = train_and_evaluate_models(all_matches)
    
    if results_df is not None:
        display(results_df.style.format({
            'Test Accuracy': '{:.2f}%',
            'Cross-Val Accuracy': '{:.2f}%',
            'Cross-Val Std': '{:.2f}%',
            'Brier Score': '{:.4f}'
        }))

## Feature Importance Analysis

Let's examine which features are most important for predicting match outcomes.

In [None]:
def plot_feature_importance(model_name='ensemble', top_n=15):
    """Plot feature importance for a specific model"""
    try:
        # Get feature importance
        ml_predictor.plot_feature_importance(model_name=model_name, top_n=top_n)
        return True
    except Exception as e:
        print(f"Error plotting feature importance: {str(e)}")
        return False

if 'X' in locals() and not X.empty:
    # Plot feature importance for different models
    models_to_check = ['xgboost', 'random_forest', 'ensemble']
    
    for model in models_to_check:
        print(f"\nFeature importance for {model}:")
        plot_feature_importance(model_name=model, top_n=15)
        
    # Generate SHAP analysis for better interpretability
    try:
        print("\nGenerating SHAP analysis for model interpretability...")
        ml_predictor.save_shap_analysis(X, model_name='xgboost')
        print("SHAP analysis saved to the model directory.")
    except Exception as e:
        print(f"Could not generate SHAP analysis: {str(e)}")

## Hyperparameter Tuning

Now let's tune hyperparameters to improve model performance. This may take some time to complete.

In [None]:
def tune_hyperparameters(X, y, model_type='xgboost', n_trials=50, test_size=0.2, random_state=42):
    """Perform hyperparameter tuning for a specific model type"""
    print(f"Starting hyperparameter tuning for {model_type} model with {n_trials} trials...")
    
    results = ml_predictor.hypertune_model(
        X, y, model_type=model_type, n_trials=n_trials, 
        test_size=test_size, random_state=random_state
    )
    
    if 'error' in results:
        print(f"Error during hyperparameter tuning: {results['error']}")
        return None
    
    print("Hyperparameter tuning complete!")
    
    # Print results
    accuracy = results.get('test_accuracy', 0) * 100
    best_cv_score = results.get('best_cv_score', 0) * 100
    brier = results.get('brier_score', 0)
    
    print(f"Best parameters: {results.get('best_params', {})}")
    print(f"Test accuracy: {accuracy:.2f}%")
    print(f"Best CV score: {best_cv_score:.2f}%")
    print(f"Brier score: {brier:.4f}")
    
    # Display classification report
    report = results.get('classification_report', {})
    if report:
        report_df = pd.DataFrame(report).transpose()
        display(report_df.style.format({
            'precision': '{:.2f}',
            'recall': '{:.2f}',
            'f1-score': '{:.2f}',
            'support': '{:.0f}'
        }))
    
    # Plot optimization history
    history = results.get('optimization_history', [])
    if history:
        history_df = pd.DataFrame(history)
        plt.figure(figsize=(10, 6))
        plt.plot(history_df['number'], history_df['value'], 'o-')
        plt.title(f'{model_type.title()} Hyperparameter Optimization Progress')
        plt.xlabel('Trial Number')
        plt.ylabel('Accuracy')
        plt.grid(True)
        plt.show()
    
    return results

# Run hyperparameter tuning if we have data (commented by default as it's time-consuming)
# If you want to run it, uncomment the code below
'''
if 'X' in locals() and not X.empty:
    # Choose a model type to tune
    model_type = 'xgboost'  # Options: 'xgboost', 'lightgbm', 'random_forest', 'logistic_regression'
    n_trials = 50  # Increase for better results, but will take longer
    
    tuning_results = tune_hyperparameters(X, y, model_type=model_type, n_trials=n_trials)
'''

## Backtesting System

Now let's create a backtesting framework to evaluate betting strategies based on our models.

In [None]:
def backtest_betting_strategy(matches, initial_balance=1000, stake_pct=0.02, model_name='ensemble', 
                             bookmaker_weight=0.5, min_threshold=0.6, min_odds=1.5):
    """Simulate a betting strategy on historical data"""
    if not matches:
        print("No matches available for backtesting")
        return None
    
    print(f"Starting backtest with {len(matches)} historical matches...")
    print(f"Initial balance: ${initial_balance:.2f}, Stake: {stake_pct*100:.1f}% of balance per bet")
    print(f"Model: {model_name}, Bookmaker weight: {bookmaker_weight*100:.1f}%")
    print(f"Min confidence threshold: {min_threshold*100:.1f}%, Min odds: {min_odds:.2f}")
    
    # Prepare for backtesting
    results = []
    balance = initial_balance
    total_bets = 0
    winning_bets = 0
    history = [balance]
    bet_outcomes = []
    
    # Sort matches by date if available
    if all(('match_date' in match) for match in matches):
        matches = sorted(matches, key=lambda x: x.get('match_date', ''))
    
    # Iterate through matches chronologically
    for match in tqdm(matches, desc="Backtesting"):
        # Skip matches without results
        if 'actual_result' not in match or not match['actual_result']:
            continue
        
        # Get combined prediction
        prediction = ml_predictor.combine_predictions(match, bookmaker_weight=bookmaker_weight)
        
        if 'error' in prediction:
            continue
        
        # Determine if we should bet based on confidence
        predicted_outcome = prediction.get('predicted_outcome')
        confidence = prediction.get('confidence', 0)
        actual_result = match.get('actual_result')
        
        # Find the odds for the predicted outcome
        all_odds = []
        for bookie in match.get('bookmakers', []):
            if predicted_outcome == 'home':
                all_odds.append(bookie.get('home_odds', 0))
            elif predicted_outcome == 'draw':
                all_odds.append(bookie.get('draw_odds', 0))
            elif predicted_outcome == 'away':
                all_odds.append(bookie.get('away_odds', 0))
        
        # Use the best odds available
        best_odds = max(all_odds) if all_odds else 0
        
        # Check if we meet our betting criteria
        should_bet = confidence >= min_threshold and best_odds >= min_odds
        
        if should_bet:
            # Calculate stake
            stake = balance * stake_pct
            
            # Determine outcome
            won = (predicted_outcome == actual_result)
            
            # Update balance
            if won:
                profit = stake * (best_odds - 1)
                balance += profit
                winning_bets += 1
            else:
                balance -= stake
            
            # Record bet details
            bet_outcomes.append({
                'match_id': match.get('match_id', ''),
                'home_team': match.get('home_team', ''),
                'away_team': match.get('away_team', ''),
                'match_date': match.get('match_date', ''),
                'predicted': predicted_outcome,
                'actual': actual_result,
                'confidence': confidence,
                'odds': best_odds,
                'stake': stake,
                'won': won,
                'balance': balance
            })
            
            total_bets += 1
        
        # Record balance history
        history.append(balance)
    
    # Calculate results
    final_balance = balance
    roi = (final_balance - initial_balance) / initial_balance * 100
    win_rate = winning_bets / total_bets * 100 if total_bets > 0 else 0
    
    print("\nBacktest Results:")
    print(f"Total bets: {total_bets}")
    print(f"Winning bets: {winning_bets} ({win_rate:.2f}%)")
    print(f"Final balance: ${final_balance:.2f}")
    print(f"Profit/Loss: ${final_balance - initial_balance:.2f}")
    print(f"ROI: {roi:.2f}%")
    
    # Plot balance history
    plt.figure(figsize=(12, 6))
    plt.plot(history)
    plt.axhline(y=initial_balance, color='r', linestyle='--')
    plt.title('Balance History')
    plt.xlabel('Event')
    plt.ylabel('Balance ($)')
    plt.grid(True)
    plt.tight_layout()
    plt.show()
    
    # Create DataFrame of bet results
    if bet_outcomes:
        bet_df = pd.DataFrame(bet_outcomes)
        
        # Plot bet outcomes by confidence level
        plt.figure(figsize=(10, 6))
        sns.boxplot(x="won", y="confidence", data=bet_df)
        plt.title("Bet Outcomes by Confidence Level")
        plt.xlabel("Bet Won")
        plt.ylabel("Confidence")
        plt.grid(True)
        plt.tight_layout()
        plt.show()
        
        # Plot bet outcomes by odds
        plt.figure(figsize=(10, 6))
        sns.boxplot(x="won", y="odds", data=bet_df)
        plt.title("Bet Outcomes by Odds")
        plt.xlabel("Bet Won")
        plt.ylabel("Odds")
        plt.grid(True)
        plt.tight_layout()
        plt.show()
        
        # Return the first few bet details
        return bet_df
    
    return None

# Run backtest if we have data
if all_matches:
    # Backtest default strategy
    bet_results = backtest_betting_strategy(
        all_matches, 
        initial_balance=1000, 
        stake_pct=0.02,  # 2% of balance per bet
        model_name='ensemble',
        bookmaker_weight=0.5,  # Equal weight to ML and bookmakers
        min_threshold=0.6,  # Only bet when confidence is at least 60%
        min_odds=1.5  # Only bet when odds are at least 1.5
    )
    
    if bet_results is not None:
        # Show some detailed bet information
        display(bet_results.head(10))

## Parameter Optimization

Let's experiment with different parameters to find the optimal betting strategy.

In [None]:
def optimize_strategy_parameters(matches, param_grid):
    """Run backtests with different parameter combinations to find optimal strategy"""
    if not matches:
        print("No matches available for optimization")
        return None
    
    # Prepare results tracking
    results = []
    
    # Total number of combinations to test
    n_combinations = 1
    for param_values in param_grid.values():
        n_combinations *= len(param_values)
    
    print(f"Testing {n_combinations} parameter combinations...")
    
    # Generate all parameter combinations
    def generate_param_combinations(param_grid, current_idx=0, current_params={}):
        param_names = list(param_grid.keys())
        
        if current_idx >= len(param_names):
            return [current_params.copy()]
        
        current_param = param_names[current_idx]
        combinations = []
        
        for value in param_grid[current_param]:
            current_params[current_param] = value
            combinations.extend(generate_param_combinations(param_grid, current_idx + 1, current_params))
        
        return combinations
    
    param_combinations = generate_param_combinations(param_grid)
    
    # Run backtest for each combination
    for params in tqdm(param_combinations, desc="Optimizing"):
        # Fixed parameters
        initial_balance = 1000
        
        # Track balance history
        balance = initial_balance
        total_bets = 0
        winning_bets = 0
        
        # Extract parameters
        stake_pct = params.get('stake_pct', 0.02)
        bookmaker_weight = params.get('bookmaker_weight', 0.5)
        min_threshold = params.get('min_threshold', 0.6)
        min_odds = params.get('min_odds', 1.5)
        model_name = params.get('model_name', 'ensemble')
        
        # Sort matches by date if available
        if all(('match_date' in match) for match in matches):
            sorted_matches = sorted(matches, key=lambda x: x.get('match_date', ''))
        else:
            sorted_matches = matches
        
        # Iterate through matches chronologically
        for match in sorted_matches:
            # Skip matches without results
            if 'actual_result' not in match or not match['actual_result']:
                continue
            
            # Get combined prediction
            prediction = ml_predictor.combine_predictions(match, bookmaker_weight=bookmaker_weight)
            
            if 'error' in prediction:
                continue
            
            # Determine if we should bet based on confidence
            predicted_outcome = prediction.get('predicted_outcome')
            confidence = prediction.get('confidence', 0)
            actual_result = match.get('actual_result')
            
            # Find the odds for the predicted outcome
            all_odds = []
            for bookie in match.get('bookmakers', []):
                if predicted_outcome == 'home':
                    all_odds.append(bookie.get('home_odds', 0))
                elif predicted_outcome == 'draw':
                    all_odds.append(bookie.get('draw_odds', 0))
                elif predicted_outcome == 'away':
                    all_odds.append(bookie.get('away_odds', 0))
            
            # Use the best odds available
            best_odds = max(all_odds) if all_odds else 0
            
            # Check if we meet our betting criteria
            should_bet = confidence >= min_threshold and best_odds >= min_odds
            
            if should_bet:
                # Calculate stake
                stake = balance * stake_pct
                
                # Determine outcome
                won = (predicted_outcome == actual_result)
                
                # Update balance
                if won:
                    profit = stake * (best_odds - 1)
                    balance += profit
                    winning_bets += 1
                else:
                    balance -= stake
                
                total_bets += 1
        
        # Calculate results
        final_balance = balance
        roi = (final_balance - initial_balance) / initial_balance * 100
        win_rate = winning_bets / total_bets * 100 if total_bets > 0 else 0
        
        # Store the results
        results.append({
            **params,
            'total_bets': total_bets,
            'winning_bets': winning_bets,
            'win_rate': win_rate,
            'final_balance': final_balance,
            'profit': final_balance - initial_balance,
            'roi': roi
        })
    
    # Convert to DataFrame
    results_df = pd.DataFrame(results)
    
    # Sort by ROI descending
    results_df = results_df.sort_values('roi', ascending=False)
    
    print("\nTop 5 Strategies:")
    display(results_df.head(5).style.format({
        'stake_pct': '{:.2%}',
        'bookmaker_weight': '{:.2f}',
        'min_threshold': '{:.2f}',
        'min_odds': '{:.2f}',
        'win_rate': '{:.2f}%',
        'final_balance': '${:.2f}',
        'profit': '${:.2f}',
        'roi': '{:.2f}%'
    }))
    
    # Visualize parameter importance
    plt.figure(figsize=(12, 8))
    
    param_names = list(param_grid.keys())
    for i, param in enumerate(param_names):
        plt.subplot(len(param_names), 1, i+1)
        sns.boxplot(x=param, y='roi', data=results_df)
        plt.title(f'ROI by {param}')
        plt.ylabel('ROI (%)')
        plt.grid(True)
    
    plt.tight_layout()
    plt.show()
    
    return results_df

# Uncomment to run parameter optimization (takes significant time)
'''
if all_matches:
    # Define parameter grid to search
    param_grid = {
        'stake_pct': [0.01, 0.02, 0.05],
        'bookmaker_weight': [0.3, 0.5, 0.7],
        'min_threshold': [0.55, 0.6, 0.65, 0.7],
        'min_odds': [1.3, 1.5, 1.7, 2.0],
        'model_name': ['ensemble', 'xgboost']
    }
    
    # Run optimization
    optimization_results = optimize_strategy_parameters(all_matches, param_grid)
'''

## Evaluating Model Performance on Specific Match Types

Let's analyze how our models perform on different types of matches (e.g., by competition, by favorite status).

In [None]:
def analyze_model_performance_by_category(matches, model_name='ensemble', bookmaker_weight=0.5, category='competition'):
    """Analyze model performance across different categories"""
    if not matches:
        print("No matches available for analysis")
        return None
    
    # Collect prediction results
    results = []
    
    for match in tqdm(matches, desc="Analyzing matches"):
        # Skip matches without results
        if 'actual_result' not in match or not match['actual_result']:
            continue
        
        # Get category value
        category_value = match.get(category, 'Unknown')
        
        # Get prediction
        prediction = ml_predictor.combine_predictions(match, bookmaker_weight=bookmaker_weight)
        
        if 'error' in prediction:
            continue
        
        # Compare prediction with actual result
        predicted_outcome = prediction.get('predicted_outcome')
        actual_result = match.get('actual_result')
        correct = (predicted_outcome == actual_result)
        
        # Store result
        results.append({
            'match_id': match.get('match_id', ''),
            'home_team': match.get('home_team', ''),
            'away_team': match.get('away_team', ''),
            'predicted': predicted_outcome,
            'actual': actual_result,
            'correct': correct,
            'category': category_value,
            'confidence': prediction.get('confidence', 0)
        })
    
    if not results:
        print("No prediction results collected")
        return None
    
    # Create DataFrame
    results_df = pd.DataFrame(results)
    
    # Calculate accuracy by category
    category_accuracy = results_df.groupby('category').agg({
        'correct': ['mean', 'count'],
        'confidence': 'mean'
    })
    
    # Flatten multi-index columns
    category_accuracy.columns = ['accuracy', 'count', 'avg_confidence']
    category_accuracy = category_accuracy.reset_index()
    
    # Sort by count descending
    category_accuracy = category_accuracy.sort_values('count', ascending=False)
    
    print(f"\nModel performance by {category}:")
    display(category_accuracy.style.format({
        'accuracy': '{:.2%}',
        'avg_confidence': '{:.2%}'
    }))
    
    # Plot results
    plt.figure(figsize=(12, 6))
    
    # Filter to categories with at least 5 matches
    plot_data = category_accuracy[category_accuracy['count'] >= 5].copy()
    plot_data = plot_data.sort_values('accuracy', ascending=False)
    
    # Create bar chart
    bars = plt.bar(plot_data['category'], plot_data['accuracy'] * 100)
    
    # Add count labels
    for i, bar in enumerate(bars):
        plt.text(bar.get_x() + bar.get_width()/2, 5, 
                f"n={int(plot_data.iloc[i]['count'])}", 
                ha='center', va='bottom', color='white', fontweight='bold')
    
    plt.title(f'Prediction Accuracy by {category.title()}')
    plt.xlabel(category.title())
    plt.ylabel('Accuracy (%)')
    plt.xticks(rotation=45, ha='right')
    plt.ylim(0, 100)
    plt.grid(axis='y')
    plt.tight_layout()
    plt.show()
    
    return results_df, category_accuracy

# Run analysis if we have data
if all_matches:
    # Analyze by competition
    competition_results, competition_accuracy = analyze_model_performance_by_category(
        all_matches,
        model_name='ensemble',
        bookmaker_weight=0.5,
        category='competition'
    )
    
    # We could analyze by other categories like favorite status
    # First, let's create a custom category for favorite status
    for match in all_matches:
        bookmakers = match.get('bookmakers', [])
        if bookmakers:
            # Calculate average probabilities
            home_probs = [b.get('implied_home_prob', 0) for b in bookmakers]
            draw_probs = [b.get('implied_draw_prob', 0) for b in bookmakers]
            away_probs = [b.get('implied_away_prob', 0) for b in bookmakers]
            
            avg_home_prob = sum(home_probs) / len(home_probs) if home_probs else 0
            avg_draw_prob = sum(draw_probs) / len(draw_probs) if draw_probs else 0
            avg_away_prob = sum(away_probs) / len(away_probs) if away_probs else 0
            
            # Determine favorite
            probs = [avg_home_prob, avg_draw_prob, avg_away_prob]
            favorite_idx = probs.index(max(probs))
            favorite_type = ['home', 'draw', 'away'][favorite_idx]
            
            # Add favorite category
            match['favorite'] = favorite_type
            
            # Add favorite strength category
            max_prob = max(probs)
            if max_prob >= 0.6:
                strength = 'strong'
            elif max_prob >= 0.4:
                strength = 'medium'
            else:
                strength = 'weak'
            
            match['favorite_strength'] = f"{favorite_type}_{strength}"
    
    # Analyze by favorite status
    favorite_results, favorite_accuracy = analyze_model_performance_by_category(
        all_matches,
        model_name='ensemble',
        bookmaker_weight=0.5,
        category='favorite'
    )
    
    # Analyze by favorite strength
    favorite_strength_results, favorite_strength_accuracy = analyze_model_performance_by_category(
        all_matches,
        model_name='ensemble',
        bookmaker_weight=0.5,
        category='favorite_strength'
    )

## Custom Analysis Questions

Let's answer some specific analysis questions to gain deeper insights.

In [None]:
# Let's look at how accuracy changes with different probability thresholds
if 'competition_results' in locals() and not competition_results.empty:
    # Create bins of confidence levels
    bins = [0, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 1.0]
    labels = ['<55%', '55-60%', '60-65%', '65-70%', '70-75%', '75-80%', '80-85%', '85-90%', '90-100%']
    
    competition_results['confidence_bin'] = pd.cut(competition_results['confidence'], bins=bins, labels=labels)
    
    # Calculate accuracy by confidence bin
    confidence_accuracy = competition_results.groupby('confidence_bin').agg({
        'correct': ['mean', 'count'],
        'confidence': 'mean'
    })
    
    # Flatten multi-index columns
    confidence_accuracy.columns = ['accuracy', 'count', 'avg_confidence']
    confidence_accuracy = confidence_accuracy.reset_index()
    
    print("\nAccuracy by confidence threshold:")
    display(confidence_accuracy.style.format({
        'accuracy': '{:.2%}',
        'avg_confidence': '{:.2%}'
    }))
    
    # Plot results
    plt.figure(figsize=(12, 6))
    bars = plt.bar(confidence_accuracy['confidence_bin'], confidence_accuracy['accuracy'] * 100)
    
    # Add count labels
    for i, bar in enumerate(bars):
        plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 1, 
                f"n={int(confidence_accuracy.iloc[i]['count'])}", 
                ha='center', va='bottom')
    
    plt.title('Prediction Accuracy by Confidence Level')
    plt.xlabel('Confidence Range')
    plt.ylabel('Accuracy (%)')
    plt.ylim(0, 100)
    plt.grid(axis='y')
    plt.tight_layout()
    plt.show()

## Export a Trained Model for Production Use

Finally, let's export our best trained model for production use in the main application.

In [None]:
def export_model_info(model_name='ensemble', bookmaker_weight=0.5):
    """Export model information and recommended parameters for production use"""
    export_info = {
        'model_name': model_name,
        'bookmaker_weight': bookmaker_weight,
        'description': f"ML model trained on {datetime.now().strftime('%Y-%m-%d')}",
        'recommended_threshold': 0.65,  # This can be adjusted based on your analysis
        'recommended_min_odds': 1.5,    # This can be adjusted based on your analysis
        'notes': "Created using backtesting notebook"
    }
    
    # Save to a file
    os.makedirs(os.path.join(data_dir, 'ml_exports'), exist_ok=True)
    export_path = os.path.join(data_dir, 'ml_exports', 'model_info.json')
    
    with open(export_path, 'w') as f:
        json.dump(export_info, f, indent=2)
    
    print(f"Model information exported to {export_path}")
    return export_info

# Export model info if we've successfully trained models
if 'X' in locals() and not X.empty:
    # Use parameters from your analysis
    export_info = export_model_info(model_name='ensemble', bookmaker_weight=0.5)
    export_info

## Conclusion

In this notebook, we've:

1. Loaded and analyzed historical match data
2. Trained multiple machine learning models to predict match outcomes
3. Performed hyperparameter tuning to optimize model performance
4. Backtested various betting strategies on historical data
5. Analyzed model performance across different categories
6. Exported trained models for production use

You can use this notebook as a starting point for your own analysis and model development. As you collect more match data, you can rerun this notebook to refine your models and strategies.