In [3]:
import pandas as pd
import numpy as np
import tensorflow as tf
from datetime import datetime
import logging
from pathlib import Path
from typing import Tuple, Dict, Optional
import os


from model_initialization import initialize_and_train_model
from backtest import EPLBacktester
from betting_model import AdvancedBettingNeuralWrapper
from base_algo import BaseAlgorithm
from analysis import analyze_backtest_results

def load_and_prepare_data(matches_path, odds_path):
    data_dir = Path("data/")
    matches_full_path = data_dir / matches_path
    odds_full_path = data_dir / odds_path
    
    # Check if files exist
    if not matches_full_path.exists():
        raise FileNotFoundError(f"Matches file not found: {matches_full_path}")
    if not odds_full_path.exists():
        raise FileNotFoundError(f"Odds file not found: {odds_full_path}")
    
    # Load data
    try:
        matches_df = pd.read_csv(matches_full_path)
        odds_df = pd.read_csv(odds_full_path)
    except Exception as e:
        raise Exception(f"Error loading data files: {str(e)}")
    
    # Validate required columns
    required_match_cols = ['date', 'home_team', 'away_team', 'result', 'match_id']
    required_odds_cols = ['date', 'match_id', 'home_win_odds', 'draw_odds', 'away_win_odds']
    
    missing_match_cols = [col for col in required_match_cols if col not in matches_df.columns]
    missing_odds_cols = [col for col in required_odds_cols if col not in odds_df.columns]
    
    if missing_match_cols:
        raise ValueError(f"Missing required columns in matches file: {missing_match_cols}")
    if missing_odds_cols:
        raise ValueError(f"Missing required columns in odds file: {missing_odds_cols}")
    
    # Convert dates
    matches_df['date'] = pd.to_datetime(matches_df['date'])
    odds_df['date'] = pd.to_datetime(odds_df['date'])
    
    # Sort chronologically
    matches_df = matches_df.sort_values('date')
    odds_df = odds_df.sort_values('date')
    
    # Ensure match_ids align
    if not set(matches_df['match_id']).intersection(odds_df['match_id']):
        raise ValueError("No matching match_ids between matches and odds data")
    
    return matches_df, odds_df



In [4]:
matches_df, odds_df = load_and_prepare_data(
    'epl_matches_2024_25.csv',
    'epl_odds_2024_25.csv'
)

algo = BaseAlgorithm(matches_df)