In [6]:
import numpy as np
import pandas as pd
from skopt import gp_minimize
from skopt.space import Real
from skopt.utils import use_named_args
from skopt.plots import plot_convergence, plot_objective
import matplotlib.pyplot as plt
import sys
import os
sys.path.append(os.path.dirname(os.getcwd()))
from models.standard_dc import TeamModel


In [7]:
# Load your shot data
def load_shot_data():
    # Load shot data from CSV files
    df = pd.read_csv(r"C:\Users\Owner\dev\team-model\shot_data_prem_2023.csv")
    df_2 = pd.read_csv(r"C:\Users\Owner\dev\team-model\shot_data_prem_2024.csv")
    
    # Combine datasets
    df = pd.concat([df, df_2])
    
    # Process dates and seasons
    df['match_date'] = pd.to_datetime(df['match_date'])
    df['season'] = np.where(df['match_date'] > pd.Timestamp('2024-08-01'), 2024, 2023)
    
    # Filter data if needed
    df = df[df["match_date"] > '2024-02-27']
    
    # Add a goal column
    df['is_goal'] = df['Outcome'].apply(lambda x: 1 if x == 'Goal' else 0)
    
    return df

# Create match summaries from shot data
def create_match_summaries(shot_data):
    # Split into home and away shots
    home_shots = shot_data[shot_data['Team'] == shot_data['home_team']]
    away_shots = shot_data[shot_data['Team'] == shot_data['away_team']]
    
    # Aggregate by match
    home_stats = home_shots.groupby(['match_url', 'match_date', 'home_team', 'away_team', 'season'], as_index=False).agg({
        'is_goal': 'sum',  # Total goals
        'xG': 'sum',       # Total xG
        'PSxG': 'sum'      # Total PSxG
    })
    
    away_stats = away_shots.groupby(['match_url', 'match_date', 'home_team', 'away_team', 'season'], as_index=False).agg({
        'is_goal': 'sum',  # Total goals
        'xG': 'sum',       # Total xG
        'PSxG': 'sum'      # Total PSxG
    })
    
    # Rename columns
    home_stats = home_stats.rename(columns={
        'is_goal': 'home_goals',
        'xG': 'home_xg',
        'PSxG': 'home_psxg'
    })
    
    away_stats = away_stats.rename(columns={
        'is_goal': 'away_goals',
        'xG': 'away_xg',
        'PSxG': 'away_psxg'
    })
    
    # Merge home and away stats
    match_stats = pd.merge(
        home_stats, 
        away_stats, 
        on=['match_url', 'match_date', 'home_team', 'away_team', 'season'],
        how='inner'
    )
    
    return match_stats


shot_data = load_shot_data()
print(f"Loaded {len(shot_data)} shots")
    
# Create match summaries
match_stats = create_match_summaries(shot_data)
print(f"Created {len(match_stats)} match summaries")
    
# Convert to list of dictionaries for matches
matches = match_stats.to_dict('records')

Loaded 10738 shots
Created 381 match summaries
