In [1]:
import sys
import os

# Add repo root to Python path
repo_root = os.path.abspath("..")  # parent folder of notebooks
sys.path.append(repo_root)

import pandas as pd
from statsbombpy import sb


# Download Data

In [2]:
competition_ids = [72, 53, 53, 43, 55, 55]
season_ids = [107, 315, 106, 106, 282, 43]

def get_match_overview(competition_ids, season_ids, requires_360=False):
    """
    Returns a filtered overview of matches for multiple competition/season pairs.
    
    Parameters:
        competition_ids (int or list): ID(s) of the competition(s)
        season_ids (int or list): ID(s) of the season(s)
        requires_360 (bool): If True, only return matches with 360 data
    
    Returns:
        pd.DataFrame: Combined and filtered overview dataframe
    """
    # Convert single integers to lists to keep the logic unified
    if isinstance(competition_ids, int):
        competition_ids = [competition_ids]
    if isinstance(season_ids, int):
        season_ids = [season_ids]
        
    if len(competition_ids) != len(season_ids):
        raise ValueError("The number of competition_ids must match the number of season_ids.")

    all_matches = []

    # Loop through each pair
    for comp_id, seas_id in zip(competition_ids, season_ids):
        try:
            df = sb.matches(competition_id=comp_id, season_id=seas_id)
            all_matches.append(df)
        except Exception as e:
            print(f"Warning: Could not fetch matches for Comp {comp_id}, Season {seas_id}: {e}")

    if not all_matches:
        return pd.DataFrame()

    # Combine all fetched data
    overview_df = pd.concat(all_matches, ignore_index=True)
    
    # Track 360 availability
    overview_df['available_360'] = overview_df['match_status_360'].notna()

    # --- Apply filters ---
    if requires_360:
        before_count = len(overview_df)
        overview_df = overview_df[overview_df['available_360'] == True]
        print(f"Dropped {before_count - len(overview_df)} matches without 360 data")

    return overview_df.reset_index(drop=True)

In [3]:
full_overview_df = get_match_overview(competition_ids, season_ids, requires_360=True)



KeyboardInterrupt: 

In [None]:
import pandas as pd
from statsbombpy import sb

def download_sb_data(overview_df, download_360=True):
    """
    Download StatsBomb event and 360 data for a list of matches.

    Parameters:
        overview_df (pd.DataFrame): Filtered matches overview with 'match_id'
        download_360 (bool): If True, download SB360 frame data; else only event data

    Returns:
        events_data (list of pd.DataFrame): List of event DataFrames per match
        frames_360_data (list of pd.DataFrame): List of SB360 frame DataFrames per match
    """
    events_data = []
    frames_360_data = []

    for match_id in overview_df['match_id']:
        # --- Download event data ---
        try:
            event_df = sb.events(match_id=match_id)
            events_data.append(event_df)
            print(f"Downloaded events for match {match_id} ({len(event_df)} rows)")
        except Exception as e:
            print(f"Failed to download events for match {match_id}: {e}")
            continue

        # --- Download 360 data if requested ---
        if download_360:
            try:
                frame_df = sb.frames(match_id=match_id)
                if 'visible_area' in frame_df.columns:
                    frame_df = frame_df.drop(columns=['visible_area'])
                frames_360_data.append(frame_df)
                print(f"Downloaded 360 frames for match {match_id} ({len(frame_df)} rows)")
            except Exception as e:
                print(f"No 360 data for match {match_id}: {e}")

    return events_data, frames_360_data


In [None]:
data_events, data_360 = download_sb_data(full_overview_df, download_360=True)

In [None]:
# Store Events Dataframe
df_events = pd.concat(data_events, ignore_index=True)
print("starting saving event")
df_events.to_parquet('events_data.parquet', engine="fastparquet")
print("done")
df_events = None

# Store 360 Dataframe
df_360 = pd.concat(data_360, ignore_index=True)
print("starting saving 360")
df_360.to_parquet('360_data.parquet', engine="fastparquet")
print("done")
df_360 = None



In [None]:
import pandas as pd
data_events = pd.read_parquet("events_data.parquet", engine="fastparquet")


In [None]:
data_360 = pd.read_parquet("360_data.parquet", engine="fastparquet")

In [None]:
data_360.head()

In [None]:
data_events.head()

In [None]:
df_single_360 = sb.frames(match_id=3788747 )