In [None]:
# 1. Imports
import sys
sys.path.append('./sportmonks_api')

import pandas as pd
from sportmonks_calls import Fixtures

# 2. API Key
API_KEY = "oYeoAVFUTQpu7MfoFqbvyiYfgRRkuBWW0p2atkZnySe4X3xrHkjgGhOvI0pd"

# 3. Load saved seasons
seasons_df = pd.read_csv("seasons.csv")
print(f"Loaded {len(seasons_df)} seasons.")

# 4. Initialize Fixtures API
fixtures_api = Fixtures(API_KEY)

# 5. Fetch fixtures for each season
all_fixtures = []

for idx, row in seasons_df.iterrows():
    season_id = row['season_id']
    league_name = row['league_name']

    print(f"Fetching fixtures for Season ID {season_id} ({league_name})...")

    # ✅ Correct v3 URL
    endpoint = f"https://api.sportmonks.com/v3/football/fixtures?seasons={season_id}"

    # Fetch fixtures
    success = fixtures_api.request.make_request(endpoint, paginated=True)

    if not success:
        print(f"Failed to fetch fixtures for season {season_id}. Skipping...")
        continue

    fixtures = fixtures_api.request.results
    for fixture in fixtures:
        all_fixtures.append(fixture)

# 6. Build dataframe
fixtures_df = pd.DataFrame(all_fixtures)
print(f"Total fixtures fetched: {len(fixtures_df)}")

# 7. (Simple view)
display(fixtures_df.head())

# 8. (Optionally save later)
# fixtures_df.to_csv("fixtures.csv", index=False)


In [None]:
import pandas as pd
from sportmonks_calls import Fixtures
import os

# 1. Load seasons
seasons_df = pd.read_csv("seasons.csv")
print(f"Loaded {len(seasons_df)} seasons.")

# 2. Initialize Fixtures API
API_KEY = "oYeoAVFUTQpu7MfoFqbvyiYfgRRkuBWW0p2atkZnySe4X3xrHkjgGhOvI0pd"
fixtures_api = Fixtures(API_KEY)

# 3. Prepare fetching
season_ids = list(seasons_df['season_id'])
batch_size = 5  # Can later change to 10 if stable

# 4. Check if partial backup exists
if os.path.exists("fixtures_partial.csv"):
    fixtures_df = pd.read_csv("fixtures_partial.csv")
    fetched_ids = set(fixtures_df['season_id'].unique())
    print(f"Resuming from {len(fetched_ids)} already fetched seasons.")
else:
    fixtures_df = pd.DataFrame()
    fetched_ids = set()

all_fixtures = []

# 5. Loop over batches
for i in range(0, len(season_ids), batch_size):
    batch = season_ids[i:i+batch_size]

    # Skip batch if all seasons already fetched
    if all(season_id in fetched_ids for season_id in batch):
        continue

    batch_str = ",".join(map(str, batch))
    print(f"Fetching fixtures for seasons: {batch_str}")

    # Build endpoint manually
    endpoint = f"https://api.sportmonks.com/v3/football/fixtures?seasons={batch_str}"

    success = fixtures_api.request.make_request(endpoint, paginated=True)

    if not success:
        print(f"Failed to fetch fixtures for seasons: {batch_str}")
        continue

    fixtures = fixtures_api.request.results
    print(f"Fetched {len(fixtures)} fixtures.")

    # Add batch results
    batch_df = pd.DataFrame(fixtures)
    batch_df['season_id'] = batch  # Add season info
    fixtures_df = pd.concat([fixtures_df, batch_df], ignore_index=True)

    # Save partial progress every batch
    fixtures_df.to_csv("fixtures_partial.csv", index=False)
    print(f"Checkpoint saved after batch {batch_str}!")

# 6. Final save
fixtures_df.to_csv("fixtures.csv", index=False)
print(f"✅ Saved all {len(fixtures_df)} fixtures to fixtures.csv!")
print("You can safely delete fixtures_partial.csv if you want after verifying.")


In [None]:
# 03_fetch_fixtures.ipynb

import requests
import pandas as pd
import time

# 1. API Key
API_KEY = "oYeoAVFUTQpu7MfoFqbvyiYfgRRkuBWW0p2atkZnySe4X3xrHkjgGhOvI0pd"

# 2. Base URL
BASE_URL = "https://api.sportmonks.com/v3/football/fixtures"

# 3. Initialize
all_fixtures = []
current_page = 1
save_every_x_pages = 20  # <- Save every 20 pages
output_file = "fixtures_raw.csv"

# 4. Fetch loop
while True:
    print(f"Fetching page {current_page}...")
    
    params = {
        "api_token": API_KEY,
        "page": current_page,
    }
    
    response = requests.get(BASE_URL, params=params)
    
    if response.status_code != 200:
        print(f"Error: {response.status_code} {response.text}")
        break
    
    data = response.json()
    
    if 'data' not in data:
        print(f"No more data on page {current_page}. Exiting loop.")
        break

    fixtures = data['data']
    all_fixtures.extend(fixtures)
    
    # Save every X pages to be safe
    if current_page % save_every_x_pages == 0:
        print(f"Saving partial results after page {current_page}...")
        pd.DataFrame(all_fixtures).to_csv(output_file, index=False)
    
    # Pagination
    if 'pagination' in data and data['pagination']['has_more']:
        current_page += 1
    else:
        print("No more pages left.")
        break

    # Sleep optional (commented for now)
    # time.sleep(0.2)

# 5. Final save
print(f"Saving final results with {len(all_fixtures)} fixtures...")
pd.DataFrame(all_fixtures).to_csv(output_file, index=False)

print("✅ Done fetching all fixtures!")


In [None]:
import requests
import pandas as pd
from tqdm import tqdm
import time

# 1. Set API details
API_KEY = "oYeoAVFUTQpu7MfoFqbvyiYfgRRkuBWW0p2atkZnySe4X3xrHkjgGhOvI0pd"
BASE_URL = "https://api.sportmonks.com/v3/football/fixtures"

# 2. Output file
output_file = "fixtures_full.csv"
save_every_x_pages = 25  # save every 25 pages

# 3. Initialize
all_fixtures = []
current_page = 1

# 4. Test connection
print("Testing API connection...")
first_response = requests.get(BASE_URL, params={"api_token": API_KEY, "page": 1})

if first_response.status_code != 200:
    print(f"Error: {first_response.status_code} {first_response.text}")
    raise Exception("Can't connect to Sportmonks API.")

first_data = first_response.json()

# Set estimated progress for tqdm
estimated_total = 1000
print("Connection OK. Starting fetching without fixed total pages...")

# 5. Fetch loop with progress bar
pbar = tqdm(total=estimated_total, desc="Fetching Fixtures", position=0, leave=True)

while True:
    try:
        params = {
            "api_token": API_KEY,
            "page": current_page,
        }

        response = requests.get(BASE_URL, params=params, timeout=15)
        
        if response.status_code != 200:
            print(f"Error: {response.status_code} {response.text}")
            break

        data = response.json()

        if 'data' not in data:
            print(f"No 'data' field on page {current_page}. Exiting loop.")
            break

        fixtures = data['data']
        all_fixtures.extend(fixtures)

        if current_page % save_every_x_pages == 0:
            pd.DataFrame(all_fixtures).to_csv(output_file, index=False)
            print(f"[Auto-Save] Saved {len(all_fixtures)} fixtures so far...")

        pbar.update(1)

        if 'pagination' in data and data['pagination']['has_more']:
            current_page += 1
            time.sleep(0.5)  # polite sleep to avoid hitting limits
        else:
            print("No more pages.")
            break

    except requests.exceptions.RequestException as e:
        print(f"Request failed: {e}. Retrying in 5 seconds...")
        time.sleep(5)
        continue

pbar.close()

# Final save
pd.DataFrame(all_fixtures).to_csv(output_file, index=False)
print(f"All done! Total fixtures saved: {len(all_fixtures)} into {output_file}")

In [None]:
import pandas as pd

# Assuming `all_fixtures` is the list containing your fetched fixture data
df_fixtures = pd.DataFrame(all_fixtures)
df_fixtures.to_csv("fixtures_full.csv", index=False)
print("Saved fixtures to fixtures_full.csv")


In [None]:
import json

# 1. Read the fixtures file
fixtures_df = pd.read_csv("fixtures_full.csv", low_memory=False)
fixture_ids = fixtures_df["id"].dropna().astype(int).tolist()

# 2. API and output
stats_output_file = "fixture_stats.csv"
save_every_x = 500  # save every 500 stats
stats_data = []
includes = "stats,localTeam,visitorTeam"

print(f"Starting to fetch statistics for {len(fixture_ids)} fixtures...")

# 3. Progress loop
for idx, fixture_id in enumerate(tqdm(fixture_ids, desc="Fetching Statistics")):
    try:
        url = f"https://api.sportmonks.com/v3/football/fixtures/{fixture_id}"
        params = {
            "api_token": API_KEY,
            "include": includes
        }
        response = requests.get(url, params=params, timeout=10)

        if response.status_code == 404:
            continue
        elif response.status_code != 200:
            print(f"[Error] Fixture {fixture_id}: {response.status_code}")
            continue

        data = response.json()
        if "data" not in data or "stats" not in data["data"]:
            continue

        fixture_stats = data["data"]
        fixture_stats["fixture_id"] = fixture_id  # Ensure ID is preserved
        stats_data.append(fixture_stats)

        if (idx + 1) % save_every_x == 0:
            pd.DataFrame(stats_data).to_json(stats_output_file, orient="records", lines=True)
            print(f"[Auto-Save] {len(stats_data)} stats saved to {stats_output_file}...")

        time.sleep(0.25)  # avoid rate limit

    except Exception as e:
        print(f"[Exception] Fixture {fixture_id}: {e}")
        time.sleep(5)
        continue

# 4. Final save
pd.DataFrame(stats_data).to_json(stats_output_file, orient="records", lines=True)
print(f"✅ Done! Total stats saved: {len(stats_data)} into {stats_output_file}")

In [None]:
import requests
import pandas as pd
from tqdm import tqdm
import time
from datetime import datetime, timedelta

API_KEY = "oYeoAVFUTQpu7MfoFqbvyiYfgRRkuBWW0p2atkZnySe4X3xrHkjgGhOvI0pd"
BASE_URL = "https://api.sportmonks.com/v3/football/fixtures/between"
INCLUDE = "stats"

# Set date range (adjust as needed) only stats from the last 5-6 seasons
start_date = datetime(2020, 1, 1)
end_date = datetime(2025, 4, 30)
step_days = 7  # fetch one week per request

output_file = "fixture_stats_batched.csv"
all_fixtures = []

# Loop through date ranges
print("Fetching fixture stats in weekly batches...")
while start_date < end_date:
    batch_end = min(start_date + timedelta(days=step_days), end_date)
    url = f"{BASE_URL}/{start_date.date()}/{batch_end.date()}"
    
    try:
        response = requests.get(url, params={
            "api_token": API_KEY,
            "include": INCLUDE
        }, timeout=30)

        if response.status_code != 200:
            print(f"Error: {response.status_code} for {start_date.date()} - {batch_end.date()}")
        else:
            data = response.json()
            all_fixtures.extend(data.get("data", []))
            print(f"Fetched: {start_date.date()} to {batch_end.date()} | Total so far: {len(all_fixtures)}")

    except Exception as e:
        print(f"[Error] {start_date.date()} -> {batch_end.date()} : {e}")

    # Respect API limits
    time.sleep(0.5)
    start_date += timedelta(days=step_days)

# Save
df = pd.json_normalize(all_fixtures)
df.to_csv(output_file, index=False)
print(f"✅ Saved {len(df)} fixtures with stats to {output_file}")


In [None]:
from datetime import datetime

# Parse starting date
fixtures_df['starting_at'] = pd.to_datetime(fixtures_df['starting_at'], errors='coerce')

# Filter for fixtures from 2018 and later
recent_fixtures_df = fixtures_df[fixtures_df['starting_at'] >= pd.Timestamp("2018-01-01")]

# Save or inspect filtered fixture IDs
recent_fixture_ids = recent_fixtures_df['id'].astype(int).tolist()
print(f"Filtered to {len(recent_fixture_ids)} fixtures from 2018 onwards.")

In [None]:
print(fixtures_df.columns.tolist())


In [None]:
import pandas as pd

# Read first few lines to inspect raw structure
with open("fixtures_full.csv", "r") as f:
    for _ in range(5):
        print(f.readline())


In [None]:
# Properly load all columns
fixtures_df = pd.read_csv("fixtures_full.csv", low_memory=False)

# Show all columns to confirm
print(fixtures_df.columns.tolist())


In [21]:
fixtures_df['starting_at'] = pd.to_datetime(fixtures_df['starting_at'], errors='coerce')
recent_fixtures_df = fixtures_df[fixtures_df['starting_at'] >= pd.Timestamp("2018-01-01")]

In [None]:
# Load full fixture dataset
fixtures_df = pd.read_csv("/Users/sebastianvinther/Desktop/Sportsmonks/fixtures_full.csv", low_memory=False)

# Convert to datetime and drop rows with invalid dates
fixtures_df['starting_at'] = pd.to_datetime(fixtures_df['starting_at'], errors='coerce')
filtered_df = fixtures_df[fixtures_df['starting_at'] >= pd.Timestamp("2010-01-01")]

# Keep only valid fixture IDs
valid_fixtures = filtered_df[['id']].dropna().astype(int)
valid_fixtures.to_csv("/Users/sebastianvinther/Desktop/Sportsmonks/valid_fixtures_2010_onward.csv", index=False)
print(f"Saved {len(valid_fixtures)} valid fixture IDs to valid_fixtures_2010_onward.csv")


In [None]:
import pandas as pd
import requests
import time

# === CONFIG ===
API_TOKEN = 'oYeoAVFUTQpu7MfoFqbvyiYfgRRkuBWW0p2atkZnySe4X3xrHkjgGhOvI0pd'  
FIXTURE_FILE = '/Users/sebastianvinther/Desktop/Sportsmonks/valid_fixtures_2010_onward.csv'
OUTPUT_FILE = '/Users/sebastianvinther/Desktop/Sportsmonks/fixture_statistics.csv'
BATCH_SIZE = 100  # adjust if needed

# === LOAD FIXTURE IDS ===
fixture_df = pd.read_csv(FIXTURE_FILE)
fixture_ids = fixture_df['id'].tolist()

all_stats = []

# === LOOP THROUGH FIXTURES ===
for idx, fixture_id in enumerate(fixture_ids, 1):
    url = f'https://api.sportmonks.com/v3/football/fixtures/{fixture_id}'
    params = {
        'api_token': API_TOKEN,
        'include': 'statistics'
    }

    try:
        response = requests.get(url, params=params, timeout=10)
        if response.status_code == 200:
            data = response.json()
            stats = data.get('data', {}).get('statistics', [])
            for stat in stats:
                all_stats.append({
                    'fixture_id': fixture_id,
                    'type_id': stat.get('type_id'),
                    'participant_id': stat.get('participant_id'),
                    'value': stat.get('data', {}).get('value'),
                    'location': stat.get('location')
                })
        else:
            print(f"[{fixture_id}] Error: {response.status_code}")
    except requests.exceptions.RequestException as e:
        print(f"[{fixture_id}] Request failed: {e}")

    # === SAVE IN BATCHES ===
    if idx % BATCH_SIZE == 0:
        pd.DataFrame(all_stats).to_csv(OUTPUT_FILE, index=False)
        print(f"[{idx}] Auto-saved {len(all_stats)} rows...")

    time.sleep(0.3)  # avoid hitting rate limits

# === FINAL SAVE ===
pd.DataFrame(all_stats).to_csv(OUTPUT_FILE, index=False)
print(f"✅ Done! Total stats saved: {len(all_stats)} to {OUTPUT_FILE}")


In [None]:
import pandas as pd
import requests
import time
from tqdm import tqdm

# === CONFIG ===
API_TOKEN = 'oYeoAVFUTQpu7MfoFqbvyiYfgRRkuBWW0p2atkZnySe4X3xrHkjgGhOvI0pd'  # Replace with your token
FIXTURE_FILE = '/Users/sebastianvinther/Desktop/Sportsmonks/valid_fixtures_2010_onward.csv'
OUTPUT_FILE = '/Users/sebastianvinther/Desktop/Sportsmonks/fixture_statistics.csv'
BATCH_SIZE = 100  # adjust if needed

# === LOAD FIXTURE IDS ===
fixture_df = pd.read_csv(FIXTURE_FILE)
fixture_ids = fixture_df['id'].tolist()

all_stats = []

# === LOOP WITH PROGRESS BAR ===
print(f"Starting download for {len(fixture_ids)} fixtures...")
for idx, fixture_id in enumerate(tqdm(fixture_ids, desc="Fetching stats", unit="fixture"), 1):
    url = f'https://api.sportmonks.com/v3/football/fixtures/{fixture_id}'
    params = {
        'api_token': API_TOKEN,
        'include': 'statistics'
    }

    try:
        response = requests.get(url, params=params, timeout=10)
        if response.status_code == 200:
            data = response.json()
            stats = data.get('data', {}).get('statistics', [])
            for stat in stats:
                all_stats.append({
                    'fixture_id': fixture_id,
                    'type_id': stat.get('type_id'),
                    'participant_id': stat.get('participant_id'),
                    'value': stat.get('data', {}).get('value'),
                    'location': stat.get('location')
                })
        else:
            print(f"[{fixture_id}] Error: {response.status_code}")
    except requests.exceptions.RequestException as e:
        print(f"[{fixture_id}] Request failed: {e}")

    # === SAVE IN BATCHES ===
    if idx % BATCH_SIZE == 0:
        pd.DataFrame(all_stats).to_csv(OUTPUT_FILE, index=False)
        print(f"[{idx}] Auto-saved {len(all_stats)} rows...")

    

# === FINAL SAVE ===
pd.DataFrame(all_stats).to_csv(OUTPUT_FILE, index=False)
print(f"✅ Done! Total stats saved: {len(all_stats)} to {OUTPUT_FILE}")

In [None]:
import pandas as pd
import requests
import time
from tqdm import tqdm

# === CONFIG ===
API_TOKEN = 'oYeoAVFUTQpu7MfoFqbvyiYfgRRkuBWW0p2atkZnySe4X3xrHkjgGhOvI0pd'  # Replace with your actual token
FIXTURE_FILE = '/Users/sebastianvinther/Desktop/Sportsmonks/valid_fixtures_2010_onward.csv'
OUTPUT_FILE = '/Users/sebastianvinther/Desktop/Sportsmonks/fixture_statistics.csv'
BATCH_SIZE = 100  # Auto-save every 100 fixtures

# === LOAD FIXTURE IDS ===
fixture_df = pd.read_csv(FIXTURE_FILE)
fixture_ids = fixture_df['id'].tolist()

all_stats = []

# === LOOP WITH PROGRESS BAR ===
pbar = tqdm(total=len(fixture_ids), desc="Fetching Stats", unit="fixture")

for idx, fixture_id in enumerate(fixture_ids, 1):
    url = f'https://api.sportmonks.com/v3/football/fixtures/{fixture_id}'
    params = {
        'api_token': API_TOKEN,
        'include': 'statistics'
    }

    try:
        response = requests.get(url, params=params, timeout=10)
        if response.status_code == 200:
            data = response.json()
            stats = data.get('data', {}).get('statistics', [])

            for stat in stats:
                flat_stat = {
                    'fixture_id': fixture_id
                }
                for key, value in stat.items():
                    if isinstance(value, dict):
                        for sub_key, sub_val in value.items():
                            flat_stat[f"{key}_{sub_key}"] = sub_val
                    else:
                        flat_stat[key] = value
                all_stats.append(flat_stat)

        else:
            print(f"[{fixture_id}] Error: {response.status_code}")

    except requests.exceptions.RequestException as e:
        print(f"[{fixture_id}] Request failed: {e}")

    # Auto-save in batches
    if idx % BATCH_SIZE == 0:
        pd.DataFrame(all_stats).to_csv(OUTPUT_FILE, index=False)
        print(f"[{idx}] Auto-saved {len(all_stats)} rows...")

    # Update progress bar
    pbar.update(1)

# Final save
pbar.close()
pd.DataFrame(all_stats).to_csv(OUTPUT_FILE, index=False)
print(f"✅ Done! Total stats saved: {len(all_stats)} to {OUTPUT_FILE}")


In [None]:
import pandas as pd

# Load the stats CSV
df = pd.read_csv("/Users/sebastianvinther/Desktop/Sportsmonks/fixture_statistics.csv")

# Show unique stat type_ids and count how often each appears
print("📊 Stat types (type_id counts):")
print(df['type_id'].value_counts())

# See the number of unique type_ids
print(f"\n🔢 Total unique stat types: {df['type_id'].nunique()}")

# Optional: Show recent fixture stat_ids to compare with older games
latest_fixture_ids = df['fixture_id'].unique()[-10:]  # last 10 fixtures
print("\n🆕 Stat types from most recent 10 fixtures:")
print(df[df['fixture_id'].isin(latest_fixture_ids)]['type_id'].value_counts())


In [None]:
import pandas as pd

# Load the stats data
df = pd.read_csv('/Users/sebastianvinther/Desktop/Sportsmonks/fixture_statistics.csv')

# Count unique type_ids
unique_type_ids = df['type_id'].nunique()
all_type_ids = df['type_id'].value_counts().sort_index()

print(f"🔢 Total unique type_ids: {unique_type_ids}\n")
print("📊 type_id frequencies:\n")
print(all_type_ids)
