In [2]:
import pandas as pd

# Path to your full fixture list
FIXTURE_FILE = '/Users/sebastianvinther/Desktop/Sportsmonks/fixtures_full.csv'

# Load fixture data
fixtures_df = pd.read_csv(FIXTURE_FILE, low_memory=False)
fixture_ids = fixtures_df['id'].dropna().astype(int).tolist()

print(f"Loaded {len(fixture_ids)} fixture IDs.")

Loaded 155541 fixture IDs.


In [None]:
import requests
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm


# === CONFIG ===
API_TOKEN = "oYeoAVFUTQpu7MfoFqbvyiYfgRRkuBWW0p2atkZnySe4X3xrHkjgGhOvI0pd"
FIXTURE_FILE = "/Users/sebastianvinther/Desktop/Sportsmonks/fixtures_full.csv"
OUTPUT_FILE = "/Users/sebastianvinther/Desktop/Sportsmonks/fixture_statistics_parallel.csv"
NUM_THREADS = 2  # ⬅️ REDUCED to avoid overload

# === LOAD FIXTURE IDS ===
fixture_ids = pd.read_csv(FIXTURE_FILE)['id'].dropna().astype(int).tolist()

# === SHARED SESSION ===
session = requests.Session()
adapter = requests.adapters.HTTPAdapter(pool_connections=100, pool_maxsize=100)
session.mount("https://", adapter)

# === FUNCTION TO FETCH STATS ===
def fetch_fixture_stats(fixture_id):
    url = f"https://api.sportmonks.com/v3/football/fixtures/{fixture_id}"
    params = {"api_token": API_TOKEN, "include": "statistics"}
    try:
        response = session.get(url, params=params, timeout=10)
        if response.status_code != 200:
            return []
        stats = response.json().get("data", {}).get("statistics", [])
        flat_stats = []
        for stat in stats:
            flat_stat = {"fixture_id": fixture_id}
            for k, v in stat.items():
                if isinstance(v, dict):
                    for sub_k, sub_v in v.items():
                        flat_stat[f"{k}_{sub_k}"] = sub_v
                else:
                    flat_stat[k] = v
            flat_stats.append(flat_stat)
        return flat_stats
    except Exception as e:
        print(f"[{fixture_id}] Error: {e}")
        return []

# === PARALLEL EXECUTION ===
all_stats = []
with ThreadPoolExecutor(max_workers=NUM_THREADS) as executor:
    futures = {executor.submit(fetch_fixture_stats, fid): fid for fid in fixture_ids}
    for future in tqdm(as_completed(futures), total=len(futures), desc="Fetching stats"):
        result = future.result()
        if result:
            all_stats.extend(result)

# === SAVE TO FILE ===
pd.DataFrame(all_stats).to_csv(OUTPUT_FILE, index=False)
print(f"✅ Done! Saved {len(all_stats)} rows to {OUTPUT_FILE}")

In [3]:
import pandas as pd
import requests
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm
import os

# === CONFIG ===
API_TOKEN = "oYeoAVFUTQpu7MfoFqbvyiYfgRRkuBWW0p2atkZnySe4X3xrHkjgGhOvI0pd"
FIXTURE_FILE = "/Users/sebastianvinther/Desktop/Sportsmonks/fixtures_full.csv"
OUTPUT_FILE = "/Users/sebastianvinther/Desktop/Sportsmonks/fixture_statistics_parallel.csv"
NUM_THREADS = 1
SAVE_EVERY = 100  # Auto-save every 100 fixtures

# === LOAD FIXTURE IDS ===
fixture_ids = pd.read_csv(FIXTURE_FILE)['id'].dropna().astype(int).tolist()

# === SHARED SESSION ===
session = requests.Session()
adapter = requests.adapters.HTTPAdapter(pool_connections=100, pool_maxsize=100)
session.mount("https://", adapter)

# === FUNCTION TO FETCH STATS ===
def fetch_fixture_stats(fixture_id):
    url = f"https://api.sportmonks.com/v3/football/fixtures/{fixture_id}"
    params = {"api_token": API_TOKEN, "include": "statistics"}
    try:
        response = session.get(url, params=params, timeout=10)
        if response.status_code != 200:
            return []
        stats = response.json().get("data", {}).get("statistics", [])
        flat_stats = []
        for stat in stats:
            flat_stat = {"fixture_id": fixture_id}
            for k, v in stat.items():
                if isinstance(v, dict):
                    for sub_k, sub_v in v.items():
                        flat_stat[f"{k}_{sub_k}"] = sub_v
                else:
                    flat_stat[k] = v
            flat_stats.append(flat_stat)
        return flat_stats
    except Exception as e:
        print(f"[{fixture_id}] Error: {e}")
        return []

# === PARALLEL EXECUTION WITH PERIODIC SAVING ===
all_stats = []
completed = 0

with ThreadPoolExecutor(max_workers=NUM_THREADS) as executor:
    futures = {executor.submit(fetch_fixture_stats, fid): fid for fid in fixture_ids}
    for future in tqdm(as_completed(futures), total=len(futures), desc="Fetching stats"):
        result = future.result()
        if result:
            all_stats.extend(result)
        completed += 1

        # Save batch
        if completed % SAVE_EVERY == 0:
            df = pd.DataFrame(all_stats)
            header = not os.path.exists(OUTPUT_FILE)
            df.to_csv(OUTPUT_FILE, mode='a', header=header, index=False)
            print(f"🔄 Autosaved {len(all_stats)} rows after {completed} fixtures.")
            all_stats = []

# Final save
if all_stats:
    df = pd.DataFrame(all_stats)
    header = not os.path.exists(OUTPUT_FILE)
    df.to_csv(OUTPUT_FILE, mode='a', header=header, index=False)
    print(f"✅ Final save: {len(all_stats)} remaining rows.")

print(f"🎯 Done! All fixture stats saved to {OUTPUT_FILE}")


  fixture_ids = pd.read_csv(FIXTURE_FILE)['id'].dropna().astype(int).tolist()
Fetching stats:   0%|          | 49/155541 [00:03<3:30:02, 12.34it/s]


KeyboardInterrupt: 

In [5]:
import pandas as pd
import requests
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm
import os

# === CONFIG ===
API_TOKEN = "oYeoAVFUTQpu7MfoFqbvyiYfgRRkuBWW0p2atkZnySe4X3xrHkjgGhOvI0pd"
FIXTURE_FILE = "/Users/sebastianvinther/Desktop/Sportsmonks/fixtures_full.csv"
OUTPUT_FILE = "/Users/sebastianvinther/Desktop/Sportsmonks/fixture_statistics_parallel.csv"
NUM_THREADS = 1
SAVE_EVERY = 100  # Save every N fetched fixtures

# === LOAD FIXTURE IDS ===
all_fixture_ids = pd.read_csv(FIXTURE_FILE)['id'].dropna().astype(int).tolist()

# === LOAD PROGRESS IF EXISTS ===
if os.path.exists(OUTPUT_FILE):
    done_ids = pd.read_csv(OUTPUT_FILE)['fixture_id'].dropna().unique().tolist()
    remaining_ids = [fid for fid in all_fixture_ids if fid not in done_ids]
    print(f"🔁 Resuming from progress: {len(done_ids)} done, {len(remaining_ids)} remaining.")
else:
    done_ids = []
    remaining_ids = all_fixture_ids
    print(f"🆕 Starting fresh with {len(remaining_ids)} fixture IDs.")

# === SHARED SESSION ===
session = requests.Session()
adapter = requests.adapters.HTTPAdapter(pool_connections=100, pool_maxsize=100)
session.mount("https://", adapter)

# === FUNCTION TO FETCH STATS ===
def fetch_fixture_stats(fixture_id):
    url = f"https://api.sportmonks.com/v3/football/fixtures/{fixture_id}"
    params = {"api_token": API_TOKEN, "include": "statistics"}
    try:
        response = session.get(url, params=params, timeout=10)
        if response.status_code != 200:
            return []
        stats = response.json().get("data", {}).get("statistics", [])
        flat_stats = []
        for stat in stats:
            flat_stat = {"fixture_id": fixture_id}
            for k, v in stat.items():
                if isinstance(v, dict):
                    for sub_k, sub_v in v.items():
                        flat_stat[f"{k}_{sub_k}"] = sub_v
                else:
                    flat_stat[k] = v
            flat_stats.append(flat_stat)
        return flat_stats
    except Exception as e:
        print(f"[{fixture_id}] Error: {e}")
        return []

# === PARALLEL EXECUTION WITH PERIODIC SAVING ===
all_stats = []
completed = 0

with ThreadPoolExecutor(max_workers=NUM_THREADS) as executor:
    futures = {executor.submit(fetch_fixture_stats, fid): fid for fid in remaining_ids}
    for future in tqdm(as_completed(futures), total=len(futures), desc="Fetching stats"):
        result = future.result()
        if result:
            all_stats.extend(result)
        completed += 1

        if completed % SAVE_EVERY == 0:
            if all_stats:
                df = pd.DataFrame(all_stats)
                header = not os.path.exists(OUTPUT_FILE)
                df.to_csv(OUTPUT_FILE, mode='a', header=header, index=False)
                print(f"🔄 Autosaved {len(all_stats)} rows after {completed} new fixtures.")
                all_stats = []

# Final save
if all_stats:
    df = pd.DataFrame(all_stats)
    header = not os.path.exists(OUTPUT_FILE)
    df.to_csv(OUTPUT_FILE, mode='a', header=header, index=False)
    print(f"✅ Final save: {len(all_stats)} rows.")

print(f"🎯 Done! Stats saved to {OUTPUT_FILE}")

  all_fixture_ids = pd.read_csv(FIXTURE_FILE)['id'].dropna().astype(int).tolist()


🔁 Resuming from progress: 9456 done, 146085 remaining.


Fetching stats:  10%|█         | 14967/146085 [21:35<3:09:07, 11.56it/s]


KeyboardInterrupt: 

In [6]:
response = requests.get("https://api.sportmonks.com/v3/football/fixtures/1", params={
    "api_token": API_TOKEN,
    "include": "statistics"
})

# Show usage
print("Limit:", response.headers.get("X-RateLimit-Limit"))
print("Remaining:", response.headers.get("X-RateLimit-Remaining"))
print("Reset:", response.headers.get("X-RateLimit-Reset"))


Limit: None
Remaining: None
Reset: None


In [4]:
import pandas as pd

# Load stats file
stats_df = pd.read_csv("/Users/sebastianvinther/Desktop/Sportsmonks/fixture_statistics_parallel.csv")

# Show basic info
print(f"✅ Loaded {len(stats_df)} rows")
print("🔢 Columns:", stats_df.columns.tolist())

# Count unique type_ids
type_counts = stats_df['type_id'].value_counts().sort_index()

# Show summary of type_id usage
print("\n📊 Stat type_id counts:")
print(type_counts)

# Optional: show sample rows per type_id
print("\n📌 Example stat rows:")
print(stats_df.groupby('type_id').head(1).sort_values('type_id'))


✅ Loaded 76582 rows
🔢 Columns: ['fixture_id', 'id', 'type_id', 'participant_id', 'data_value', 'location']

📊 Stat type_id counts:
type_id
34      18902
45      18640
52      15056
83       7834
84      14048
85        236
1605     1866
Name: count, dtype: int64

📌 Example stat rows:
       fixture_id        id  type_id  participant_id  data_value location
3             486  17278082       34              26         3.0     away
4             486  17278095       45              16        55.0     home
0             468  22501328       52              13         2.0     away
436           849  14959145       83              14         0.0     home
8             486  52391006       84              26         2.0     away
16192        6518  18617910       85              22         0.0     home
2             486  17278081     1605              16        54.0     home


In [2]:
import pandas as pd
import requests
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm
import os

# === CONFIG ===
API_TOKEN = "oYeoAVFUTQpu7MfoFqbvyiYfgRRkuBWW0p2atkZnySe4X3xrHkjgGhOvI0pd"
FIXTURE_FILE = "/Users/sebastianvinther/Desktop/Sportsmonks/fixtures_full.csv"
OUTPUT_FILE = "/Users/sebastianvinther/Desktop/Sportsmonks/fixture_statistics_parallel.csv"
NUM_THREADS = 1
SAVE_EVERY = 100

# === LOAD FIXTURE IDS ===
fixture_ids = pd.read_csv(FIXTURE_FILE)['id'].dropna().astype(int).tolist()

# === RESUME LOGIC ===
if os.path.exists(OUTPUT_FILE):
    done_df = pd.read_csv(OUTPUT_FILE, usecols=["fixture_id"])
    done_ids = set(done_df['fixture_id'].unique())
else:
    done_ids = set()

pending_ids = [fid for fid in fixture_ids if fid not in done_ids]
print(f"Resuming from {len(pending_ids)} remaining fixtures (skipped {len(done_ids)})")

# === SHARED SESSION ===
session = requests.Session()
adapter = requests.adapters.HTTPAdapter(pool_connections=100, pool_maxsize=100)
session.mount("https://", adapter)

# === FUNCTION TO FETCH STATS ===
def fetch_fixture_stats(fixture_id):
    url = f"https://api.sportmonks.com/v3/football/fixtures/{fixture_id}"
    params = {"api_token": API_TOKEN, "include": "statistics"}
    try:
        response = session.get(url, params=params, timeout=10)
        if response.status_code != 200:
            return []
        stats = response.json().get("data", {}).get("statistics", [])
        flat_stats = []
        for stat in stats:
            flat_stat = {"fixture_id": fixture_id}
            for k, v in stat.items():
                if isinstance(v, dict):
                    for sub_k, sub_v in v.items():
                        flat_stat[f"{k}_{sub_k}"] = sub_v
                else:
                    flat_stat[k] = v
            flat_stats.append(flat_stat)
        return flat_stats
    except Exception as e:
        print(f"[{fixture_id}] Error: {e}")
        return []

# === PARALLEL EXECUTION WITH RESUME + AUTOSAVE ===
all_stats = []
completed = 0

with ThreadPoolExecutor(max_workers=NUM_THREADS) as executor:
    futures = {executor.submit(fetch_fixture_stats, fid): fid for fid in pending_ids}
    with tqdm(total=len(pending_ids), desc="Fetching stats") as pbar:
        for future in as_completed(futures):
            result = future.result()
            if result:
                all_stats.extend(result)
            completed += 1
            pbar.update(1)

            if completed % SAVE_EVERY == 0:
                df = pd.DataFrame(all_stats)
                df.to_csv(OUTPUT_FILE, mode='a', header=not os.path.exists(OUTPUT_FILE), index=False)
                print(f"🔄 Autosaved {len(all_stats)} rows after {completed} new fixtures.")
                all_stats = []

# Final save
if all_stats:
    df = pd.DataFrame(all_stats)
    df.to_csv(OUTPUT_FILE, mode='a', header=not os.path.exists(OUTPUT_FILE), index=False)
    print(f"✅ Final save: {len(all_stats)} remaining rows.")

print(f"🎯 Done! All fixture stats saved to {OUTPUT_FILE}")

  fixture_ids = pd.read_csv(FIXTURE_FILE)['id'].dropna().astype(int).tolist()


Resuming from 146085 remaining fixtures (skipped 9456)


Fetching stats:   0%|          | 101/146085 [00:08<3:38:00, 11.16it/s]

🔄 Autosaved 0 rows after 100 new fixtures.


Fetching stats:   0%|          | 201/146085 [00:17<3:21:58, 12.04it/s]

🔄 Autosaved 0 rows after 200 new fixtures.


Fetching stats:   0%|          | 301/146085 [00:25<3:09:00, 12.86it/s]

🔄 Autosaved 0 rows after 300 new fixtures.


Fetching stats:   0%|          | 401/146085 [00:33<3:26:05, 11.78it/s]

🔄 Autosaved 0 rows after 400 new fixtures.


Fetching stats:   0%|          | 501/146085 [00:41<3:21:52, 12.02it/s]

🔄 Autosaved 0 rows after 500 new fixtures.


Fetching stats:   0%|          | 601/146085 [00:50<3:27:51, 11.67it/s]

🔄 Autosaved 0 rows after 600 new fixtures.


Fetching stats:   0%|          | 701/146085 [00:58<3:21:17, 12.04it/s]

🔄 Autosaved 0 rows after 700 new fixtures.


Fetching stats:   1%|          | 801/146085 [01:06<3:25:38, 11.77it/s]

🔄 Autosaved 0 rows after 800 new fixtures.


Fetching stats:   1%|          | 901/146085 [01:15<3:19:36, 12.12it/s]

🔄 Autosaved 0 rows after 900 new fixtures.


Fetching stats:   1%|          | 1001/146085 [01:23<3:22:51, 11.92it/s]

🔄 Autosaved 0 rows after 1000 new fixtures.


Fetching stats:   1%|          | 1101/146085 [01:32<3:27:04, 11.67it/s]

🔄 Autosaved 0 rows after 1100 new fixtures.


Fetching stats:   1%|          | 1201/146085 [01:41<3:24:59, 11.78it/s]

🔄 Autosaved 0 rows after 1200 new fixtures.


Fetching stats:   1%|          | 1301/146085 [01:49<3:21:12, 11.99it/s]

🔄 Autosaved 0 rows after 1300 new fixtures.


Fetching stats:   1%|          | 1401/146085 [01:58<3:32:11, 11.36it/s]

🔄 Autosaved 0 rows after 1400 new fixtures.


Fetching stats:   1%|          | 1501/146085 [02:07<3:39:13, 10.99it/s]

🔄 Autosaved 0 rows after 1500 new fixtures.


Fetching stats:   1%|          | 1601/146085 [02:16<3:50:12, 10.46it/s]

🔄 Autosaved 0 rows after 1600 new fixtures.


Fetching stats:   1%|          | 1701/146085 [02:24<3:27:13, 11.61it/s]

🔄 Autosaved 0 rows after 1700 new fixtures.


Fetching stats:   1%|          | 1801/146085 [02:33<3:27:05, 11.61it/s]

🔄 Autosaved 0 rows after 1800 new fixtures.


Fetching stats:   1%|▏         | 1901/146085 [02:42<3:26:36, 11.63it/s]

🔄 Autosaved 0 rows after 1900 new fixtures.


Fetching stats:   1%|▏         | 2001/146085 [02:50<3:19:35, 12.03it/s]

🔄 Autosaved 0 rows after 2000 new fixtures.


Fetching stats:   1%|▏         | 2101/146085 [02:59<3:21:05, 11.93it/s]

🔄 Autosaved 0 rows after 2100 new fixtures.


Fetching stats:   2%|▏         | 2201/146085 [03:07<3:16:57, 12.18it/s]

🔄 Autosaved 0 rows after 2200 new fixtures.


Fetching stats:   2%|▏         | 2301/146085 [03:15<3:13:13, 12.40it/s]

🔄 Autosaved 0 rows after 2300 new fixtures.


Fetching stats:   2%|▏         | 2401/146085 [03:24<3:15:21, 12.26it/s]

🔄 Autosaved 0 rows after 2400 new fixtures.


Fetching stats:   2%|▏         | 2501/146085 [03:32<3:14:35, 12.30it/s]

🔄 Autosaved 0 rows after 2500 new fixtures.


Fetching stats:   2%|▏         | 2601/146085 [03:40<3:15:44, 12.22it/s]

🔄 Autosaved 0 rows after 2600 new fixtures.


Fetching stats:   2%|▏         | 2701/146085 [03:49<3:15:37, 12.22it/s]

🔄 Autosaved 0 rows after 2700 new fixtures.


Fetching stats:   2%|▏         | 2793/146085 [03:56<3:22:20, 11.80it/s]


KeyboardInterrupt: 

In [None]:
all_stats = []

with ThreadPoolExecutor(max_workers=NUM_THREADS) as executor:
    futures = {executor.submit(fetch_fixture_stats, fid): fid for fid in fixture_ids}
    for future in tqdm(as_completed(futures), total=len(futures), desc="Fetching stats"):
        result = future.result()
        if result:
            all_stats.extend(result)

# Save once all threads complete
pd.DataFrame(all_stats).to_csv(OUTPUT_FILE, index=False)
print(f"✅ Done! Saved {len(all_stats)} rows to {OUTPUT_FILE}")
