In [1]:
import os 
import json
import glob 
import pandas as pd
from datetime import datetime, timezone, date
import requests
from IPython.display import clear_output
import random 
import time 
import numpy as np

In [104]:
def determine_winner(match_json):
    """
    Parses the match JSON to determine the winner's ID.
    Returns the winner's ID or None if it can't be determined.
    """
    try:
        score_str = match_json.get('overallScores')
        if not score_str or '-' not in score_str:
            return None
            
        player_a_score, player_b_score = map(int, score_str.split('-'))
        
        competitors = match_json.get('competitiors')
        if not (competitors and isinstance(competitors, list) and len(competitors) >= 2):
            return None

        player_a_id = competitors[0].get('competitiorId')
        player_b_id = competitors[1].get('competitiorId')

        if player_a_score > player_b_score:
            return player_a_id
        else:
            return player_b_id
    except (ValueError, IndexError, TypeError):
        return None

def calculate_total_points(scores_str):
    try:
        return sum(int(p) for p in scores_str.split(',') if p.isdigit())
    except:
        return None

In [108]:
match_files_path = "Data/Raw/Match_details/**/*.json"
all_match_files = glob.glob(match_files_path, recursive=True)


all_match_files = all_match_files[0:5]

total_files = len(all_match_files)
print(f"{total_files} match files found")

events_dir = "Data/Processed/Events"
pattern = os.path.join(events_dir, "[0-9]*_events.csv")
dated_files = glob.glob(pattern)

if date_stamped_files:
    latest_events_file = max(date_stamped_files)
    print(f"Found latest events file: {latest_events_file}")
else:
    print("No date-stamped event files found.")
events_df = pd.read_csv(latest_events_file)
event_name_map = pd.Series(events_df.EventName.values, index=events_df.EventId).to_dict()



5 match files found
Found latest events file: Data/Processed/Events/20251007_events.csv


In [109]:
print(f"--- 🟢 Commencing processing for {total_files} match files 🟢 ---")

all_matches_list = []

for number, file_path in enumerate(all_match_files):
    clear_output(wait=True)
    print(f"--- 🟢 Commencing processing for {total_files} match files 🟢 ---")
    print(f"Processing file {number + 1}/{total_files}: {file_path}")

    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            match_data = json.load(f)

        competitors = match_data.get('competitiors')
        if not (competitors and isinstance(competitors, list) and len(competitors) >= 2):
            continue

        player_a_score, player_b_score = (None, None)
        if match_data.get('overallScores') and '-' in match_data.get('overallScores'):
            player_a_score, player_b_score = map(int, match_data['overallScores'].split('-'))

        winner_id = determine_winner(match_data)
        player_a_id = competitors[0].get('players', [{}])[0].get('playerId')
        player_b_id = competitors[1].get('players', [{}])[0].get('playerId')
        player_a_name = competitors[0].get('competitiorName')
        player_b_name = competitors[1].get('competitiorName')

        winner_name = None
        if winner_id:
            if winner_id == player_a_id:
                winner_name = player_a_name
            else:
                winner_name = player_b_name
        
        event_id = match_data.get('eventId')

        match_dict = {
            'MatchID': match_data.get('documentCode'),
            'EventID': event_id,
            'EventName': event_name_map.get(int(event_id)) if event_id else None,
            'MatchDate': match_data.get('matchDateTime', {}).get('startDateLocal'),
            'BestOf': match_data.get('matchConfig', {}).get('bestOfXGames'),
            'PlayerA_ID': player_a_id,
            'PlayerA_Name': player_a_name,
            'PlayerA_Country': competitors[0].get('competitiorOrg'),
            'PlayerB_ID': player_b_id,
            'PlayerB_Name': player_b_name,
            'PlayerB_Country': competitors[1].get('competitiorOrg'),
            'PlayerA_GamesWon': player_a_score,
            'PlayerB_GamesWon': player_b_score,
            'PlayerA_TotalPoints': calculate_total_points(competitors[0].get('scores')),
            'PlayerB_TotalPoints': calculate_total_points(competitors[1].get('scores')),
            'WinnerID': winner_id,
            'WinnerName': winner_name,
            'GameScores': match_data.get('gameScores')
        }
        
        all_matches_list.append(match_dict)

    except Exception as e:
        print(f"  -> Error processing {file_path}: {e}")
        time.sleep(1)

clear_output(wait=True)
print(f"✅ Finished! Processed {total_files} files.")

if all_matches_list:
    today_str = datetime.now().strftime('%Y%m%d')
    output_path_json = f"Data/Processed/master_match_list_{today_str}.json"
    output_path_csv = f"Data/Processed/master_match_list_{today_str}.csv"

    with open(output_path_json, 'w', encoding='utf-8') as f:
        json.dump(all_matches_list, f, indent=2)
    print(f"Successfully saved {len(all_matches_list)} matches to '{output_path_json}'")

    master_df = pd.DataFrame(all_matches_list)
    master_df.to_csv(output_path_csv, index=False)
    print(f"Also saved a copy as '{output_path_csv}'")

✅ Finished! Processed 5 files.
Successfully saved 5 matches to 'Data/Processed/master_match_list_20251008.json'
Also saved a copy as 'Data/Processed/master_match_list_20251008.csv'
