In [44]:
import pandas as pd
import requests
import json
import time
import os

In [45]:
# 1. Find all events and event IDs. An event is something like VCT: Masters - Tokyo 2023

events = []

eventsFilePath = "events/events.json"

with open(eventsFilePath, "r") as f:
    events_data = json.load(f)

events = events_data["pageProps"]["events"]

print("Completed Event IDs and Names:")
print("====================")

event_ids = [event["id"] for event in events if "VCT" in event["shortName"]]

print(event_ids)
print(f"\nTotal completed events found: {len(event_ids)}")

def format_to_csv(df, cols, path):
    df = pd.DataFrame(df)
    df.columns = cols
    df.index = pd.RangeIndex(len(df.index))
    df.to_csv(path)


Completed Event IDs and Names:
[5574, 5461, 5469, 5465, 5474, 5351, 5232, 5228, 5230, 5233]

Total completed events found: 10


In [46]:
def fetch_consolidated_match_data(event_ids):
    """Fetch and consolidate kill data with economy and location information"""
    
    all_consolidated_kills = []
    for event_id in event_ids:
        print(f"Processing event ID: {event_id}")
        
        url = f"https://be-prod.rib.gg/v1/series?completed=true&take=50&eventIds%5B%5D={event_id}"
        series_data = requests.get(url).json()
        
        if series_data.get("data") is None:
            print(f"  No data found for event {event_id}")
            continue
        match_ids = []
        map_names = {}
        
        for data in series_data.get("data", []):
            for match in data.get("matches", []):
                match_id = match.get("id")
                if match_id:
                    match_ids.append(match_id)
                    if match.get("map") is not None:
                        map_names[match_id] = match["map"].get("name", "Unknown")
                    else:
                        map_names[match_id] = "Unknown"
        
        print(f"Found {len(match_ids)} matches for event {event_id}")
        
        for match_id in match_ids:
            try:
                print(f"  Processing match ID: {match_id} ({map_names[match_id]})")
                kd_locations_url = f"https://be-prod.rib.gg/v1/x/players/kill-death-locations-by-match?matchId={match_id}"
                try:
                    kd_response = requests.get(kd_locations_url)
                    kd_data = kd_response.json()
                    player_rounds_df = pd.DataFrame(kd_data)
                    print(f"    Got {len(player_rounds_df)} player-round records")
                    
                    player_round_side_map = {}
                    
                    if not player_rounds_df.empty and "playerId" in player_rounds_df.columns and "roundNumber" in player_rounds_df.columns:
                        mapping_df = player_rounds_df[["playerId", "roundNumber", "side"]].dropna()
                        for _, row in mapping_df.iterrows():
                            key = f"{row['playerId']}_{row['roundNumber']}"
                            player_round_side_map[key] = row["side"]
                    
                except Exception as e:
                    print(f"    Error processing kill-death data: {str(e)}")
                    player_round_side_map = {}
                
                match_details_url = f"https://be-prod.rib.gg/v1/matches/{match_id}/details"
                match_details = requests.get(match_details_url).json()
                map_name = map_names[match_id]
                round_mapping = {}
                player_info = {}
                economy_data = {}
                
                for event in match_details.get("events", []):
                    round_id = event.get("roundId")
                    round_num = event.get("roundNumber")
                    if round_id and round_num is not None:
                        round_mapping[round_num] = round_id
                
                for econ in match_details.get("economies", []):
                    round_id = econ.get("roundId")
                    player_id = econ.get("playerId")
                    agent_id = econ.get("agentId")
                    
                    if player_id and agent_id:
                        if player_id not in player_info:
                            player_info[player_id] = {"agent_id": agent_id}
                    
                    if round_id and player_id:
                        key = f"{round_id}_{player_id}"
                        economy_data[key] = econ
                
                kill_events = [e for e in match_details.get("events", []) if e.get("eventType") == "kill"]
                match_kills = []
                
                for kill_event in kill_events:
                    round_id = kill_event.get("roundId")
                    round_number = kill_event.get("roundNumber")
                    round_time = kill_event.get("roundTimeMillis")
                    kill_id = kill_event.get("killId")
                    killer_id = kill_event.get("playerId")
                    victim_id = kill_event.get("referencePlayerId")
                    weapon_id = kill_event.get("weaponId")
                    
                    if not (round_id and killer_id and victim_id):
                        continue
                    
                    killer_economy = economy_data.get(f"{round_id}_{killer_id}", {})
                    victim_economy = economy_data.get(f"{round_id}_{victim_id}", {})
                    
                    killer_locations = []
                    victim_locations = []
                    
                    for loc in match_details.get("locations", []):
                        if loc.get("roundNumber") != round_number:
                            continue
                            
                        time_diff = abs(loc.get("roundTimeMillis", 0) - round_time)
                        
                        if time_diff <= 2000:
                            if loc.get("playerId") == killer_id:
                                killer_locations.append((time_diff, loc))
                            elif loc.get("playerId") == victim_id:
                                victim_locations.append((time_diff, loc))
                    
                    killer_location = min(killer_locations, key=lambda x: x[0])[1] if killer_locations else {}
                    victim_location = min(victim_locations, key=lambda x: x[0])[1] if victim_locations else {}

                    plant_events = [e for e in match_details.get("events", []) 
                                   if e.get("roundId") == round_id and e.get("eventType") == "plant"]
                    plant_time = plant_events[0].get("roundTimeMillis") if plant_events else None

                    plant_site = None
                    if plant_events and len(plant_events) > 0:
                        plant_site = plant_events[0].get("bombsite")

                    killer_side_key = f"{killer_id}_{round_number}"
                    side = player_round_side_map[killer_side_key]
                    
                    consolidated_kill = {
                        # Match/round info
                        "map_name": map_name,
                        "match_id": match_id,
                        "round_id": round_id,
                        "round_number": round_number,
                        "time": round_time,
                        "plant_time": plant_time,
                        "plant_site": plant_site,
                        
                        # Kill info
                        "kill_id": kill_id,
                        "weapon_id": weapon_id,
                        "first_kill": kill_event.get("firstKill", False),
                        "traded_by_kill_id": kill_event.get("tradedByKillId"),
                        "traded_for_kill_id": kill_event.get("tradedForKillId"),
                        
                        # Killer info
                        "killer_id": killer_id,
                        "killer_agent_id": killer_economy.get("agentId"),
                        "killer_weapon_id": killer_economy.get("weaponId"),
                        "killer_armor_id": killer_economy.get("armorId"),
                        "killer_loadout_value": killer_economy.get("loadoutValue"),
                        "killer_x": killer_location.get("locationX"),
                        "killer_y": killer_location.get("locationY"),
                        "killer_view_radians": killer_location.get("viewRadians"),
                        "side": side,
                        
                        # Victim info
                        "victim_id": victim_id,
                        "victim_agent_id": victim_economy.get("agentId"),
                        "victim_weapon_id": victim_economy.get("weaponId"),
                        "victim_armor_id": victim_economy.get("armorId"),
                        "victim_loadout_value": victim_economy.get("loadoutValue"),
                        "victim_x": victim_location.get("locationX"),
                        "victim_y": victim_location.get("locationY"),
                        "victim_view_radians": victim_location.get("viewRadians"),
                        
                        "event_id": event_id,
                    }
                    
                    match_kills.append(consolidated_kill)
                
                all_consolidated_kills.extend(match_kills)
                print(f"    Added {len(match_kills)} kill events from match")
                time.sleep(0.5)  # Be nice to the API
                
            except Exception as e:
                print(f"  Error processing match {match_id}: {str(e)}")
                import traceback
                traceback.print_exc()
    return all_consolidated_kills

In [47]:
if __name__ == "__main__":
    with open("events/events.json", "r") as f:
        events_data = json.load(f)
    
    print(f"Processing {len(event_ids)} events")
    
    consolidated_kills = fetch_consolidated_match_data(event_ids)
    if consolidated_kills:
        df = pd.DataFrame(consolidated_kills)
        
        df.to_csv("datasets/all_kill_data_consolidated.csv", index=False)
        print(f"Saved {len(df)} consolidated kill records")
        
        for map_name, map_group in df.groupby("map_name"):
            map_filename = f"datasets/consolidated_data/{map_name.lower()}_kill_data_consolidated.csv"
            map_group.to_csv(map_filename, index=False)
            print(f"Saved {len(map_group)} kill records for {map_name}")
    else:
        print("No data collected")

Processing 10 events
Processing event ID: 5574
Found 0 matches for event 5574
Processing event ID: 5461
Found 114 matches for event 5461
  Processing match ID: 203422 (Icebox)
    Got 315 player-round records
    Added 159 kill events from match
  Processing match ID: 203423 (Split)
    Got 246 player-round records
    Added 124 kill events from match
  Processing match ID: 203424 (Ascent)
    Got 300 player-round records
    Added 150 kill events from match
  Processing match ID: 203425 (Haven)
    Got 67 player-round records
    Added 34 kill events from match
  Processing match ID: 203426 (Split)
    Got 190 player-round records
    Added 95 kill events from match
  Processing match ID: 203427 (Ascent)
    Got 315 player-round records
    Added 158 kill events from match
  Processing match ID: 202232 (Split)
    Got 297 player-round records
    Added 149 kill events from match
  Processing match ID: 202233 (Icebox)
    Got 363 player-round records
    Added 183 kill events from matc