In [2]:
import requests
import json
import os
import sys
import pandas as pd
import logging
from urllib.parse import urlencode
import time
from datetime import timedelta, datetime, timezone
from data.fetch_data import bulk_fetch_matches
from typing import Optional, Dict, Any, Iterable, List

logging.basicConfig(level=logging.DEBUG)
logging = logging.getLogger(__name__)



In [3]:
def unix_utc_start(date_str: str) -> int:
    # YYYY-MM-DD at 00:00:00 UTC
    dt = datetime.strptime(date_str, "%Y-%m-%d").replace(tzinfo=timezone.utc)
    dt = dt.replace(hour=0, minute=0, second=0)
    return int(dt.timestamp())

def unix_utc_eod(date_str: str) -> int:
    # YYYY-MM-DD at 23:59:59 UTC (inclusive)
    dt = datetime.strptime(date_str, "%Y-%m-%d").replace(tzinfo=timezone.utc)
    dt = dt.replace(hour=23, minute=59, second=59)
    return int(dt.timestamp())

In [4]:
start_date = "2025-08-19"
end_date = "2025-08-21"
folder_name = f"v2_data//pred_data//test_pred_v2_{start_date}_{end_date}"

In [5]:
def fetch_match_data(
    min_average_badge: int = 100,
    fetch_till_date: int | None = None,
    fetch_from_date: int | None = None,
    m_id: str | None = None,
    include_player_info: bool = True,
    limit: int = 1000
    ) -> json:
    """Fetches match data from the Deadlock API.
    
    Key Parameters:
    - min_average_badge: Minimum average rank to return matches.
    - fetch_till_date: Newest time to filter matches. i.e. matches before yesterday
    - fetch_from_date: Oldest time to filter matches. i.e. matches 3 months ago -> max time
    - m_id: Specific match ID to fetch metadata for.
    - include_player_info: Whether to include player information in the response. this is required for match_player data
    - limit: Maximum number of matches to return.
    Returns:
    - JSON response containing match metadata with 12 players per match.
    """

    logging.debug(f"Fetching match data..")
    base = "https://api.deadlock-api.com/v1/matches"

    # if a specific match ID is given, check player_data and hit that endpoint
    if m_id:
        path = f"{base}/{m_id}/metadata"
        params = {}
        if include_player_info:
            params["include_player_info"] = "true"

        query = urlencode(params)
        full_url = f"{path}?{query}" if query else path
        response = requests.get(full_url)
        if response.status_code != 200:
            print(f"Error: API request failed with status code {response.status_code}")
            print(f"URL: {full_url}")
            return {"error": f"API request failed with status code {response.status_code}"}
        return response.json()

    # Bulk-metadata endpoint
    path = f"{base}/metadata"
    params: dict[str, str] = {}

    if include_player_info:
        params["include_player_info"] = "true"
    
    print(f"Time range: {fetch_from_date} to {fetch_till_date}")

    fetch_from_date = unix_utc_start(fetch_from_date) if fetch_from_date else None
    fetch_till_date = unix_utc_eod(fetch_till_date) if fetch_till_date else None

    if fetch_from_date is not None:
        params["min_unix_timestamp"] = (fetch_from_date)
    if fetch_till_date is not None:
        params["max_unix_timestamp"] = (fetch_till_date)
    if min_average_badge is not None:
        params["min_average_badge"] = str(min_average_badge)
    if limit is not None:
        params["limit"] = str(limit)

    query = urlencode(params)
    full_url = f"{path}?{query}" if query else path
    
    response = requests.get(full_url)
    if response.status_code != 200:
        print(f"Error: API request failed with status code {response.status_code}")
        print(f"URL: {full_url}")
        return {"error": f"API request failed with status code {response.status_code}"}
    return response.json()

def bulk_fetch_matches(start_date, end_date, limit=1000)->list:
    """fetches a batch of matches, 1 day per pull, list of jsons, 1 element per batch.

    batch return is unnormalized, 'players' contains a df of each matches 'players'
    
    min_days = Oldest time barrier (more days ago)
    max_days = Newest time barrier (fewer days ago)
    
    """

    batch_matches = []
    
    # Calculate the starting day (defaults to today)
    current_start = datetime.strptime(start_date, "%Y-%m-%d")
    current_end = datetime.strptime(end_date, "%Y-%m-%d")

    total_batches = (current_end - current_start).days + 1
    batch_num = 1

    while current_start <= current_end:
        fetch_from = current_start.strftime("%Y-%m-%d")
        fetch_till = current_start.strftime("%Y-%m-%d")

        logging.debug(f"\nBatch {batch_num} of {total_batches}: fetching day from {fetch_from} to {fetch_till}")

        # Note: API expects min_unix_timestamp to be OLDER than max_unix_timestamp
        fetched_matches = fetch_match_data(
            fetch_till_date=fetch_from,  # Older timestamp (more days ago)
            fetch_from_date=fetch_till,  # Newer timestamp (fewer days ago)
            limit=limit
        )
        
        logging.info(f"fetch matches for day {fetch_from}. total matches found: {len(fetched_matches)}")

        # Check if there was an error in the API response
        if "error" in fetched_matches:
            print(f"Error encountered during batch {batch_num+1}. Skipping this batch.")
        else:
            batch_matches.append(fetched_matches)
            
        # Move backward in time by one day
        current_start += timedelta(days=1)
        batch_num += 1

    return batch_matches

In [6]:
# Fetch batch matches
batch_matches = bulk_fetch_matches(
    start_date=start_date,
    end_date=end_date, 
    limit=1000
)

DEBUG:__main__:
Batch 1 of 3: fetching day from 2025-08-19 to 2025-08-19
DEBUG:__main__:Fetching match data..
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.deadlock-api.com:443


Time range: 2025-08-19 to 2025-08-19


DEBUG:urllib3.connectionpool:https://api.deadlock-api.com:443 "GET /v1/matches/metadata?include_player_info=true&min_unix_timestamp=1755561600&max_unix_timestamp=1755647999&min_average_badge=100&limit=1000 HTTP/1.1" 200 None
INFO:__main__:fetch matches for day 2025-08-19. total matches found: 336
DEBUG:__main__:
Batch 2 of 3: fetching day from 2025-08-20 to 2025-08-20
DEBUG:__main__:Fetching match data..
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.deadlock-api.com:443


Time range: 2025-08-20 to 2025-08-20


DEBUG:urllib3.connectionpool:https://api.deadlock-api.com:443 "GET /v1/matches/metadata?include_player_info=true&min_unix_timestamp=1755648000&max_unix_timestamp=1755734399&min_average_badge=100&limit=1000 HTTP/1.1" 200 None
INFO:__main__:fetch matches for day 2025-08-20. total matches found: 261
DEBUG:__main__:
Batch 3 of 3: fetching day from 2025-08-21 to 2025-08-21
DEBUG:__main__:Fetching match data..
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.deadlock-api.com:443


Time range: 2025-08-21 to 2025-08-21


DEBUG:urllib3.connectionpool:https://api.deadlock-api.com:443 "GET /v1/matches/metadata?include_player_info=true&min_unix_timestamp=1755734400&max_unix_timestamp=1755820799&min_average_badge=100&limit=1000 HTTP/1.1" 200 None
INFO:__main__:fetch matches for day 2025-08-21. total matches found: 275


In [7]:
batch_df = pd.DataFrame(batch_matches).copy()
batch_df.to_csv(f"{folder_name}//batch_matches.csv")

In [None]:
# Compare Players to another data set if relevant

In [8]:
# Converts nested Dict into matches and players dataframes
from data.process_data import separate_match_players

raw_matches, raw_players = separate_match_players(batch_matches)

INFO:root:Normalizing bulk match data
INFO:root:Processing day #0 with 336 matches
INFO:root:Processing day #1 with 261 matches
INFO:root:Processing day #2 with 275 matches


In [9]:
# For each unique player within raw_players, fetch player_hero and calculate player_stats

def fetch_player_hero_stats(account_ids: List[int], fetch_till_date, fetch_from_date=None) -> dict:
    """Fetches hero stats for a specific player from the Deadlock API.
    Generally used in conjunction with run_player_batches and 
    process_player_stats_parallel

    - account_ids: list of Player's account IDs to fetch stats for (can be string or numeric)
    
    Returns:
    - Dict response containing player's hero stats or error dict
    """
    
    base = "https://api.deadlock-api.com/v1/players/hero-stats"
    path = f"{base}"
    params: dict[str, str] = {}

    params["account_ids"] = ",".join(str(i) for i in account_ids)

    fetch_from_date = unix_utc_start(fetch_from_date) if fetch_from_date else None
    fetch_till_date = unix_utc_eod(fetch_till_date) if fetch_till_date else None

    if fetch_from_date is not None:
        params["min_unix_timestamp"] = (fetch_from_date)
    if fetch_till_date is not None:
        params["max_unix_timestamp"] = (fetch_till_date)

    query = urlencode(params)
    full_url = f"{path}?{query}" if query else path

    print(f"**DEBUG** params = {params},full_url = \n\n{full_url}")

    try:
        response = requests.get(full_url)
        if response.status_code != 200:
            logging.error(f"API request failed for list of players with status code {response.status_code}")
            logging.error(f"Response: {response.text}")
            return {"error": f"API request failed with status code {response.status_code}"}
        
        
        return response.json()
    except Exception as e:
        logging.error(f"Exception fetching hero stats for players list: {e}")
        return {"error": str(e)}

# send players in batches of 1,000
def fetch_player_hero_stats_batch(batch_size, account_ids: List[int], fetch_till_date, fetch_from_date=None) -> pd.DataFrame:
    """Fetches hero stats for a batch of players from the Deadlock API.
    Generally used in conjunction with run_player_batches and
    process_player_stats_parallel

    - account_ids: list of Player's account IDs to fetch stats for (can be string or numeric)

    Returns:
    - Dict response containing player's hero stats or error dict
    """

    results = []
    for i in range(0, len(account_ids), batch_size):
        print(len(raw_players["account_id"].unique()))
        batch = account_ids[i:i + batch_size]
        response = fetch_player_hero_stats(batch, fetch_till_date=fetch_till_date, fetch_from_date=fetch_from_date)
        results.extend(format_player_hero_response(response))
    return pd.DataFrame(results)

def format_player_hero_response(players_hero_data: list[Dict]):
    """removes matches nested list, normalizes to player<>hero stat row"""
    ph_stats = []

    for entry in players_hero_data:
        entry = {k: v for k, v in entry.items() if k != "matches"}
        ph_stats.append(entry)
    return ph_stats

In [10]:
bulk_player_hero_stats= fetch_player_hero_stats_batch(
    account_ids=raw_players["account_id"].unique().tolist(),
    fetch_till_date=start_date,
    fetch_from_date=None,
    batch_size=700
)

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.deadlock-api.com:443


2641
**DEBUG** params = {'account_ids': '923602661,1594163085,1059528967,133308331,223305242,867492119,119437859,1191471475,1841913745,1875602814,195409859,1730138529,177704190,256512296,97652111,838749946,438025075,150882082,248322741,211863131,1141067246,119358588,487852817,1830659492,1711030490,104859703,380394699,288422074,106448300,1830269399,323897568,1890139555,1891938420,1195896640,419694923,911856667,44307921,185989154,363045702,395967846,49971767,327114883,1108205529,1115903684,1016689973,366861640,486959981,846383258,225105863,111750713,396957591,845610743,1557303666,205071683,298305257,318845348,331706904,298155885,1186801441,61295713,1918973451,108130192,54151884,1762695496,375288852,83608685,1743351096,284870226,244109796,1049966963,1885262860,298312890,1728767634,246214695,194833486,154191414,1676299122,87624911,25821887,385202207,1215984137,451711628,1046192556,1012333396,1710092569,69431452,277803056,1736528837,98791792,161334108,360428356,32639412,158043057,1859387891

DEBUG:urllib3.connectionpool:https://api.deadlock-api.com:443 "GET /v1/players/hero-stats?account_ids=923602661%2C1594163085%2C1059528967%2C133308331%2C223305242%2C867492119%2C119437859%2C1191471475%2C1841913745%2C1875602814%2C195409859%2C1730138529%2C177704190%2C256512296%2C97652111%2C838749946%2C438025075%2C150882082%2C248322741%2C211863131%2C1141067246%2C119358588%2C487852817%2C1830659492%2C1711030490%2C104859703%2C380394699%2C288422074%2C106448300%2C1830269399%2C323897568%2C1890139555%2C1891938420%2C1195896640%2C419694923%2C911856667%2C44307921%2C185989154%2C363045702%2C395967846%2C49971767%2C327114883%2C1108205529%2C1115903684%2C1016689973%2C366861640%2C486959981%2C846383258%2C225105863%2C111750713%2C396957591%2C845610743%2C1557303666%2C205071683%2C298305257%2C318845348%2C331706904%2C298155885%2C1186801441%2C61295713%2C1918973451%2C108130192%2C54151884%2C1762695496%2C375288852%2C83608685%2C1743351096%2C284870226%2C244109796%2C1049966963%2C1885262860%2C298312890%2C1728767634%2C2462

2641
**DEBUG** params = {'account_ids': '190169838,842969768,452340799,839468175,230517301,151913358,1049947751,123403438,325973787,1348197276,1691663360,1840902834,200874500,78513236,1289384738,1522309916,248353052,1508349438,1027512840,991044819,153784687,889457024,205286134,924889838,1125557395,1166077212,43104332,1423690634,1695112305,308610633,1120368512,1662419499,1521984193,41212128,884850823,1110347672,141789305,30694523,195563441,188030304,281695706,837436655,1729783760,130143991,1255354474,409930672,108960927,466504733,371698502,89359132,96370882,303986100,66649287,138529848,160954600,920952216,392127070,1534960931,298880426,455559339,312744516,99792010,392086095,72976859,1253533815,129979937,107632839,117651401,1833901250,112724001,105263588,360241120,141680443,354117290,122953761,159356881,1514557202,34218621,993986242,1544988058,1912069048,1008538858,1526032320,1120352291,1144773549,1865470570,211458349,1015929493,25017556,234314999,1144753662,840800894,1826179440,98564266

DEBUG:urllib3.connectionpool:https://api.deadlock-api.com:443 "GET /v1/players/hero-stats?account_ids=190169838%2C842969768%2C452340799%2C839468175%2C230517301%2C151913358%2C1049947751%2C123403438%2C325973787%2C1348197276%2C1691663360%2C1840902834%2C200874500%2C78513236%2C1289384738%2C1522309916%2C248353052%2C1508349438%2C1027512840%2C991044819%2C153784687%2C889457024%2C205286134%2C924889838%2C1125557395%2C1166077212%2C43104332%2C1423690634%2C1695112305%2C308610633%2C1120368512%2C1662419499%2C1521984193%2C41212128%2C884850823%2C1110347672%2C141789305%2C30694523%2C195563441%2C188030304%2C281695706%2C837436655%2C1729783760%2C130143991%2C1255354474%2C409930672%2C108960927%2C466504733%2C371698502%2C89359132%2C96370882%2C303986100%2C66649287%2C138529848%2C160954600%2C920952216%2C392127070%2C1534960931%2C298880426%2C455559339%2C312744516%2C99792010%2C392086095%2C72976859%2C1253533815%2C129979937%2C107632839%2C117651401%2C1833901250%2C112724001%2C105263588%2C360241120%2C141680443%2C354117290%

2641
**DEBUG** params = {'account_ids': '1728463836,105554039,337696170,904667162,866838417,422389983,279125228,104826682,198444926,113552260,374962424,281367264,215398243,39703833,50102427,395867088,1272586127,403098855,35277778,1854526514,420175220,921962455,1153005358,1229219946,343528896,1700511880,1284192363,75526556,172273948,1850022759,1718704309,152074541,1851360525,1754223221,153125220,244471627,144630819,1041717369,1755164043,1854731997,230307735,1299785813,173444968,237746071,1215094284,1300082578,91589411,169588667,1910169214,153741640,957234943,1280179474,158566162,1250845755,108631241,142830496,157142230,244576612,306685585,1260685159,125640589,1576723802,344884238,1056577991,14231343,119035214,1098404253,134768982,1880999000,1835736500,142845480,341420696,837705260,1879231723,1111221230,1153351822,44796801,1660960071,1068913860,431486501,80307157,185368295,1055301178,1894956390,44122052,487005657,1229018896,1904092000,87151422,124024173,1133672306,1014484221,924812291,24

DEBUG:urllib3.connectionpool:https://api.deadlock-api.com:443 "GET /v1/players/hero-stats?account_ids=1728463836%2C105554039%2C337696170%2C904667162%2C866838417%2C422389983%2C279125228%2C104826682%2C198444926%2C113552260%2C374962424%2C281367264%2C215398243%2C39703833%2C50102427%2C395867088%2C1272586127%2C403098855%2C35277778%2C1854526514%2C420175220%2C921962455%2C1153005358%2C1229219946%2C343528896%2C1700511880%2C1284192363%2C75526556%2C172273948%2C1850022759%2C1718704309%2C152074541%2C1851360525%2C1754223221%2C153125220%2C244471627%2C144630819%2C1041717369%2C1755164043%2C1854731997%2C230307735%2C1299785813%2C173444968%2C237746071%2C1215094284%2C1300082578%2C91589411%2C169588667%2C1910169214%2C153741640%2C957234943%2C1280179474%2C158566162%2C1250845755%2C108631241%2C142830496%2C157142230%2C244576612%2C306685585%2C1260685159%2C125640589%2C1576723802%2C344884238%2C1056577991%2C14231343%2C119035214%2C1098404253%2C134768982%2C1880999000%2C1835736500%2C142845480%2C341420696%2C837705260%2C18

2641
**DEBUG** params = {'account_ids': '361289487,137863219,48554332,1917392067,303772766,1854545787,1007596529,256188916,1270067154,163992116,415236503,1174729006,1864773511,224623791,318194745,175041379,380864492,166347564,12896754,1211340519,84291531,33499383,399962998,191262593,277867124,1875798896,1026783630,88617667,898877481,14091677,917884749,932197219,850700636,1103721040,355397886,998798724,159068073,279286466,150105679,200848943,1659551328,119334188,1097736364,106978956,1901353887,162425986,838322891,120420364,328247249,1211008531,220276145,1730618776,53819330,981283069,87286322,103017366,108496442,1122643739,85096185,1625578694,1676690647,844075563,135981230,366664693,1070982426,140515667,426611054,1880861795,1857896411,1108152657,420181684,162632247,1914364898,1901272737,144067642,183362050,141839619,24750365,1110807689,16435195,176020422,1854836768,316642546,76218202,1744140008,114080783,238401462,1741858935,1747170856,1711531859,1755204572,157932082,1754872492,132979748

DEBUG:urllib3.connectionpool:https://api.deadlock-api.com:443 "GET /v1/players/hero-stats?account_ids=361289487%2C137863219%2C48554332%2C1917392067%2C303772766%2C1854545787%2C1007596529%2C256188916%2C1270067154%2C163992116%2C415236503%2C1174729006%2C1864773511%2C224623791%2C318194745%2C175041379%2C380864492%2C166347564%2C12896754%2C1211340519%2C84291531%2C33499383%2C399962998%2C191262593%2C277867124%2C1875798896%2C1026783630%2C88617667%2C898877481%2C14091677%2C917884749%2C932197219%2C850700636%2C1103721040%2C355397886%2C998798724%2C159068073%2C279286466%2C150105679%2C200848943%2C1659551328%2C119334188%2C1097736364%2C106978956%2C1901353887%2C162425986%2C838322891%2C120420364%2C328247249%2C1211008531%2C220276145%2C1730618776%2C53819330%2C981283069%2C87286322%2C103017366%2C108496442%2C1122643739%2C85096185%2C1625578694%2C1676690647%2C844075563%2C135981230%2C366664693%2C1070982426%2C140515667%2C426611054%2C1880861795%2C1857896411%2C1108152657%2C420181684%2C162632247%2C1914364898%2C19012727

In [11]:
def process_player_stats(player_hero_stats:pd.DataFrame)->pd.DataFrame:
    """Creates aggreagate player stats from player_hero stats"""

    columns = ["matches_played", "kills", "deaths", "wins", "assists", "time_played"]
    
    for c in columns:
        if c in player_hero_stats.columns:
            player_hero_stats[c] = pd.to_numeric(player_hero_stats[c], errors='coerce')

    p_stats = player_hero_stats.groupby('account_id', as_index=False).agg(
        p_total_matches_played=('matches_played', 'sum'),
        p_total_kills=('kills', 'sum'),
        p_total_deaths=('deaths', 'sum'),
        p_total_wins=('wins', 'sum'),
        p_total_assists=('assists', 'sum'),
        p_total_time_played=('time_played', 'sum')
    )

    p_stats['p_avg_kills'] = (p_stats['p_total_kills'] / p_stats['p_total_matches_played'].replace(0, pd.NA)).fillna(0)
    p_stats['p_win_rate'] = (p_stats['p_total_wins'] / p_stats['p_total_matches_played'].replace(0, pd.NA)).fillna(0)

    return p_stats

In [12]:
p_stats = process_player_stats(bulk_player_hero_stats)

In [None]:
# p_stats = pd.read_csv(f"{folder_name}//p_stats.csv")
# bulk_player_hero_stats = pd.read_csv(f"{folder_name}//player_hero_stats.csv")
# raw_players = pd.read_csv(f"{folder_name}//raw_players.csv")
# raw_matches = pd.read_csv(f"{folder_name}//matches.csv")

In [15]:
test = raw_players.copy()
testm = raw_matches.copy()

In [None]:
# calculate win column for the account<>match
players = raw_players.merge(
        raw_matches[['match_id', 'winning_team']],
        on='match_id',
        how='left'
    )
players['win'] = players.apply(
    lambda row: 'Y' if row['team'] == row['winning_team'] else 'N',
    axis=1
)
players

Unnamed: 0,account_id,match_id,team,hero_id,kills,deaths,assists,denies,net_worth,winning_team,win
0,923602661,38940475,Team1,18,0,6,4,1,18987,Team0,N
1,1594163085,38940475,Team1,1,6,6,3,6,30463,Team0,N
2,1059528967,38940475,Team1,52,3,1,4,2,26020,Team0,N
3,133308331,38940475,Team0,2,9,2,9,6,35954,Team0,Y
4,223305242,38940475,Team0,14,7,4,10,0,28362,Team0,Y
...,...,...,...,...,...,...,...,...,...,...,...
10459,105363944,39193084,Team1,14,6,7,13,1,29112,Team0,N
10460,321601241,39193084,Team1,20,5,8,7,1,32927,Team0,N
10461,1062898121,39193084,Team1,4,4,7,14,3,27593,Team0,N
10462,31573479,39193084,Team1,63,13,9,7,1,30494,Team0,N


In [23]:
# Aggregate stats at the match->player level
player_match_stats = pd.merge(players, p_stats, on="account_id",how="left")

In [45]:
def drop_pm_columns(player_match_stats):
    """drop columns for a player<>match data. Not used in training/pred"""

    drop_columns = [
        'kills','deaths','assists','denies','net_worth','Unnamed: 0'
    ]
    for col in drop_columns:
        if col in player_match_stats.columns:
            player_match_stats.drop(columns=col, inplace=True)
    
    return player_match_stats

In [46]:
player_match_stats=(drop_pm_columns(player_match_stats))

In [48]:
player_match_stats.to_csv(f"{folder_name}/player_match_stats.csv", index=False)

In [51]:
def fetch_hero_stats(
    min_average_badge: int = 100,
    fetch_till_date: int | None = None,
    fetch_from_date: int | None = None,
    ) -> json:
    """Fetches match data from the Deadlock API.
    
    Key Parameters:
    - min_average_badge: Minimum average rank to capture hero stats
    - fetch_till_date: Newest time to filter hero stats. i.e. matches before yesterday
    - fetch_from_date: Oldest time to filter hero stats. i.e. matches 3 months ago -> max time

    Returns:
    - JSON response, list of dicts
    """

    logging.debug(f"Fetching match data..")
    base = 'https://api.deadlock-api.com/v1/analytics'

    path = f"{base}/hero-stats"
    params: dict[str, str] = {}

    
    print(f"Time range: {fetch_from_date} to {fetch_till_date}")

    fetch_from_date = unix_utc_start(fetch_from_date) if fetch_from_date else None
    fetch_till_date = unix_utc_eod(fetch_till_date) if fetch_till_date else None

    if fetch_from_date is not None:
        params["min_unix_timestamp"] = (fetch_from_date)
    if fetch_till_date is not None:
        params["max_unix_timestamp"] = (fetch_till_date)
    if min_average_badge is not None:
        params["min_average_badge"] = str(min_average_badge)

    query = urlencode(params)
    full_url = f"{path}?{query}" if query else path
    
    response = requests.get(full_url)
    if response.status_code != 200:
        print(f"Error: API request failed with status code {response.status_code}")
        print(f"URL: {full_url}")
        return {"error": f"API request failed with status code {response.status_code}"}
    return response.json()

In [103]:
hero_stats = fetch_hero_stats(fetch_from_date=start_date, fetch_till_date=end_date)

DEBUG:__main__:Fetching match data..
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.deadlock-api.com:443


Time range: 2025-08-19 to 2025-08-21


DEBUG:urllib3.connectionpool:https://api.deadlock-api.com:443 "GET /v1/analytics/hero-stats?min_unix_timestamp=1755561600&max_unix_timestamp=1755820799&min_average_badge=100 HTTP/1.1" 200 None


### Checkpoint ###
p_stats, ph_stats, matches, players, h_stats

In [None]:
p_stats.to_csv(f"{folder_name}/p_stats.csv",index=False)
bulk_player_hero_stats.to_csv(f"{folder_name}/player_hero_stats.csv", index=False)
raw_players.to_csv(f"{folder_name}/raw_players.csv", index=False)
raw_matches.to_csv(f"{folder_name}/raw_matches.csv", index=False)
h_stats.to_csv(f"{folder_name}/h_stats.csv", index=False)

In [104]:
def merge_pm_h_stats(p_m_stats, h_stats)-> pd.DataFrame:
    """merges player_match_stats with hero_stats on hero_id, adds suffix 'h_' to hero_stats columns"""
    h_stats_copy = h_stats.copy()
    
    h_stats_copy = pd.json_normalize(h_stats_copy)
    h_stats_copy = h_stats_copy.add_prefix('h_')
    h_stats_copy = h_stats_copy.rename(columns={'h_hero_id': 'hero_id'})
    
    merged = pd.merge(p_m_stats, h_stats_copy, on="hero_id", suffixes=("h_", ""))
    
    return merged

In [107]:
p_ph_h_stats = merge_pm_h_stats(player_match_stats, hero_stats)

In [108]:
p_ph_h_stats.to_csv(f"{folder_name}/player_hero_match_stats.csv", index=False)

In [None]:
def calculate_ph_stats(p_ph_h_stats: pd.DataFrame) -> pd.DataFrame:
    """
    Create player hero stats by aggregating the player_hero_stats DataFrame.
    Creates aggregate function across all hero_stats for players in df.
    Checks for potential divide by zero errors and sets result to 0 if denominator is zero.
    """

    ph_stats = p_ph_h_stats.copy()

    # Avoid divide by zero for deaths
    ph_stats['ph_total_kd'] = np.where(ph_stats['deaths'] == 0, 0, ph_stats['kills'] / ph_stats['deaths'])

    # Avoid divide by zero for ph_total_kd
    ph_stats['ph_kd_ratio'] = np.where(ph_stats['ph_total_kd'] == 0, 0, ph_stats['h_total_kd']/ ph_stats['ph_total_kd'])

    ph_stats['h_avg_total_time_played'] = (ph_stats.groupby('hero_id')['time_played'].transform("mean"))
    # Avoid divide by zero for h_avg_total_time_played
    ph_stats['ph_time_played_ratio'] = np.where(ph_stats['h_avg_total_time_played'] == 0, 0, ph_stats['time_played']/ ph_stats['h_avg_total_time_played'])

    ph_stats['h_total_damage_per_min'] = (ph_stats.groupby('hero_id')['damage_per_min'].transform("mean"))
    # Avoid divide by zero for h_total_damage_per_min
    ph_stats['ph_damage_per_min_ratio'] = np.where(ph_stats['h_total_damage_per_min'] == 0, 0, ph_stats['damage_per_min']/ ph_stats['h_total_damage_per_min'])

    ph_stats['h_total_assists'] = (ph_stats.groupby('hero_id')['assists'].transform("mean"))
    # Avoid divide by zero for h_total_assists
    ph_stats['ph_assists_ratio'] = np.where(ph_stats['h_total_assists'] == 0, 0, ph_stats['assists']/ ph_stats['h_total_assists'])

    # Avoid divide by zero for matches_played
    ph_stats['ph_win_rate'] = np.where(ph_stats['matches_played'] == 0, 0, ph_stats['wins'] / ph_stats['matches_played'])
    ph_stats['h_total_win_rate'] = (ph_stats.groupby('hero_id')['ph_win_rate'].transform("mean"))
    # Avoid divide by zero for h_total_win_rate
    ph_stats['ph_win_rate_ratio'] = np.where(ph_stats['h_total_win_rate'] == 0, 0, ph_stats['ph_win_rate'] / ph_stats['h_total_win_rate'])

    ph_stats.rename(columns={
        "wins": "ph_wins",
        "kills": "ph_kills",
        "deaths": "ph_deaths",
        "assists": "ph_assists",
        "damage_per_min": "ph_damage_per_min",
        'time_played': 'ph_time_played'
    }, inplace=True)

    return ph_stats

def create_hero_stats(player_hero_stats: pd.DataFrame) -> pd.DataFrame:
    """
    Create hero stats by aggregating the player_hero_stats DataFrame.
    """
    hero_stats = player_hero_stats.copy()

    hero_stats['h_total_kd'] = (hero_stats.groupby('hero_id')['ph_total_kd'].transform("mean"))


    return hero_stats

def create_player_stats(player_match_stats: pd.DataFrame) -> pd.DataFrame:
    """
    Create player stats by aggregating the player_match_stats DataFrame.
    """
    player_stats = player_match_stats.copy()



    return player_stats