In [2]:
import requests
import json
import os
import sys
import pandas as pd
import logging
from urllib.parse import urlencode
import time
from datetime import timedelta, datetime

In [3]:
def get_unix_time(days_ago=0):   
    """
    Convert days_ago to a unix timestamp
    
    Parameters:
    days_ago (int): Number of days in the past
    
    Returns:
    int: Unix timestamp for the date that was 'days_ago' days ago
    """
    current_time = int(time.time())
    seconds_ago = int(timedelta(days=days_ago).total_seconds())
    return current_time - seconds_ago

def get_time_delta(min_unix_time,max_time):
    """Returns string for url if short=False, else just the int"""
    min_unix_time = int(time.time()) #current time
    return (int(min_unix_time - timedelta(days=max_time).total_seconds()))

In [4]:
def fetch_match_data(
    min_average_badge: int = 100,
    max_unix_timestamp: int | None = None,
    min_unix_timestamp: int | None = None,
    m_id: str | None = None,
    include_player_info: bool = True,
    limit: int = 1000
    ) -> json:
    """Fetches match data from the Deadlock API.
    
    Key Parameters:
    - min_average_badge: Minimum average rank to return matches.
    - max_unix_timestamp: Newest time to filter matches. i.e. matches before yesterday
    - min_unix_timestamp: Oldest time to filter matches. i.e. matches 3 months ago -> max time
    - m_id: Specific match ID to fetch metadata for.
    - include_player_info: Whether to include player information in the response. this is required for match_player data
    - limit: Maximum number of matches to return.
    Returns:
    - JSON response containing match metadata with 12 players per match.
    """

    logging.debug(f"Fetching match data..")
    base = "https://api.deadlock-api.com/v1/matches"

    # if a specific match ID is given, check player_data and hit that endpoint
    if m_id:
        path = f"{base}/{m_id}/metadata"
        params = {}
        if include_player_info:
            params["include_player_info"] = "true"

        query = urlencode(params)
        full_url = f"{path}?{query}" if query else path
        response = requests.get(full_url)
        if response.status_code != 200:
            print(f"Error: API request failed with status code {response.status_code}")
            print(f"URL: {full_url}")
            return {"error": f"API request failed with status code {response.status_code}"}
        return response.json()

    # Bulk-metadata endpoint
    path = f"{base}/metadata"
    params: dict[str, str] = {}

    if include_player_info:
        params["include_player_info"] = "true"
    
    # Convert days to unix timestamps - ensure max is newer (smaller days_ago) than min
    if min_unix_timestamp is not None:
        older_time = get_unix_time(min_unix_timestamp)
        params["min_unix_timestamp"] = str(older_time)
    if max_unix_timestamp is not None:
        newer_time = get_unix_time(max_unix_timestamp)
        params["max_unix_timestamp"] = str(newer_time)
        
        # Debug info for timestamps
        print(f"Time range: {max_unix_timestamp} days ago to {min_unix_timestamp} days ago")
        print(f"Unix timestamps: {newer_time} to {older_time}")
        
    if min_average_badge is not None:
        params["min_average_badge"] = str(min_average_badge)
    if limit is not None:
        params["limit"] = str(limit)

    query = urlencode(params)
    full_url = f"{path}?{query}" if query else path
    
    print(f"Making request to: {full_url}")
    response = requests.get(full_url)
    if response.status_code != 200:
        print(f"Error: API request failed with status code {response.status_code}")
        print(f"URL: {full_url}")
        return {"error": f"API request failed with status code {response.status_code}"}
    return response.json()

In [5]:
# Test fetching a few matches
data = fetch_match_data(limit=10)
data

Making request to: https://api.deadlock-api.com/v1/matches/metadata?include_player_info=true&min_average_badge=100&limit=10


[{'average_badge_team0': 102,
  'average_badge_team1': 102,
  'duration_s': 1267,
  'game_mode': 'Normal',
  'game_mode_version': None,
  'is_high_skill_range_parties': False,
  'low_pri_pool': False,
  'match_id': 28627568,
  'match_mode': 'Unranked',
  'match_outcome': 'TeamWin',
  'new_player_pool': False,
  'players': [{'abandon_match_time_s': 0,
    'ability_points': 20,
    'account_id': 100051626,
    'assigned_lane': 3,
    'assists': 9,
    'deaths': 3,
    'denies': 10,
    'hero_id': 19,
    'kills': 10,
    'last_hits': 94,
    'net_worth': 21578,
    'party': 1,
    'player_level': 25,
    'player_slot': 9,
    'team': 'Team1'},
   {'abandon_match_time_s': 0,
    'ability_points': 22,
    'account_id': 91308342,
    'assigned_lane': 4,
    'assists': 10,
    'deaths': 5,
    'denies': 13,
    'hero_id': 12,
    'kills': 8,
    'last_hits': 150,
    'net_worth': 27204,
    'party': 0,
    'player_level': 27,
    'player_slot': 11,
    'team': 'Team1'},
   {'abandon_match_ti

In [6]:
def bulk_fetch_matches(max_days_fetch=90, min_days=3, max_days=0)->list:
    """fetches a batch of matches, 1 day per pull, list of jsons, 1 element per batch.

    batch is unnormalized, 'players' contains a df of each matches 'players'
    
    limit = max matches within a day to pull
    min_days = Oldest time barrier (more days ago)
    max_days = Newest time barrier (fewer days ago)
    max_days_fetch = max days to fetch, starting from max_days
    
    example:
    bulk_fetch_matches(max_days_fetch=30, min_days=7, max_days=0)
    will fetch data in one-day increments, from today back to 7 days ago,
    or until 30 days of data have been fetched.
    """

    limit = 500
    batch_matches = []
    
    # Calculate the starting day (defaults to today)
    current_max = max_days      # Newer boundary (fewer days ago)
    current_min = current_max + 1  # Older boundary (more days ago)
    
    for batch in range(max_days_fetch):
        logging.debug(f"\nBatch {batch}: fetching day from {current_max} to {current_min} days ago")
        print(f"DEBUG: Fetching matches for day {batch + 1} from {current_max} to {current_min} days ago")
        
        # Note: API expects min_unix_timestamp to be OLDER than max_unix_timestamp
        fetched_matches = fetch_match_data(
            min_unix_timestamp=current_min,  # Older timestamp (more days ago)
            max_unix_timestamp=current_max,  # Newer timestamp (fewer days ago)
            limit=limit
        )
        
        # Check if there was an error in the API response
        if "error" in fetched_matches:
            print(f"Error encountered during batch {batch+1}. Skipping this batch.")
        else:
            batch_matches.append(fetched_matches)
            
        # Move backward in time by one day
        current_max += 1  # Increase days ago for newer boundary
        current_min += 1  # Increase days ago for older boundary
        
        # Stop if we've reached the minimum days boundary
        if current_max >= min_days:
            print(f"Reached configured minimum day boundary ({min_days} days ago)")
            break

    return batch_matches

In [7]:
# Test fetching a bulk set of matches. badge > 100, yesterday -> 3 days ago.
# Data is a list of matches

print("Testing fixed bulk fetch - getting data from today to 3 days ago")
data = bulk_fetch_matches(max_days_fetch=4, min_days=3, max_days=0)
# Show how many matches in each day
for i, day_data in enumerate(data):
    if "error" in day_data:
        print(f"Day {i}: Error")
    else:
        print(f"Day {i}: {len(day_data)} matches")
print(f"data = {data}")

Testing fixed bulk fetch - getting data from today to 3 days ago
DEBUG: Fetching matches for day 1 from 0 to 1 days ago
Time range: 0 days ago to 1 days ago
Unix timestamps: 1754666835 to 1754580435
Making request to: https://api.deadlock-api.com/v1/matches/metadata?include_player_info=true&min_unix_timestamp=1754580435&max_unix_timestamp=1754666835&min_average_badge=100&limit=500
DEBUG: Fetching matches for day 2 from 1 to 2 days ago
Time range: 1 days ago to 2 days ago
Unix timestamps: 1754580435 to 1754494035
Making request to: https://api.deadlock-api.com/v1/matches/metadata?include_player_info=true&min_unix_timestamp=1754494035&max_unix_timestamp=1754580435&min_average_badge=100&limit=500
DEBUG: Fetching matches for day 2 from 1 to 2 days ago
Time range: 1 days ago to 2 days ago
Unix timestamps: 1754580435 to 1754494035
Making request to: https://api.deadlock-api.com/v1/matches/metadata?include_player_info=true&min_unix_timestamp=1754494035&max_unix_timestamp=1754580435&min_averag

In [8]:
for idx, d in enumerate(data):
    print(f"Day {idx}:")
    for match in d:
        print(f"  Match ID: {match['match_id']}, Players: {[player['account_id'] for player in match['players']]}")


Day 0:
  Match ID: 38602764, Players: [217021090, 113552260, 845610743, 1902831327, 1100407667, 300886209, 192013690, 80977080, 100906242, 1026895916, 95947783, 392972181]
  Match ID: 38602989, Players: [487005657, 298155885, 106448300, 1110765241, 1710092569, 1278325772, 876346925, 99576706, 36607982, 1012333396, 1423197814, 404144306]
  Match ID: 38603210, Players: [277803056, 180076228, 917191256, 1675013638, 1243852454, 1047948227, 958156650, 344732515, 1006394213, 269017416, 296583733, 1899590665]
  Match ID: 38603331, Players: [236599146, 69321575, 224623791, 54456193, 1611087535, 355397886, 1896932924, 85336255, 215382319, 193258494, 414858274, 65704200]
  Match ID: 38603366, Players: [1238293735, 848524552, 1192612644, 1257294113, 1425284998, 215846827, 1886876070, 120596979, 1144753662, 890100186, 864364265, 24195490]
  Match ID: 38603640, Players: [1803767927, 206238595, 1255354474, 1560494557, 1837860425, 130174860, 1848503388, 396957591, 281768392, 1902831327, 1045169707, 1

In [9]:
# identify unique players in matches
unique_players = set()
for d in data:
    for match in d:
        unique_players.update(player['account_id'] for player in match['players'])
print(f"Unique players identified: {unique_players}")

Unique players identified: {1253367808, 366829568, 199843842, 65552385, 1055031308, 38019084, 165560339, 150454291, 173907991, 1390706713, 188268570, 1697693729, 122953761, 1837383713, 1047035947, 1132011566, 388251702, 121610296, 339976259, 1776173125, 327532620, 145596499, 1270505558, 91373654, 135856218, 846913632, 919887968, 1164034146, 165945463, 111820919, 874692732, 196051074, 327114883, 248881284, 884850823, 1700511880, 1834778761, 900440203, 873169036, 1012457622, 123031, 414269598, 153706657, 1210056867, 1599733928, 404144306, 141222074, 1883365566, 85336255, 49045696, 1676140735, 1027465407, 1062215875, 260112577, 158638283, 200057040, 385482964, 1127203029, 169631959, 71336153, 7831771, 1196875996, 392102114, 207511780, 981508327, 445120743, 1694531820, 878674161, 1809776883, 85328115, 68673784, 117326075, 144449790, 21217538, 1709752581, 1393615111, 403751180, 869892364, 867492119, 1192788248, 1193312537, 1087136027, 185573660, 147743012, 140919080, 1860264234, 196444468, 

In [None]:
def seperate_match_players(
        matches_grouped_by_day: dict) -> tuple[pd.DataFrame, set]:
    """Normalizes bulk match data into two dataframes: matches and players."""

    logging.info("Normalizing bulk match data")
    matches = []
    players = []
    if not matches_grouped_by_day:
        logging.warning("No match data found — matches_grouped_by_day is empty.")
        return pd.DataFrame(), pd.DataFrame()
    for day_idx, day_matches in enumerate(matches_grouped_by_day): #day = key, match = value
        logging.info(f"Processing day #{day_idx} with {len(day_matches)} matches")
        
        for match in day_matches: # match: day = key: value | match_id: 7432551
            try:
                match_id = match["match_id"]
                start_time = match["start_time"]
                game_mode = match["game_mode"]
                match_mode = match["match_mode"]
                duration_s = match["duration_s"]
                winning_team = match["winning_team"]
            except KeyError as e:
                logging.error(f"Match missing key {e}: {match.get('match_id', 'unknown')}", exc_info=True)
                continue

            # Append to matches list
            matches.append({
                "match_id": match_id, # PK
                "start_time": start_time,
                "game_mode": game_mode,
                "match_mode": match_mode,
                "duration_s": duration_s,
                "winning_team": winning_team
            })
            
            # Append each player to players list
            if "players" not in match or len(match["players"]) != 12:
                logging.error(f"Match {match.get('match_id', 'unknown')} has invalid player count: {len(match.get('players', []))}")
                continue
            for player in match["players"]: # player: match["players"] = key: value | player_id: 1234567
                try:
                    players.append({
                        "account_id": player["account_id"],
                        "match_id": match_id,
                        "team": player["team"],
                        "hero_id": player["hero_id"],
                        "kills": player["kills"],
                        "deaths": player["deaths"],
                        "assists": player["assists"],
                        "denies": player["denies"],
                        "net_worth": player["net_worth"],
                    })
                except KeyError as e:
                    logging.error(f"Player missing key {e}: {player.get('account_id', 'unknown')}", exc_info=True)
                    continue

    # Convert lists to DataFrames
    df_matches = pd.DataFrame(matches)
    set_players = set(players)
    if not matches:
        logging.warning("No matches appended — matches list is empty.")
    if not players:
        logging.warning("No players appended — players list is empty.")

    return df_matches, set_players

In [17]:
matches, players = seperate_match_players(data)
print(f"matches: {matches}")
print(f"players: {players}")

matches:      match_id           start_time game_mode match_mode  duration_s  \
0    38602764  2025-08-07 15:29:32    Normal   Unranked        1784   
1    38602989  2025-08-07 15:40:06    Normal   Unranked        1824   
2    38603210  2025-08-07 15:50:19    Normal   Unranked        2150   
3    38603331  2025-08-07 15:55:51    Normal   Unranked        1956   
4    38603366  2025-08-07 15:57:37    Normal   Unranked        2091   
..        ...                  ...       ...        ...         ...   
639  38573483  2025-08-06 15:04:54    Normal   Unranked        1369   
640  38573542  2025-08-06 15:08:02    Normal   Unranked        1473   
641  38573590  2025-08-06 15:10:08    Normal   Unranked        1352   
642  38573635  2025-08-06 15:12:33    Normal   Unranked        1634   
643  38573814  2025-08-06 15:20:43    Normal   Unranked        2357   

    winning_team  
0          Team1  
1          Team0  
2          Team1  
3          Team0  
4          Team1  
..           ...  
639  