In [28]:
import requests
import json
import os
import sys
import pandas as pd
import logging
from urllib.parse import urlencode
import time
from datetime import timedelta, datetime, timezone
from data.fetch_data import bulk_fetch_matches
from typing import Optional, Dict, Any, Iterable, List

logging.basicConfig(level=logging.DEBUG)
logging = logging.getLogger(__name__)



In [29]:
def unix_utc_start(date_str: str) -> int:
    # YYYY-MM-DD at 00:00:00 UTC
    dt = datetime.strptime(date_str, "%Y-%m-%d").replace(tzinfo=timezone.utc)
    dt = dt.replace(hour=0, minute=0, second=0)
    return int(dt.timestamp())

def unix_utc_eod(date_str: str) -> int:
    # YYYY-MM-DD at 23:59:59 UTC (inclusive)
    dt = datetime.strptime(date_str, "%Y-%m-%d").replace(tzinfo=timezone.utc)
    dt = dt.replace(hour=23, minute=59, second=59)
    return int(dt.timestamp())

In [30]:
start_date = "2025-08-19"
end_date = "2025-08-21"
folder_name = f"//v2_data//pred_data//test_pred_v2_{start_date}_{end_date}"

In [31]:
def fetch_match_data(
    min_average_badge: int = 100,
    fetch_till_date: int | None = None,
    fetch_from_date: int | None = None,
    m_id: str | None = None,
    include_player_info: bool = True,
    limit: int = 1000
    ) -> json:
    """Fetches match data from the Deadlock API.
    
    Key Parameters:
    - min_average_badge: Minimum average rank to return matches.
    - fetch_till_date: Newest time to filter matches. i.e. matches before yesterday
    - fetch_from_date: Oldest time to filter matches. i.e. matches 3 months ago -> max time
    - m_id: Specific match ID to fetch metadata for.
    - include_player_info: Whether to include player information in the response. this is required for match_player data
    - limit: Maximum number of matches to return.
    Returns:
    - JSON response containing match metadata with 12 players per match.
    """

    logging.debug(f"Fetching match data..")
    base = "https://api.deadlock-api.com/v1/matches"

    # if a specific match ID is given, check player_data and hit that endpoint
    if m_id:
        path = f"{base}/{m_id}/metadata"
        params = {}
        if include_player_info:
            params["include_player_info"] = "true"

        query = urlencode(params)
        full_url = f"{path}?{query}" if query else path
        response = requests.get(full_url)
        if response.status_code != 200:
            print(f"Error: API request failed with status code {response.status_code}")
            print(f"URL: {full_url}")
            return {"error": f"API request failed with status code {response.status_code}"}
        return response.json()

    # Bulk-metadata endpoint
    path = f"{base}/metadata"
    params: dict[str, str] = {}

    if include_player_info:
        params["include_player_info"] = "true"
    
    print(f"Time range: {fetch_from_date} days ago to {fetch_till_date} days ago")

    fetch_from_date = unix_utc_start(fetch_from_date) if fetch_from_date else None
    fetch_till_date = unix_utc_eod(fetch_till_date) if fetch_till_date else None

    print(f"Unix timestamps: {fetch_till_date} to {fetch_from_date}. types:")
    print(fetch_from_date, type(fetch_from_date))
    print(fetch_till_date, type(fetch_till_date))

    if fetch_from_date is not None:
        params["min_unix_timestamp"] = str(fetch_from_date)
    if fetch_till_date is not None:
        params["max_unix_timestamp"] = str(fetch_till_date)
    if min_average_badge is not None:
        params["min_average_badge"] = str(min_average_badge)
    if limit is not None:
        params["limit"] = str(limit)

    query = urlencode(params)
    full_url = f"{path}?{query}" if query else path
    
    print(f"Making request to: {full_url}")
    response = requests.get(full_url)
    if response.status_code != 200:
        print(f"Error: API request failed with status code {response.status_code}")
        print(f"URL: {full_url}")
        return {"error": f"API request failed with status code {response.status_code}"}
    return response.json()

def bulk_fetch_matches(start_date, end_date, limit=1000)->list:
    """fetches a batch of matches, 1 day per pull, list of jsons, 1 element per batch.

    batch return is unnormalized, 'players' contains a df of each matches 'players'
    
    min_days = Oldest time barrier (more days ago)
    max_days = Newest time barrier (fewer days ago)
    
    """

    batch_matches = []
    
    # Calculate the starting day (defaults to today)
    current_start = datetime.strptime(start_date, "%Y-%m-%d")
    current_end = datetime.strptime(end_date, "%Y-%m-%d")
    
    total_batches = (current_end - current_start).days + 1
    batch_num = 1

    while current_start <= current_end:
        fetch_from = current_start.strftime("%Y-%m-%d")
        fetch_till = (current_start + timedelta(days=1)).strftime("%Y-%m-%d")

        logging.debug(f"\nBatch {batch_num} of {total_batches}: fetching day from {fetch_from} to {fetch_till}")

        # Note: API expects min_unix_timestamp to be OLDER than max_unix_timestamp
        fetched_matches = fetch_match_data(
            fetch_till_date=fetch_from,  # Older timestamp (more days ago)
            fetch_from_date=fetch_till,  # Newer timestamp (fewer days ago)
            limit=limit
        )
        
        # Check if there was an error in the API response
        if "error" in fetched_matches:
            print(f"Error encountered during batch {batch_num+1}. Skipping this batch.")
        else:
            batch_matches.append(fetched_matches)
            
        # Move backward in time by one day
        current_start += timedelta(days=1)
        batch_num += 1

    return batch_matches

In [32]:
# Fetch batch matches
batch_matches = bulk_fetch_matches(
    start_date=start_date,
    end_date=end_date, 
    limit=2000
)

DEBUG:__main__:
Batch 1 of 3: fetching day from 2025-08-19 to 2025-08-20
DEBUG:__main__:Fetching match data..
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.deadlock-api.com:443


Time range: 2025-08-20 days ago to 2025-08-19 days ago
Unix timestamps: 1755647999 to 1755648000. types:
1755648000 <class 'int'>
1755647999 <class 'int'>
Making request to: https://api.deadlock-api.com/v1/matches/metadata?include_player_info=true&min_unix_timestamp=1755648000&max_unix_timestamp=1755647999&min_average_badge=100&limit=2000


DEBUG:urllib3.connectionpool:https://api.deadlock-api.com:443 "GET /v1/matches/metadata?include_player_info=true&min_unix_timestamp=1755648000&max_unix_timestamp=1755647999&min_average_badge=100&limit=2000 HTTP/1.1" 404 None
DEBUG:__main__:
Batch 2 of 3: fetching day from 2025-08-20 to 2025-08-21
DEBUG:__main__:Fetching match data..
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.deadlock-api.com:443


Error: API request failed with status code 404
URL: https://api.deadlock-api.com/v1/matches/metadata?include_player_info=true&min_unix_timestamp=1755648000&max_unix_timestamp=1755647999&min_average_badge=100&limit=2000
Error encountered during batch 2. Skipping this batch.
Time range: 2025-08-21 days ago to 2025-08-20 days ago
Unix timestamps: 1755734399 to 1755734400. types:
1755734400 <class 'int'>
1755734399 <class 'int'>
Making request to: https://api.deadlock-api.com/v1/matches/metadata?include_player_info=true&min_unix_timestamp=1755734400&max_unix_timestamp=1755734399&min_average_badge=100&limit=2000


DEBUG:urllib3.connectionpool:https://api.deadlock-api.com:443 "GET /v1/matches/metadata?include_player_info=true&min_unix_timestamp=1755734400&max_unix_timestamp=1755734399&min_average_badge=100&limit=2000 HTTP/1.1" 404 None
DEBUG:__main__:
Batch 3 of 3: fetching day from 2025-08-21 to 2025-08-22
DEBUG:__main__:Fetching match data..
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.deadlock-api.com:443


Error: API request failed with status code 404
URL: https://api.deadlock-api.com/v1/matches/metadata?include_player_info=true&min_unix_timestamp=1755734400&max_unix_timestamp=1755734399&min_average_badge=100&limit=2000
Error encountered during batch 3. Skipping this batch.
Time range: 2025-08-22 days ago to 2025-08-21 days ago
Unix timestamps: 1755820799 to 1755820800. types:
1755820800 <class 'int'>
1755820799 <class 'int'>
Making request to: https://api.deadlock-api.com/v1/matches/metadata?include_player_info=true&min_unix_timestamp=1755820800&max_unix_timestamp=1755820799&min_average_badge=100&limit=2000


DEBUG:urllib3.connectionpool:https://api.deadlock-api.com:443 "GET /v1/matches/metadata?include_player_info=true&min_unix_timestamp=1755820800&max_unix_timestamp=1755820799&min_average_badge=100&limit=2000 HTTP/1.1" 404 None


Error: API request failed with status code 404
URL: https://api.deadlock-api.com/v1/matches/metadata?include_player_info=true&min_unix_timestamp=1755820800&max_unix_timestamp=1755820799&min_average_badge=100&limit=2000
Error encountered during batch 4. Skipping this batch.
