# XC Result Collector

This script collects events and event results from https://www.athletic.net/

It is focused om Massachusetts High School XC

# Config

In [12]:
import os
import requests
import datetime

import pandas as pd
import numpy as np


In [13]:
path_data = os.path.join('..', 'data')
fn_events = 'db_events.csv'
fn_athletes = 'db_athletes.csv'
fn_results = 'db_results.csv'

In [14]:
FLAG_SAVE = True

# Load prexisting data

## Define schemas

In [15]:
# Schema

dict_schema_events = {
    'IDMeet': int,
    'Sport': str,
    'MeetName': str,
    'SeasonId': int,
    'StartDate': 'datetime64[ns]',
    'EndDate': 'datetime64[ns]',
    'Virtual': bool,
    'LocationName': str,
    'StreetAddress': str,  # nullable
    'City': str,
    'PostalCode': str,
    'State': str,
    'OwnerID': int,
    'CalendarLock': int,
    'UCalendarLock': int,
    'RegEnd': 'datetime64[ns]',  # nullable
    'HasResults': int,
    'LevelMask': int,
    'HostName': str,  # nullable
    'MascotUrl': str,  # nullable
    'Lat': float,
    'Long': float,
    'score': float,
    'CalCount': int,
    'OffDays': str,  # nullable, appears to be JSON string
    'Data': str,  # nullable
    'rsUrl': str,  # nullable
    'LiveID': int,  # nullable
    'LivePublished': bool,
    'VideoURL': str,  # nullable
    'Website': str,
    'Country': str,
    
    # Additional columns:
    
    'dt_retrieved': 'datetime64[ns]'
}

# Results

dict_schema_results ={
    'Date': 'datetime64[ns]',
    'EventID': np.int32, 
    'Gender': str, 
    'RaceID': np.int32, 
    'Location': np.int32,
    'Grade': np.int32,
    'AthleteID': np.int32,
    'First Name': str,
    'Last Name': str,
    'Name': str,
    'Time': 'timedelta64[ns]',
    'School': str,
    'Seconds': np.float64, 
    'Minutes': np.float64,
    'Team Count': np.int32, 
    'Team Position': np.int32, 
    'Place': np.int32, 
    'Points': np.int32
}

## Load data

In [16]:
# Events

pe = os.path.join(path_data, fn_events)

if os.path.isfile(pe):
    df_events = pd.read_csv(pe)
    df_events = df_fix_types(df_events, dict_schema_events)
else:
    df_events = pd.DataFrame({col: pd.Series(dtype=dtype) for col, dtype in dict_schema_events.items()})
    
# Results

pr = os.path.join(path_data, fn_results)

if os.path.isfile(pr):
    df_events = pd.read_csv(pr)
else:
    df_results = pd.DataFrame({col: pd.Series(dtype=dtype) for col, dtype in dict_schema_results.items()})
    


In [17]:
df_events

Unnamed: 0,IDMeet,Sport,MeetName,SeasonId,StartDate,EndDate,Virtual,LocationName,StreetAddress,City,...,CalCount,OffDays,Data,rsUrl,LiveID,LivePublished,VideoURL,Website,Country,dt_retrieved


In [18]:
def df_fix_types(df, dict_schema):
    
    # Convert each column according to its type
    for col, dtype in dict_schema.items():
        if col in df.columns:
            try:
                if dtype == str:
                    df[col] = df[col].astype('string')
                elif dtype == int:
                    #print(f"Column type before conversion: {type(df[col])}")
                    df[col] = pd.to_numeric(df[col], errors='coerce').astype('Int64')
                elif dtype == float:
                    df[col] = pd.to_numeric(df[col], errors='coerce')
                elif dtype == bool:
                    df[col] = df[col].astype(bool)
                elif dtype == 'datetime64[ns]':
                    #df[col] = pd.to_datetime(df[col])
                    df[col] = pd.to_datetime(df[col], format='ISO8601')
            except Exception as e:
                print(f"Error converting column {col} to {dtype}: {str(e)}")
                raise
    
    return df

In [19]:
def create_events_df(response, schema):
    """
    Create a DataFrame from the API response using a provided schema.
    
    Args:
        response: requests.Response object from the API call
        schema: dict mapping column names to their intended data types
        
    Returns:
        pandas.DataFrame: Properly typed DataFrame containing the events data
    """
    
    # Parse JSON response
    data = response.json()
    
    # Convert events list to DataFrame
    df = pd.DataFrame(data['events'])
    
    # Add retrieval timestamp for any datetime columns not in the response
    for col, dtype in schema.items():
        if dtype == 'datetime64[ns]' and col not in df.columns:
            df[col] = pd.Timestamp.now()
    
    df = df_fix_types(df, schema)
    
    return df


In [20]:
#def add_events(df_events, state, dt_start, dt_end, country_2='US', level=4)

country_2 = 'US'
state_2 = 'MA'
state_name = 'Massachusetts'

dt_start = '2024-11-01'
dt_end = '2024-11-30'

url = 'https://www.athletic.net/api/v1/Event/Events'

params = {
    "start":dt_start,
    "end":dt_end,
    "levelMask":0,
    "sportMask":0,
    "country":country_2,
    "state":state_2,
    "location":"",
    "distanceKM":0,
    "filterTerm":""}

# Make the POST request
headers = {
    'content-type': 'application/json',
    'authority': 'www.athletic.net',
    'accept': 'application/json, text/plain, */*',
    'accept-encoding': 'gzip, deflate, br, zstd',
    'accept-language': 'en-US,en;q=0.9',
    
    # 'anet-appinfo': 'web:web:0:300',
    # 'dnt': '1',
    # 'origin': 'https://www.athletic.net',
    # 'pageguid': 'c73bf291-0b5a-4062-a158-321e0c72c0f4',
    # 'priority': 'u=1, i',
    # 'referer': 'https://www.athletic.net/events/usa/massachusetts/2024-11-16;level=4',
    # 'sec-ch-ua': '"Chromium";v="130", "Google Chrome";v="130", "Not?A_Brand";v="99"',
    # 'sec-ch-ua-mobile': '?0',
    # 'sec-ch-ua-platform': '"macOS"',
    # 'sec-fetch-dest': 'empty',
    # 'sec-fetch-mode': 'cors',
    # 'sec-fetch-site': 'same-origin',
    # 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36'
}

response = requests.post(url, json=params, headers=headers)

# Check the response
if response.status_code == 200:
    print("Request successful!")
    
    df_events_new = create_events_df(response, dict_schema_events)
    
else:
    print(f"Request failed with status code: {response.status_code}")
    print(f"Response: {response.text}")


Request successful!


In [21]:
df_events


Unnamed: 0,IDMeet,Sport,MeetName,SeasonId,StartDate,EndDate,Virtual,LocationName,StreetAddress,City,...,CalCount,OffDays,Data,rsUrl,LiveID,LivePublished,VideoURL,Website,Country,dt_retrieved


In [22]:
# Add to df_events

df_events = pd.concat([df_events, df_events_new], axis=0)

df_events = df_events.dropna(how='all')

df_events = df_events.sort_values('dt_retrieved', ascending=True).drop_duplicates(subset=['IDMeet'], keep='last')

df_events = df_fix_types(df_events, dict_schema_events)

  df_events = pd.concat([df_events, df_events_new], axis=0)


In [23]:
if FLAG_SAVE:
    df_events.to_csv(pe, index=False)

In [24]:
# https://www.athletic.net/api/v1/public/GetStatesCountries2
