In [1]:
from racecards import fetch_from_api
from dotenv import load_dotenv
import os
import json
import requests



In [2]:
load_dotenv()

result = fetch_from_api(
        url=os.getenv("API_URL"),
        username=os.getenv("USERNAME"),
        password=os.getenv("PASSWORD"),
        params={"region_codes": "gb"})

In [22]:
# Set up the authentication
auth = (os.getenv("USERNAME"), os.getenv("PASSWORD"))

# params={"region_codes": "gb",
#         "day":"today"}

params={"day":"today"}

# Make the request
response = requests.get(os.getenv("API_URL"), auth=auth, params=params)

# Raise an exception if the request was unsuccessful
response.raise_for_status()

response

<Response [200]>

In [None]:
response.json()

In [23]:
import pandas as pd

total_df = pd.DataFrame()

for racecard in response.json()["racecards"]:
    racecard_df = pd.DataFrame.from_dict(racecard)

    runners_flat = pd.json_normalize(racecard_df['runners'])
    runners_flat.rename(columns={"region":"horse_region"}, inplace=True)

    df_final = pd.concat([racecard_df.copy().drop(columns=['runners']), runners_flat], axis=1)

    df_final = df_final[[
        'race_id', 'course_id', 'off_dt', 'distance_f',
        'region', 'type', 'age_band', 'field_size', 'weather', 'going',
        'surface', 'big_race', 'is_abandoned', 
        # Plus runner details if merged
        'horse_id', 'horse', 'age', 'sex_code', 'horse_region',
        'dam_id', 'sire_id', 'damsire_id', 'trainer_id', 'owner_id',
        'lbs', 'form', 'jockey_id'
    ]]

    df_final["is_abandoned"] = df_final["is_abandoned"].astype(int)
    df_final["going_soft"] = (df_final["going"].str.lower() == "soft").astype(int)
    df_final["going_good"] = (df_final["going"].str.lower() == "good").astype(int)
    df_final.drop(columns=['going'], inplace=True)

    # Append to the total DataFrame
    total_df = pd.concat([total_df, df_final], ignore_index=True)

In [19]:
total_df.to_csv("total_df.csv", index=False)

# Fetch Results

In [35]:
# Make the request
results_response = requests.get(os.getenv("API_RES_URL"), auth=auth)

# Raise an exception if the request was unsuccessful
results_response.raise_for_status()

results_response

<Response [200]>

In [36]:
results_response.json()

{'results': [{'race_id': 'rac_11685778',
   'date': '2025-07-28',
   'region': 'GB',
   'course': 'Yarmouth',
   'course_id': 'crs_2704',
   'off': '3:15',
   'off_dt': '2025-07-28T15:15:00+01:00',
   'race_name': 'Free Digital Racecard At raceday-ready.com Handicap',
   'type': 'Flat',
   'class': 'Class 5',
   'pattern': '',
   'rating_band': '0-75',
   'age_band': '3yo+',
   'sex_rest': '',
   'dist': '1m6f',
   'dist_y': '3097',
   'dist_m': '2832',
   'dist_f': '14f',
   'going': 'Good To Soft',
   'surface': 'Turf',
   'jumps': '',
   'runners': [{'horse_id': 'hrs_46233838',
     'horse': 'String Of Pearls (GB)',
     'sp': '4/6F',
     'sp_dec': '1.67',
     'number': '4',
     'position': '1',
     'draw': '1',
     'btn': '0',
     'ovr_btn': '0',
     'age': '3',
     'sex': 'F',
     'weight': '9-5',
     'weight_lbs': '131',
     'headgear': '',
     'time': '3:3.83',
     'or': '73',
     'rpr': '–',
     'tsr': '–',
     'prize': '4396.56',
     'jockey': 'Luke Morris',
 

In [None]:
final_results = pd.DataFrame()

for result in results_response.json()['results']:
    results_df = pd.DataFrame([
        {
            'race_id': result['race_id'],
            'horse_id': runner['horse_id'],
            'position': runner['position']
        }
        for runner in result['runners']
    ])

    results_df['position'] = pd.to_numeric(results_df['position'], errors='coerce')

    final_results = pd.concat([final_results, results_df], ignore_index=True)

# Model Training Preparation

In [25]:
# Drop ID columns for training data
training_df = total_df.drop(columns=['race_id', 'course_id', 'horse_id', 'horse', 'dam_id', 'sire_id', 'damsire_id', 'trainer_id', 'owner_id', 'jockey_id'])
training_df = training_df[training_df['is_abandoned'] == 0]
training_df = training_df.drop(columns=['off_dt','big_race', 'is_abandoned'])

# Convert age_band to minimum age
training_df['age_band'] = training_df['age_band'].str.extract(r'(\d+)')
training_df.rename(columns={'age_band': 'min_age'}, inplace=True)

# Convert horse_region to horse_is_local
training_df['horse_is_local'] = (training_df['horse_region'] == training_df['region']).astype(int)
training_df.drop(columns=['horse_region'], inplace=True)

# Convert numeric columns
training_df['distance_f'] = pd.to_numeric(training_df['distance_f'], errors='coerce')
training_df['age'] = pd.to_numeric(training_df['age'], errors='coerce')
training_df['lbs'] = pd.to_numeric(training_df['lbs'], errors='coerce')
training_df['field_size'] = pd.to_numeric(training_df['field_size'], errors='coerce')
training_df['min_age'] = pd.to_numeric(training_df['min_age'], errors='coerce')

# Label encode sex_code, surface, weather, and type
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
for col in ['sex_code', 'surface', 'weather', 'type']:
    training_df[col] = label_encoder.fit_transform(training_df[col].astype(str))

# Encode categorical variables
training_df = pd.get_dummies(training_df, columns=['region'], drop_first=True)

training_df.head()

Unnamed: 0,distance_f,type,min_age,field_size,weather,surface,age,sex_code,lbs,form,going_soft,going_good,horse_is_local,region_IRE
0,5.5,0,2,4,0,1,2,0,133,63.0,0,1,0,False
1,5.5,0,2,4,0,1,2,0,133,,0,1,0,False
2,5.5,0,2,4,0,1,2,0,133,,0,1,0,False
3,5.5,0,2,4,0,1,2,1,128,6.0,0,1,1,False
4,5.5,0,2,4,0,1,2,1,128,,0,1,0,False


In [27]:
# Convert form into a score 

import numpy as np

def form_to_score(form):
    if not isinstance(form, str) or form.strip() == "":
        return np.nan
    
    points = []
    max_points = 8  # 1st place worth 8 points

    # Weights (most recent = higher weight)
    weights = [1.5, 1.2, 1.0, 0.8, 0.6]  # for up to 5 runs
    
    for i, ch in enumerate(form[:5]):  # consider last 5 runs max
        if ch.isdigit():
            pos = int(ch)
            # score for position (1st=5, 2nd=4, etc.)
            score = max(0, max_points - (pos - 1))
            # apply weighting
            weight = weights[i] if i < len(weights) else 0.5
            points.append(score * weight)
        # Handle special cases (e.g., P, F, U)
        elif ch.upper() in ['P','F','U']:
            points.append(0)  # treat as no score

    if not points:
        return np.nan
    
    # Normalize: divide by (max possible for count of runs considered)
    max_possible = max_points * sum(weights[:len(points)])
    return sum(points) / max_possible


In [28]:
training_df['form_score'] = training_df['form'].apply(form_to_score)
training_df.drop(columns=['form'], inplace=True)