# Horse Recent Form & Performance Indicators


Incorporate Recent Performance Indicators

LEFT JOIN LATERAL (
    SELECT
        rr.race_date AS last_race_date,
        rr.finish_position AS last_finish_position,
        rr.speed_figure AS last_speed_figure
    FROM
        race_results rr
    WHERE
        rr.axciskey = vr2.axciskey
        AND rr.race_date < vr2.race_date
    ORDER BY
        rr.race_date DESC
    LIMIT 1
) lr ON TRUE



lr.last_race_date,
lr.last_finish_position,
lr.last_speed_figure,
(vr2.race_date - lr.last_race_date) AS days_since_last_race

In [1]:
# Environment setup

import logging
import os
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy import text
import geopandas as gpd
from datetime import datetime
import configparser
from src.data_ingestion.ingestion_utils import (
    get_db_connection, update_tracking, load_processed_files
)
from src.data_ingestion.eqb_ppData import process_pluspro_data
from src.data_ingestion.eqb_resultsCharts import process_resultscharts_data
from src.data_ingestion.tpd_datasets import (
    process_tpd_sectionals_data,
    process_tpd_gpsdata_data
)

# Load the configuration file
config = configparser.ConfigParser()
config.read('/home/exx/myCode/horse-racing/FoxRiverAIRacing/config.ini')

# Set up logging for consistent logging behavior in Notebook
logging.basicConfig(level=logging.INFO)

# Retrieve database credentials from config file
# Retrieve database credentials from config file
db_host = config['database']['host']
db_port = config['database']['port']
db_name = config['database']['dbname']  # Corrected from 'name' to 'dbname'
db_user = config['database']['user']

# Establish connection using get_db_connection
conn = get_db_connection(config)

# Create the SQLAlchemy engine
engine = create_engine(f'postgresql+psycopg2://{db_user}@{db_host}:{db_port}/{db_name}')


In [7]:
query = """
WITH horse_races AS (
    SELECT
        r.axciskey,
        r.race_date,
        re.official_fin AS finish_position,
        re.speed_rating,
        ROW_NUMBER() OVER (PARTITION BY r.axciskey ORDER BY r.race_date) AS race_seq
    FROM
        v_runners r
    JOIN
        v_results_entries re ON re.course_cd = r.course_cd
                             AND re.race_date = r.race_date
                             AND re.race_number = r.race_number
                             AND re.program_num = r.saddle_cloth_number
    WHERE
        re.official_fin IS NOT NULL
)
INSERT INTO horse_recent_form (
    axciskey, race_date, last_race_date, last_finish_position,
    last_speed_figure, days_since_last_race, avg_finish_position_last_3, top3_finishes_last_3
)
SELECT
    hr.axciskey,
    hr.race_date,
    LAG(hr.race_date) OVER w AS last_race_date,
    LAG(hr.finish_position) OVER w AS last_finish_position,
    LAG(hr.speed_rating) OVER w AS last_speed_figure,
    (hr.race_date - LAG(hr.race_date) OVER w) AS days_since_last_race,
    AVG(hr.finish_position) OVER w3 AS avg_finish_position_last_3,
    SUM(CASE WHEN hr.finish_position <= 3 THEN 1 ELSE 0 END) OVER w3 AS top3_finishes_last_3
FROM
    horse_races hr
WINDOW
    w AS (PARTITION BY hr.axciskey ORDER BY hr.race_date),
    w3 AS (PARTITION BY hr.axciskey ORDER BY hr.race_date ROWS BETWEEN 3 PRECEDING AND 1 PRECEDING)
ORDER BY
    hr.axciskey, hr.race_date
ON CONFLICT (axciskey, race_date) DO UPDATE SET
    last_race_date = EXCLUDED.last_race_date,
    last_finish_position = EXCLUDED.last_finish_position,
    last_speed_figure = EXCLUDED.last_speed_figure,
    days_since_last_race = EXCLUDED.days_since_last_race,
    avg_finish_position_last_3 = EXCLUDED.avg_finish_position_last_3,
    top3_finishes_last_3 = EXCLUDED.top3_finishes_last_3;
"""


try:
    with engine.begin() as connection:
        connection.execute(text(query))
        logging.info("Data inserted/updated successfully.")
except Exception as e:
    print(f"An error occurred: {e}")
    traceback.print_exc()
    traceback.print_exc()

In [10]:
# View sample from jock_accum_stats
query = """
SELECT
    axciskey,
    race_date,
    last_race_date,
    last_finish_position,
    last_speed_figure,
    days_since_last_race,
    avg_finish_position_last_3
    
FROM
    horse_recent_form
ORDER BY
    race_date
LIMIT 10;
"""

df = pd.read_sql_query(query, engine)

# Display the DataFrame
df.head()

Unnamed: 0,axciskey,race_date,last_race_date,last_finish_position,last_speed_figure,days_since_last_race,avg_finish_position_last_3
0,049052050051055061056060,2022-01-01,,,,,
1,049052050051057053058064,2022-01-01,,,,,
2,049051050053058056061064,2022-01-01,,,,,
3,049052050051054062058058,2022-01-01,,,,,
4,049051050051053058058063,2022-01-01,,,,,
