# Horse Lifetime Accumulative Stats:

horse_accum_stats


In [3]:
# Environment setup

import logging
import os
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy import text
import geopandas as gpd
from datetime import datetime
import configparser
from src.data_ingestion.ingestion_utils import (
    get_db_connection, update_tracking, load_processed_files
)
from src.data_ingestion.eqb_ppData import process_pluspro_data
from src.data_ingestion.eqb_resultsCharts import process_resultscharts_data
from src.data_ingestion.tpd_datasets import (
    process_tpd_sectionals_data,
    process_tpd_gpsdata_data
)
import traceback

# Load the configuration file
config = configparser.ConfigParser()
config.read('/home/exx/myCode/horse-racing/FoxRiverAIRacing/config.ini')

# Set up logging for consistent logging behavior in Notebook
logging.basicConfig(level=logging.INFO)

# Retrieve database credentials from config file
# Retrieve database credentials from config file
db_host = config['database']['host']
db_port = config['database']['port']
db_name = config['database']['dbname']  # Corrected from 'name' to 'dbname'
db_user = config['database']['user']

# Establish connection using get_db_connection
conn = get_db_connection(config)

# Create the SQLAlchemy engine
engine = create_engine(f'postgresql+psycopg2://{db_user}@{db_host}:{db_port}/{db_name}')

In [5]:
query = """
WITH race_results AS (
    SELECT
        r.axciskey,
        r.stat_type,
        r.race_date AS as_of_date,
        re.official_fin AS finish_position,
        re.win_payoff + re.place_payoff + re.show_payoff AS total_earnings
    FROM
        v_runners r
    JOIN
        v_results_entries re ON re.course_cd = r.course_cd
                           AND re.race_date = r.race_date
                           AND re.race_number = r.race_number
                           AND re.program_num = r.saddle_cloth_number
    WHERE
        re.official_fin IS NOT NULL
),
daily_stats AS (
    SELECT
        axciskey,
        stat_type,
        as_of_date,
        COUNT(*) AS daily_starts,
        SUM(CASE WHEN finish_position = 1 THEN 1 ELSE 0 END) AS daily_win,
        SUM(CASE WHEN finish_position = 2 THEN 1 ELSE 0 END) AS daily_place,
        SUM(CASE WHEN finish_position = 3 THEN 1 ELSE 0 END) AS daily_show,
        SUM(CASE WHEN finish_position = 4 THEN 1 ELSE 0 END) AS daily_fourth,
        SUM(COALESCE(total_earnings, 0)) AS daily_earnings
    FROM
        race_results
    GROUP BY
        axciskey,
        stat_type,
        as_of_date
),
cumulative_stats AS (
    SELECT
        axciskey,
        stat_type,
        as_of_date,
        SUM(daily_starts) OVER (
            PARTITION BY axciskey, stat_type
            ORDER BY as_of_date
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS starts,
        SUM(daily_win) OVER (
            PARTITION BY axciskey, stat_type
            ORDER BY as_of_date
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS win,
        SUM(daily_place) OVER (
            PARTITION BY axciskey, stat_type
            ORDER BY as_of_date
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS place,
        SUM(daily_show) OVER (
            PARTITION BY axciskey, stat_type
            ORDER BY as_of_date
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS show,
        SUM(daily_fourth) OVER (
            PARTITION BY axciskey, stat_type
            ORDER BY as_of_date
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS fourth,
        SUM(daily_earnings) OVER (
            PARTITION BY axciskey, stat_type
            ORDER BY as_of_date
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS earnings
    FROM
        daily_stats
)
INSERT INTO horse_accum_stats (
    axciskey, stat_type, as_of_date, starts, win, place, show, fourth, earnings
)
SELECT
    axciskey,
    stat_type,
    as_of_date,
    COALESCE(starts, 0) AS starts,
    COALESCE(win, 0) AS win,
    COALESCE(place, 0) AS place,
    COALESCE(show, 0) AS show,
    COALESCE(fourth, 0) AS fourth,
    COALESCE(earnings, 0) AS earnings
FROM
    cumulative_stats
ON CONFLICT (axciskey, stat_type, as_of_date) DO UPDATE SET
    starts = EXCLUDED.starts,
    win = EXCLUDED.win,
    place = EXCLUDED.place,
    show = EXCLUDED.show,
    fourth = EXCLUDED.fourth,
    earnings = EXCLUDED.earnings;
"""


try:
    with engine.begin() as connection:
        connection.execute(text(query))
        logging.info("Data inserted/updated successfully.")
except Exception as e:
    print(f"An error occurred: {e}")
    traceback.print_exc()

In [6]:
# View sample from jock_accum_stats
query = """
SELECT
    axciskey,
    stat_type,
    as_of_date,
    starts,
    win,
    place,
    show,
    fourth,
    win_percentage,
    itm_percentage,
    top4_percentage,
    earnings_per_start
FROM
    horse_accum_stats
ORDER BY
    as_of_date
LIMIT 10;
"""

df = pd.read_sql_query(query, engine)

# Display the DataFrame
df.head()

Unnamed: 0,axciskey,stat_type,as_of_date,starts,win,place,show,fourth,win_percentage,itm_percentage,top4_percentage,earnings_per_start
0,049052050051055061056060,ALL_WEATHR,2022-01-01,0,0,0,0,0,,,,
1,049052050051057053058064,DIRT_RTE,2022-01-01,0,0,0,0,0,,,,
2,049051050053058056061064,ALL_WEATHR,2022-01-01,0,0,0,0,0,,,,
3,049052050051054062058058,DIRT_RTE,2022-01-01,0,0,0,0,0,,,,
4,049051050051053058058063,DIRT_RTE,2022-01-01,0,0,0,0,0,,,,
