# Benchmark

In [8]:
import pandas as pd
import psycopg2
from psycopg2 import Error
import sqlalchemy
from sqlalchemy import create_engine
import matplotlib as plt
import pickle
import os

In [9]:
DATA_DIR = '../data'

In [10]:
# Postgres username, password, and database name
POSTGRES_ADDRESS = 'localhost' 
POSTGRES_PORT = '5432'
POSTGRES_USERNAME = 'postgres' 
POSTGRES_PASSWORD = 'postgres' 
POSTGRES_DBNAME = 'greyhounds'

postgres_str = f"postgresql://{POSTGRES_USERNAME}:{POSTGRES_PASSWORD}@{POSTGRES_ADDRESS}:{POSTGRES_PORT}/{POSTGRES_DBNAME}"

# Create the connection
cnx = create_engine(postgres_str)

cnx_str = "dbname='greyhounds' user='postgres' host='localhost' password='postgres'"

Identifies the bookies favourite for each race.
 In the case of a joint favourite, we take the dog with the lowest name alphabetically 
 as a proxie for a random selection.

In [2]:
def favourite_1():

    try:
        print(f"Atempting to create the favourite_1 table")

        # Connect to database
        connect_str = "dbname='greyhounds' user='postgres' host='localhost' password='postgres'"
        conn_psql = psycopg2.connect(connect_str)
        cursor = conn_psql.cursor()

        # Create table
        cursor.execute("""
        DROP TABLE IF EXISTS favourite_1;
        CREATE TABLE favourite_1 AS
        SELECT *, 
        CASE
            WHEN sp ~ 'F' THEN '1'
            ELSE 0
        END as favourite,
        CASE
            WHEN sp ~ 'JF' THEN '1'
            ELSE 0
        END as joint_favourite
        FROM positions;
        CREATE INDEX idx_favourite_1 ON favourite_1(race_id);
            
            """)
        conn_psql.commit()
        print(f"The favourite_1 table has been created")
        
    except (Exception, psycopg2.DatabaseError) as error:
        print(f"ERROR occured whilst creating the favourite_1 table")
        print(error)

    finally:
        if(conn_psql):
            cursor.close()
            conn_psql.close()

In [3]:
def favourite():

    try:
        print(f"Atempting to create the favourite table")

        # Connect to database
        connect_str = "dbname='greyhounds' user='postgres' host='localhost' password='postgres'"
        conn_psql = psycopg2.connect(connect_str)
        cursor = conn_psql.cursor()

        # Create table
        cursor.execute("""
        DROP TABLE IF EXISTS favourite;
        CREATE TABLE favourite AS
        SELECT race_id, dog_id, favourite, box, fin
        FROM
            (SELECT f.*, d.dog_name, ROW_NUMBER() OVER (PARTITION BY race_id ORDER BY d.dog_name) AS rn
            FROM favourite_1 f
            LEFT JOIN dogs d ON
                d.dog_id = f.dog_id
            WHERE f.favourite = 1) subquery
        WHERE subquery.rn = 1;
        CREATE INDEX idx_favourite ON favourite(race_id, dog_id);
            """)
        conn_psql.commit()
        print(f"The favourite table has been created")
        
    except (Exception, psycopg2.DatabaseError) as error:
        print(f"ERROR occured whilst creating the favourite table")
        print(error)

    finally:
        if(conn_psql):
            cursor.close()
            conn_psql.close()

In [4]:
def favourite_clear_up():

    try:
        print(f"Attempting to delete the intermediate tables used to create the favourite table")
        
        # Connect to database
        connect_str = "dbname='greyhounds' user='postgres' host='localhost' password='postgres'"
        conn_psql = psycopg2.connect(connect_str)
        cursor = conn_psql.cursor()

        # Create table
        cursor.execute("""
        DROP TABLE IF EXISTS favourite_1;
            """)

        conn_psql.commit()
        print(f"The intermediate tables used to create the favourite table have been deleted")
        
    except (Exception, psycopg2.DatabaseError) as error:
        print(f"ERROR occured whilst deleting the intermediate tables")
        print(error)

    finally:
        if(conn_psql):
            cursor.close()
            conn_psql.close()

In [5]:
favourite_1()
favourite()
favourite_clear_up()

Atempting to create the favourite_1 table
The favourite_1 table has been created
Atempting to create the favourite table
The favourite table has been created
Attempting to delete the intermediate tables used to create the favourite table
The intermediate tables used to create the favourite table have been deleted


In [15]:
benchmark = pd.read_sql_query('''
        SELECT race_id, box as benchmark
        FROM favourite 
        ''', cnx, index_col='race_id')
benchmark

Unnamed: 0_level_0,benchmark
race_id,Unnamed: 1_level_1
16022,1
16809,2
16895,4
16896,5
16897,2
...,...
4642020,4
4642021,3
4642022,6
4642023,3


# Store Benchmark

In [16]:
pickle.dump(benchmark, open(os.path.join(DATA_DIR, 'benchmark.p'), 'wb'))