In [1]:
import pandas as pd
import numpy as np
from prefect.blocks.system import Secret
from catnip.fla_redshift import FLA_Redshift
from typing import Dict
from concurrent.futures import ThreadPoolExecutor

from sklearn.preprocessing import StandardScaler, Normalizer
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

from datetime import datetime
import statsmodels.api as sm

import polars as pl
import pyarrow

In [2]:
def get_redshift_credentials() -> Dict:

    cred_dict = {
        "dbname": Secret.load("stellar-redshift-db-name").get(),
        "host": Secret.load("stellar-redshift-host").get(),
        "port": 5439,
        "user": Secret.load("stellar-redshift-user-name").get(),
        "password": Secret.load("stellar-redshift-password").get(),

        "aws_access_key_id": Secret.load("fla-s3-aws-access-key-id-east-1").get(),
        "aws_secret_access_key": Secret.load("fla-s3-aws-secret-access-key-east-1").get(),
        "bucket": Secret.load("fla-s3-bucket-name-east-1").get(),
        "subdirectory": "us-east-1",

        "verbose": False,
    }

    return cred_dict

with ThreadPoolExecutor(1) as pool:
    rs_creds = pool.submit(lambda: get_redshift_credentials()).result()

In [3]:
# Tickets, Nightly Suites, Turnstile

In [4]:
q = """
WITH playoffs_22_23 AS (
    SELECT
        '2022-23' AS season,
        LEFT(RIGHT(event_name, 4), 2) AS round,
        event_name,
        CASE
            WHEN DATEDIFF('days', DATE(add_datetime), DATE(event_date)) >= 0
                THEN DATEDIFF('days', DATE(add_datetime), DATE(event_date))
            ELSE 0
        END AS days_out,
        SUM(block_purchase_price) AS gross_revenue,
        SUM(paid_seats) AS paid_seats
    FROM
        custom.cth_ticket_expanded_all_playoffs_2223
    WHERE
        event_name IN ('23POR1G1', '23POR1G2', '23POR1G3', '23POR2G1', '23POR2G2', '23POR3G1', '23POR3G2', '23POR4G1', '23POR4G2')
        AND ticket_type IN ('Singles')
    GROUP BY
        event_name,
        event_date,
        days_out,
        ticket_type
),
playoffs_23_24 AS (
    SELECT
        '2023-24' AS season,
        RIGHT(LEFT(product_description, 6), 2) AS round,
        LEFT(product_description, 8) AS event_name,
        CASE
            WHEN DATEDIFF('days', DATE(transaction_date), DATE(event_datetime)) >= 0
                THEN DATEDIFF('days', DATE(transaction_date), DATE(event_datetime))
            ELSE 0
        END AS days_out,
        SUM(gross_revenue) AS gross_revenue,
        SUM(paid_seats) AS paid_seats
    FROM
        custom.cth_v_ticket_2324_playoffs
    WHERE
        ticket_type_playoffs IN ('Singles')
        AND event_name != '23-24 Pl'
    GROUP BY
        product_description,
        days_out,
        ticket_type_playoffs
),
playoffs_24_25 AS (
    SELECT
        '2024-25' AS season,
        RIGHT(LEFT(product_description, 6), 2) AS round,
        LEFT(product_description, 8) AS event_name,
        CASE
            WHEN DATEDIFF('days', DATE(transaction_date), DATE(event_datetime)) >= 0
                THEN DATEDIFF('days', DATE(transaction_date), DATE(event_datetime))
            ELSE 0
        END AS days_out,
        SUM(gross_revenue) AS gross_revenue,
        SUM(paid_seats) AS paid_seats
    FROM
        custom.cth_v_ticket_2425_playoffs
    WHERE
        ticket_type_playoffs IN ('Singles')
        AND event_name != '24-25 Pl'
    GROUP BY
        product_description,
        days_out,
        ticket_type_playoffs
)
SELECT
    *
FROM
    playoffs_22_23
UNION ALL
SELECT
    *
FROM
    playoffs_23_24
UNION ALL
SELECT
    *
FROM
    playoffs_24_25
ORDER BY
    season,
    round,
    event_name,
    days_out DESC
"""

ticket_df = FLA_Redshift(**rs_creds).query_warehouse(sql_string = q)

In [5]:
# predicted singles

ticket_df['min_days_out'] = ticket_df.apply(lambda row: ticket_df[(ticket_df['event_name'] == row['event_name'])]['days_out'].min(), axis = 1)

cumdf = ticket_df.groupby(by = ['round','event_name'], axis = 0)[['gross_revenue','paid_seats']].cumsum().rename(columns = {'gross_revenue':'cum_gross_rev', 'paid_seats':'cum_num_seats'})

dfdf = pd.concat([ticket_df,cumdf], axis = 1)

dfdf['final_seats'] = dfdf.apply(lambda row: dfdf[(dfdf['event_name'] == row['event_name'])&
                                                  (dfdf['days_out'] == row['min_days_out'])]['cum_num_seats'].item(), axis = 1)

dfdf['per_seats_in'] = [x/y for x,y in zip(dfdf['cum_num_seats'],dfdf['final_seats'])]

dfdf['final_rev'] = dfdf.apply(lambda row: dfdf[(dfdf['event_name'] == row['event_name'])&
                                                  (dfdf['days_out'] == row['min_days_out'])]['cum_gross_rev'].item(), axis = 1)

dfdf['per_rev_in'] = [x/y for x,y in zip(dfdf['cum_gross_rev'],dfdf['final_rev'])]

#dfdf['num_seats_left'] = dfdf.apply(lambda row: row['final_seats'] - row['cum_num_seats']+ row['paid_seats'], axis = 1)

dfdf = dfdf[['season','round', 'event_name', 'days_out','gross_revenue','paid_seats', 'cum_gross_rev','cum_num_seats','per_seats_in','per_rev_in']]

  cumdf = ticket_df.groupby(by = ['round','event_name'], axis = 0)[['gross_revenue','paid_seats']].cumsum().rename(columns = {'gross_revenue':'cum_gross_rev', 'paid_seats':'cum_num_seats'})


In [6]:
df_train = dfdf[dfdf['season'] != '2024-25']

df_2425 = dfdf[dfdf['season'] == '2024-25']

df_avgs = df_train.groupby(by = ['round','days_out'])[['per_seats_in','per_rev_in']].mean().rename(columns = {'per_seats_in':'avg_per_seats_in','per_rev_in':'avg_per_rev_in'}).reset_index()

In [7]:
df_merged = df_2425.merge(right = df_avgs, how = 'left', on = ['round','days_out'])

df_merged['paid_seats'] = df_merged['cum_num_seats']/df_merged['avg_per_seats_in']

df_merged['gross_revenue'] = df_merged['cum_gross_rev']/df_merged['avg_per_rev_in']

min_indices = df_merged.groupby('event_name')['days_out'].idxmin()

result = df_merged.loc[min_indices]

result['ticket_type_playoffs'] = 'Singles'

result['tier'] = result['event_name'].str[-4:].str[:2]

result = result[['event_name','tier','ticket_type_playoffs','paid_seats','gross_revenue']]

In [8]:
q = """
SELECT
    LEFT(product_description, 8) AS event_name,
    RIGHT(LEFT(product_description,6),2) AS tier,
    ticket_type_playoffs,
    sum(gross_revenue) as gross_revenue,
    sum(paid_seats) as paid_seats
FROM
    custom.cth_v_ticket_2425_playoffs
WHERE
    date(event_datetime) <= '2025-05-02'
    and ticket_type_playoffs != 'Singles'
GROUP BY
    event_datetime,
    ticket_type_playoffs,
    product_description
"""

current_in = FLA_Redshift(**rs_creds).query_warehouse(sql_string = q)

In [9]:
df_final = pd.concat([result,current_in])

In [13]:
# get weighted show_rate avergaes over last 4 seasons

# get historical show rate data

q = """
WITH historical AS (
    SELECT
        game_desc.season,
        game_desc.tier,
        ticket.event_date::date,
        ticket.comp_seats::float,
        ticket.paid_seats::float,
        CASE
            WHEN ticket.is_comp = TRUE AND ticket.did_attended = TRUE THEN 1
            ELSE 0
        END AS "comp_seats_attended",
        CASE
            WHEN is_comp = FALSE AND did_attended = TRUE THEN 1
            ELSE 0
        END AS "paid_seats_attended"
    FROM
        custom.cth_v_historical_ticket ticket
    INNER JOIN
        custom.cth_game_descriptions game_desc
            ON ticket.event_datetime::date = game_desc.event_datetime::date
            AND game_desc.season IN ('2021-22', '2022-23', '2023-24', '2024-25')
            AND game_desc.event_datetime < current_date
),
tier_show_rate AS (
    SELECT
        season,
        tier,
        sum(historical.comp_seats_attended)::float / nullif(sum(historical.comp_seats),0) AS "comp_show_rate",
        sum(historical.paid_seats_attended)::float / nullif(sum(historical.paid_seats),0) AS "paid_show_rate"
    FROM
        historical
    GROUP BY
        season,
        tier
)
SELECT
    *
FROM
    tier_show_rate
where
    tier in ('R1','R2','R3','SC')
"""

show_rate = FLA_Redshift(**rs_creds).query_warehouse(sql_string = q)

weights = {'2022-23': .5, '2023-24':1,'2024-25':1.5}

show_rate['weights'] = show_rate['season'].map(weights)

def weighted_paid_average(group):
    # Calculate the weighted sum
    weighted_sum = (group['paid_show_rate'] * group['weights']).sum()
    
    # Calculate the weight sum
    weight_sum = group['weights'].sum()
    
    # Calculate the weighted average
    wavg = weighted_sum / weight_sum
    
    return pd.Series({
        'weighted_paid_average': wavg
    })

paid_tiers = show_rate.groupby(by = ['tier']).apply(weighted_paid_average).reset_index()

def weighted_comp_average(group):
    # Calculate the weighted sum
    weighted_sum = (group['comp_show_rate'] * group['weights']).sum()
    
    # Calculate the weight sum
    weight_sum = group['weights'].sum()
    
    # Calculate the weighted average
    wavg = weighted_sum / weight_sum
    
    return pd.Series({
        'weighted_comp_average': wavg
    })

comp_tiers = show_rate.groupby(by = ['tier']).apply(weighted_comp_average).reset_index()

tiers = pd.merge(paid_tiers, comp_tiers, on = ['tier'], how = 'left')

  paid_tiers = show_rate.groupby(by = ['tier']).apply(weighted_paid_average).reset_index()
  comp_tiers = show_rate.groupby(by = ['tier']).apply(weighted_comp_average).reset_index()


In [20]:
# merge onto 24/25 data and predict attendance

final_df = df_final.merge(tiers, on = 'tier')

final_df['total_attendance'] = final_df['paid_seats'] * final_df['weighted_paid_average']

final_tickets_and_attendance = final_df.groupby(by = ['event_name'])[['paid_seats','gross_revenue','total_attendance']].sum()

In [None]:
# Merch, F&B

In [26]:
q = """
with attendance as
    (select
        event_datetime,
        count(*) as attendance
    from
        custom.cth_v_attendance_2324_playoffs
    group by
        event_datetime)
select
    cth_game_descriptions.season,
    cth_game_descriptions.event_date,
    tier,
    day_of_week,
    start_time,
    attendance,
    gross_revenue,
    num_orders,
    quantity_sold
from
    custom.cheq_v_hockey_summary
left join
    custom.cth_game_descriptions on date(cheq_v_hockey_summary.event_date) = date(cth_game_descriptions.event_date)
left join
    attendance on date(attendance.event_datetime) = date(cheq_v_hockey_summary.event_date)
where
    tier in ('R1','R2','R3','SC')
"""

historical_f_and_b = FLA_Redshift(**rs_creds).query_warehouse(sql_string = q)

In [27]:
historical_f_and_b

Unnamed: 0,season,event_date,tier,day_of_week,start_time,attendance,gross_revenue,num_orders,quantity_sold
0,2023-24,2024-05-06,R2,Mon,8:00 PM,18053,564314.235335,17851,33245
1,2023-24,2024-05-14,R2,Tue,7:00 PM,19163,691262.165332,19782,39841
2,2023-24,2024-05-26,R3,Sun,3:00 PM,18949,720159.86,22283,43583
3,2023-24,2024-05-28,R3,Tue,8:00 PM,18999,716579.98,21726,42196
4,2023-24,2024-06-24,SC,Mon,8:00 PM,19186,914254.46,25488,51328
5,2023-24,2024-04-21,R1,Sun,12:30 PM,17119,517888.0,16208,31409
6,2023-24,2024-04-23,R1,Tue,7:30 PM,18177,630547.6,20002,37272
7,2023-24,2024-04-29,R1,Mon,7:00 PM,18489,605455.56,18584,35383
8,2023-24,2024-05-08,R2,Wed,7:30 PM,18829,643016.67,18900,36772
9,2023-24,2024-06-01,R3,Sat,8:00 PM,19156,806433.96,23701,47303


In [None]:
day_map = {
    'Fri': 1,
    'Sat': 1,
    'Sun': 1,
    'Mon': 0,
    'Tue': 0,
    'Wed': 0,
    'Thu': 0
}

historical_f_and_b['weekend'] = historical_f_and_b.apply(
    lambda row: day_map.get(row['day_of_week'], 0), axis=1)

start_time_map = {
    '12:30 PM': 1,
    '1:00 PM' : 1,
    '3:00 PM': 1,
    '7:00 PM': 2,
    '7:30 PM': 2,
    '8:00 PM': 2
}

historical_f_and_b['start_time_num'] = historical_f_and_b.apply(
    lambda row: start_time_map.get(row['start_time'], 0),
    axis=1)

tier_mapping = {
    'SC': 4,
    'R3': 3,
    'R2': 2,
    'R1': 1
}

historical_f_and_b['tier_num'] = historical_f_and_b.apply(
    lambda row: tier_mapping.get(row['tier'], 0),
    axis=1)

In [29]:
q = """
with attendance as
    (select
        event_datetime,
        count(*) as attendance
    from
        custom.cth_v_attendance_2324_playoffs
    group by
        event_datetime)
select
    cth_game_descriptions.season,
    cth_game_descriptions.event_date,
    tier,
    is_premier,
    original_six_plus_extra,
    day_of_week,
    start_time,
    attendance,
    sum(gross_revenue) as gross_revenue,
    sum(qty) as quantity,
    count(distinct invoice_id) as num_orders
from
    custom.retailpro_v_invoice_items
left join
    custom.cth_game_descriptions on retailpro_v_invoice_items.event_date = cth_game_descriptions.event_date
left join
    attendance on retailpro_v_invoice_items.event_date = date(attendance.event_datetime)
where
    season in ('2023-24','2024-25')
    and tier in ('R1','R2','R3','SC')
group by
    cth_game_descriptions.season,
    cth_game_descriptions.event_date,
    tier,
    is_premier,
    original_six_plus_extra,
    day_of_week,
    start_time,
    attendance
"""


historical_merch = FLA_Redshift(**rs_creds).query_warehouse(sql_string = q)

In [30]:
historical_merch

Unnamed: 0,season,event_date,tier,is_premier,original_six_plus_extra,day_of_week,start_time,attendance,gross_revenue,quantity,num_orders
0,2023-24,2024-06-10,SC,False,0.0,Mon,8:00 PM,19032,507179.3993,10924,4105
1,2023-24,2024-06-08,SC,False,0.0,Sat,8:00 PM,18845,603499.087,13106,4112
2,2023-24,2024-04-29,R1,False,1.0,Mon,7:00 PM,18489,189103.179,3530,1934
3,2023-24,2024-06-24,SC,False,0.0,Mon,8:00 PM,19186,451898.3252,10157,3424
4,2023-24,2024-05-26,R3,False,1.0,Sun,3:00 PM,18949,293148.526,5618,2381
5,2023-24,2024-05-28,R3,False,1.0,Tue,8:00 PM,18999,194328.171,3725,1932
6,2023-24,2024-05-08,R2,False,1.0,Wed,7:30 PM,18829,186568.757,3327,1839
7,2023-24,2024-06-18,SC,False,0.0,Tue,8:00 PM,19387,427095.892,7849,3004
8,2023-24,2024-04-23,R1,False,1.0,Tue,7:30 PM,18177,196274.903,3400,1951
9,2023-24,2024-04-21,R1,False,1.0,Sun,12:30 PM,17119,203726.5015,4110,2123


In [31]:
day_map = {
    'Fri': 1,
    'Sat': 1,
    'Sun': 1,
    'Mon': 0,
    'Tue': 0,
    'Wed': 0,
    'Thu': 0
}

historical_merch['weekend'] = historical_merch.apply(
    lambda row: day_map.get(row['day_of_week'], 0), axis=1)

start_time_map = {
    '12:30 PM': 1,
    '1:00 PM' : 1,
    '3:00 PM': 1,
    '7:00 PM': 2,
    '7:30 PM': 2,
    '8:00 PM': 2
}

historical_merch['start_time_num'] = historical_merch.apply(
    lambda row: start_time_map.get(row['start_time'], 0),
    axis=1)

tier_mapping = {
    'A': 5,
    'B': 4,
    'C': 3,
    'D': 2,
    'E': 1
}

historical_merch['tier_num'] = historical_merch.apply(
    lambda row: tier_mapping.get(row['tier'], 0),
    axis=1)

In [None]:
q = """
select
    season,
    event_date,
    tier,
    is_premier,
    original_six_plus_extra,
    day_of_week,
    start_time
from
    custom.cth_game_descriptions
where
    event_date >= current_date
"""

future_game_info = FLA_Redshift(**rs_creds).query_warehouse(sql_string = q)

predicted_attendance = pd.read_csv('C:\\Users\\riffere\\Desktop\\output.csv')

future_game_info = future_game_info.merge(predicted_attendance, how = 'left', on = 'event_date')

In [None]:
day_map = {
    'Fri': 1,
    'Sat': 1,
    'Sun': 1,
    'Mon': 0,
    'Tue': 0,
    'Wed': 0,
    'Thu': 0
}

future_game_info['weekend'] = future_game_info.apply(
    lambda row: day_map.get(row['day_of_week'], 0), axis=1)

start_time_map = {
    '12:30 PM': 1,
    '12:45 PM': 1,
    '1:00 PM': 1,
    '3:00 PM': 1,
    '3:30 PM': 1,
    '4:00 PM': 2,
    '5:00 PM': 2,
    '6:00 PM': 2
}

future_game_info['start_time_num'] = future_game_info.apply(
    lambda row: start_time_map.get(row['start_time'], 0),
    axis=1)

tier_mapping = {
    'A': 5,
    'B': 4,
    'C': 3,
    'D': 2,
    'E': 1
}

future_game_info['tier_num'] = future_game_info.apply(
    lambda row: tier_mapping.get(row['tier'], 0),
    axis=1)

In [None]:
def run_f_and_b_model(df, df_future):

    x_train = df[['attendance','weekend','start_time_num','tier_num']]
    y_train = df[['gross_revenue']]

    x_test = df_future[['attendance','weekend','start_time_num','tier_num']]

    #scalar = PolynomialFeatures(degree=2, include_bias=False)
    #poly_features = scalar.fit_transform(x_train)
    scalar = StandardScaler()
    poly_features = scalar.fit_transform(x_train)

    polynomial = LinearRegression().fit(poly_features, np.array(y_train).ravel())
    #predicted_train = polynomial.predict(poly_features)

    poly_features2 = scalar.fit_transform(x_test)

    return polynomial.predict(poly_features2).astype(int)

In [None]:
future_game_info['predicted_f_and_b_gross_rev'] = run_f_and_b_model(historical_f_and_b, future_game_info)

In [None]:
def run_merch_model(df, df_future):

    x_train = df[['attendance','weekend','start_time_num','tier_num']]
    y_train = df[['gross_revenue']]

    x_test = df_future[['attendance','weekend','start_time_num','tier_num']]

    #scalar = PolynomialFeatures(degree=2, include_bias=False)
    #poly_features = scalar.fit_transform(x_train)
    scalar = StandardScaler()
    poly_features = scalar.fit_transform(x_train)

    polynomial = LinearRegression().fit(poly_features, np.array(y_train).ravel())
    #predicted_train = polynomial.predict(poly_features)

    poly_features2 = scalar.fit_transform(x_test)

    return polynomial.predict(poly_features2).astype(int)

In [None]:
future_game_info['predicted_merch_gross_rev'] = run_merch_model(historical_merch, future_game_info)

In [None]:
# Parking

In [None]:
# get historical show rates by tier and lot

q = """
with prepaid as
    (select
         event_datetime,
         location_group
    from
        custom.ctp_v_ticket_2324
    where
        event_type ilike '%panthers%'
        and event_datetime < current_date
    UNION ALL
    select
        event_datetime,
        location_group
    from
        custom.ctp_v_ticket_2425
    where
        event_type ilike '%panthers%'
        and event_datetime < current_date),
prepaid_agg as
    (select
        event_datetime,
        location_group,
        count(*) as prepaid_passes
    from
        prepaid
    group by
        event_datetime, location_group),
scans as
    (select
        season,
        cth_game_descriptions.event_datetime,
        tier,
        location_group,
        case
            when paid_amount = 0 then 1
            else 0 end as num_scans
    from
        custom.parkhub_v_transactions
    left join
        custom.cth_game_descriptions on parkhub_v_transactions.event_datetime = cth_game_descriptions.event_datetime
    where
        cth_game_descriptions.event_datetime is not Null
        and season in ('2023-24','2024-25')),
scans_agg as
    (select
        season,
        event_datetime,
        tier,
        location_group,
        sum(num_scans) as num_scans
    from
        scans
    group by
        season,
        event_datetime,
        tier,
        location_group)
select
    season,
    prepaid_agg.event_datetime,
    tier,
    prepaid_agg.location_group,
    prepaid_passes,
    num_scans,
    num_scans*1.0/prepaid_passes::float as show_rate
from
    scans_agg
left join
    prepaid_agg on scans_agg.event_datetime = prepaid_agg.event_datetime
    and scans_agg.location_group = prepaid_agg.location_group
where
    prepaid_agg.event_datetime is not Null
"""

show_rate_df = FLA_Redshift(**rs_creds).query_warehouse(sql_string = q)

In [None]:
# weigh this past season more heavily

weights = {'2023-24':1.25,'2024-25':2.75}

show_rate_df['weights'] = show_rate_df['season'].map(weights)

def weighted_paid_average(group):
    # Calculate the weighted sum
    weighted_sum = (group['show_rate'] * group['weights']).sum()
    
    # Calculate the weight sum
    weight_sum = group['weights'].sum()
    
    # Calculate the weighted average
    wavg = weighted_sum / weight_sum
    
    return pd.Series({
        'weighted_average': wavg
    })

paid_tiers = show_rate_df.groupby(by = ['tier','location_group']).apply(weighted_paid_average).reset_index()

In [None]:
# gather historical prepaid parking data

q = """
with prepaid as
    (select
         event_datetime,
         location_group,
         date(transaction_date) as transaction_date
    from
        custom.ctp_v_ticket_2324
    where
        event_type ilike '%panthers%'
        and event_datetime < current_date
    UNION ALL
    select
        event_datetime,
        location_group,
        date(transaction_date) as transaction_date
    from
        custom.ctp_v_ticket_2425
    where
        event_type ilike '%panthers%'
        and event_datetime < current_date)
select
    date(event_datetime) as event_date,
    location_group,
    'prepaid' as parking_type,
    datediff('days',transaction_date, event_datetime) as days_out,
    case
        when days_out >= 150 then 0
        else count(*) 
    end as num_passes
from
    prepaid
where
    days_out >= 0
group by
    event_datetime,
    location_group,
    transaction_date
order by
    event_datetime,
    location_group,
    transaction_date desc
"""

historical_prepaid_parking_info = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

location_map = {
    'Club': 4,
    'Garage': 3,
    'General': 2,
    'Valet': 1
}

historical_prepaid_parking_info['location_num'] = historical_prepaid_parking_info.apply(
    lambda row: location_map.get(row['location_group'], 0), axis=1)

historical_prepaid_parking_info['cumulative_num_passes']  = historical_prepaid_parking_info.groupby(['event_date', 'location_group'])['num_passes'].cumsum()

In [None]:
# gather historical game data (ie tier, dow, and start time)

q = """
SELECT
    date(cth_game_descriptions.event_date) as event_date,
    tier,
    day_of_week,
    start_time,
    total_tickets
FROM
    custom.cth_v_historical_attendance_summary
LEFT JOIN
    custom.cth_game_descriptions on cth_v_historical_attendance_summary.event_date = cth_game_descriptions.event_date
WHERE
    tier in ('A','B','C','D','E')
"""

all_game_info = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

day_map = {
    'Fri': 1,
    'Sat': 1,
    'Sun': 1,
    'Mon': 0,
    'Tue': 0,
    'Wed': 0,
    'Thu': 0
}

all_game_info['weekend'] = all_game_info.apply(
    lambda row: day_map.get(row['day_of_week'], 0), axis=1)

start_time_map = {
    '12:30 PM': 1,
    '12:45 PM': 1,
    '1:00 PM': 1,
    '3:00 PM': 1,
    '3:30 PM': 1,
    '4:00 PM': 2,
    '5:00 PM': 2,
    '6:00 PM': 2
}

all_game_info['start_time_num'] = all_game_info.apply(
    lambda row: start_time_map.get(row['start_time'], 0),
    axis=1)

In [None]:
# gather upcoming game data including current prepaid totals by game and lot

q = """
select
    date(cth_game_descriptions.event_datetime) as event_date,
    datediff('day', current_date, cth_game_descriptions.event_datetime) as days_out,
    ctp_v_ticket_2425.location_group,
    capacity::int,
    sum(paid_seats)+sum(comp_seats) as prepaid_cars,
    sum(gross_revenue) as current_gross_revenue,
    capacity::int - prepaid_cars as cap_remaining
from
    custom.ctp_v_ticket_2425
left join
    custom.ctp_parking_capacities on ctp_v_ticket_2425.location_group = ctp_parking_capacities.location_group
left join
    custom.cth_game_descriptions on ctp_v_ticket_2425.event_datetime = cth_game_descriptions.event_datetime
where
    cth_game_descriptions.event_datetime is not null and
    cth_game_descriptions.event_datetime >= current_date
group by
    cth_game_descriptions.event_datetime,
    ctp_v_ticket_2425.location_group,
    capacity
order by
    cth_game_descriptions.event_datetime,
    ctp_v_ticket_2425.location_group
"""

upcoming_game_info = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

In [None]:
# gather upcoming pricing data by game and lot

q = """
SELECT
    tier,
    location_group,
    max(transaction_date) AS "transaction_date",
    max(adjusted_price) AS "highest_price"
FROM
    custom.ctp_v_ticket_2425
LEFT JOIN             
    custom.cth_game_descriptions on ctp_v_ticket_2425.event_datetime = cth_game_descriptions.event_datetime
WHERE
    is_comp = FALSE
    AND price_type ILIKE 'IA%'
GROUP BY
    tier, 
    location_group
"""

pricing_info = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

In [None]:
# create a model to predict remaining prepaid cars

def run_prepaid_model(df, df_future, lot):

    total_table = df[df['location_group'] == lot]

    x_train = total_table[['days_out','weekend','start_time_num']]
    y_train = total_table[['cumulative_num_passes']]

    total_future_table = df_future[df_future['location_group'] == lot]

    x_test = total_future_table[['days_out','weekend','start_time_num']]

    #scalar = PolynomialFeatures(degree=2, include_bias=False)
    #poly_features = scalar.fit_transform(x_train)
    scalar = StandardScaler()
    poly_features = scalar.fit_transform(x_train)

    polynomial = LinearRegression().fit(poly_features, np.array(y_train).ravel())
    #predicted_train = polynomial.predict(poly_features)

    poly_features2 = scalar.fit_transform(x_test)

    return polynomial.predict(poly_features2).astype(int)

In [None]:
# merge hisotrical prepaid parking data with hisorical game data for training model

total_table = historical_prepaid_parking_info.merge(all_game_info, how = 'left', on = 'event_date')
total_table = total_table[total_table['tier'].isin(['A','B','C','D','E'])]

# merge upcoming parking data with hisorical game data for testing model

total_future_table = upcoming_game_info.merge(all_game_info, how = 'left', on = 'event_date')
total_future_table = total_future_table[total_future_table['tier'].isin(['A','B','C','D','E'])]

In [None]:
# only predicting for general and garage not  club, valet or executive

lots = ['General','Garage']

final_df = pd.DataFrame()

for lot in lots:

    temp = total_future_table[total_future_table['location_group'] == lot]

    temp['predicted_parking'] = run_prepaid_model(total_table, total_future_table, lot)

    final_df = pd.concat([final_df,temp], ignore_index= True)

In [None]:
# concat club totals for onsite model next

club_totals = total_future_table[total_future_table['location_group'] == 'Club']
club_totals['predicted_parking'] = 0

final_df = pd.concat([final_df, club_totals], axis=0)

In [None]:
# make sure no negative predictions are made

final_df['predicted_prepaid_additional_parking'] = np.where(final_df['predicted_parking'] < 0, 0, final_df['predicted_parking'])

# get total prepaid tickets (current + predicted additional)

final_df['total_predicted_prepaid_cars'] = final_df['prepaid_cars'] + final_df['predicted_parking']

# get number of parked cars using historical show rates

final_df = final_df.merge(paid_tiers, how =  'left', on = ['tier', 'location_group'])
final_df['prepaid_cars_parked'] = (final_df['total_predicted_prepaid_cars'] * final_df['weighted_average']).astype(int)

In [None]:
# find the capacity remaining 

final_df['cap_remaining'] = final_df['capacity'] - final_df['prepaid_cars_parked']

# if predicted cars over capacity subtract overflow out

final_df['predicted_prepaid_additional_parking'] = np.where(final_df['cap_remaining'] < 0, final_df['predicted_prepaid_additional_parking']+final_df['cap_remaining'], final_df['predicted_prepaid_additional_parking'])
final_df['prepaid_cars_parked'] = np.where(final_df['cap_remaining'] < 0, final_df['prepaid_cars_parked']+final_df['cap_remaining'], final_df['prepaid_cars_parked'])
final_df['cap_remaining'] = np.where(final_df['cap_remaining'] < 0, 0, final_df['prepaid_cars_parked'])

In [None]:
final_df = final_df[['event_date','days_out','tier', 'start_time_num','weekend',
                     'location_group','capacity','prepaid_cars','current_gross_revenue', 
                     'predicted_prepaid_additional_parking', 'total_predicted_prepaid_cars',
                     'prepaid_cars_parked','cap_remaining']]

In [None]:
# get hisotrical onsite parking data

q = """
with onsite as
    (select
        date(cth_game_descriptions.event_datetime) as event_date,
        location_group,
        0 as days_out,
        case
            when paid_amount > 0 then 1
        else 0
        end as num_onsite_cars,
        case
            when paid_amount = 0 then 1
        else 0
        end as num_prepaid_cars
    from
        custom.parkhub_v_transactions
    left join
        custom.cth_game_descriptions on parkhub_v_transactions.event_datetime = cth_game_descriptions.event_datetime
    where
        season in ('2023-24','2024-25'))
select
    onsite.event_date,
    onsite.location_group,
    'onsite' as parking_type,
    days_out,
    sum(num_onsite_cars) as num_cars,
    capacity - sum(num_prepaid_cars) as cap_remaining
from
    onsite
left join
    custom.ctp_parking_capacities on onsite.location_group = ctp_parking_capacities.location_group
where
    days_out >= 0
group by
    onsite.event_date,
    onsite.location_group,
    parking_type,
    days_out,
    capacity
"""

historical_onsite_parking_info = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

In [None]:
# merge hisotrical prepaid parking data with hisorical game data for training model

total_onsite_table = historical_onsite_parking_info.merge(all_game_info, how = 'left', on = 'event_date')
total_onsite_table = total_onsite_table[total_onsite_table['tier'].isin(['A','B','C','D','E'])]

In [None]:
# create a model to predict onsite cars

def run_onsite_model(df, df_future, lot):

    x_train_table = df[df['location_group'] == lot]

    x_train = x_train_table[['cap_remaining','weekend','start_time_num']]
    y_train = x_train_table[['num_cars']]

    x_test_table = df_future[df_future['location_group'] == lot]

    x_test = x_test_table[['cap_remaining','weekend','start_time_num']]

    #scalar = PolynomialFeatures(degree=2, include_bias=False)
    #poly_features = scalar.fit_transform(x_train)
    scalar = StandardScaler()
    poly_features = scalar.fit_transform(x_train)

    polynomial = LinearRegression().fit(poly_features, np.array(y_train).ravel())
    #predicted_train = polynomial.predict(poly_features)

    poly_features2 = scalar.fit_transform(x_test)

    #predicted_test = polynomial.predict(poly_features2)

    return polynomial.predict(poly_features2).astype(int)

In [None]:
# only predicting for general, garage, and club not valet or executive

lots = ['General','Garage','Club']

final_df_onsite = pd.DataFrame()

for lot in lots:

    temp = final_df[final_df['location_group'] == lot]

    temp['predicted_onsite_parking'] = run_onsite_model(total_onsite_table, final_df, lot)

    final_df_onsite = pd.concat([final_df_onsite,temp], ignore_index= True)

In [None]:
# add back executive and valet parking and match fields from final_df

exec_and_valet = total_future_table[total_future_table['location_group'].isin(['Executive','Valet'])]

exec_and_valet['predicted_prepaid_additional_parking'] = 0
exec_and_valet['total_predicted_prepaid_cars'] = exec_and_valet['prepaid_cars']

exec_and_valet = exec_and_valet.merge(paid_tiers, how =  'left', on = ['tier', 'location_group'])
exec_and_valet['weighted_average'] = exec_and_valet['weighted_average'].fillna(1)

exec_and_valet['prepaid_cars_parked'] = (exec_and_valet['total_predicted_prepaid_cars'] * exec_and_valet['weighted_average']).astype(int)
exec_and_valet['predicted_onsite_parking'] = 0
exec_and_valet['total_parking'] = exec_and_valet['prepaid_cars_parked'] 

exec_and_valet = exec_and_valet[['event_date', 'days_out','tier','location_group','capacity','prepaid_cars',
                                       'current_gross_revenue', 'predicted_prepaid_additional_parking',
                                       'total_predicted_prepaid_cars', 'prepaid_cars_parked',
                                       'predicted_onsite_parking','total_parking']]

In [None]:
# if predicted total over capacity subtract overflow out

final_df_onsite['predicted_onsite_parking'] = [pred_onsite if pred_onsite <= cap_remaining else cap_remaining for pred_onsite, cap_remaining in zip(final_df_onsite['predicted_onsite_parking'], final_df_onsite['cap_remaining'])]

final_df_onsite['total_parking'] = final_df_onsite['prepaid_cars_parked'] + final_df_onsite['predicted_onsite_parking']

final_parking_model = final_df_onsite[['event_date', 'days_out','tier','location_group','capacity','prepaid_cars',
                                       'current_gross_revenue', 'predicted_prepaid_additional_parking','total_predicted_prepaid_cars',
                                       'prepaid_cars_parked','predicted_onsite_parking','total_parking']]

# merge with executive and valet parking info

final_parking_model_df = pd.concat([final_parking_model, exec_and_valet], axis=0)

In [None]:
final_parking_model_df = final_parking_model_df.merge(pricing_info, how = 'left', on = ['tier', 'location_group'])

final_parking_model_df['predicted_prepaid_additional_gross_revenue'] = (final_parking_model_df['predicted_prepaid_additional_parking'] * final_parking_model_df['highest_price']).fillna(0)

final_parking_model_df['predicted_onsite_parking_gross_revenue'] = (final_parking_model_df['predicted_prepaid_additional_parking'] * final_parking_model_df['highest_price']*1.25).fillna(0)

final_parking_model_df['predicted_gross_revenue'] = final_parking_model_df['predicted_prepaid_additional_gross_revenue'] + final_parking_model_df['predicted_onsite_parking_gross_revenue'] + final_parking_model_df['current_gross_revenue']

final_parking_model_df = final_parking_model_df[['event_date', 'days_out','tier','location_group','capacity','prepaid_cars',
                                       'current_gross_revenue', 'predicted_prepaid_additional_parking', 'predicted_prepaid_additional_gross_revenue',
                                       'total_predicted_prepaid_cars', 'prepaid_cars_parked', 'predicted_onsite_parking',
                                       'predicted_onsite_parking_gross_revenue','total_parking', 'predicted_gross_revenue']]