In [1]:
import pandas as pd
import numpy as np
from prefect.blocks.system import Secret
from catnip.fla_redshift import FLA_Redshift
from typing import Dict
from concurrent.futures import ThreadPoolExecutor
from datetime import date

from sklearn.preprocessing import StandardScaler, Normalizer
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

from datetime import datetime
import statsmodels.api as sm

import polars as pl
import pyarrow

In [2]:
def get_redshift_credentials() -> Dict:

    cred_dict = {
        "dbname": Secret.load("stellar-redshift-db-name").get(),
        "host": Secret.load("stellar-redshift-host").get(),
        "port": 5439,
        "user": Secret.load("stellar-redshift-user-name").get(),
        "password": Secret.load("stellar-redshift-password").get(),

        "aws_access_key_id": Secret.load("fla-s3-aws-access-key-id-east-1").get(),
        "aws_secret_access_key": Secret.load("fla-s3-aws-secret-access-key-east-1").get(),
        "bucket": Secret.load("fla-s3-bucket-name-east-1").get(),
        "subdirectory": "us-east-1",

        "verbose": False,
    }

    return cred_dict

with ThreadPoolExecutor(1) as pool:
    rs_creds = pool.submit(lambda: get_redshift_credentials()).result()

In [3]:
# Tickets, Nightly Suites, Turnstile

In [3]:
# get past singles data

q = """
WITH playoffs_22_23 AS (
    SELECT
        '2022-23' AS season,
        LEFT(RIGHT(event_name, 4), 2) AS round,
        event_name,
        date(event_date) as event_date,
        CASE
            WHEN DATEDIFF('days', DATE(add_datetime), DATE(event_date)) >= 0
                THEN DATEDIFF('days', DATE(add_datetime), DATE(event_date))
            ELSE 0
        END AS days_out,
        SUM(block_purchase_price) AS gross_revenue,
        SUM(paid_seats) AS paid_seats
    FROM
        custom.cth_ticket_expanded_all_playoffs_2223
    WHERE
        event_name IN ('23POR1G1', '23POR1G2', '23POR1G3', '23POR2G1', '23POR2G2', '23POR3G1', '23POR3G2', '23POR4G1', '23POR4G2')
        AND ticket_type IN ('Singles')
    GROUP BY
        event_name,
        event_date,
        days_out,
        ticket_type
),
playoffs_23_24 AS (
    SELECT
        '2023-24' AS season,
        RIGHT(LEFT(product_description, 6), 2) AS round,
        LEFT(product_description, 8) AS event_name,
        date(event_datetime) as event_date,
        CASE
            WHEN DATEDIFF('days', DATE(transaction_date), DATE(event_datetime)) >= 0
                THEN DATEDIFF('days', DATE(transaction_date), DATE(event_datetime))
            ELSE 0
        END AS days_out,
        SUM(gross_revenue) AS gross_revenue,
        SUM(paid_seats) AS paid_seats
    FROM
        custom.cth_v_ticket_2324_playoffs
    WHERE
        ticket_type_playoffs IN ('Singles')
        AND event_name != '23-24 Pl'
    GROUP BY
        product_description,
        event_date,
        days_out,
        ticket_type_playoffs
),
playoffs_24_25 AS (
    SELECT
        '2024-25' AS season,
        RIGHT(LEFT(product_description, 6), 2) AS round,
        LEFT(product_description, 8) AS event_name,
        date(event_datetime) as event_date,
        CASE
            WHEN DATEDIFF('days', DATE(transaction_date), DATE(event_datetime)) >= 0
                THEN DATEDIFF('days', DATE(transaction_date), DATE(event_datetime))
            ELSE 0
        END AS days_out,
        SUM(gross_revenue) AS gross_revenue,
        SUM(paid_seats) AS paid_seats
    FROM
        custom.cth_v_ticket_2425_playoffs
    WHERE
        ticket_type_playoffs IN ('Singles')
        AND event_name != '24-25 Pl'
    GROUP BY
        product_description,
        event_date,
        days_out,
        ticket_type_playoffs
)
SELECT
    *
FROM
    playoffs_22_23
UNION ALL
SELECT
    *
FROM
    playoffs_23_24
UNION ALL
SELECT
    *
FROM
    playoffs_24_25
ORDER BY
    season,
    round,
    event_name,
    days_out DESC
"""

ticket_df = FLA_Redshift(**rs_creds).query_warehouse(sql_string = q)

In [4]:
# get average tickets sold by days out from previous seasons

ticket_df['min_days_out'] = ticket_df.apply(lambda row: ticket_df[(ticket_df['event_name'] == row['event_name'])]['days_out'].min(), axis = 1)

cumdf = ticket_df.groupby(by = ['round','event_name','event_date'], axis = 0)[['gross_revenue','paid_seats']].cumsum().rename(columns = {'gross_revenue':'cum_gross_rev', 'paid_seats':'cum_num_seats'})

ticket_df = pd.concat([ticket_df,cumdf], axis = 1)

ticket_df['final_seats'] = ticket_df.apply(lambda row: ticket_df[(ticket_df['event_name'] == row['event_name'])&
                                                  (ticket_df['days_out'] == row['min_days_out'])]['cum_num_seats'].item(), axis = 1)

ticket_df['per_seats_in'] = [x/y for x,y in zip(ticket_df['cum_num_seats'],ticket_df['final_seats'])]

ticket_df['final_rev'] = ticket_df.apply(lambda row: ticket_df[(ticket_df['event_name'] == row['event_name'])&
                                                  (ticket_df['days_out'] == row['min_days_out'])]['cum_gross_rev'].item(), axis = 1)

ticket_df['per_rev_in'] = [x/y for x,y in zip(ticket_df['cum_gross_rev'],ticket_df['final_rev'])]

ticket_df = ticket_df[['season','round', 'event_name', 'event_date','days_out','gross_revenue','paid_seats', 'cum_gross_rev','cum_num_seats','per_seats_in','per_rev_in']]

  cumdf = ticket_df.groupby(by = ['round','event_name','event_date'], axis = 0)[['gross_revenue','paid_seats']].cumsum().rename(columns = {'gross_revenue':'cum_gross_rev', 'paid_seats':'cum_num_seats'})


In [5]:
# merge averages back to 24/25 season

df_train = ticket_df[ticket_df['season'] != '2024-25']

df_2425 = ticket_df[ticket_df['season'] == '2024-25']

df_avgs = df_train.groupby(by = ['round','days_out'])[['per_seats_in','per_rev_in']].mean().rename(columns = {'per_seats_in':'avg_per_seats_in','per_rev_in':'avg_per_rev_in'}).reset_index()

In [6]:
# predict final singles totals for 24/25

df_merged = df_2425.merge(right = df_avgs, how = 'left', on = ['round','days_out'])

df_merged['paid_seats'] = df_merged['cum_num_seats']/df_merged['avg_per_seats_in']

df_merged['gross_revenue'] = df_merged['cum_gross_rev']/df_merged['avg_per_rev_in']

min_indices = df_merged.groupby('event_name')['days_out'].idxmin()

result = df_merged.loc[min_indices]

result['ticket_type_playoffs'] = 'Singles'

result['tier'] = result['event_name'].str[-4:].str[:2]

result = result[['event_name','event_date','tier','ticket_type_playoffs','paid_seats','gross_revenue']]

In [7]:
result

Unnamed: 0,event_name,event_date,tier,ticket_type_playoffs,paid_seats,gross_revenue
20,25POR1G1,2025-04-26,R1,Singles,1821.0,374553.2
42,25POR1G2,2025-04-28,R1,Singles,1518.0,281894.2
51,25POR2G1,2025-05-09,R2,Singles,1628.0,872777.0
62,25POR2G2,2025-05-11,R2,Singles,1740.0,809061.0
78,25POR2G3,2025-05-16,R2,Singles,1258.0,445490.7
84,25POR3G1,2025-05-24,R3,Singles,1815.0,546105.9
92,25POR3G2,2025-05-26,R3,Singles,1830.0,549828.8
94,25POR4G1,2025-06-09,R4,Singles,1318.550406,1530094.0
96,25POR4G2,2025-06-12,R4,Singles,1440.935999,1894088.0
98,25POR4G3,2025-06-17,R4,Singles,1102.377317,3532696.0


In [8]:
# get current in from other ticket types

q = """
SELECT
    LEFT(product_description, 8) AS event_name,
    RIGHT(LEFT(product_description,6),2) AS tier,
    date(event_datetime) as event_date,
    ticket_type_playoffs,
    sum(gross_revenue) as gross_revenue,
    sum(paid_seats) as paid_seats
FROM
    custom.cth_v_ticket_2425_playoffs
WHERE
    ticket_type_playoffs != 'Singles'
GROUP BY
    product_description,
    event_date,
    ticket_type_playoffs
"""

current_in = FLA_Redshift(**rs_creds).query_warehouse(sql_string = q)

In [9]:
# get weighted show_rate avergaes over last 4 seasons

# get historical show rate data

q = """
WITH historical AS (
    SELECT
        game_desc.season,
        game_desc.tier,
        ticket.event_date::date,
        ticket.comp_seats::float,
        ticket.paid_seats::float,
        CASE
            WHEN ticket.is_comp = TRUE AND ticket.did_attended = TRUE THEN 1
            ELSE 0
        END AS "comp_seats_attended",
        CASE
            WHEN is_comp = FALSE AND did_attended = TRUE THEN 1
            ELSE 0
        END AS "paid_seats_attended"
    FROM
        custom.cth_v_historical_ticket ticket
    INNER JOIN
        custom.cth_game_descriptions game_desc
            ON ticket.event_datetime::date = game_desc.event_datetime::date
            AND game_desc.season IN ('2022-23', '2023-24', '2024-25')
            AND game_desc.event_datetime < current_date
),
tier_show_rate AS (
    SELECT
        season,
        tier,
        sum(historical.comp_seats_attended)::float / nullif(sum(historical.comp_seats),0) AS "comp_show_rate",
        sum(historical.paid_seats_attended)::float / nullif(sum(historical.paid_seats),0) AS "paid_show_rate"
    FROM
        historical
    GROUP BY
        season,
        tier
)
SELECT
    *
FROM
    tier_show_rate
where
    tier in ('R1','R2','R3','SC')
"""

show_rate = FLA_Redshift(**rs_creds).query_warehouse(sql_string = q)

weights = {'2022-23': .5, '2023-24':1,'2024-25':1.5}

show_rate['weights'] = show_rate['season'].map(weights)

def weighted_paid_average(group):
    # Calculate the weighted sum
    weighted_sum = (group['paid_show_rate'] * group['weights']).sum()
    
    # Calculate the weight sum
    weight_sum = group['weights'].sum()
    
    # Calculate the weighted average
    wavg = weighted_sum / weight_sum
    
    return pd.Series({
        'weighted_paid_average': wavg
    })

paid_tiers = show_rate.groupby(by = ['tier']).apply(weighted_paid_average).reset_index()

def weighted_comp_average(group):
    # Calculate the weighted sum
    weighted_sum = (group['comp_show_rate'] * group['weights']).sum()
    
    # Calculate the weight sum
    weight_sum = group['weights'].sum()
    
    # Calculate the weighted average
    wavg = weighted_sum / weight_sum
    
    return pd.Series({
        'weighted_comp_average': wavg
    })

comp_tiers = show_rate.groupby(by = ['tier']).apply(weighted_comp_average).reset_index()

tiers = pd.merge(paid_tiers, comp_tiers, on = ['tier'], how = 'left')

  paid_tiers = show_rate.groupby(by = ['tier']).apply(weighted_paid_average).reset_index()
  comp_tiers = show_rate.groupby(by = ['tier']).apply(weighted_comp_average).reset_index()


In [10]:
# merge onto 24/25 data and predict attendance

df_final = pd.concat([result,current_in])

final_tickets_and_attendance = df_final.groupby(by = ['event_name','event_date', 'tier'])[['paid_seats','gross_revenue']].sum().reset_index()

final_tickets_and_attendance = final_tickets_and_attendance.merge(tiers, how = 'left', on = 'tier')

final_tickets_and_attendance['total_attendance'] = final_tickets_and_attendance['paid_seats'] * final_tickets_and_attendance['weighted_paid_average']

In [11]:
final_tickets_and_attendance = final_tickets_and_attendance[['event_name','event_date','tier','paid_seats','gross_revenue','total_attendance']]

In [13]:
final_tickets_and_attendance

Unnamed: 0,event_name,event_date,tier,paid_seats,gross_revenue,total_attendance
0,2024-25,2025-06-30,-2,0.0,0.0,
1,25POR1G1,2025-04-26,R1,18800.0,2571834.0,17783.725212
2,25POR1G2,2025-04-28,R1,18758.0,2495216.0,17743.995613
3,25POR2G1,2025-05-09,R2,19139.0,3362859.0,18342.064202
4,25POR2G2,2025-05-11,R2,19158.0,3268124.0,18360.273054
5,25POR2G3,2025-05-16,R2,18952.0,3381040.0,18162.850763
6,25POR3G1,2025-05-24,R3,19048.0,4067036.0,18404.560898
7,25POR3G2,2025-05-26,R3,19162.0,4133486.0,18514.709992
8,25POR4G1,2025-06-09,R4,18008.550406,6681228.0,
9,25POR4G2,2025-06-12,R4,18145.935999,7081371.0,


In [14]:
# get nightly suite tickets

q = """
WITH sold_suites AS (

    WITH comp_temp AS (
        SELECT
            product_id,
            section,
            product_id || '-' || section AS "id",
            'COMP'::varchar AS "status",
            NULL::varchar AS "locks",
            'Comp'::varchar AS "allocations",
            sum(gross_revenue) AS "gross_revenue"
        FROM
            custom.cth_v_ticket_status_2425_playoffs
        WHERE
            (pc_one IN ('U', 'V', 'W') OR section = 'House')
            AND status = 'SOLD'
        GROUP BY
            product_id,
            section
        HAVING
            sum(gross_revenue) = 0
    )

    -- sold suites
    SELECT
        product_id,
        section,
        product_id || '-' || section AS "id",
        'SOLD'::varchar AS "status",
        NULL::varchar AS "locks",
        'Sold'::varchar AS "allocations",
        sum(gross_revenue) AS "gross_revenue"
    FROM
        custom.cth_v_ticket_status_2425_playoffs
    WHERE
        (pc_one IN ('U', 'V', 'W') OR section = 'House')
        AND status = 'SOLD'
    GROUP BY
        product_id,
        section
    HAVING
        sum(gross_revenue) > 0
    UNION ALL

    -- killed suites
    SELECT
        product_id,
        section,
        product_id || '-' || section AS "id",
        'SOLD'::varchar AS "status",
        NULL::varchar AS "locks",
        'Sold'::varchar AS "allocations",
        sum(gross_revenue) AS "gross_revenue"
    FROM
        custom.cth_v_ticket_status_2425_playoffs
    WHERE
        (
            pc_one IN ('U', 'V', 'W')
            OR section = 'House'
        )
        AND (
            allocations ilike '%kill%'
            OR locks ilike '%kill%'
            OR allocations ilike '%panthers players%'
            OR allocations ilike '%owner%'
            OR allocations ilike '%hockey operations%'
            OR allocations ilike '%visiting team%'
        )
        AND "id" NOT IN (SELECT ct.id FROM comp_temp ct)
    GROUP BY
        product_id,
        section
    UNION ALL

    -- comp suites
    SELECT * FROM comp_temp
),
held_suites AS (
    SELECT
        product_id,
        section,
        product_id || '-' || section AS "id",
        'HELD'::varchar AS "status",
        NULL::varchar AS "locks",
        'Held'::varchar AS "allocations",
        sum(gross_revenue) AS "gross_revenue"
    FROM
        custom.cth_v_ticket_status_2425_playoffs
    WHERE
        (pc_one IN ('U', 'V', 'W') OR section = 'House')
        AND status = 'HELD'
        AND "id" NOT IN (SELECT s.id FROM sold_suites s)
    GROUP BY
        product_id,
        section
),
-- SELECT * FROM held_suites;
available_suites AS (
    SELECT
        product_id,
        section,
        product_id || '-' || section AS "id",
        'AVAIL'::varchar AS "status",
        LISTAGG(DISTINCT locks, ', ') AS "locks",
        LISTAGG(DISTINCT allocations, ', ') WITHIN GROUP (ORDER BY allocations) AS "allocations",
        sum(gross_revenue) AS "gross_revenue"
    FROM
        custom.cth_v_ticket_status_2425_playoffs
    WHERE
        (pc_one IN ('U', 'V', 'W') OR section = 'House')
        AND status = 'AVAIL'
        AND "id" NOT IN (SELECT s.id FROM sold_suites s)
        AND "id" NOT IN (SELECT h.id FROM held_suites h)
        AND (allocations <> '["Standing Room Only"]' OR allocations IS NULL)
    GROUP BY
        product_id,
        section
),
temp AS (
    SELECT * FROM sold_suites
    UNION ALL
    SELECT * FROM held_suites
    UNION ALL
    SELECT * FROM available_suites
)
SELECT
    split_part(products.product_description, ' - ', 1) AS "event_name",
    CASE
        WHEN RIGHT(LEFT(product_description,6),2) = 'R4' THEN 'SC'
        ELSE RIGHT(LEFT(product_description,6),2)
    END AS tier,
    event_date,
    temp.*
FROM
    temp
LEFT JOIN
    custom.seatgeek_v_products products ON temp.product_id = products.product_id
ORDER BY
    "event_name",
    event_date,
    section
"""

current_nightly_suites = FLA_Redshift(**rs_creds).query_warehouse(sql_string = q)

In [20]:
current_nightly_suites

Unnamed: 0,event_name,event_date,tier,AVAIL,COMP,SOLD,days_out_from_event,avg_sold,cumulative_avg_sold,is_going_to_sellout
0,2024-25 Playoff Inventory Management,2025-06-30 00:00:00,-2,17.0,58.0,4.0,30,,,False
1,25POR1G1,2025-04-26 13:00:00,R1,,15.0,64.0,-34,,,False
2,25POR1G2,2025-04-28 19:00:00,R1,,11.0,68.0,-32,,,False
3,25POR1G3,2025-05-02 00:00:00,R1,72.0,,7.0,-29,,,False
4,25POR1G4,2025-05-04 00:00:00,R1,72.0,,7.0,-27,,,False
5,25POR2G1,2025-05-09 19:00:00,R2,,9.0,70.0,-21,,,False
6,25POR2G2,2025-05-11 19:30:00,R2,,10.0,69.0,-19,,,False
7,25POR2G3,2025-05-16 20:00:00,R2,,15.0,64.0,-14,,,False
8,25POR2G4,2025-05-18 00:00:00,R2,73.0,,6.0,-13,,,False
9,25POR3G1,2025-05-24 20:00:00,R3,,13.0,68.0,-6,,,False


In [15]:
current_nightly_suites.groupby(by = ['event_name','event_date','status'])[['section']].count().reset_index()

current_nightly_suites = current_nightly_suites.pivot_table(index=['event_name','event_date', 'tier'], columns='status', 
             values='section', aggfunc='count').reset_index()

current_nightly_suites['event_date'] = pd.to_datetime(current_nightly_suites['event_date'])

current_nightly_suites['days_out_from_event'] = (current_nightly_suites['event_date'] - datetime.now()).dt.days

In [16]:
q = """
SELECT
    *
FROM
    custom.forecasting_hockey_nightly_suites_playoffs
"""

forecasting_nightly_suites = FLA_Redshift(**rs_creds).query_warehouse(sql_string = q)

In [17]:
current_nightly_suites = current_nightly_suites.merge(forecasting_nightly_suites, how = 'left', on = ['tier', 'days_out_from_event'])

In [18]:
current_nightly_suites['is_going_to_sellout'] = current_nightly_suites['cumulative_avg_sold'] > current_nightly_suites['AVAIL']


In [19]:
current_nightly_suites = current_nightly_suites[['event_name','event_date','days_out_from_event','tier','AVAIL','COMP','HELD','SOLD','cumulative_avg_sold','is_going_to_sellout']]

KeyError: "['HELD'] not in index"

In [70]:
# Merch, F&B

In [None]:
q = """
WITH attendance AS (
    SELECT
        event_datetime,
        COUNT(*) AS attendance
    FROM
        custom.cth_v_attendance_2324_playoffs
    GROUP BY
        event_datetime
)
SELECT
    cth_game_descriptions.season,
    cth_game_descriptions.event_date,
    tier,
    day_of_week,
    start_time,
    attendance,
    gross_revenue,
    num_orders,
    quantity_sold
FROM
    custom.cheq_v_hockey_summary
LEFT JOIN
    custom.cth_game_descriptions 
    ON DATE(cheq_v_hockey_summary.event_date) = DATE(cth_game_descriptions.event_date)
LEFT JOIN
    attendance 
    ON DATE(attendance.event_datetime) = DATE(cheq_v_hockey_summary.event_date)
WHERE
    tier IN ('R1', 'R2', 'R3', 'SC')
"""

historical_f_and_b = FLA_Redshift(**rs_creds).query_warehouse(sql_string = q)

In [None]:
def get_model_columns(df):

    day_map = {
    'Fri': 1,
    'Sat': 1,
    'Sun': 1,
    'Mon': 0,
    'Tue': 0,
    'Wed': 0,
    'Thu': 0
}

    df['weekend'] = df.apply(
        lambda row: day_map.get(row['day_of_week'], 0), axis=1)

    start_time_map = {
        '12:30 PM': 1,
        '12:45 PM': 1,
        '1:00 PM': 1,
        '3:00 PM': 1,
        '3:30 PM': 1,
        '4:00 PM': 2,
        '5:00 PM': 2,
        '6:00 PM': 2
    }

    df['start_time_num'] = df.apply(
        lambda row: start_time_map.get(row['start_time'], 0),
        axis=1)

    tier_mapping = {
        'SC': 4,
        'R3': 3,
        'R2': 2,
        'R1': 1
    }

    df['tier_num'] = df.apply(
        lambda row: tier_mapping.get(row['tier'], 0),
        axis=1) 
    
    return df

In [52]:
historical_f_and_b = get_model_columns(historical_f_and_b)

In [None]:
q = """
WITH attendance AS
    (SELECT
        event_datetime,
        COUNT(*) AS attendance
    FROM
        custom.cth_v_attendance_2324_playoffs
    GROUP BY
        event_datetime)
SELECT
    cth_game_descriptions.season,
    cth_game_descriptions.event_date,
    tier,
    day_of_week,
    start_time,
    attendance,
    SUM(gross_revenue) AS gross_revenue,
    SUM(qty) AS quantity,
    COUNT(distinct invoice_id) AS num_orders
FROM
    custom.retailpro_v_invoice_items
LEFT JOIN
    custom.cth_game_descriptions ON retailpro_v_invoice_items.event_date = cth_game_descriptions.event_date
LEFT JOIN
    attendance ON retailpro_v_invoice_items.event_date = date(attendance.event_datetime)
WHERE
    season IN ('2023-24','2024-25')
    AND tier IN ('R1','R2','R3','SC')
GROUP BY
    cth_game_descriptions.season,
    cth_game_descriptions.event_date,
    tier,
    is_premier,
    original_six_plus_extra,
    day_of_week,
    start_time,
    attendance
"""

historical_merch = FLA_Redshift(**rs_creds).query_warehouse(sql_string = q)

In [54]:
historical_merch = get_model_columns(historical_merch)

In [None]:
q = """
SELECT
    season,
    date(event_date) AS event_date,
    day_of_week,
    tier,
    start_time
FROM
    custom.cth_game_descriptions
WHERE
    event_date >= current_date
"""

future_game_info = FLA_Redshift(**rs_creds).query_warehouse(sql_string = q)

future_game_info = future_game_info.merge(final_tickets_and_attendance, how = 'left', on = ['event_date', 'tier'])

future_game_info = get_model_columns(future_game_info)

In [60]:
def run_f_and_b_model(df, df_future):

    x_train = df[['attendance','weekend','start_time_num','tier_num']]
    y_train = df[['gross_revenue']]

    x_test = df_future[['total_attendance','weekend','start_time_num','tier_num']]

    #scalar = PolynomialFeatures(degree=2, include_bias=False)
    #poly_features = scalar.fit_transform(x_train)
    scalar = StandardScaler()
    poly_features = scalar.fit_transform(x_train)

    polynomial = LinearRegression().fit(poly_features, np.array(y_train).ravel())
    #predicted_train = polynomial.predict(poly_features)

    poly_features2 = scalar.fit_transform(x_test)

    return polynomial.predict(poly_features2).astype(int)

In [61]:
future_game_info['predicted_f_and_b_gross_rev'] = run_f_and_b_model(historical_f_and_b, future_game_info)

In [62]:
def run_merch_model(df, df_future):

    x_train = df[['attendance','weekend','start_time_num','tier_num']]
    y_train = df[['gross_revenue']]

    x_test = df_future[['total_attendance','weekend','start_time_num','tier_num']]

    #scalar = PolynomialFeatures(degree=2, include_bias=False)
    #poly_features = scalar.fit_transform(x_train)
    scalar = StandardScaler()
    poly_features = scalar.fit_transform(x_train)

    polynomial = LinearRegression().fit(poly_features, np.array(y_train).ravel())
    #predicted_train = polynomial.predict(poly_features)

    poly_features2 = scalar.fit_transform(x_test)

    return polynomial.predict(poly_features2).astype(int)

In [63]:
future_game_info['predicted_merch_gross_rev'] = run_merch_model(historical_merch, future_game_info)

In [106]:
merch_and_f_and_b_rev = future_game_info[['event_date','event_name','total_attendance','predicted_f_and_b_gross_rev', 'predicted_merch_gross_rev']]

In [None]:
# Parking

In [69]:
# get historical show rates by tier and lot

q = """
WITH prepaid AS (
    SELECT
        event_datetime,
        location_group
    FROM
        custom.ctp_v_ticket_2324
    WHERE
        event_type ILIKE '%panthers%'
        AND event_datetime < CURRENT_DATE
    UNION ALL
    SELECT
        event_datetime,
        location_group
    FROM
        custom.ctp_v_ticket_2425
    WHERE
        event_type ILIKE '%panthers%'
        AND event_datetime < CURRENT_DATE
),
prepaid_agg AS (
    SELECT
        event_datetime,
        location_group,
        COUNT(*) AS prepaid_passes
    FROM
        prepaid
    GROUP BY
        event_datetime, 
        location_group
),
scans AS (
    SELECT
        season,
        cth_game_descriptions.event_datetime,
        tier,
        location_group,
        CASE
            WHEN paid_amount = 0 THEN 1
            ELSE 0 
        END AS num_scans
    FROM
        custom.parkhub_v_transactions
    LEFT JOIN
        custom.cth_game_descriptions 
        ON parkhub_v_transactions.event_datetime = cth_game_descriptions.event_datetime
    WHERE
        cth_game_descriptions.event_datetime IS NOT NULL
        AND season IN ('2023-24', '2024-25')
),
scans_agg AS (
    SELECT
        season,
        event_datetime,
        tier,
        location_group,
        SUM(num_scans) AS num_scans
    FROM
        scans
    GROUP BY
        season,
        event_datetime,
        tier,
        location_group
)
SELECT
    season,
    prepaid_agg.event_datetime,
    tier,
    prepaid_agg.location_group,
    prepaid_passes,
    num_scans,
    num_scans * 1.0 / prepaid_passes::FLOAT AS show_rate
FROM
    scans_agg
LEFT JOIN
    prepaid_agg 
    ON scans_agg.event_datetime = prepaid_agg.event_datetime
    AND scans_agg.location_group = prepaid_agg.location_group
WHERE
    prepaid_agg.event_datetime IS NOT NULL
    AND tier IN ('R1','R2','R3','SC')
"""

show_rate_df = FLA_Redshift(**rs_creds).query_warehouse(sql_string = q)

In [71]:
# weigh this past season more heavily

weights = {'2023-24':1.25,'2024-25':2.75}

show_rate_df['weights'] = show_rate_df['season'].map(weights)

def weighted_paid_average(group):
    # Calculate the weighted sum
    weighted_sum = (group['show_rate'] * group['weights']).sum()
    
    # Calculate the weight sum
    weight_sum = group['weights'].sum()
    
    # Calculate the weighted average
    wavg = weighted_sum / weight_sum
    
    return pd.Series({
        'weighted_average': wavg
    })

paid_tiers = show_rate_df.groupby(by = ['tier','location_group']).apply(weighted_paid_average).reset_index()

  paid_tiers = show_rate_df.groupby(by = ['tier','location_group']).apply(weighted_paid_average).reset_index()


In [72]:
# gather historical prepaid parking data

q = """
WITH prepaid AS (
    SELECT
        event_datetime,
        location_group,
        DATE(transaction_date) AS transaction_date
    FROM
        custom.ctp_v_ticket_2324
    WHERE
        event_type ILIKE '%panthers%'
        AND event_datetime < CURRENT_DATE
    UNION ALL
    SELECT
        event_datetime,
        location_group,
        DATE(transaction_date) AS transaction_date
    FROM
        custom.ctp_v_ticket_2425
    WHERE
        event_type ILIKE '%panthers%'
        AND event_datetime < CURRENT_DATE
)
SELECT
    DATE(prepaid.event_datetime) AS event_date,
    location_group,
    'prepaid' AS parking_type,
    DATEDIFF('days', transaction_date, prepaid.event_datetime) AS days_out,
    CASE
        WHEN DATEDIFF('days', transaction_date, prepaid.event_datetime) >= 150 THEN 0
        ELSE COUNT(*)
    END AS num_passes
FROM
    prepaid
LEFT JOIN
    custom.cth_game_descriptions 
    ON prepaid.event_datetime = cth_game_descriptions.event_datetime
WHERE
    DATEDIFF('days', transaction_date, prepaid.event_datetime) >= 0
    AND tier IN ('R1', 'R2', 'R3', 'SC')
GROUP BY
    prepaid.event_datetime,
    location_group,
    transaction_date
ORDER BY
    prepaid.event_datetime,
    location_group,
    transaction_date DESC
"""

historical_prepaid_parking_info = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

location_map = {
    'Club': 4,
    'Garage': 3,
    'General': 2,
    'Valet': 1
}

historical_prepaid_parking_info['location_num'] = historical_prepaid_parking_info.apply(
    lambda row: location_map.get(row['location_group'], 0), axis=1)

historical_prepaid_parking_info['cumulative_num_passes']  = historical_prepaid_parking_info.groupby(['event_date', 'location_group'])['num_passes'].cumsum()

In [73]:
# gather historical game data (ie tier, dow, and start time)

q = """
SELECT
    date(cth_game_descriptions.event_date) AS event_date,
    tier,
    day_of_week,
    start_time,
    total_tickets
FROM
    custom.cth_v_historical_attendance_summary
LEFT JOIN
    custom.cth_game_descriptions ON cth_v_historical_attendance_summary.event_date = cth_game_descriptions.event_date
WHERE
    tier IN ('R1','R2','R3','SC')
"""

all_game_info = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

day_map = {
    'Fri': 1,
    'Sat': 1,
    'Sun': 1,
    'Mon': 0,
    'Tue': 0,
    'Wed': 0,
    'Thu': 0
}

all_game_info['weekend'] = all_game_info.apply(
    lambda row: day_map.get(row['day_of_week'], 0), axis=1)

start_time_map = {
    '12:30 PM': 1,
    '12:45 PM': 1,
    '1:00 PM': 1,
    '3:00 PM': 1,
    '3:30 PM': 1,
    '4:00 PM': 2,
    '5:00 PM': 2,
    '6:00 PM': 2
}

all_game_info['start_time_num'] = all_game_info.apply(
    lambda row: start_time_map.get(row['start_time'], 0),
    axis=1)

In [74]:
# gather upcoming game data including current prepaid totals by game and lot

q = """
SELECT
    DATE(cth_game_descriptions.event_datetime) AS event_date,
    DATEDIFF('day', CURRENT_DATE, cth_game_descriptions.event_datetime) AS days_out,
    ctp_v_ticket_2425.location_group,
    capacity::INT,
    SUM(paid_seats) + SUM(comp_seats) AS prepaid_cars,
    SUM(gross_revenue) AS current_gross_revenue,
    capacity::INT - (SUM(paid_seats) + SUM(comp_seats)) AS cap_remaining
FROM
    custom.ctp_v_ticket_2425
LEFT JOIN
    custom.ctp_parking_capacities 
    ON ctp_v_ticket_2425.location_group = ctp_parking_capacities.location_group
LEFT JOIN
    custom.cth_game_descriptions 
    ON ctp_v_ticket_2425.event_datetime = cth_game_descriptions.event_datetime
WHERE
    cth_game_descriptions.event_datetime IS NOT NULL 
    AND cth_game_descriptions.event_datetime >= CURRENT_DATE
GROUP BY
    cth_game_descriptions.event_datetime,
    ctp_v_ticket_2425.location_group,
    capacity
ORDER BY
    cth_game_descriptions.event_datetime,
    ctp_v_ticket_2425.location_group
"""

upcoming_game_info = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

In [75]:
# gather upcoming pricing data by game and lot

q = """
SELECT
    tier,
    location_group,
    max(transaction_date) AS "transaction_date",
    max(adjusted_price) AS "highest_price"
FROM
    custom.ctp_v_ticket_2425
LEFT JOIN             
    custom.cth_game_descriptions on ctp_v_ticket_2425.event_datetime = cth_game_descriptions.event_datetime
WHERE
    is_comp = FALSE
    AND price_type ILIKE 'IA%'
GROUP BY
    tier, 
    location_group
"""

pricing_info = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

In [76]:
# create a model to predict remaining prepaid cars

def run_prepaid_model(df, df_future, lot):

    total_table = df[df['location_group'] == lot]

    x_train = total_table[['days_out','weekend','start_time_num']]
    y_train = total_table[['cumulative_num_passes']]

    total_future_table = df_future[df_future['location_group'] == lot]

    x_test = total_future_table[['days_out','weekend','start_time_num']]

    #scalar = PolynomialFeatures(degree=2, include_bias=False)
    #poly_features = scalar.fit_transform(x_train)
    scalar = StandardScaler()
    poly_features = scalar.fit_transform(x_train)

    polynomial = LinearRegression().fit(poly_features, np.array(y_train).ravel())
    #predicted_train = polynomial.predict(poly_features)

    poly_features2 = scalar.fit_transform(x_test)

    return polynomial.predict(poly_features2).astype(int)

In [86]:
# merge hisotrical prepaid parking data with hisorical game data for training model

total_table = historical_prepaid_parking_info.merge(all_game_info, how = 'left', on = 'event_date')
total_table = total_table[total_table['tier'].isin(['R1','R2','R3','SC'])]

# merge upcoming parking data with hisorical game data for testing model

total_future_table = upcoming_game_info.merge(future_game_info, how = 'left', on = 'event_date')
total_future_table = total_future_table[total_future_table['tier'].isin(['R1','R2','R3','SC'])]

In [88]:
# only predicting for general and garage not  club, valet or executive

lots = ['General','Garage']

final_df = pd.DataFrame()

for lot in lots:

    temp = total_future_table[total_future_table['location_group'] == lot]

    temp['predicted_parking'] = run_prepaid_model(total_table, total_future_table, lot)

    final_df = pd.concat([final_df,temp], ignore_index= True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['predicted_parking'] = run_prepaid_model(total_table, total_future_table, lot)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['predicted_parking'] = run_prepaid_model(total_table, total_future_table, lot)


In [89]:
# concat club totals for onsite model next

club_totals = total_future_table[total_future_table['location_group'] == 'Club']
club_totals['predicted_parking'] = 0

final_df = pd.concat([final_df, club_totals], axis=0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  club_totals['predicted_parking'] = 0


In [90]:
# make sure no negative predictions are made

final_df['predicted_prepaid_additional_parking'] = np.where(final_df['predicted_parking'] < 0, 0, final_df['predicted_parking'])

# get total prepaid tickets (current + predicted additional)

final_df['total_predicted_prepaid_cars'] = final_df['prepaid_cars'] + final_df['predicted_parking']

# get number of parked cars using historical show rates

final_df = final_df.merge(paid_tiers, how =  'left', on = ['tier', 'location_group'])
final_df['prepaid_cars_parked'] = (final_df['total_predicted_prepaid_cars'] * final_df['weighted_average']).astype(int)

In [91]:
# find the capacity remaining 

final_df['cap_remaining'] = final_df['capacity'] - final_df['prepaid_cars_parked']

# if predicted cars over capacity subtract overflow out

final_df['predicted_prepaid_additional_parking'] = np.where(final_df['cap_remaining'] < 0, final_df['predicted_prepaid_additional_parking']+final_df['cap_remaining'], final_df['predicted_prepaid_additional_parking'])
final_df['prepaid_cars_parked'] = np.where(final_df['cap_remaining'] < 0, final_df['prepaid_cars_parked']+final_df['cap_remaining'], final_df['prepaid_cars_parked'])
final_df['cap_remaining'] = np.where(final_df['cap_remaining'] < 0, 0, final_df['prepaid_cars_parked'])

In [92]:
final_df = final_df[['event_date','days_out','tier', 'start_time_num','weekend',
                     'location_group','capacity','prepaid_cars','current_gross_revenue', 
                     'predicted_prepaid_additional_parking', 'total_predicted_prepaid_cars',
                     'prepaid_cars_parked','cap_remaining']]

In [93]:
# get hisotrical onsite parking data

q = """
with onsite as
    (select
        date(cth_game_descriptions.event_datetime) as event_date,
        location_group,
        0 as days_out,
        case
            when paid_amount > 0 then 1
        else 0
        end as num_onsite_cars,
        case
            when paid_amount = 0 then 1
        else 0
        end as num_prepaid_cars
    from
        custom.parkhub_v_transactions
    left join
        custom.cth_game_descriptions on parkhub_v_transactions.event_datetime = cth_game_descriptions.event_datetime
    where
        season in ('2023-24','2024-25'))
select
    onsite.event_date,
    onsite.location_group,
    'onsite' as parking_type,
    days_out,
    sum(num_onsite_cars) as num_cars,
    capacity - sum(num_prepaid_cars) as cap_remaining
from
    onsite
left join
    custom.ctp_parking_capacities on onsite.location_group = ctp_parking_capacities.location_group
where
    days_out >= 0
group by
    onsite.event_date,
    onsite.location_group,
    parking_type,
    days_out,
    capacity
"""

historical_onsite_parking_info = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

In [94]:
# merge hisotrical prepaid parking data with hisorical game data for training model

total_onsite_table = historical_onsite_parking_info.merge(all_game_info, how = 'left', on = 'event_date')
total_onsite_table = total_onsite_table[total_onsite_table['tier'].isin(['R1','R2','R3','SC'])]

In [96]:
# create a model to predict onsite cars

def run_onsite_model(df, df_future, lot):

    x_train_table = df[df['location_group'] == lot]

    x_train = x_train_table[['cap_remaining','weekend','start_time_num']]
    y_train = x_train_table[['num_cars']]

    x_test_table = df_future[df_future['location_group'] == lot]

    x_test = x_test_table[['cap_remaining','weekend','start_time_num']]

    #scalar = PolynomialFeatures(degree=2, include_bias=False)
    #poly_features = scalar.fit_transform(x_train)
    scalar = StandardScaler()
    poly_features = scalar.fit_transform(x_train)

    polynomial = LinearRegression().fit(poly_features, np.array(y_train).ravel())
    #predicted_train = polynomial.predict(poly_features)

    poly_features2 = scalar.fit_transform(x_test)

    #predicted_test = polynomial.predict(poly_features2)

    return polynomial.predict(poly_features2).astype(int)

In [97]:
# only predicting for general, garage, and club not valet or executive

lots = ['General','Garage','Club']

final_df_onsite = pd.DataFrame()

for lot in lots:

    temp = final_df[final_df['location_group'] == lot]

    temp['predicted_onsite_parking'] = run_onsite_model(total_onsite_table, final_df, lot)

    final_df_onsite = pd.concat([final_df_onsite,temp], ignore_index= True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['predicted_onsite_parking'] = run_onsite_model(total_onsite_table, final_df, lot)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['predicted_onsite_parking'] = run_onsite_model(total_onsite_table, final_df, lot)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['predicted_onsite_parking'

In [98]:
# add back executive and valet parking and match fields from final_df

exec_and_valet = total_future_table[total_future_table['location_group'].isin(['Executive','Valet'])]

exec_and_valet['predicted_prepaid_additional_parking'] = 0
exec_and_valet['total_predicted_prepaid_cars'] = exec_and_valet['prepaid_cars']

exec_and_valet = exec_and_valet.merge(paid_tiers, how =  'left', on = ['tier', 'location_group'])
exec_and_valet['weighted_average'] = exec_and_valet['weighted_average'].fillna(1)

exec_and_valet['prepaid_cars_parked'] = (exec_and_valet['total_predicted_prepaid_cars'] * exec_and_valet['weighted_average']).astype(int)
exec_and_valet['predicted_onsite_parking'] = 0
exec_and_valet['total_parking'] = exec_and_valet['prepaid_cars_parked'] 

exec_and_valet = exec_and_valet[['event_date', 'days_out','tier','location_group','capacity','prepaid_cars',
                                       'current_gross_revenue', 'predicted_prepaid_additional_parking',
                                       'total_predicted_prepaid_cars', 'prepaid_cars_parked',
                                       'predicted_onsite_parking','total_parking']]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  exec_and_valet['predicted_prepaid_additional_parking'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  exec_and_valet['total_predicted_prepaid_cars'] = exec_and_valet['prepaid_cars']


In [99]:
# if predicted total over capacity subtract overflow out

final_df_onsite['predicted_onsite_parking'] = [pred_onsite if pred_onsite <= cap_remaining else cap_remaining for pred_onsite, cap_remaining in zip(final_df_onsite['predicted_onsite_parking'], final_df_onsite['cap_remaining'])]

final_df_onsite['total_parking'] = final_df_onsite['prepaid_cars_parked'] + final_df_onsite['predicted_onsite_parking']

final_parking_model = final_df_onsite[['event_date', 'days_out','tier','location_group','capacity','prepaid_cars',
                                       'current_gross_revenue', 'predicted_prepaid_additional_parking','total_predicted_prepaid_cars',
                                       'prepaid_cars_parked','predicted_onsite_parking','total_parking']]

# merge with executive and valet parking info

final_parking_model_df = pd.concat([final_parking_model, exec_and_valet], axis=0)

In [100]:
final_parking_model_df = final_parking_model_df.merge(pricing_info, how = 'left', on = ['tier', 'location_group'])

final_parking_model_df['predicted_prepaid_additional_gross_revenue'] = (final_parking_model_df['predicted_prepaid_additional_parking'] * final_parking_model_df['highest_price']).fillna(0)

final_parking_model_df['predicted_onsite_parking_gross_revenue'] = (final_parking_model_df['predicted_prepaid_additional_parking'] * final_parking_model_df['highest_price']*1.25).fillna(0)

final_parking_model_df['predicted_gross_revenue'] = final_parking_model_df['predicted_prepaid_additional_gross_revenue'] + final_parking_model_df['predicted_onsite_parking_gross_revenue'] + final_parking_model_df['current_gross_revenue']

final_parking_model_df = final_parking_model_df[['event_date', 'days_out','tier','location_group','capacity','prepaid_cars',
                                       'current_gross_revenue', 'predicted_prepaid_additional_parking', 'predicted_prepaid_additional_gross_revenue',
                                       'total_predicted_prepaid_cars', 'prepaid_cars_parked', 'predicted_onsite_parking',
                                       'predicted_onsite_parking_gross_revenue','total_parking', 'predicted_gross_revenue']]