In [None]:
# Parking

In [1]:
import pandas as pd
import numpy as np
from prefect.blocks.system import Secret
from catnip.fla_redshift import FLA_Redshift
from typing import Dict
from concurrent.futures import ThreadPoolExecutor

from sklearn.preprocessing import StandardScaler, Normalizer
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

from datetime import datetime
import statsmodels.api as sm

import polars as pl
import pyarrow

In [2]:
def get_redshift_credentials() -> Dict:

    cred_dict = {
        "dbname": Secret.load("stellar-redshift-db-name").get(),
        "host": Secret.load("stellar-redshift-host").get(),
        "port": 5439,
        "user": Secret.load("stellar-redshift-user-name").get(),
        "password": Secret.load("stellar-redshift-password").get(),

        "aws_access_key_id": Secret.load("fla-s3-aws-access-key-id-east-1").get(),
        "aws_secret_access_key": Secret.load("fla-s3-aws-secret-access-key-east-1").get(),
        "bucket": Secret.load("fla-s3-bucket-name-east-1").get(),
        "subdirectory": "us-east-1",

        "verbose": False,
    }

    return cred_dict

with ThreadPoolExecutor(1) as pool:
    rs_creds = pool.submit(lambda: get_redshift_credentials()).result()

In [None]:
# Tickets, Nightly Suites, Turnstile

In [None]:
# get game info data

q = """
select 
    date(event_date) as event_date,
    tier,
    is_premier, 
    cast(original_six_plus_extra*100 as int) as original_six_plus_extra
from  
    custom.cth_game_descriptions
where 
    season = '2023-24'
"""
tier_df = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)
pl_tier_df = pl.from_pandas(tier_df)

In [None]:
# get 23/24 playoff ticket data

#df_2324 = pl.read_csv("C:\\Users\\riffere\\Florida Panthers\\SP-BS - Documents\\Data Science\\Resources\\Files\\emily_ticket_sales_model_data_final.csv")  

q = """
with arena_levels as
    (select
         event_date,
        CASE
            WHEN pc_one IN ('A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', '1', '2', '3', '4', '5', '6', '7', '8') THEN 'Lowers'
            WHEN pc_one IN ('K', 'L', 'M') THEN 'Clubs'
            WHEN pc_one IN ('N', 'O', 'P', 'Q', 'R', 'S', 'T', 'Y') THEN 'Uppers'
            WHEN pc_one IN ('U', 'V', 'W') THEN 'Suites'
            WHEN pc_one IN ('X', 'Z') THEN 'Premium'
            ELSE 'Unknown'
        END AS arena_level_internal,
        case
            when allocations like '%Kill%' or locks like '%Kill%' then 0
            else 1
        end as capacity
    from
        custom.cth_v_ticket_status_2324_playoffs),
arena_level_agg as
    (select
         event_date,
         arena_level_internal,
         sum(capacity) as capacity
    from
        arena_levels
    group by
        event_date,
        arena_level_internal),
ticket_info as
    (select
        event_datetime,
        datediff('days',date(transaction_date), date(event_datetime)) as days_out,
        arena_level_internal,
        sum(paid_seats) as paid_seats,
        sum(gross_revenue) as gross_revenue
    from
        custom.cth_v_ticket_2324_playoffs
    group by
        event_datetime,
        date(transaction_date),
        arena_level_internal)
select
    date(arena_level_agg.event_date) as event_date,
    ticket_info.days_out,
    arena_level_agg.arena_level_internal,
    capacity,
    case
        when arena_level_agg.arena_level_internal = 'Lowers' AND days_out > 80 THEN 0
        when arena_level_agg.arena_level_internal = 'Uppers' AND days_out > 100 THEN 0
        when arena_level_agg.arena_level_internal = 'Clubs' AND days_out > 50 THEN 0
        when arena_level_agg.arena_level_internal = 'Premium' AND days_out > 30 THEN 0
        else paid_seats
        end as paid_seats
from
    arena_level_agg
left join
    ticket_info on date(arena_level_agg.event_date) = date(ticket_info.event_datetime)
    and arena_level_agg.arena_level_internal = ticket_info.arena_level_internal
order by
    event_date,
    arena_level_internal,
    days_out
"""

df_2324 = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)
df_2324 = pl.from_pandas(df_2324)

### coalesce(gross_revenue::int,0) as gross_revenue

In [None]:
# create tickets left to sell by days out descending

#df_2324['cumulative_tickets']  = df_2324.groupby(['event_date', 'arena_level_internal'])['paid_seats'].cumsum()
df_2324 = df_2324.with_columns(
    pl.col("paid_seats").cum_sum().over(["event_date", "arena_level_internal"]).cast(pl.Int16).alias("cumulative_tickets")
)

In [None]:
# join game info data on 24/25 ticket data

#df_2324 = df_2324.merge(tier_df, on = 'event_date', how = 'left')
df_2324 = df_2324.join(pl_tier_df, on="event_date", how="left")

In [None]:
# make all int64 to int16 columns to make it run faster

int64_columns = df_2324.select(pl.col(pl.Int64)).columns
    
df_2324 = df_2324.with_columns([
    pl.col(col).cast(pl.Int16) for col in int64_columns])

In [None]:
# create model columns: dow, tier_num, arena_level_num, cap_remaining and filter out non-regular season games


# df_2324 = df_2324.with_columns([
#     pl.col('event_date').cast(pl.Date)
# ])

# df_2324 = df_2324.with_columns([
#     pl.col('gross_revenue').cast(pl.Int16)
# ])

df_2324 = df_2324.with_columns([
    pl.col('event_date').dt.weekday().cast(pl.Int16).alias('dow')
    ])

# pcs = sorted(df_2324['pc_one'].unique())
# pc_dict = dict((value,count) for count, value in enumerate(pcs))
# df_2324 = df_2324.with_columns([
#     pl.col('pc_one').map_elements(
#         lambda x: pc_dict.get(x, None)
#     ).cast(pl.Int16)
#     .alias('pc_number')
# ])

tier_mapping = {
    'A': 5,
    'B': 4,
    'C': 3,
    'D': 2,
    'E': 1
}

df_2324 = df_2324.with_columns(
    pl.col('tier').replace(tier_mapping, default=0).cast(pl.Int16).alias('tier_num')
)

arena_level_mapping = {
    'Clubs': 5,
    'Lowers': 4,
    'Uppers': 3,
    'Suites': 2,
    'Premium': 1
}

df_2324 = df_2324.with_columns([
    pl.col('arena_level_internal')
      .replace(arena_level_mapping, default=0)
      .cast(pl.Int16)
      .alias('arena_level_num')
])

df_2324 = df_2324.with_columns(
    pl.col('capacity').sub(pl.col('cumulative_tickets')).alias('cap_remaining')
)

df_2324 = df_2324.filter(
    (pl.col("tier").is_in(['A','B','C','D','E'])) &
    (pl.col("days_out") >= 0) &
    (pl.col('arena_level_internal').is_in(['Clubs','Lowers','Uppers','Suites','Premium']))
)

In [None]:
# get 23/24 playoff ticket data

#df_2324 = pl.read_csv("C:\\Users\\riffere\\Florida Panthers\\SP-BS - Documents\\Data Science\\Resources\\Files\\emily_ticket_sales_model_data_final.csv")  

q = """
with arena_levels as
    (select
         event_date,
        CASE
            WHEN pc_one IN ('A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', '1', '2', '3', '4', '5', '6', '7', '8') THEN 'Lowers'
            WHEN pc_one IN ('K', 'L', 'M') THEN 'Clubs'
            WHEN pc_one IN ('N', 'O', 'P', 'Q', 'R', 'S', 'T', 'Y') THEN 'Uppers'
            WHEN pc_one IN ('U', 'V', 'W') THEN 'Suites'
            WHEN pc_one IN ('X', 'Z') THEN 'Premium'
            ELSE 'Unknown'
        END AS arena_level_internal,
        case
            when allocations like '%Kill%' or locks like '%Kill%' then 0
            else 1
        end as capacity
    from
        custom.cth_v_ticket_status_2425_playoffs),
arena_level_agg as
    (select
         event_date,
         arena_level_internal,
         sum(capacity) as capacity
    from
        arena_levels
    group by
        event_date,
        arena_level_internal),
ticket_info as
    (select
        event_datetime,
        datediff('days',date(transaction_date), date(event_datetime)) as days_out,
        arena_level_internal,
        sum(paid_seats) as paid_seats,
        sum(gross_revenue) as gross_revenue
    from
        custom.cth_v_ticket_2425_playoffs
    group by
        event_datetime,
        date(transaction_date),
        arena_level_internal)
select
    date(arena_level_agg.event_date) as event_date,
    ticket_info.days_out,
    arena_level_agg.arena_level_internal,
    capacity,
    case
        when arena_level_agg.arena_level_internal = 'Lowers' AND days_out > 80 THEN 0
        when arena_level_agg.arena_level_internal = 'Uppers' AND days_out > 100 THEN 0
        when arena_level_agg.arena_level_internal = 'Clubs' AND days_out > 50 THEN 0
        when arena_level_agg.arena_level_internal = 'Premium' AND days_out > 30 THEN 0
        else paid_seats
        end as paid_seats
from
    arena_level_agg
left join
    ticket_info on date(arena_level_agg.event_date) = date(ticket_info.event_datetime)
    and arena_level_agg.arena_level_internal = ticket_info.arena_level_internal
order by
    event_date,
    arena_level_internal,
    days_out
"""

df_2324 = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)
df_2324 = pl.from_pandas(df_2324)

### coalesce(gross_revenue::int,0) as gross_revenue

In [None]:
# create model columns: dow, tier_num, arena_level_num, cap_remaining and filter out non-regular season games


# df_2425['dow'] = [datetime.weekday(x) for x in df_2425['event_datetime']]
# df_2425['tier_num'] = [5 if tier == 'A' else (4 if tier == 'B' else (3 if tier == 'C' else (2 if tier == 'D' else 1))) for tier in df_2425['tier']]
#df_2324['random'] = [x for x in (np.random.rand(len(df_2324),1)/2)]

# pcs = sorted(df_2324['pc_one'].unique())
# pc_dict = dict((value,count) for count, value in enumerate(pcs))
# df_2324['pc_num'] = df_2324.apply(lambda row: pc_dict[row['pc_one']], axis = 1)


# df_2425['arena_level_num'] = [6 if arena_level_internal == 'Premium' else (5 if arena_level_internal == 'Clubs' else (4 if arena_level_internal == 'Lowers' else 
#                             (3 if arena_level_internal == 'Uppers' else (2 if arena_level_internal == 'Suites' else 1)))) for arena_level_internal in df_2425['arena_level_internal']]

#df_2425 = df_2425.sample(n=len(df_2324), random_state=1993)
# df_2425 = df_2425.reset_index()

df_2425 = df_2425.with_columns([
    pl.col('event_date').dt.weekday().cast(pl.Int16).alias('dow')
    ])

# pcs = sorted(df_2425['pc_one'].unique())
# pc_dict = dict((value,count) for count, value in enumerate(pcs))
# df_2425 = df_2425.with_columns([
#     pl.col('pc_one').map_elements(
#         lambda x: pc_dict.get(x, None)
#     ).cast(pl.Int16)
#     .alias('pc_number')
# ])

tier_mapping = {
    'A': 5,
    'B': 4,
    'C': 3,
    'D': 2,
    'E': 1
}

df_2425 = df_2425.with_columns([
    pl.col('tier').replace(tier_mapping, default=0).cast(pl.Int16).alias('tier_num')
])

arena_level_mapping = {
    'Clubs': 5,
    'Lowers': 4,
    'Uppers': 3,
    'Suites': 2,
    'Premium': 1
}

df_2425 = df_2425.with_columns([
    pl.col('arena_level_internal')
      .replace(arena_level_mapping, default=0)
      .cast(pl.Int16)
      .alias('arena_level_num')
])

# df_2425 = df_2425.with_columns(
#     pl.col('capacity').sub(pl.col('cumulative_tickets')).alias('cap_remaining'))

df_2425 = df_2425.filter(
    (pl.col("tier").is_in(['A','B','C','D','E'])) &
    (pl.col("days_out") >= 0) &
     (pl.col('arena_level_internal').is_in(['Clubs','Lowers','Uppers','Suites','Premium']))
)

In [None]:
# make all int64 to int16 columns to make it run faster

int64_columns = df_2425.select(pl.col(pl.Int64)).columns
    
df_2425 = df_2425.with_columns([
    pl.col(col).cast(pl.Int16) for col in int64_columns])

In [None]:
# create model

# def run_model(df, df_future, arena_level):

#     x_train_table = df.filter(
#         (pl.col("arena_level_internal").is_in([arena_level])))

#     X_train = x_train_table.select(['tier_num', 'arena_level_num', 'days_out', 'cap_remaining'])
#     y_train = x_train_table.select(['cumulative_tickets'])

#     x_test_table = df_future.filter(
#         (pl.col("arena_level_internal").is_in([arena_level])))
    
#     X_test = x_test_table.select(['tier_num', 'arena_level_num', 'days_out', 'cap_remaining'])

#     ss = StandardScaler()
#     x_train_scaled = ss.fit_transform(X_train)
#     x_test_scaled = ss.fit_transform(X_test)

#     polynomial = LinearRegression().fit(x_train_scaled, np.array(y_train).ravel())

#     return polynomial.predict(x_test_scaled)

def run_model(df, df_future, arena_level):

    x_train_table = df.filter(
        (pl.col("arena_level_internal").is_in([arena_level])))

    X_train = x_train_table.select(['tier_num', 'arena_level_num', 'days_out', 'is_premier'])
    y_train = x_train_table.select(['cumulative_tickets'])

    x_test_table = df_future.filter(
        (pl.col("arena_level_internal").is_in([arena_level])))
    
    X_test = x_test_table.select(['tier_num', 'arena_level_num', 'days_out', 'is_premier'])

    # poly = PolynomialFeatures(degree=3, include_bias=False)
    # x_train_poly = poly.fit_transform(X_train)
    # x_test_poly = poly.fit_transform(X_test)

    ss = StandardScaler()
    x_train_poly = ss.fit_transform(X_train)
    x_test_poly = ss.fit_transform(X_test)

    # x_train_poly = np.array(X_train)
    # x_test_poly = np.array(X_test)

    #polynomial = sm.OLS(np.array(y_train).ravel(), x_train_poly).fit()

    y_log = np.nan_to_num(np.log(np.array(y_train).ravel()), nan=0.0, posinf=0.0, neginf=0.0)

    polynomial = LinearRegression().fit(x_train_poly, y_log)

    #print(polynomial.aic)

    return polynomial.predict(x_test_poly)

In [None]:
# run by arena_level NOT SUITES!

arena_levels = ['Lowers','Premium','Uppers','Clubs']

final_df = pl.DataFrame(
    schema= {
        'event_date': pl.Date,
        'arena_level_internal': pl.String,
        'days_out': pl.Int16,
        'tier': pl.String,
        'original_six_plus_extra': pl.Int16,
        'is_premier': pl.Boolean,
        'capacity': pl.Int16,
        'gross_revenue' : pl.Float64,
        'paid_seats': pl.Int16,
        'cap_remaining': pl.Int16,
        'dow': pl.Int16,
        'tier_num': pl.Int16,
        'arena_level_num': pl.Int16,
        'literal' : pl.Float64
    }
)

for arena_level in arena_levels:

    temp = df_2425.filter(
        (pl.col("arena_level_internal").is_in([arena_level])))
    
    result = run_model(df_2324, df_2425, arena_level)

    temp = temp.with_columns([result])

    final_df = pl.concat([final_df,temp], how = 'vertical')

In [None]:
suite_df = df_2425.filter((pl.col("arena_level_internal").is_in(['Suites'])))
suite_df = suite_df.with_columns(pl.zeros(pl.count()).alias('literal'))

final_df = pl.concat([final_df,suite_df], how = 'vertical')
final_df = final_df.rename({'literal':'cumulative_tickets_predicted'})

In [None]:
# get total tickets prediction

final_df = final_df.with_columns([
    ((pl.col('cumulative_tickets_predicted')).exp()
    .alias('cumulative_tickets_predicted'))
])

# create cumulative_tickets_predicted column so its greater than 0 and less than cap_remianing

final_df = final_df.with_columns(
        pl.when(pl.col("cumulative_tickets_predicted") < 0)
        .then(0)
        .when(pl.col("cap_remaining") < pl.col("cumulative_tickets_predicted"))
        .then(pl.col("cap_remaining"))
        .otherwise(pl.col("cumulative_tickets_predicted"))
        .alias("cumulative_tickets_predicted")
)

final_df = final_df.with_columns([
    ((pl.col('paid_seats') + pl.col('cumulative_tickets_predicted'))
    .alias('total_predicted_tickets'))
])

In [None]:
# get historical show rate data

q = """
WITH historical AS (
    SELECT
    
        game_desc.season,
        game_desc.tier,
        arena_level_internal,
        ticket.event_date::date,
        ticket.comp_seats::float,
        ticket.paid_seats::float,
        CASE
            WHEN ticket.is_comp = TRUE AND ticket.did_attended = TRUE THEN 1
            ELSE 0
        END AS "comp_seats_attended",
        CASE
            WHEN is_comp = FALSE AND did_attended = TRUE THEN 1
            ELSE 0
        END AS "paid_seats_attended"
    FROM
        custom.cth_v_historical_ticket ticket
    INNER JOIN
        custom.cth_game_descriptions game_desc
            ON ticket.event_datetime::date = game_desc.event_datetime::date
            AND game_desc.season IN ('2021-22', '2022-23', '2023-24', '2024-25')
            AND game_desc.event_datetime < current_date
),
tier_show_rate AS (
    SELECT
        season,
        tier,
        arena_level_internal,
        sum(historical.comp_seats_attended)::float / nullif(sum(historical.comp_seats),0) AS "comp_show_rate",
        sum(historical.paid_seats_attended)::float / nullif(sum(historical.paid_seats),0) AS "paid_show_rate"
    FROM
        historical
    GROUP BY
        season, tier, arena_level_internal
)
SELECT
    *
FROM
    tier_show_rate
where 
    tier != 'F'
"""

show_rate = FLA_Redshift(**rs_creds).query_warehouse(sql_string = q)

In [None]:
# get weighted show_rate avergaes over last 4 seasons

weights = {'2021-22':0.5, '2022-23': .75, '2023-24':1.25,'2024-25':1.5}

show_rate['weights'] = show_rate['season'].map(weights)

def weighted_paid_average(group):
    # Calculate the weighted sum
    weighted_sum = (group['paid_show_rate'] * group['weights']).sum()
    
    # Calculate the weight sum
    weight_sum = group['weights'].sum()
    
    # Calculate the weighted average
    wavg = weighted_sum / weight_sum
    
    return pd.Series({
        'weighted_paid_average': wavg
    })

paid_tiers = show_rate.groupby(by = ['tier', 'arena_level_internal']).apply(weighted_paid_average).reset_index()

def weighted_comp_average(group):
    # Calculate the weighted sum
    weighted_sum = (group['comp_show_rate'] * group['weights']).sum()
    
    # Calculate the weight sum
    weight_sum = group['weights'].sum()
    
    # Calculate the weighted average
    wavg = weighted_sum / weight_sum
    
    return pd.Series({
        'weighted_comp_average': wavg
    })

comp_tiers = show_rate.groupby(by = ['tier', 'arena_level_internal']).apply(weighted_comp_average).reset_index()

tiers = pd.merge(paid_tiers, comp_tiers, on = ['tier', 'arena_level_internal'], how = 'left')
tiers = pl.from_pandas(tiers)

In [None]:
# merge onto 24/25 data and predict attendance

final_df = final_df.join(tiers, on = ['tier','arena_level_internal'])

final_df = final_df.with_columns([
    ((pl.col('total_predicted_tickets') * pl.col('weighted_paid_average'))
    .alias('total_attendance'))
])

In [None]:
# Merch, F&B

In [None]:
q = """
with attendance as
    (select
        event_datetime,
        count(*) as attendance
    from
        custom.cth_v_attendance_2324_playoffs
    group by
        event_datetime
    UNION ALL
    select
        event_datetime,
        count(*) as attendance
    from
        custom.cth_v_attendance_2425_playoffs
    group by
        event_datetime)
select
    cth_game_descriptions.season,
    cth_game_descriptions.event_date,
    tier,
    is_premier,
    original_six_plus_extra,
    day_of_week,
    start_time,
    attendance,
    gross_revenue,
    num_orders,
    quantity_sold
from
    custom.cheq_v_hockey_summary
left join
    custom.cth_game_descriptions on date(cheq_v_hockey_summary.event_date) = date(cth_game_descriptions.event_date)
left join
    attendance on date(attendance.event_datetime) = date(cheq_v_hockey_summary.event_date)
where
    tier in ('A','B','C','D','E')
"""

historical_f_and_b = FLA_Redshift(**rs_creds).query_warehouse(sql_string = q)

In [None]:
day_map = {
    'Fri': 1,
    'Sat': 1,
    'Sun': 1,
    'Mon': 0,
    'Tue': 0,
    'Wed': 0,
    'Thu': 0
}

historical_f_and_b['weekend'] = historical_f_and_b.apply(
    lambda row: day_map.get(row['day_of_week'], 0), axis=1)

start_time_map = {
    '12:30 PM': 1,
    '12:45 PM': 1,
    '1:00 PM': 1,
    '3:00 PM': 1,
    '3:30 PM': 1,
    '4:00 PM': 2,
    '5:00 PM': 2,
    '6:00 PM': 2
}

historical_f_and_b['start_time_num'] = historical_f_and_b.apply(
    lambda row: start_time_map.get(row['start_time'], 0),
    axis=1)

tier_mapping = {
    'A': 5,
    'B': 4,
    'C': 3,
    'D': 2,
    'E': 1
}

historical_f_and_b['tier_num'] = historical_f_and_b.apply(
    lambda row: tier_mapping.get(row['tier'], 0),
    axis=1)

In [None]:
q = """
with attendance as
    (select
        event_datetime,
        count(*) as attendance
    from
        custom.cth_v_attendance_2324_playoffs
    group by
        event_datetime
    UNION ALL
    select
        event_datetime,
        count(*) as attendance
    from
        custom.cth_v_attendance_2425_playoffs
    group by
        event_datetime)
select
    cth_game_descriptions.season,
    cth_game_descriptions.event_date,
    tier,
    is_premier,
    original_six_plus_extra,
    day_of_week,
    start_time,
    attendance,
    sum(gross_revenue) as gross_revenue,
    sum(qty) as quantity,
    count(distinct invoice_id) as num_orders
from
    custom.retailpro_v_invoice_items
left join
    custom.cth_game_descriptions on retailpro_v_invoice_items.event_date = cth_game_descriptions.event_date
left join
    attendance on retailpro_v_invoice_items.event_date = date(attendance.event_datetime)
where
    season in ('2023-24','2024-25')
group by
    cth_game_descriptions.season,
    cth_game_descriptions.event_date,
    tier,
    is_premier,
    original_six_plus_extra,
    day_of_week,
    start_time,
    attendance
"""

historical_merch = FLA_Redshift(**rs_creds).query_warehouse(sql_string = q)

In [None]:
day_map = {
    'Fri': 1,
    'Sat': 1,
    'Sun': 1,
    'Mon': 0,
    'Tue': 0,
    'Wed': 0,
    'Thu': 0
}

historical_merch['weekend'] = historical_merch.apply(
    lambda row: day_map.get(row['day_of_week'], 0), axis=1)

start_time_map = {
    '12:30 PM': 1,
    '12:45 PM': 1,
    '1:00 PM': 1,
    '3:00 PM': 1,
    '3:30 PM': 1,
    '4:00 PM': 2,
    '5:00 PM': 2,
    '6:00 PM': 2
}

historical_merch['start_time_num'] = historical_merch.apply(
    lambda row: start_time_map.get(row['start_time'], 0),
    axis=1)

tier_mapping = {
    'A': 5,
    'B': 4,
    'C': 3,
    'D': 2,
    'E': 1
}

historical_merch['tier_num'] = historical_merch.apply(
    lambda row: tier_mapping.get(row['tier'], 0),
    axis=1)

In [None]:
q = """
select
    season,
    event_date,
    tier,
    is_premier,
    original_six_plus_extra,
    day_of_week,
    start_time
from
    custom.cth_game_descriptions
where
    event_date >= current_date
"""

future_game_info = FLA_Redshift(**rs_creds).query_warehouse(sql_string = q)

predicted_attendance = pd.read_csv('C:\\Users\\riffere\\Desktop\\output.csv')

future_game_info = future_game_info.merge(predicted_attendance, how = 'left', on = 'event_date')

In [None]:
day_map = {
    'Fri': 1,
    'Sat': 1,
    'Sun': 1,
    'Mon': 0,
    'Tue': 0,
    'Wed': 0,
    'Thu': 0
}

future_game_info['weekend'] = future_game_info.apply(
    lambda row: day_map.get(row['day_of_week'], 0), axis=1)

start_time_map = {
    '12:30 PM': 1,
    '12:45 PM': 1,
    '1:00 PM': 1,
    '3:00 PM': 1,
    '3:30 PM': 1,
    '4:00 PM': 2,
    '5:00 PM': 2,
    '6:00 PM': 2
}

future_game_info['start_time_num'] = future_game_info.apply(
    lambda row: start_time_map.get(row['start_time'], 0),
    axis=1)

tier_mapping = {
    'A': 5,
    'B': 4,
    'C': 3,
    'D': 2,
    'E': 1
}

future_game_info['tier_num'] = future_game_info.apply(
    lambda row: tier_mapping.get(row['tier'], 0),
    axis=1)

In [None]:
def run_f_and_b_model(df, df_future):

    x_train = df[['attendance','weekend','start_time_num','tier_num']]
    y_train = df[['gross_revenue']]

    x_test = df_future[['attendance','weekend','start_time_num','tier_num']]

    #scalar = PolynomialFeatures(degree=2, include_bias=False)
    #poly_features = scalar.fit_transform(x_train)
    scalar = StandardScaler()
    poly_features = scalar.fit_transform(x_train)

    polynomial = LinearRegression().fit(poly_features, np.array(y_train).ravel())
    #predicted_train = polynomial.predict(poly_features)

    poly_features2 = scalar.fit_transform(x_test)

    return polynomial.predict(poly_features2).astype(int)

In [None]:
future_game_info['predicted_f_and_b_gross_rev'] = run_f_and_b_model(historical_f_and_b, future_game_info)

In [None]:
def run_merch_model(df, df_future):

    x_train = df[['attendance','weekend','start_time_num','tier_num']]
    y_train = df[['gross_revenue']]

    x_test = df_future[['attendance','weekend','start_time_num','tier_num']]

    #scalar = PolynomialFeatures(degree=2, include_bias=False)
    #poly_features = scalar.fit_transform(x_train)
    scalar = StandardScaler()
    poly_features = scalar.fit_transform(x_train)

    polynomial = LinearRegression().fit(poly_features, np.array(y_train).ravel())
    #predicted_train = polynomial.predict(poly_features)

    poly_features2 = scalar.fit_transform(x_test)

    return polynomial.predict(poly_features2).astype(int)

In [None]:
future_game_info['predicted_merch_gross_rev'] = run_merch_model(historical_merch, future_game_info)

In [None]:
# Parking

In [None]:
# get historical show rates by tier and lot

q = """
with prepaid as
    (select
         event_datetime,
         location_group
    from
        custom.ctp_v_ticket_2324
    where
        event_type ilike '%panthers%'
        and event_datetime < current_date
    UNION ALL
    select
        event_datetime,
        location_group
    from
        custom.ctp_v_ticket_2425
    where
        event_type ilike '%panthers%'
        and event_datetime < current_date),
prepaid_agg as
    (select
        event_datetime,
        location_group,
        count(*) as prepaid_passes
    from
        prepaid
    group by
        event_datetime, location_group),
scans as
    (select
        season,
        cth_game_descriptions.event_datetime,
        tier,
        location_group,
        case
            when paid_amount = 0 then 1
            else 0 end as num_scans
    from
        custom.parkhub_v_transactions
    left join
        custom.cth_game_descriptions on parkhub_v_transactions.event_datetime = cth_game_descriptions.event_datetime
    where
        cth_game_descriptions.event_datetime is not Null
        and season in ('2023-24','2024-25')),
scans_agg as
    (select
        season,
        event_datetime,
        tier,
        location_group,
        sum(num_scans) as num_scans
    from
        scans
    group by
        season,
        event_datetime,
        tier,
        location_group)
select
    season,
    prepaid_agg.event_datetime,
    tier,
    prepaid_agg.location_group,
    prepaid_passes,
    num_scans,
    num_scans*1.0/prepaid_passes::float as show_rate
from
    scans_agg
left join
    prepaid_agg on scans_agg.event_datetime = prepaid_agg.event_datetime
    and scans_agg.location_group = prepaid_agg.location_group
where
    prepaid_agg.event_datetime is not Null
"""

show_rate_df = FLA_Redshift(**rs_creds).query_warehouse(sql_string = q)

In [None]:
# weigh this past season more heavily

weights = {'2023-24':1.25,'2024-25':2.75}

show_rate_df['weights'] = show_rate_df['season'].map(weights)

def weighted_paid_average(group):
    # Calculate the weighted sum
    weighted_sum = (group['show_rate'] * group['weights']).sum()
    
    # Calculate the weight sum
    weight_sum = group['weights'].sum()
    
    # Calculate the weighted average
    wavg = weighted_sum / weight_sum
    
    return pd.Series({
        'weighted_average': wavg
    })

paid_tiers = show_rate_df.groupby(by = ['tier','location_group']).apply(weighted_paid_average).reset_index()

In [None]:
# gather historical prepaid parking data

q = """
with prepaid as
    (select
         event_datetime,
         location_group,
         date(transaction_date) as transaction_date
    from
        custom.ctp_v_ticket_2324
    where
        event_type ilike '%panthers%'
        and event_datetime < current_date
    UNION ALL
    select
        event_datetime,
        location_group,
        date(transaction_date) as transaction_date
    from
        custom.ctp_v_ticket_2425
    where
        event_type ilike '%panthers%'
        and event_datetime < current_date)
select
    date(event_datetime) as event_date,
    location_group,
    'prepaid' as parking_type,
    datediff('days',transaction_date, event_datetime) as days_out,
    case
        when days_out >= 150 then 0
        else count(*) 
    end as num_passes
from
    prepaid
where
    days_out >= 0
group by
    event_datetime,
    location_group,
    transaction_date
order by
    event_datetime,
    location_group,
    transaction_date desc
"""

historical_prepaid_parking_info = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

location_map = {
    'Club': 4,
    'Garage': 3,
    'General': 2,
    'Valet': 1
}

historical_prepaid_parking_info['location_num'] = historical_prepaid_parking_info.apply(
    lambda row: location_map.get(row['location_group'], 0), axis=1)

historical_prepaid_parking_info['cumulative_num_passes']  = historical_prepaid_parking_info.groupby(['event_date', 'location_group'])['num_passes'].cumsum()

In [None]:
# gather historical game data (ie tier, dow, and start time)

q = """
SELECT
    date(cth_game_descriptions.event_date) as event_date,
    tier,
    day_of_week,
    start_time,
    total_tickets
FROM
    custom.cth_v_historical_attendance_summary
LEFT JOIN
    custom.cth_game_descriptions on cth_v_historical_attendance_summary.event_date = cth_game_descriptions.event_date
WHERE
    tier in ('A','B','C','D','E')
"""

all_game_info = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

day_map = {
    'Fri': 1,
    'Sat': 1,
    'Sun': 1,
    'Mon': 0,
    'Tue': 0,
    'Wed': 0,
    'Thu': 0
}

all_game_info['weekend'] = all_game_info.apply(
    lambda row: day_map.get(row['day_of_week'], 0), axis=1)

start_time_map = {
    '12:30 PM': 1,
    '12:45 PM': 1,
    '1:00 PM': 1,
    '3:00 PM': 1,
    '3:30 PM': 1,
    '4:00 PM': 2,
    '5:00 PM': 2,
    '6:00 PM': 2
}

all_game_info['start_time_num'] = all_game_info.apply(
    lambda row: start_time_map.get(row['start_time'], 0),
    axis=1)

In [None]:
# gather upcoming game data including current prepaid totals by game and lot

q = """
select
    date(cth_game_descriptions.event_datetime) as event_date,
    datediff('day', current_date, cth_game_descriptions.event_datetime) as days_out,
    ctp_v_ticket_2425.location_group,
    capacity::int,
    sum(paid_seats)+sum(comp_seats) as prepaid_cars,
    sum(gross_revenue) as current_gross_revenue,
    capacity::int - prepaid_cars as cap_remaining
from
    custom.ctp_v_ticket_2425
left join
    custom.ctp_parking_capacities on ctp_v_ticket_2425.location_group = ctp_parking_capacities.location_group
left join
    custom.cth_game_descriptions on ctp_v_ticket_2425.event_datetime = cth_game_descriptions.event_datetime
where
    cth_game_descriptions.event_datetime is not null and
    cth_game_descriptions.event_datetime >= current_date
group by
    cth_game_descriptions.event_datetime,
    ctp_v_ticket_2425.location_group,
    capacity
order by
    cth_game_descriptions.event_datetime,
    ctp_v_ticket_2425.location_group
"""

upcoming_game_info = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

In [None]:
# gather upcoming pricing data by game and lot

q = """
SELECT
    tier,
    location_group,
    max(transaction_date) AS "transaction_date",
    max(adjusted_price) AS "highest_price"
FROM
    custom.ctp_v_ticket_2425
LEFT JOIN             
    custom.cth_game_descriptions on ctp_v_ticket_2425.event_datetime = cth_game_descriptions.event_datetime
WHERE
    is_comp = FALSE
    AND price_type ILIKE 'IA%'
GROUP BY
    tier, 
    location_group
"""

pricing_info = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

In [None]:
# create a model to predict remaining prepaid cars

def run_prepaid_model(df, df_future, lot):

    total_table = df[df['location_group'] == lot]

    x_train = total_table[['days_out','weekend','start_time_num']]
    y_train = total_table[['cumulative_num_passes']]

    total_future_table = df_future[df_future['location_group'] == lot]

    x_test = total_future_table[['days_out','weekend','start_time_num']]

    #scalar = PolynomialFeatures(degree=2, include_bias=False)
    #poly_features = scalar.fit_transform(x_train)
    scalar = StandardScaler()
    poly_features = scalar.fit_transform(x_train)

    polynomial = LinearRegression().fit(poly_features, np.array(y_train).ravel())
    #predicted_train = polynomial.predict(poly_features)

    poly_features2 = scalar.fit_transform(x_test)

    return polynomial.predict(poly_features2).astype(int)

In [None]:
# merge hisotrical prepaid parking data with hisorical game data for training model

total_table = historical_prepaid_parking_info.merge(all_game_info, how = 'left', on = 'event_date')
total_table = total_table[total_table['tier'].isin(['A','B','C','D','E'])]

# merge upcoming parking data with hisorical game data for testing model

total_future_table = upcoming_game_info.merge(all_game_info, how = 'left', on = 'event_date')
total_future_table = total_future_table[total_future_table['tier'].isin(['A','B','C','D','E'])]

In [None]:
# only predicting for general and garage not  club, valet or executive

lots = ['General','Garage']

final_df = pd.DataFrame()

for lot in lots:

    temp = total_future_table[total_future_table['location_group'] == lot]

    temp['predicted_parking'] = run_prepaid_model(total_table, total_future_table, lot)

    final_df = pd.concat([final_df,temp], ignore_index= True)

In [None]:
# concat club totals for onsite model next

club_totals = total_future_table[total_future_table['location_group'] == 'Club']
club_totals['predicted_parking'] = 0

final_df = pd.concat([final_df, club_totals], axis=0)

In [None]:
# make sure no negative predictions are made

final_df['predicted_prepaid_additional_parking'] = np.where(final_df['predicted_parking'] < 0, 0, final_df['predicted_parking'])

# get total prepaid tickets (current + predicted additional)

final_df['total_predicted_prepaid_cars'] = final_df['prepaid_cars'] + final_df['predicted_parking']

# get number of parked cars using historical show rates

final_df = final_df.merge(paid_tiers, how =  'left', on = ['tier', 'location_group'])
final_df['prepaid_cars_parked'] = (final_df['total_predicted_prepaid_cars'] * final_df['weighted_average']).astype(int)

In [None]:
# find the capacity remaining 

final_df['cap_remaining'] = final_df['capacity'] - final_df['prepaid_cars_parked']

# if predicted cars over capacity subtract overflow out

final_df['predicted_prepaid_additional_parking'] = np.where(final_df['cap_remaining'] < 0, final_df['predicted_prepaid_additional_parking']+final_df['cap_remaining'], final_df['predicted_prepaid_additional_parking'])
final_df['prepaid_cars_parked'] = np.where(final_df['cap_remaining'] < 0, final_df['prepaid_cars_parked']+final_df['cap_remaining'], final_df['prepaid_cars_parked'])
final_df['cap_remaining'] = np.where(final_df['cap_remaining'] < 0, 0, final_df['prepaid_cars_parked'])

In [None]:
final_df = final_df[['event_date','days_out','tier', 'start_time_num','weekend',
                     'location_group','capacity','prepaid_cars','current_gross_revenue', 
                     'predicted_prepaid_additional_parking', 'total_predicted_prepaid_cars',
                     'prepaid_cars_parked','cap_remaining']]

In [None]:
# get hisotrical onsite parking data

q = """
with onsite as
    (select
        date(cth_game_descriptions.event_datetime) as event_date,
        location_group,
        0 as days_out,
        case
            when paid_amount > 0 then 1
        else 0
        end as num_onsite_cars,
        case
            when paid_amount = 0 then 1
        else 0
        end as num_prepaid_cars
    from
        custom.parkhub_v_transactions
    left join
        custom.cth_game_descriptions on parkhub_v_transactions.event_datetime = cth_game_descriptions.event_datetime
    where
        season in ('2023-24','2024-25'))
select
    onsite.event_date,
    onsite.location_group,
    'onsite' as parking_type,
    days_out,
    sum(num_onsite_cars) as num_cars,
    capacity - sum(num_prepaid_cars) as cap_remaining
from
    onsite
left join
    custom.ctp_parking_capacities on onsite.location_group = ctp_parking_capacities.location_group
where
    days_out >= 0
group by
    onsite.event_date,
    onsite.location_group,
    parking_type,
    days_out,
    capacity
"""

historical_onsite_parking_info = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

In [None]:
# merge hisotrical prepaid parking data with hisorical game data for training model

total_onsite_table = historical_onsite_parking_info.merge(all_game_info, how = 'left', on = 'event_date')
total_onsite_table = total_onsite_table[total_onsite_table['tier'].isin(['A','B','C','D','E'])]

In [None]:
# create a model to predict onsite cars

def run_onsite_model(df, df_future, lot):

    x_train_table = df[df['location_group'] == lot]

    x_train = x_train_table[['cap_remaining','weekend','start_time_num']]
    y_train = x_train_table[['num_cars']]

    x_test_table = df_future[df_future['location_group'] == lot]

    x_test = x_test_table[['cap_remaining','weekend','start_time_num']]

    #scalar = PolynomialFeatures(degree=2, include_bias=False)
    #poly_features = scalar.fit_transform(x_train)
    scalar = StandardScaler()
    poly_features = scalar.fit_transform(x_train)

    polynomial = LinearRegression().fit(poly_features, np.array(y_train).ravel())
    #predicted_train = polynomial.predict(poly_features)

    poly_features2 = scalar.fit_transform(x_test)

    #predicted_test = polynomial.predict(poly_features2)

    return polynomial.predict(poly_features2).astype(int)

In [None]:
# only predicting for general, garage, and club not valet or executive

lots = ['General','Garage','Club']

final_df_onsite = pd.DataFrame()

for lot in lots:

    temp = final_df[final_df['location_group'] == lot]

    temp['predicted_onsite_parking'] = run_onsite_model(total_onsite_table, final_df, lot)

    final_df_onsite = pd.concat([final_df_onsite,temp], ignore_index= True)

In [None]:
# add back executive and valet parking and match fields from final_df

exec_and_valet = total_future_table[total_future_table['location_group'].isin(['Executive','Valet'])]

exec_and_valet['predicted_prepaid_additional_parking'] = 0
exec_and_valet['total_predicted_prepaid_cars'] = exec_and_valet['prepaid_cars']

exec_and_valet = exec_and_valet.merge(paid_tiers, how =  'left', on = ['tier', 'location_group'])
exec_and_valet['weighted_average'] = exec_and_valet['weighted_average'].fillna(1)

exec_and_valet['prepaid_cars_parked'] = (exec_and_valet['total_predicted_prepaid_cars'] * exec_and_valet['weighted_average']).astype(int)
exec_and_valet['predicted_onsite_parking'] = 0
exec_and_valet['total_parking'] = exec_and_valet['prepaid_cars_parked'] 

exec_and_valet = exec_and_valet[['event_date', 'days_out','tier','location_group','capacity','prepaid_cars',
                                       'current_gross_revenue', 'predicted_prepaid_additional_parking',
                                       'total_predicted_prepaid_cars', 'prepaid_cars_parked',
                                       'predicted_onsite_parking','total_parking']]

In [None]:
# if predicted total over capacity subtract overflow out

final_df_onsite['predicted_onsite_parking'] = [pred_onsite if pred_onsite <= cap_remaining else cap_remaining for pred_onsite, cap_remaining in zip(final_df_onsite['predicted_onsite_parking'], final_df_onsite['cap_remaining'])]

final_df_onsite['total_parking'] = final_df_onsite['prepaid_cars_parked'] + final_df_onsite['predicted_onsite_parking']

final_parking_model = final_df_onsite[['event_date', 'days_out','tier','location_group','capacity','prepaid_cars',
                                       'current_gross_revenue', 'predicted_prepaid_additional_parking','total_predicted_prepaid_cars',
                                       'prepaid_cars_parked','predicted_onsite_parking','total_parking']]

# merge with executive and valet parking info

final_parking_model_df = pd.concat([final_parking_model, exec_and_valet], axis=0)

In [None]:
final_parking_model_df = final_parking_model_df.merge(pricing_info, how = 'left', on = ['tier', 'location_group'])

final_parking_model_df['predicted_prepaid_additional_gross_revenue'] = (final_parking_model_df['predicted_prepaid_additional_parking'] * final_parking_model_df['highest_price']).fillna(0)

final_parking_model_df['predicted_onsite_parking_gross_revenue'] = (final_parking_model_df['predicted_prepaid_additional_parking'] * final_parking_model_df['highest_price']*1.25).fillna(0)

final_parking_model_df['predicted_gross_revenue'] = final_parking_model_df['predicted_prepaid_additional_gross_revenue'] + final_parking_model_df['predicted_onsite_parking_gross_revenue'] + final_parking_model_df['current_gross_revenue']

final_parking_model_df = final_parking_model_df[['event_date', 'days_out','tier','location_group','capacity','prepaid_cars',
                                       'current_gross_revenue', 'predicted_prepaid_additional_parking', 'predicted_prepaid_additional_gross_revenue',
                                       'total_predicted_prepaid_cars', 'prepaid_cars_parked', 'predicted_onsite_parking',
                                       'predicted_onsite_parking_gross_revenue','total_parking', 'predicted_gross_revenue']]