In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
from prefect.blocks.system import Secret
from catnip.fla_redshift import FLA_Redshift
from typing import Dict
from concurrent.futures import ThreadPoolExecutor
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression

In [2]:
def get_redshift_credentials() -> Dict:

    cred_dict = {
        "dbname": Secret.load("stellar-redshift-db-name").get(),
        "host": Secret.load("stellar-redshift-host").get(),
        "port": 5439,
        "user": Secret.load("stellar-redshift-user-name").get(),
        "password": Secret.load("stellar-redshift-password").get(),

        "aws_access_key_id": Secret.load("fla-s3-aws-access-key-id-east-1").get(),
        "aws_secret_access_key": Secret.load("fla-s3-aws-secret-access-key-east-1").get(),
        "bucket": Secret.load("fla-s3-bucket-name-east-1").get(),
        "subdirectory": "us-east-1",

        "verbose": False,
    }

    return cred_dict

with ThreadPoolExecutor(1) as pool:
    rs_creds = pool.submit(lambda: get_redshift_credentials()).result()

In [84]:
# 1. Pricing - Dynamic Pricing Model

# seperate file (pricing_model_v1.py)

In [3]:
# 2. Predicted Budget

q = """
select 
    date(event_date)::varchar as event_date, sum(budget_goal) as budget_goal
from  
    custom.cth_budget_summary_2324
group by
    event_date
order by 
    event_date
"""
budget_df = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

In [4]:
# 3. Predicted Revenue & Tickets

q = """
select
    cth_ticket_sales_model_2324.event_date, tier,
    sum(paid_seats+expected_additional_paid_seats) as expected_final_paid_seats,
    sum(comp_seats+expected_additional_comp_seats) as expected_final_comp_seats,
    expected_final_paid_seats+expected_final_comp_seats as expected_final_seats,
    sum(gross_revenue+expected_additional_revenue) as expected_final_gross_revenue
from
    custom.cth_ticket_sales_model_2324
left join
    custom.cth_game_descriptions on cth_ticket_sales_model_2324.event_date = cth_game_descriptions.event_date
group by
    cth_ticket_sales_model_2324.event_date, tier, abbreviation, start_time_tableau, day_of_week
order by
    cth_ticket_sales_model_2324.event_date
"""
predicted_df = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

In [13]:
# 4. Predicted Attendance

# mulitply projected tickets by historical show rate

q = """
WITH a as
    (SELECT 
        h.paid_seats, h.did_attend, h.event_date
    FROM 
        custom.cth_historical_all_1718_2223 h
    WHERE 
        h.season in ('2019-20', '2021-22', '2022-23')
        and h.paid_seats != 0)

SELECT 
    g.tier, sum(a.did_attend)/sum(a.paid_seats) as paid_rate
FROM 
    a
LEFT JOIN 
    custom.cth_game_descriptions g on date(a.event_date) = date(g.event_date)
GROUP BY 
    tier
ORDER BY 
    tier"""

paid_seats = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

q = """
WITH a as
    (SELECT
        h.comp_seats, h.did_attend, h.event_date
    FROM
        custom.cth_historical_all_1718_2223 h
    WHERE
        h.season in ('2019-20', '2021-22', '2022-23')
        and h.comp_seats != 0)

SELECT
    g.tier, sum(a.did_attend)/sum(a.comp_seats) as comp_rate
FROM
    a
LEFT JOIN
    custom.cth_game_descriptions g on date(a.event_date) = date(g.event_date)
GROUP BY
    tier
ORDER BY
    tier"""

comp_seats = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

attendance_df = paid_seats.merge(comp_seats, how = 'left', on = 'tier')

final_attendance_df = predicted_df.merge(right = attendance_df, how = 'left', on = 'tier')
final_attendance_df['predicted_attendance'] = [(w*x)+(y*z) for w,x,y,z, in zip(final_attendance_df['expected_final_paid_seats'],
                                                              final_attendance_df['paid_rate'], final_attendance_df['expected_final_comp_seats'], final_attendance_df['comp_rate'])]
just_attendance = final_attendance_df[['event_date', 'predicted_attendance', 'expected_final_gross_revenue', 'expected_final_seats']]

In [30]:
# 5. Predicted Parking

# polynomial model (built by Pavan), multiply by avg atp to get rev

q = """
WITH attendance as
    (SELECT
        '2022-23' as season, event_date, sum(entry) as attendance
    FROM
        custom.cth_attendance_scans_2223
    GROUP BY
        event_date
    ORDER BY
        event_date)

SELECT
    attendance.event_date, tier, coalesce(attendance,0) as ticket_scans,
    coalesce(sum(paid_amount),0) as parking_paid_amount, count(*)-1 as num_parking_transactions
FROM
    attendance
LEFT JOIN
    custom.Parkhub_v_transactions on attendance.event_date = Parkhub_v_transactions.event_date
LEFT JOIN
    custom.cth_game_descriptions on attendance.event_date = cth_game_descriptions.event_date
WHERE
    attendance.event_date > '2022-10-01'
    and tier in ('A','B','C','D','E')
    and attendance.event_date != '2022-11-09'
GROUP BY
    attendance.event_date, tier, ticket_scans
ORDER BY
    attendance.event_date, tier
"""

df = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

#Historical Parking Data Clean Up 
df['weekend'] = df.apply(lambda row: 1 if datetime.weekday(datetime.strptime(row['event_date'], '%Y-%m-%d').date()) == 5 
    or datetime.weekday(datetime.strptime(row['event_date'], '%Y-%m-%d').date()) == 6 else 0, axis = 1)
df['tier_num'] = df.apply(lambda row: 5 if row['tier'] == 'A' else (4 if row['tier'] == 'B' else (3 if row['tier'] == 'C' else 
    (2 if row['tier'] == 'D' else (1 if row['tier'] == 'E' else 0)))), axis = 1)

#2023-2024 Season Data Clean Up 
q = """
select 
    event_date, tier, day_of_week
from 
    custom.cth_game_descriptions
where 
    season = '2023-24'
"""

games = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

games['weekend'] = games.apply(lambda row: 1 if row['day_of_week'] =='Fri' or row['day_of_week']=='Sat'  or row['day_of_week']=='Sun'  else 0 , axis=1)
games['tier_num'] = games.apply(lambda row: 5 if row['tier'] == 'A' else (4 if row['tier'] == 'B' else (3 if row['tier'] == 'C' else 
    (2 if row['tier'] == 'D' else (1 if row['tier'] == 'E' else 0)))), axis = 1)
games = games.merge(right = just_attendance, how = 'left', on = 'event_date')

#Training Data 
xdf = df[['ticket_scans', 'weekend', 'tier_num']]
y = df[['num_parking_transactions']]

# #Test Data 
xdf2 = games[['predicted_attendance', 'weekend', 'tier_num']] ## GET PREDICTED ATTENDANCE FROM ABOVE CODE!

#Scaling Data
poly = PolynomialFeatures(degree=4, include_bias=False)
poly_features2 = poly.fit_transform(xdf2)
poly_features = poly.fit_transform(xdf)

polynomial = LinearRegression().fit(poly_features, np.array(y).ravel())
predicted = polynomial.predict(poly_features2)

games['predicted_parking'] = predicted

In [31]:
q = """
select 
    cast(date(event_datetime) as varchar) as event_date, count(*) as current_parking, sum(gross_revenue) as current_gross_rev
from 
    custom.ctp_v_ticket
where 
    event_type = '2023-24 Panthers Parking'
    and price_type_group not like 'Comp%'
group by 
    event_date
"""

current_parking = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)
games = games.merge(current_parking, how = 'left', on = 'event_date')
games['remaining_parking'] = [x-y for x,y in zip(games['predicted_parking'], games['current_parking'])]
games['predicted_parking_revenue'] = [42.8*x+y for x,y in zip(games['remaining_parking'], games['current_gross_rev'])]

In [32]:
# 6. Predicted F&B & Merch

# avg F&B & Merch per caps by tier (add DOW for drink deals and stuff, simply regression model)

q = """
with a as
    (select bypass_orderitems_2223.event_date, sum(line_item_gross_revenue) as fandb_rev
    from custom.bypass_orderitems_2223
    where event_type = 'Hockey'
    group by bypass_orderitems_2223.event_date
    order by bypass_orderitems_2223.event_date),
b as
    (select event_date, sum(entry) as entry
     from custom.cth_attendance_scans_2223
     group by event_date
     UNION
     select event_date, sum(entry) as entry
     from custom.cth_attendance_scans_playoffs_2223
     group by event_date),
c as
    (select event_date, sum(line_item_gross_revenue) as merch_rev
     from custom.retailpro_invoice_items
     group by event_date),

temp as
    (select a.event_date, tier,
        CASE
            when date_part('dw', date(a.event_date)) in (5,6,7) then 1
            else 0
        end as weekend, fandb_rev, entry, merch_rev, fandb_rev/entry as fandb_percap, merch_rev/entry as merch_percap
    from a
    left join b on a.event_date = b.event_date
    left join c on a.event_date = c.event_date
    left join custom.cth_game_descriptions on a.event_date = cth_game_descriptions.event_date
    where a.event_date not like '2022-11-15')

select tier, weekend, avg(fandb_percap) as fandb_percap, avg(merch_percap) as merch_percap
from temp
where tier in ('A','B','C','D','E')
group by tier, weekend
order by tier, weekend
"""

df = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

final = games.merge(right = df, how = 'left', on = ['weekend', 'tier'])
final['predicted_fandb'] = [x*y for x,y in zip(final['predicted_attendance'], final['fandb_percap'])]
final['predicted_merch'] = [x*y for x,y in zip(final['predicted_attendance'], final['merch_percap'])]

In [33]:
final = final.merge(budget_df, how = 'left', on = 'event_date')
final = final[['event_date', 'tier', 'budget_goal', 'expected_final_gross_revenue', 'expected_final_seats',
               'predicted_attendance', 'predicted_parking', 'predicted_parking_revenue','predicted_fandb', 'predicted_merch']]

In [37]:
final['predicted_parking_revenue'].sum()

4955870.679880997