In [4]:
import pandas as pd
import numpy as np
from datetime import datetime
from prefect.blocks.system import Secret
from catnip.fla_redshift import FLA_Redshift
from typing import Dict
from concurrent.futures import ThreadPoolExecutor
from fractions import Fraction

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from collections import Counter

from sklearn.metrics import confusion_matrix, accuracy_score, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression

In [10]:
def get_redshift_credentials() -> Dict:

    cred_dict = {
        "dbname": Secret.load("stellar-redshift-db-name").get(),
        "host": Secret.load("stellar-redshift-host").get(),
        "port": 5439,
        "user": Secret.load("stellar-redshift-user-name").get(),
        "password": Secret.load("stellar-redshift-password").get(),

        "aws_access_key_id": Secret.load("fla-s3-aws-access-key-id-east-1").get(),
        "aws_secret_access_key": Secret.load("fla-s3-aws-secret-access-key-east-1").get(),
        "bucket": Secret.load("fla-s3-bucket-name-east-1").get(),
        "subdirectory": "us-east-1",

        "verbose": False,
    }

    return cred_dict

with ThreadPoolExecutor(1) as pool:
    rs_creds = pool.submit(lambda: get_redshift_credentials()).result()

In [None]:
# 1. Pricing - Dynamic Pricing Model

# seperate file (pricing_model_v1.py)

In [8]:
# 2. Predicted Budget

df = pd.read_excel(r"C:\Users\\riffere\\Desktop\\test_budget_by_ticket_by_game.xlsx", sheet_name = 'sum_table')

In [21]:
# 3. Predicted Revenue & Tickets

# classification model for tickets then use avg (loc, ticket type, tier) atp for revenue

# revenue pacing based on current ATP and days out based on historical numbers
# q = """
# with a as
#     (select 
#         cth_ticket_expanded_all.event_date, tier, cast(date_diff('day', cast(cth_ticket_expanded_all.event_date as date), getdate()) as int) as days_out, sum(block_purchase_price) as gross_rev,
#         sum(paid_seats) as paid_seats,
#         CASE
#             when sum(paid_seats) = 0 then 0
#             else gross_rev/sum(paid_seats)
#         end as avg_atp
#     from 
#         custom.cth_ticket_expanded_all
#     left join 
#         custom.cth_game_descriptions 
#         on cth_ticket_expanded_all.event_date = cth_game_descriptions.event_date
#     group by 
#         cth_ticket_expanded_all.event_date, tier)

# select 
#     *
# from 
#     a
# left join 
#     custom.cth_expected_singles_012723
#     on a.tier = cth_expected_singles_012723.tier
#     and a.days_out = cth_expected_singles_012723.days_out
# order by 
#     event_date
# """

# df = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

In [13]:
# 4. Predicted Attendance

# mulitply projected tickets by historical show rate

q = """
WITH a as
    (SELECT 
        h.paid_seats, h.did_attend, h.event_date,
        CASE
        WHEN h.pc_one IN ('A','B','C','D','E','F','G','H','I','J','1','2','3','4','5','6','7','8') THEN 'Lowers'
        WHEN h.pc_one IN ('K','L','M') THEN 'Clubs'
        WHEN h.pc_one IN ('N','O','P','Q','R','S','T') THEN 'Uppers'
        WHEN h.pc_one IN ('U','V','W') THEN 'Suites'
        WHEN h.pc_one IN ('X') THEN 'Lounge 954'
        WHEN h.pc_one IN ('Y') THEN 'Loft'
        WHEN h.pc_one IN ('Z') THEN 'Corona'
        END as "location"
    FROM 
        custom.cth_historical_all_1718_2223 h
    WHERE 
        h.season in ('2019-20', '2021-22', '2022-23')
        and h.paid_seats != 0)

SELECT 
    g.tier, a.location, sum(a.paid_seats)/count(distinct(a.event_date)) as paid_seats, sum(a.did_attend)/count(distinct(a.event_date)) as did_attend, 
    sum(a.did_attend)/sum(a.paid_seats)
FROM 
    a
LEFT JOIN 
    custom.cth_game_descriptions g on a.event_date = g.event_date
GROUP BY 
    tier, location
ORDER BY 
    tier, location"""

paid_seats = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

q = """
WITH a as
    (SELECT 
        h.comp_seats, h.did_attend, h.event_date,
        CASE
        WHEN h.pc_one IN ('A','B','C','D','E','F','G','H','I','J','1','2','3','4','5','6','7','8') THEN 'Lowers'
        WHEN h.pc_one IN ('K','L','M') THEN 'Clubs'
        WHEN h.pc_one IN ('N','O','P','Q','R','S','T') THEN 'Uppers'
        WHEN h.pc_one IN ('U','V','W') THEN 'Suites'
        WHEN h.pc_one IN ('X') THEN 'Lounge 954'
        WHEN h.pc_one IN ('Y') THEN 'Loft'
        WHEN h.pc_one IN ('Z') THEN 'Corona'
        END as "location"
    FROM 
        custom.cth_historical_all_1718_2223 h
    WHERE 
        h.season in ('2019-20', '2021-22', '2022-23')
        and h.comp_seats != 0)

SELECT 
    g.tier, a.location, sum(a.comp_seats)/count(distinct(a.event_date)) as paid_seats, sum(a.did_attend)/count(distinct(a.event_date)) as did_attend,
    sum(a.did_attend)/sum(a.comp_seats)
FROM 
    a
LEFT JOIN 
    custom.cth_game_descriptions g on a.event_date = g.event_date
GROUP BY 
    tier, location
ORDER BY 
    tier, location"""

comp_seats = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)
comp_seats

Unnamed: 0,tier,location,paid_seats,did_attend,?column?
0,A,Clubs,10,6.666667,0.625
1,A,Corona,3,3.0,0.818182
2,A,Lounge 954,12,9.666667,0.763158
3,A,Lowers,263,195.666667,0.74398
4,A,Suites,177,133.666667,0.752345
5,A,Uppers,315,268.0,0.850794
6,B,Clubs,35,29.357143,0.830303
7,B,Corona,3,3.285714,0.884615
8,B,Loft,11,11.0,1.0
9,B,Lounge 954,12,11.428571,0.909091


In [32]:
# 5. Predicted Parking

# polynomial model (built by Pavan), multiply by avg atp to get rev

q = """
WITH attendance as
    (SELECT 
        season, event_date, sum(did_attend) as attendance
    FROM 
        custom.cth_historical_all_1718_2122
    GROUP BY 
        event_date, season
    UNION
    SELECT 
        '2021-22' as season, event_date, sum(num_seats) as attendance
    FROM 
        custom.cth_playoffs_2122_expanded
    GROUP BY 
        event_date
    UNION
    SELECT 
        '2022-23' as season, event_date, sum(entry) as attendance
    FROM 
        custom.cth_attendance_scans_2223
    GROUP BY 
        event_date
    ORDER BY 
        event_date)

SELECT 
    attendance.event_date, tier, coalesce(attendance,0) as ticket_scans, 
    coalesce(sum(paid_amount),0) as parking_paid_amount, count(*)-1 as num_parking_transactions
FROM 
    attendance
LEFT JOIN 
    custom.parkhub_reporting on attendance.event_date = parkhub_reporting.event_date
LEFT JOIN 
    custom.cth_game_descriptions on attendance.event_date = cth_game_descriptions.event_date
    --and df.time_bucket = attendance.action_time_bucket
WHERE 
    attendance.event_date > '2022-10-01'
    and event_time != ''
    and tier in ('A','B','C','D','E')
    and attendance.event_date != '2022-11-09'
GROUP BY 
    attendance.event_date, tier, ticket_scans
ORDER BY 
    attendance.event_date, tier
"""

df = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

#Historical Parking Data Clean Up 
df['weekend'] = df.apply(lambda row: 1 if datetime.weekday(datetime.strptime(row['event_date'], '%Y-%m-%d').date()) == 5 
    or datetime.weekday(datetime.strptime(row['event_date'], '%Y-%m-%d').date()) == 6 else 0, axis = 1)
df['tier_num'] = df.apply(lambda row: 5 if row['tier'] == 'A' else (4 if row['tier'] == 'B' else (3 if row['tier'] == 'C' else 
    (2 if row['tier'] == 'D' else (1 if row['tier'] == 'E' else 0)))), axis = 1)

#2023-2024 Season Data Clean Up 
games['weekend'] = games.apply(lambda row: 1 if row['dow']=='Sat'  or row['dow']=='Sun'  else 0 , axis=1)
games['tier_num'] = games.apply(lambda row: 5 if row['Tier'] == 'A' else (4 if row['Tier'] == 'B' else (3 if row['Tier'] == 'C' else 
    (2 if row['Tier'] == 'D' else (1 if row['Tier'] == 'E' else 0)))), axis = 1)

#Training Data 
xdf = df[['ticket_scans', 'weekend', 'tier_num']]
y = df[['num_parking_transactions']]

#Test Data 
xdf2 = games[['predicted_attendance', 'weekend', 'tier_num']] ## GET PREDICTED ATTENDANCE FROM ABOVE CODE!

#Scaling Data
poly = PolynomialFeatures(degree=4, include_bias=False)
poly_features2 = poly.fit_transform(xdf2)
poly_features = poly.fit_transform(xdf)

polynomial = LinearRegression().fit(poly_features, np.array(y).ravel())
predicted = polynomial.predict(poly_features2)

games['predicted_parking'] = predicted
games

KeyError: "['predicted_attendance'] not in index"

In [12]:
# 6. Predicted F&B & Merch

# avg F&B & Merch per caps by tier (add DOW for drink deals and stuff, simply regression model)

q = """
with a as
    (select bypass_orderitems_2223.event_date, sum(line_item_gross_revenue) as fandb_rev
    from custom.bypass_orderitems_2223
    where event_type = 'Hockey'
    group by bypass_orderitems_2223.event_date
    order by bypass_orderitems_2223.event_date),
b as
    (select event_date, sum(entry) as entry
     from custom.cth_attendance_scans_2223
     group by event_date
     UNION
     select event_date, sum(entry) as entry
     from custom.cth_attendance_scans_playoffs_2223
     group by event_date),
c as
    (select event_date, sum(line_item_gross_revenue) as merch_rev
     from custom.retailpro_invoice_items
     group by event_date),

temp as
    (select a.event_date, date_part('dw',cast(a.event_date as date)) as weekday, tier, fandb_rev, entry, merch_rev, fandb_rev/entry as fandb_percap, merch_rev/entry as merch_percap
    from a
    left join b on a.event_date = b.event_date
    left join c on a.event_date = c.event_date
    left join custom.cth_game_descriptions on a.event_date = cth_game_descriptions.event_date
    where a.event_date not like '2022-11-15')

select tier, weekday, avg(fandb_percap) as fandb_percap, avg(merch_percap) as merch_percap
from temp
group by tier, weekday
order by tier, weekday
"""

df = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)
df

Unnamed: 0,tier,weekday,fandb_percap,merch_percap
0,A,4.0,22.555028,8.385843
1,A,6.0,29.790659,6.055374
2,B,0.0,24.230363,6.952212
3,B,4.0,25.042488,6.211725
4,B,5.0,29.146634,10.89286
5,B,6.0,27.954689,9.780764
6,C,1.0,26.085917,11.192126
7,C,3.0,27.765611,12.363794
8,C,4.0,25.559875,8.635926
9,C,6.0,24.958902,12.266062


In [None]:
# 7. All-in-total

# ticket+parking+f&b+merch