In [1]:
import pandas as pd
import numpy as np
from catnip.fla_redshift import FLA_Redshift
from sqlalchemy import null
from datetime import datetime

from prefect.blocks.system import Secret
from typing import Dict
from concurrent.futures import ThreadPoolExecutor

from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler

In [2]:
def get_redshift_credentials() -> Dict:

    cred_dict = {
        "dbname": Secret.load("stellar-redshift-db-name").get(),
        "host": Secret.load("stellar-redshift-host").get(),
        "port": 5439,
        "user": Secret.load("stellar-redshift-user-name").get(),
        "password": Secret.load("stellar-redshift-password").get(),

        "aws_access_key_id": Secret.load("fla-s3-aws-access-key-id-east-1").get(),
        "aws_secret_access_key": Secret.load("fla-s3-aws-secret-access-key-east-1").get(),
        "bucket": Secret.load("fla-s3-bucket-name-east-1").get(),
        "subdirectory": "us-east-1",

        "verbose": False,
    }

    return cred_dict

with ThreadPoolExecutor(1) as pool:
    rs_creds = pool.submit(lambda: get_redshift_credentials()).result()

In [3]:
q = """
with prepaid as
    (select
         event_datetime,
         location_group
    from
        custom.ctp_v_ticket_2324
    where
        event_type ilike '%panthers%'
        and event_datetime < current_date
    UNION ALL
    select
        event_datetime,
        location_group
    from
        custom.ctp_v_ticket_2425
    where
        event_type ilike '%panthers%'
        and event_datetime < current_date),
prepaid_agg as
    (select
        event_datetime,
        location_group,
        count(*) as prepaid_passes
    from
        prepaid
    group by
        event_datetime, location_group),
scans as
    (select
        season,
        cth_game_descriptions.event_datetime,
        tier,
        location_group,
        case
            when paid_amount = 0 then 1
            else 0 end as num_scans
    from
        custom.parkhub_v_transactions
    left join
        custom.cth_game_descriptions on parkhub_v_transactions.event_datetime = cth_game_descriptions.event_datetime
    where
        cth_game_descriptions.event_datetime is not Null
        and season in ('2023-24','2024-25')),
scans_agg as
    (select
        season,
        event_datetime,
        tier,
        location_group,
        sum(num_scans) as num_scans
    from
        scans
    group by
        season,
        event_datetime,
        tier,
        location_group)
select
    season,
    prepaid_agg.event_datetime,
    tier,
    prepaid_agg.location_group,
    prepaid_passes,
    num_scans,
    num_scans*1.0/prepaid_passes::float as show_rate
from
    scans_agg
left join
    prepaid_agg on scans_agg.event_datetime = prepaid_agg.event_datetime
    and scans_agg.location_group = prepaid_agg.location_group
where
    prepaid_agg.event_datetime is not Null
"""

show_rate_df = FLA_Redshift(**rs_creds).query_warehouse(sql_string = q)

In [4]:
weights = {'2023-24':1.25,'2024-25':2.75}

show_rate_df['weights'] = show_rate_df['season'].map(weights)

def weighted_paid_average(group):
    # Calculate the weighted sum
    weighted_sum = (group['show_rate'] * group['weights']).sum()
    
    # Calculate the weight sum
    weight_sum = group['weights'].sum()
    
    # Calculate the weighted average
    wavg = weighted_sum / weight_sum
    
    return pd.Series({
        'weighted_average': wavg
    })

paid_tiers = show_rate_df.groupby(by = ['tier','location_group']).apply(weighted_paid_average).reset_index()

  paid_tiers = show_rate_df.groupby(by = ['tier','location_group']).apply(weighted_paid_average).reset_index()


In [5]:
q = """
with prepaid as
    (select
         event_datetime,
         location_group,
         date(transaction_date) as transaction_date
    from
        custom.ctp_v_ticket_2324
    where
        event_type ilike '%panthers%'
        and event_datetime < current_date
    UNION ALL
    select
        event_datetime,
        location_group,
        date(transaction_date) as transaction_date
    from
        custom.ctp_v_ticket_2425
    where
        event_type ilike '%panthers%'
        and event_datetime < current_date)
select
    date(event_datetime) as event_date,
    location_group,
    'prepaid' as parking_type,
    datediff('days',transaction_date, event_datetime) as days_out,
    case
        when days_out >= 200 then 0
        else count(*) 
    end as num_cars
from
    prepaid
where
    days_out >= 0
group by
    event_datetime,
    location_group,
    transaction_date
order by
    event_datetime,
    location_group,
    transaction_date desc
"""

historical_prepaid_parking_info = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

historical_prepaid_parking_info['location_num'] = historical_prepaid_parking_info.apply(lambda row: 4 if row['location_group'] == 'Club' else (3 if row['location_group'] == 'Garage' 
    else (2 if row['location_group'] == 'General' else (1 if row['location_group'] == 'Valet' else 0))), axis = 1)

historical_prepaid_parking_info['cumulative_num_cars']  = historical_prepaid_parking_info.groupby(['event_date', 'location_group'])['num_cars'].cumsum()

In [6]:
q = """
SELECT
    date(cth_game_descriptions.event_date) as event_date,
    tier,
    day_of_week,
    start_time,
    total_tickets
FROM
    custom.cth_v_historical_attendance_summary
LEFT JOIN
    custom.cth_game_descriptions on cth_v_historical_attendance_summary.event_date = cth_game_descriptions.event_date
WHERE
    tier in ('A','B','C','D','E')
"""

all_game_info = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

all_game_info['weekend'] = all_game_info.apply(lambda row: 1 if row['day_of_week'] =='Fri' or row['day_of_week']=='Sat'  or row['day_of_week']=='Sun'  else 0 , axis=1)
all_game_info['start_time_num'] = all_game_info.apply(lambda row: 1 if row['start_time'] =='12:30 PM' or row['start_time']=='12:45 PM' 
    or row['start_time']=='1:00 PM' or row['start_time']=='3:00 PM' or row['start_time']=='3:30 PM' else (2 if row['start_time'] =='4:00 PM'
     or row['start_time'] =='5:00 PM' or row['start_time'] =='6:00 PM' else 0) , axis=1)

In [7]:
q = """
select
    date(event_datetime) as event_date,
    datediff('day', current_date, event_datetime) as days_out,
    ctp_v_ticket_2425.location_group,
    capacity::int,
    sum(paid_seats)+sum(comp_seats) as prepaid_cars,
    capacity::int - prepaid_cars as cap_remaining
from
    custom.ctp_v_ticket_2425
left join
    custom.ctp_parking_capacities on ctp_v_ticket_2425.location_group = ctp_parking_capacities.location_group
where
    event_datetime >= current_date
group by
    event_datetime,
    ctp_v_ticket_2425.location_group,
    capacity
"""

upcoming_game_info = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

In [8]:
def run_model(df, df_future, lot):

    total_table = df[df['location_group'] == lot]

    x_train = total_table[['days_out','weekend','start_time_num']]
    y_train = total_table[['cumulative_num_cars']]

    total_future_table = df_future[df_future['location_group'] == lot]

    x_test = total_future_table[['days_out','weekend','start_time_num']]
    #y_test = total_future_table[['prepaid_cars']] 

    #scalar = PolynomialFeatures(degree=2, include_bias=False)
    #poly_features = scalar.fit_transform(x_train)
    scalar = StandardScaler()
    poly_features = scalar.fit_transform(x_train)

    polynomial = LinearRegression().fit(poly_features, np.array(y_train).ravel())
    #predicted_train = polynomial.predict(poly_features)

    #total_table['predicted_parking'] = predicted

    poly_features2 = scalar.fit_transform(x_test)

    predicted_test = polynomial.predict(poly_features2)

    return polynomial.predict(poly_features2)

    #total_future_table['predicted_parking'] = predicted

In [9]:
total_table = historical_prepaid_parking_info.merge(all_game_info, how = 'left', on = 'event_date')
total_table = total_table[total_table['tier'].isin(['A','B','C','D','E'])]

total_future_table = upcoming_game_info.merge(all_game_info, how = 'left', on = 'event_date')
total_future_table = total_future_table[total_future_table['tier'].isin(['A','B','C','D','E'])]

In [10]:
lots = ['General','Garage','Club']

final_df = pd.DataFrame()

for lot in lots:

    temp = total_future_table[total_future_table['location_group'] == lot]

    temp['predicted_parking'] = run_model(total_table, total_future_table, lot)

    final_df = pd.concat([final_df,temp], ignore_index= True)

    # for everything but Valet maybe? add together, mulitply by show rate to get number of cars then subtract from capacity to get onsite limit

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['predicted_parking'] = run_model(total_table, total_future_table, lot)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['predicted_parking'] = run_model(total_table, total_future_table, lot)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['predicted_parking'] = run_model(total_table, to

In [11]:
final_df['predicted_prepaid_additional_parking'] = np.where(final_df['predicted_parking'] < 0, 0, final_df['predicted_parking'])

final_df['total'] = final_df['prepaid_cars'] + final_df['predicted_parking']

final_df = final_df.merge(paid_tiers, how = 'left', on = ['tier', 'location_group'])
final_df['prepaid_cars_final'] = final_df['total'] * final_df['weighted_average']

In [12]:
final_df['cap_remaining'] = final_df['capacity'] - final_df['prepaid_cars_final']         

In [13]:
final_df = final_df[['event_date','days_out','tier', 'start_time_num','weekend',
                     'location_group','capacity','prepaid_cars','predicted_prepaid_additional_parking',
                     'total','prepaid_cars_final','cap_remaining']].rename({'total':'total_predicted_prepaid_parking','prepaid_cars_final' : 'prepaid_cars_parked'})

In [22]:
final_df

Unnamed: 0,event_date,days_out,tier,start_time_num,weekend,location_group,capacity,prepaid_cars,predicted_prepaid_additional_parking,total,prepaid_cars_final,cap_remaining
0,2024-12-30,69,A,0.0,0.0,General,4360,1957,845.088376,2802.088376,2263.990904,2096.009096
1,2025-03-01,130,C,1.0,1.0,General,4360,1924,1160.329641,3084.329641,2458.995887,1901.004113
2,2025-01-11,81,B,1.0,1.0,General,4360,1984,937.721140,2921.721140,2346.670752,2013.329248
3,2024-11-30,39,C,2.0,1.0,General,4360,1933,765.797981,2698.797981,2151.629011,2208.370989
4,2025-04-08,168,B,0.0,0.0,General,4360,1937,1294.848407,3231.848407,2595.759064,1764.240936
...,...,...,...,...,...,...,...,...,...,...,...,...
106,2024-11-07,16,E,0.0,0.0,Club,1861,1220,64.223381,1284.223381,838.073069,1022.926931
107,2024-12-07,46,D,2.0,1.0,Club,1861,1439,282.768055,1721.768055,1214.156422,646.843578
108,2025-04-10,170,D,0.0,0.0,Club,1861,1208,986.655081,2194.655081,1547.626902,313.373098
109,2025-03-08,137,C,2.0,1.0,Club,1861,1443,827.841332,2270.841332,1503.430406,357.569594


In [23]:
q = """
with onsite as
    (select
        date(cth_game_descriptions.event_datetime) as event_date,
        location_group,
        0 as days_out,
        case
            when paid_amount > 0 then 1
        else 0
        end as num_onsite_cars,
        case
            when paid_amount = 0 then 1
        else 0
        end as num_prepaid_cars
    from
        custom.parkhub_v_transactions
    left join
        custom.cth_game_descriptions on parkhub_v_transactions.event_datetime = cth_game_descriptions.event_datetime
    where
        season in ('2023-24','2024-25'))
select
    onsite.event_date,
    onsite.location_group,
    'onsite' as parking_type,
    days_out,
    sum(num_onsite_cars) as num_cars,
    capacity - sum(num_prepaid_cars) as cap_remaining
from
    onsite
left join
    custom.ctp_parking_capacities on onsite.location_group = ctp_parking_capacities.location_group
where
    days_out >= 0
group by
    onsite.event_date,
    onsite.location_group,
    parking_type,
    days_out,
    capacity
"""

historical_onsite_parking_info = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

In [25]:
historical_onsite_parking_info

Unnamed: 0,event_date,location_group,parking_type,days_out,num_cars,cap_remaining
0,2024-10-08,Club,onsite,0,259,285
1,2024-05-28,Club,onsite,0,388,598
2,2024-02-27,General,onsite,0,929,2619
3,2024-10-08,General,onsite,0,1089,1861
4,2023-11-12,General,onsite,0,1361,2410
...,...,...,...,...,...,...
240,2024-03-28,Unknown,onsite,0,4,
241,2024-10-22,General,onsite,0,0,4358
242,2024-10-22,Club,onsite,0,1,1860
243,2024-10-22,Garage,onsite,0,0,199


In [26]:
total_onsite_table = historical_onsite_parking_info.merge(all_game_info, how = 'left', on = 'event_date')
total_onsite_table = total_onsite_table[total_onsite_table['tier'].isin(['A','B','C','D','E'])]

In [28]:
total_future_table

Unnamed: 0,event_date,days_out,location_group,capacity,prepaid_cars,cap_remaining,tier,day_of_week,start_time,total_tickets,weekend,start_time_num
0,2025-03-28,157,Club,1861,1186,675,D,Fri,7:00 PM,14493.0,1.0,0.0
1,2024-11-09,18,Club,1861,1472,389,C,Sat,6:00 PM,18327.0,1.0,2.0
2,2024-11-14,23,Club,1861,1206,655,D,Thu,7:00 PM,15648.0,0.0,0.0
3,2024-12-30,69,General,4360,1957,2403,A,Mon,7:00 PM,17751.0,0.0,0.0
4,2025-01-18,88,Club,1861,1439,422,D,Sat,6:00 PM,17195.0,1.0,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...
230,2024-11-12,21,Executive,79,46,33,E,Tue,7:30 PM,15639.0,0.0,0.0
231,2024-11-12,21,Garage,200,96,104,E,Tue,7:30 PM,15639.0,0.0,0.0
232,2025-02-27,128,General,4360,1471,2889,C,Thu,7:30 PM,15832.0,0.0,0.0
247,2024-10-22,0,General,4360,2017,2343,E,Tue,6:30 PM,17609.0,0.0,0.0


In [15]:
def run_onsite_model(df, df_future, lot):

    total_onsite_table = df[df['location_group'] == lot]

    x_train = total_onsite_table[['cap_remaining','weekend','start_time_num']]
    y_train = total_onsite_table[['num_cars']]

    total_future_table = df_future[df_future['location_group'] == lot]

    x_test = total_future_table[['cap_remaining','weekend','start_time_num']]
    #y_test = total_future_table[['prepaid_cars']] 

    #scalar = PolynomialFeatures(degree=2, include_bias=False)
    #poly_features = scalar.fit_transform(x_train)
    scalar = StandardScaler()
    poly_features = scalar.fit_transform(x_train)

    polynomial = LinearRegression().fit(poly_features, np.array(y_train).ravel())
    #predicted_train = polynomial.predict(poly_features)

    #total_table['predicted_parking'] = predicted

    poly_features2 = scalar.fit_transform(x_test)

    predicted_test = polynomial.predict(poly_features2)

    return polynomial.predict(poly_features2)

    #total_future_table['predicted_parking'] = predicted

In [16]:
lots = ['General','Garage','Club']

final_df_onsite = pd.DataFrame()

for lot in lots:

    temp = total_future_table[total_future_table['location_group'] == lot]

    temp['predicted_parking'] = run_onsite_model(final_df, total_future_table, lot)

    final_df_onsite = pd.concat([final_df,temp], ignore_index= True)


KeyError: "None of [Index(['num_cars'], dtype='object')] are in the [columns]"

In [46]:
final_df_onsite

Unnamed: 0,event_date,days_out,location_group,capacity,prepaid_cars,cap_remaining,tier,day_of_week,start_time,total_tickets,weekend,start_time_num,predicted_parking
0,2024-11-07,16,General,4360,1479,2881,E,Thu,7:00 PM,16248.0,0.0,0.0,747.810635
1,2025-02-02,103,General,4360,1918,2442,D,Sun,6:00 PM,17429.0,1.0,2.0,971.592136
2,2025-02-08,109,General,4360,1925,2435,C,Sat,7:00 PM,17586.0,1.0,0.0,1063.154872
3,2025-03-23,152,General,4360,1922,2438,C,Sun,6:00 PM,17187.0,1.0,2.0,974.124029
4,2024-11-27,36,General,4360,1940,2420,B,Wed,7:30 PM,17498.0,0.0,0.0,1039.611299
...,...,...,...,...,...,...,...,...,...,...,...,...,...
106,2024-12-20,59,Club,1861,1205,656,C,Fri,7:00 PM,14971.0,1.0,0.0,250.030450
107,2024-11-16,25,Club,1861,1446,415,D,Sat,7:00 PM,17559.0,1.0,0.0,332.818809
108,2024-11-12,21,Club,1861,1199,662,E,Tue,7:30 PM,15639.0,0.0,0.0,226.270091
109,2025-02-27,128,Club,1861,1230,631,C,Thu,7:30 PM,15832.0,0.0,0.0,236.919216
