In [1]:
import pandas as pd
import numpy as np
from catnip.fla_redshift import FLA_Redshift
from sqlalchemy import null
from datetime import datetime

from prefect.blocks.system import Secret
from typing import Dict
from concurrent.futures import ThreadPoolExecutor

from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler

In [2]:
def get_redshift_credentials() -> Dict:

    cred_dict = {
        "dbname": Secret.load("stellar-redshift-db-name").get(),
        "host": Secret.load("stellar-redshift-host").get(),
        "port": 5439,
        "user": Secret.load("stellar-redshift-user-name").get(),
        "password": Secret.load("stellar-redshift-password").get(),

        "aws_access_key_id": Secret.load("fla-s3-aws-access-key-id-east-1").get(),
        "aws_secret_access_key": Secret.load("fla-s3-aws-secret-access-key-east-1").get(),
        "bucket": Secret.load("fla-s3-bucket-name-east-1").get(),
        "subdirectory": "us-east-1",

        "verbose": False,
    }

    return cred_dict

with ThreadPoolExecutor(1) as pool:
    rs_creds = pool.submit(lambda: get_redshift_credentials()).result()

In [3]:
# get historical show rates by tier and lot

q = """
with prepaid as
    (select
         event_datetime,
         location_group
    from
        custom.ctp_v_ticket_2324
    where
        event_type ilike '%panthers%'
        and event_datetime < current_date
    UNION ALL
    select
        event_datetime,
        location_group
    from
        custom.ctp_v_ticket_2425
    where
        event_type ilike '%panthers%'
        and event_datetime < current_date),
prepaid_agg as
    (select
        event_datetime,
        location_group,
        count(*) as prepaid_passes
    from
        prepaid
    group by
        event_datetime, location_group),
scans as
    (select
        season,
        cth_game_descriptions.event_datetime,
        tier,
        location_group,
        case
            when paid_amount = 0 then 1
            else 0 end as num_scans
    from
        custom.parkhub_v_transactions
    left join
        custom.cth_game_descriptions on parkhub_v_transactions.event_datetime = cth_game_descriptions.event_datetime
    where
        cth_game_descriptions.event_datetime is not Null
        and season in ('2023-24','2024-25')),
scans_agg as
    (select
        season,
        event_datetime,
        tier,
        location_group,
        sum(num_scans) as num_scans
    from
        scans
    group by
        season,
        event_datetime,
        tier,
        location_group)
select
    season,
    prepaid_agg.event_datetime,
    tier,
    prepaid_agg.location_group,
    prepaid_passes,
    num_scans,
    num_scans*1.0/prepaid_passes::float as show_rate
from
    scans_agg
left join
    prepaid_agg on scans_agg.event_datetime = prepaid_agg.event_datetime
    and scans_agg.location_group = prepaid_agg.location_group
where
    prepaid_agg.event_datetime is not Null
"""

show_rate_df = FLA_Redshift(**rs_creds).query_warehouse(sql_string = q)

In [4]:
# weigh this past season more heavily

weights = {'2023-24':1.25,'2024-25':2.75}

show_rate_df['weights'] = show_rate_df['season'].map(weights)

def weighted_paid_average(group):
    # Calculate the weighted sum
    weighted_sum = (group['show_rate'] * group['weights']).sum()
    
    # Calculate the weight sum
    weight_sum = group['weights'].sum()
    
    # Calculate the weighted average
    wavg = weighted_sum / weight_sum
    
    return pd.Series({
        'weighted_average': wavg
    })

paid_tiers = show_rate_df.groupby(by = ['tier','location_group']).apply(weighted_paid_average).reset_index()

  paid_tiers = show_rate_df.groupby(by = ['tier','location_group']).apply(weighted_paid_average).reset_index()


In [5]:
# gather historical prepaid parking data

q = """
with prepaid as
    (select
         event_datetime,
         location_group,
         date(transaction_date) as transaction_date
    from
        custom.ctp_v_ticket_2324
    where
        event_type ilike '%panthers%'
        and event_datetime < current_date
    UNION ALL
    select
        event_datetime,
        location_group,
        date(transaction_date) as transaction_date
    from
        custom.ctp_v_ticket_2425
    where
        event_type ilike '%panthers%'
        and event_datetime < current_date)
select
    date(event_datetime) as event_date,
    location_group,
    'prepaid' as parking_type,
    datediff('days',transaction_date, event_datetime) as days_out,
    case
        when days_out >= 150 then 0
        else count(*) 
    end as num_passes
from
    prepaid
where
    days_out >= 0
group by
    event_datetime,
    location_group,
    transaction_date
order by
    event_datetime,
    location_group,
    transaction_date desc
"""

historical_prepaid_parking_info = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

location_map = {
    'Club': 4,
    'Garage': 3,
    'General': 2,
    'Valet': 1
}

historical_prepaid_parking_info['location_num'] = historical_prepaid_parking_info.apply(
    lambda row: location_map.get(row['location_group'], 0), axis=1)

historical_prepaid_parking_info['cumulative_num_passes']  = historical_prepaid_parking_info.groupby(['event_date', 'location_group'])['num_passes'].cumsum()

historical_prepaid_parking_info

Unnamed: 0,event_date,location_group,parking_type,days_out,num_passes,location_num,cumulative_num_passes
0,2023-10-07,Club,prepaid,0,41,4,41
1,2023-10-07,Club,prepaid,1,6,4,47
2,2023-10-07,Club,prepaid,2,11,4,58
3,2023-10-07,Club,prepaid,3,10,4,68
4,2023-10-07,Club,prepaid,4,4,4,72
...,...,...,...,...,...,...,...
12790,2024-10-22,Valet,prepaid,74,71,1,168
12791,2024-10-22,Valet,prepaid,75,1,1,169
12792,2024-10-22,Valet,prepaid,81,2,1,171
12793,2024-10-22,Valet,prepaid,88,1,1,172


In [6]:
# gather historical game data (ie tier, dow, and start time)

q = """
SELECT
    date(cth_game_descriptions.event_date) as event_date,
    tier,
    day_of_week,
    start_time,
    total_tickets
FROM
    custom.cth_v_historical_attendance_summary
LEFT JOIN
    custom.cth_game_descriptions on cth_v_historical_attendance_summary.event_date = cth_game_descriptions.event_date
WHERE
    tier in ('A','B','C','D','E')
"""

all_game_info = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

day_map = {
    'Fri': 1,
    'Sat': 1,
    'Sun': 1,
    'Mon': 0,
    'Tue': 0,
    'Wed': 0,
    'Thu': 0
}

all_game_info['weekend'] = all_game_info.apply(
    lambda row: day_map.get(row['day_of_week'], 0), axis=1)

start_time_map = {
    '12:30 PM': 1,
    '12:45 PM': 1,
    '1:00 PM': 1,
    '3:00 PM': 1,
    '3:30 PM': 1,
    '4:00 PM': 2,
    '5:00 PM': 2,
    '6:00 PM': 2
}

all_game_info['start_time_num'] = all_game_info.apply(
    lambda row: start_time_map.get(row['start_time'], 0),
    axis=1)

In [7]:
# gather upcoming game data including current prepaid totals by game and lot

q = """
select
    date(cth_game_descriptions.event_datetime) as event_date,
    datediff('day', current_date, cth_game_descriptions.event_datetime) as days_out,
    ctp_v_ticket_2425.location_group,
    capacity::int,
    sum(paid_seats)+sum(comp_seats) as prepaid_cars,
    sum(gross_revenue) as current_gross_revenue,
    capacity::int - prepaid_cars as cap_remaining
from
    custom.ctp_v_ticket_2425
left join
    custom.ctp_parking_capacities on ctp_v_ticket_2425.location_group = ctp_parking_capacities.location_group
left join
    custom.cth_game_descriptions on ctp_v_ticket_2425.event_datetime = cth_game_descriptions.event_datetime
where
    cth_game_descriptions.event_datetime is not null and
    cth_game_descriptions.event_datetime >= current_date
group by
    cth_game_descriptions.event_datetime,
    ctp_v_ticket_2425.location_group,
    capacity
order by
    cth_game_descriptions.event_datetime,
    ctp_v_ticket_2425.location_group
"""

upcoming_game_info = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

In [8]:
# gather upcoming pricing data by game and lot

q = """
SELECT
    date(event_datetime) as event_date,
    location_group,
    max(transaction_date) AS "transaction_date",
    max(adjusted_price) AS "highest_price"
FROM
    custom.ctp_v_ticket_2425
WHERE
    is_comp = FALSE
    AND price_type ILIKE 'IA%'
GROUP BY
    event_datetime, location_group
"""

pricing_info = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

In [9]:
# create a model to predict remaining prepaid cars

def run_prepaid_model(df, df_future, lot):

    total_table = df[df['location_group'] == lot]

    x_train = total_table[['days_out','weekend','start_time_num']]
    y_train = total_table[['cumulative_num_passes']]

    total_future_table = df_future[df_future['location_group'] == lot]

    x_test = total_future_table[['days_out','weekend','start_time_num']]

    #scalar = PolynomialFeatures(degree=2, include_bias=False)
    #poly_features = scalar.fit_transform(x_train)
    scalar = StandardScaler()
    poly_features = scalar.fit_transform(x_train)

    polynomial = LinearRegression().fit(poly_features, np.array(y_train).ravel())
    #predicted_train = polynomial.predict(poly_features)

    poly_features2 = scalar.fit_transform(x_test)

    return polynomial.predict(poly_features2).astype(int)

In [10]:
# merge hisotrical prepaid parking data with hisorical game data for training model

total_table = historical_prepaid_parking_info.merge(all_game_info, how = 'left', on = 'event_date')
total_table = total_table[total_table['tier'].isin(['A','B','C','D','E'])]

# merge upcoming parking data with hisorical game data for testing model

total_future_table = upcoming_game_info.merge(all_game_info, how = 'left', on = 'event_date')
total_future_table = total_future_table[total_future_table['tier'].isin(['A','B','C','D','E'])]

In [11]:
# only predicting for general and garage not  club, valet or executive

lots = ['General','Garage']

final_df = pd.DataFrame()

for lot in lots:

    temp = total_future_table[total_future_table['location_group'] == lot]

    temp['predicted_parking'] = run_prepaid_model(total_table, total_future_table, lot)

    final_df = pd.concat([final_df,temp], ignore_index= True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['predicted_parking'] = run_prepaid_model(total_table, total_future_table, lot)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['predicted_parking'] = run_prepaid_model(total_table, total_future_table, lot)


In [12]:
# concat club totals for onsite model next

club_totals = total_future_table[total_future_table['location_group'] == 'Club']
club_totals['predicted_parking'] = 0

final_df = pd.concat([final_df, club_totals], axis=0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  club_totals['predicted_parking'] = 0


In [13]:
# make sure no negative predictions are made

final_df['predicted_prepaid_additional_parking'] = np.where(final_df['predicted_parking'] < 0, 0, final_df['predicted_parking'])

# get total prepaid tickets (current + predicted additional)

final_df['total_predicted_prepaid_cars'] = final_df['prepaid_cars'] + final_df['predicted_parking']

# get number of parked cars using historical show rates

final_df = final_df.merge(paid_tiers, how =  'left', on = ['tier', 'location_group'])
final_df['prepaid_cars_parked'] = (final_df['total_predicted_prepaid_cars'] * final_df['weighted_average']).astype(int)

In [14]:
# find the capacity remaining 

final_df['cap_remaining'] = final_df['capacity'] - final_df['prepaid_cars_parked']

# if predicted cars over capacity subtract overflow out

final_df['predicted_prepaid_additional_parking'] = np.where(final_df['cap_remaining'] < 0, final_df['predicted_prepaid_additional_parking']+final_df['cap_remaining'], final_df['predicted_prepaid_additional_parking'])
final_df['prepaid_cars_parked'] = np.where(final_df['cap_remaining'] < 0, final_df['prepaid_cars_parked']+final_df['cap_remaining'], final_df['prepaid_cars_parked'])
final_df['cap_remaining'] = np.where(final_df['cap_remaining'] < 0, 0, final_df['prepaid_cars_parked'])

In [15]:
final_df = final_df[['event_date','days_out','tier', 'start_time_num','weekend',
                     'location_group','capacity','prepaid_cars','current_gross_revenue', 
                     'predicted_prepaid_additional_parking', 'total_predicted_prepaid_cars',
                     'prepaid_cars_parked','cap_remaining']]

In [16]:
# get hisotrical onsite parking data

q = """
with onsite as
    (select
        date(cth_game_descriptions.event_datetime) as event_date,
        location_group,
        0 as days_out,
        case
            when paid_amount > 0 then 1
        else 0
        end as num_onsite_cars,
        case
            when paid_amount = 0 then 1
        else 0
        end as num_prepaid_cars
    from
        custom.parkhub_v_transactions
    left join
        custom.cth_game_descriptions on parkhub_v_transactions.event_datetime = cth_game_descriptions.event_datetime
    where
        season in ('2023-24','2024-25'))
select
    onsite.event_date,
    onsite.location_group,
    'onsite' as parking_type,
    days_out,
    sum(num_onsite_cars) as num_cars,
    capacity - sum(num_prepaid_cars) as cap_remaining
from
    onsite
left join
    custom.ctp_parking_capacities on onsite.location_group = ctp_parking_capacities.location_group
where
    days_out >= 0
group by
    onsite.event_date,
    onsite.location_group,
    parking_type,
    days_out,
    capacity
"""

historical_onsite_parking_info = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

In [17]:
# merge hisotrical prepaid parking data with hisorical game data for training model

total_onsite_table = historical_onsite_parking_info.merge(all_game_info, how = 'left', on = 'event_date')
total_onsite_table = total_onsite_table[total_onsite_table['tier'].isin(['A','B','C','D','E'])]

In [18]:
# create a model to predict onsite cars

def run_onsite_model(df, df_future, lot):

    x_train_table = df[df['location_group'] == lot]

    x_train = x_train_table[['cap_remaining','weekend','start_time_num']]
    y_train = x_train_table[['num_cars']]

    x_test_table = df_future[df_future['location_group'] == lot]

    x_test = x_test_table[['cap_remaining','weekend','start_time_num']]

    #scalar = PolynomialFeatures(degree=2, include_bias=False)
    #poly_features = scalar.fit_transform(x_train)
    scalar = StandardScaler()
    poly_features = scalar.fit_transform(x_train)

    polynomial = LinearRegression().fit(poly_features, np.array(y_train).ravel())
    #predicted_train = polynomial.predict(poly_features)

    poly_features2 = scalar.fit_transform(x_test)

    predicted_test = polynomial.predict(poly_features2)

    return polynomial.predict(poly_features2).astype(int)

In [None]:
# only predicting for general, garage, and club not valet or executive

lots = ['General','Garage','Club']

final_df_onsite = pd.DataFrame()

for lot in lots:

    temp = final_df[final_df['location_group'] == lot]

    temp['predicted_onsite_parking'] = run_onsite_model(total_onsite_table, final_df, lot)

    final_df_onsite = pd.concat([final_df_onsite,temp], ignore_index= True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['predicted_onsite_parking'] = run_onsite_model(total_onsite_table, final_df, lot)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['predicted_onsite_parking'] = run_onsite_model(total_onsite_table, final_df, lot)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['predicted_onsite_parking'

In [20]:
# add back executive and valet parking and match fields from final_df

exec_and_valet = total_future_table[total_future_table['location_group'].isin(['Executive','Valet'])]

exec_and_valet['predicted_prepaid_additional_parking'] = 0
exec_and_valet['total_predicted_prepaid_cars'] = exec_and_valet['prepaid_cars']

exec_and_valet = exec_and_valet.merge(paid_tiers, how =  'left', on = ['tier', 'location_group'])
exec_and_valet['weighted_average'] = exec_and_valet['weighted_average'].fillna(1)

exec_and_valet['prepaid_cars_parked'] = (exec_and_valet['total_predicted_prepaid_cars'] * exec_and_valet['weighted_average']).astype(int)
exec_and_valet['predicted_onsite_parking'] = 0
exec_and_valet['total_parking'] = exec_and_valet['prepaid_cars_parked'] 

exec_and_valet = exec_and_valet[['event_date', 'days_out','tier','location_group','capacity','prepaid_cars',
                                       'current_gross_revenue', 'predicted_prepaid_additional_parking',
                                       'total_predicted_prepaid_cars', 'prepaid_cars_parked',
                                       'predicted_onsite_parking','total_parking']]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  exec_and_valet['predicted_prepaid_additional_parking'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  exec_and_valet['total_predicted_prepaid_cars'] = exec_and_valet['prepaid_cars']


In [21]:
# if predicted total over capacity subtract overflow out

final_df_onsite['predicted_onsite_parking'] = [pred_onsite if pred_onsite <= cap_remaining else cap_remaining for pred_onsite, cap_remaining in zip(final_df_onsite['predicted_onsite_parking'], final_df_onsite['cap_remaining'])]

final_df_onsite['total_parking'] = final_df_onsite['prepaid_cars_parked'] + final_df_onsite['predicted_onsite_parking']

final_parking_model = final_df_onsite[['event_date', 'days_out','tier','location_group','capacity','prepaid_cars',
                                       'current_gross_revenue', 'predicted_prepaid_additional_parking','total_predicted_prepaid_cars',
                                       'prepaid_cars_parked','predicted_onsite_parking','total_parking']]

# merge with executive and valet parking info

final_parking_model_df = pd.concat([final_parking_model, exec_and_valet], axis=0)

In [22]:
final_parking_model_df = final_parking_model_df.merge(pricing_info, how = 'left', on = ['event_date', 'location_group'])

final_parking_model_df['predicted_prepaid_additional_gross_revenue'] = (final_parking_model_df['predicted_prepaid_additional_parking'] * final_parking_model_df['highest_price']).fillna(0)

final_parking_model_df['predicted_onsite_parking_gross_revenue'] = (final_parking_model_df['predicted_prepaid_additional_parking'] * final_parking_model_df['highest_price']*1.25).fillna(0)

final_parking_model_df['predicted_gross_revenue'] = final_parking_model_df['predicted_prepaid_additional_gross_revenue'] + final_parking_model_df['predicted_onsite_parking_gross_revenue'] + final_parking_model_df['current_gross_revenue']

final_parking_model_df = final_parking_model_df[['event_date', 'days_out','tier','location_group','capacity','prepaid_cars',
                                       'current_gross_revenue', 'predicted_prepaid_additional_parking', 'predicted_prepaid_additional_gross_revenue',
                                       'total_predicted_prepaid_cars', 'prepaid_cars_parked', 'predicted_onsite_parking',
                                       'predicted_onsite_parking_gross_revenue','total_parking', 'predicted_gross_revenue']]

final_parking_model_df

Unnamed: 0,event_date,days_out,tier,location_group,capacity,prepaid_cars,current_gross_revenue,predicted_prepaid_additional_parking,predicted_prepaid_additional_gross_revenue,total_predicted_prepaid_cars,prepaid_cars_parked,predicted_onsite_parking,predicted_onsite_parking_gross_revenue,total_parking,predicted_gross_revenue
0,2024-11-07,9,E,General,4360,1537,25095.18,651,19530.0,2188,1789,1038,24412.5,2827,69037.68
1,2024-11-09,11,C,General,4360,2027,35281.34,704,21120.0,2731,2177,895,26400.0,3072,82801.34
2,2024-11-12,14,E,General,4360,1487,24042.24,667,20010.0,2154,1761,1046,25012.5,2807,69064.74
3,2024-11-14,16,D,General,4360,1469,23128.92,674,20220.0,2143,1708,1061,25275.0,2769,68623.92
4,2024-11-16,18,D,General,4360,1962,34191.06,693,20790.0,2655,2117,1004,25987.5,3121,80968.56
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
175,2025-04-10,163,D,Valet,292,321,1367.46,0,0.0,321,128,0,0.0,128,1367.46
176,2025-04-12,165,C,Executive,79,46,160.71,0,0.0,46,46,0,0.0,46,160.71
177,2025-04-12,165,C,Valet,292,367,1703.44,0,0.0,367,154,0,0.0,154,1703.44
178,2025-04-14,167,B,Executive,79,46,160.71,0,0.0,46,46,0,0.0,46,160.71


In [23]:
final_parking_model_df.to_csv('C:\\Users\\riffere\\Desktop\\output_2.csv')

In [24]:
final_parking_model_df.groupby(by = 'event_date').sum()[['total_parking', 'predicted_gross_revenue']]

Unnamed: 0_level_0,total_parking,predicted_gross_revenue
event_date,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-11-07,4196,87284.3
2024-11-09,4583,107037.42
2024-11-12,4167,86384.43
2024-11-14,4164,86023.25
2024-11-16,4679,104632.66
2024-11-23,4570,107973.59
2024-11-25,4203,89576.37
2024-11-27,4695,104834.94
2024-11-30,4562,109040.8
2024-12-07,4608,109880.56
