In [1]:
import pandas as pd
import numpy as np
from catnip.fla_redshift import FLA_Redshift
from sqlalchemy import null
from datetime import datetime

from prefect.blocks.system import Secret
from typing import Dict
from concurrent.futures import ThreadPoolExecutor

from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler

In [2]:
def get_redshift_credentials() -> Dict:

    cred_dict = {
        "dbname": Secret.load("stellar-redshift-db-name").get(),
        "host": Secret.load("stellar-redshift-host").get(),
        "port": 5439,
        "user": Secret.load("stellar-redshift-user-name").get(),
        "password": Secret.load("stellar-redshift-password").get(),

        "aws_access_key_id": Secret.load("fla-s3-aws-access-key-id-east-1").get(),
        "aws_secret_access_key": Secret.load("fla-s3-aws-secret-access-key-east-1").get(),
        "bucket": Secret.load("fla-s3-bucket-name-east-1").get(),
        "subdirectory": "us-east-1",

        "verbose": False,
    }

    return cred_dict

with ThreadPoolExecutor(1) as pool:
    rs_creds = pool.submit(lambda: get_redshift_credentials()).result()

In [3]:
q = """
with transaction_type as
    (select
        event_datetime,
        parkhub_v_transactions.location_group,
        capacity,
        transaction_type,
        case
            when transaction_type = 'prepaid' then count(*)
            else 0
        end as prepaid_cars,
        case
            when transaction_type = 'credit' then count(*)
            else 0
        end as onsite_cars
    from
        custom.parkhub_v_transactions
    left join
        custom.ctp_parking_capacities on parkhub_v_transactions.location_group = ctp_parking_capacities.location_group
    group by
        event_datetime,
        parkhub_v_transactions.location_group,
        capacity,
        transaction_type)
select
    date(event_datetime) as event_date,
    location_group,
    capacity,
    sum(prepaid_cars) as prepaid_cars,
    sum(onsite_cars) as onsite_cars,
    sum(prepaid_cars)+sum(onsite_cars) as total_cars
from
    transaction_type
where
    event_datetime is not Null
    and location_group != 'Unknown'
group by
    event_datetime,
    location_group,
    capacity
order by
    event_datetime,
    location_group
"""

historical_parking_info = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

historical_parking_info['location_num'] = historical_parking_info.apply(lambda row: 4 if row['location_group'] == 'Club' else (3 if row['location_group'] == 'Garage' 
    else (2 if row['location_group'] == 'General' else (1 if row['location_group'] == 'Valet' else 0))), axis = 1)

In [4]:
q = """
SELECT
    date(cth_game_descriptions.event_date) as event_date,
    tier,
    day_of_week,
    start_time,
    total_tickets
FROM
    custom.cth_v_historical_attendance_summary
LEFT JOIN
    custom.cth_game_descriptions on cth_v_historical_attendance_summary.event_date = cth_game_descriptions.event_date
WHERE
    tier in ('A','B','C','D','E')
"""

all_game_info = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

all_game_info['weekend'] = all_game_info.apply(lambda row: 1 if row['day_of_week'] =='Fri' or row['day_of_week']=='Sat'  or row['day_of_week']=='Sun'  else 0 , axis=1)
all_game_info['start_time_num'] = all_game_info.apply(lambda row: 1 if row['start_time'] =='12:30 PM' or row['start_time']=='12:45 PM' 
    or row['start_time']=='1:00 PM' or row['start_time']=='3:00 PM' or row['start_time']=='3:30 PM' else (2 if row['start_time'] =='4:00 PM'
     or row['start_time'] =='5:00 PM' or row['start_time'] =='6:00 PM' else 0) , axis=1)

In [5]:
q = """
select
    date(event_datetime) as event_date,
    ctp_v_ticket_2425.location_group,
    capacity,
    sum(paid_seats)+sum(comp_seats) as prepaid_cars
from
    custom.ctp_v_ticket_2425
left join
    custom.ctp_parking_capacities on ctp_v_ticket_2425.location_group = ctp_parking_capacities.location_group
where
    event_datetime >= current_date
group by
    event_datetime,
    ctp_v_ticket_2425.location_group,
    capacity
"""

upcoming_game_info = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

upcoming_game_info['location_num'] = upcoming_game_info.apply(lambda row: 5 if row['location_group'] == 'Executive' else (4 if row['location_group'] == 'Club' else (3 if row['location_group'] == 'Garage' 
    else (2 if row['location_group'] == 'General' else (1 if row['location_group'] == 'Valet' else 0)))), axis = 1)

In [6]:
total_table = historical_parking_info.merge(all_game_info, how = 'left', on = 'event_date')

total_table = total_table[total_table['tier'].isin(['A','B','C','D','E'])]
total_table = total_table[total_table['location_group'].isin(['Club','Garage','General','Valet'])]


x_train = total_table[['capacity','location_num','total_tickets','weekend','start_time_num']]
y_train = total_table[['prepaid_cars']]

In [7]:
total_future_table = upcoming_game_info.merge(all_game_info, how = 'left', on = 'event_date')

total_future_table = total_future_table[total_future_table['tier'].isin(['A','B','C','D','E'])]
total_future_table = total_future_table[total_future_table['location_group'].isin(['Club','Garage','General','Valet'])]

x_test = total_future_table[['capacity','location_num','total_tickets','weekend','start_time_num']]
y_test = total_future_table[['prepaid_cars']] 

In [24]:
#scalar = StandardScaler()
scalar = PolynomialFeatures(degree=2, include_bias=False)
poly_features = scalar.fit_transform(x_train)

polynomial = LinearRegression().fit(poly_features, np.array(y_train).ravel())
predicted = polynomial.predict(poly_features)

total_table['predicted_parking'] = predicted

In [25]:
poly_features2 = scalar.fit_transform(x_test)

predicted = polynomial.predict(poly_features2)

total_future_table['predicted_parking'] = predicted

In [26]:
total_future_table.sort_values(by='event_date').head(30)

Unnamed: 0,event_date,location_group,capacity,prepaid_cars,location_num,tier,day_of_week,start_time,total_tickets,weekend,start_time_num,predicted_parking
25,2024-10-19,General,4360,2344,2,D,Sat,6:00 PM,18604.0,1.0,2.0,1626.688094
219,2024-10-19,Garage,200,102,3,D,Sat,6:00 PM,18604.0,1.0,2.0,111.290992
148,2024-10-19,Club,1861,1488,4,D,Sat,6:00 PM,18604.0,1.0,2.0,933.920106
169,2024-10-19,Valet,292,368,1,D,Sat,6:00 PM,18604.0,1.0,2.0,109.036899
255,2024-10-22,Valet,292,321,1,E,Tue,6:30 PM,15676.0,0.0,0.0,81.705453
253,2024-10-22,Club,1861,1250,4,E,Tue,6:30 PM,15676.0,0.0,0.0,627.393724
124,2024-10-22,General,4360,1545,2,E,Tue,6:30 PM,15676.0,0.0,0.0,1169.853089
254,2024-10-22,Garage,200,96,3,E,Tue,6:30 PM,15676.0,0.0,0.0,5.831593
23,2024-11-07,Club,1861,1216,4,E,Thu,7:00 PM,16051.0,0.0,0.0,657.208414
20,2024-11-07,General,4360,1474,2,E,Thu,7:00 PM,16051.0,0.0,0.0,1219.402543


1. Get historical totals by lot
    also: lot_number, day_of_week, start_time, ticket_out, historical_tier_show_rate, current_prepaid, spots_remaining

3. Onsite Model
    