In [5]:
import pandas as pd
import numpy as np
from datetime import datetime
from prefect.blocks.system import Secret
from catnip.fla_redshift import FLA_Redshift
from typing import Dict
from concurrent.futures import ThreadPoolExecutor
import openpyxl 
import calendar

In [2]:
def get_redshift_credentials() -> Dict:

    cred_dict = {
        "dbname": Secret.load("stellar-redshift-db-name").get(),
        "host": Secret.load("stellar-redshift-host").get(),
        "port": 5439,
        "user": Secret.load("stellar-redshift-user-name").get(),
        "password": Secret.load("stellar-redshift-password").get(),

        "aws_access_key_id": Secret.load("fla-s3-aws-access-key-id-east-1").get(),
        "aws_secret_access_key": Secret.load("fla-s3-aws-secret-access-key-east-1").get(),
        "bucket": Secret.load("fla-s3-bucket-name-east-1").get(),
        "subdirectory": "us-east-1",

        "verbose": False,
    }

    return cred_dict

with ThreadPoolExecutor(1) as pool:
    rs_creds = pool.submit(lambda: get_redshift_credentials()).result()

In [4]:
q = """
select 
    full_opponent, max(original_six_plus_extra)
from 
    custom.cth_game_descriptions
group by 
    full_opponent
order by 
    full_opponent"""

tier_df = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)
tier_df.at[29,'max'] = 0.00

In [48]:
df = pd.read_excel("C:\\Users\\riffere\\Desktop\\new_budget_by_game_2324.xlsx", sheet_name = 'final')

# game day info
# df['start_time_num'] = df.apply(lambda row: 1 if datetime.strptime(row['start_time'][:-3], "%H:%M") <= datetime.strptime('5:00', "%H:%M") 
#                     or datetime.strptime(row['start_time'][:-3], "%H:%M") == datetime.strptime('12:30', "%H:%M") else 2, axis = 1)
# df['is_early'] = df.apply(lambda row: 1 if row['start_time_num'] == 1 else 0, axis = 1)
# df['is_evening'] = df.apply(lambda row: 1 if row['start_time_num'] == 2 else 0, axis = 1)

df['event_date'] = pd.to_datetime(df['event_date']).dt.date
df['week_day'] = df.apply(lambda row: 1 if datetime.strftime(row['event_date'], "%w") == '6' else (
    2 if datetime.strftime(row['event_date'], "%w") == '5' or datetime.strftime(row['event_date'], "%w") == '0' else 3), axis = 1)
df['is_fri_sun'] = df.apply(lambda row: 1 if row['week_day'] == 2 else 0, axis = 1)
df['is_sat'] = df.apply(lambda row: 1 if row['week_day'] == 1 else 0, axis = 1)
df['is_weekday'] = df.apply(lambda row: 1 if row['week_day'] == 3 else 0, axis = 1)

df['trimester'] = df.apply(lambda row: 1 if 9 <= row['event_date'].month < 12 else 
                    (2 if row['event_date'].month < 2 or row['event_date'].month == 12 else 3), axis = 1)
df['is_trimester_1'] = df.apply(lambda row: 1 if row['trimester'] == 1 else 0, axis = 1)
df['is_trimester_2'] = df.apply(lambda row: 1 if row['trimester'] == 2 else 0, axis = 1)
df['is_trimester_3'] = df.apply(lambda row: 1 if row['trimester'] == 3 else 0, axis = 1)

df['event_date_date'] = pd.to_datetime(df['event_date']).dt.date
df['week_day'] = df.apply(lambda row: 1 if datetime.strftime(row['event_date'], "%w") == '6' else (
    2 if datetime.strftime(row['event_date'], "%w") == '5' or datetime.strftime(row['event_date'], "%w") == '0' else 3), axis = 1)
df['trimester'] = df.apply(lambda row: 1 if 9 <= row['event_date'].month < 12 else 
                    (2 if row['event_date'].month < 2 or row['event_date'].month == 12 else 3), axis = 1)

thanksgiving = {2017 : 23, 2018 : 22, 2019 : 28, 2020 : 26, 2021: 25, 2022: 24, 2023: 23, 2024: 28, 2025: 27, 2026: 26}
df['is_thanksgiving'] = df.apply(lambda row: 1 if row['event_date'].month == 11 and 
                (thanksgiving[row['event_date'].year]-3) <= row['event_date'].day <= (thanksgiving[row['event_date'].year]+3) else 0, axis = 1)
df['is_christmas'] = df.apply(lambda row: 1 if row['event_date'].month == 12 and 21 <= row['event_date'].day <= 27 else 0, axis = 1)
df['is_nye'] = df.apply(lambda row: 1 if row['event_date'].month == 12 and 29 <= row['event_date'].day <= 31 else 
                (1 if row['event_date'].month == 1 and 1 <= row['event_date'].day <= 4 else 0), axis = 1)
df['is_holiday'] = df.apply(lambda row: 1 if row['is_thanksgiving'] == 1 or row['is_christmas'] == 1 or 
    row['is_nye'] == 1 else 0, axis = 1)

# density
calendar_df = pd.DataFrame(columns = ['days_of_the_year'], data = 
    ['01-01','01-02','01-03','01-04','01-05','01-06','01-07','01-08','01-09','01-10','01-11',
    '01-12','01-13','01-14','01-15','01-16','01-17','01-18','01-19','01-20','01-21','01-22',
    '01-23','01-24','01-25','01-26','01-27','01-28','01-29','01-30','01-31',
    '02-01','02-02','02-03','02-04','02-05','02-06','02-07','02-08','02-09','02-10','02-11',
    '02-12','02-13','02-14','02-15','02-16','02-17','02-18','02-19','02-20','02-21','02-22',
    '02-23','02-24','02-25','02-26','02-27','02-28',
    '03-01','03-02','03-03','03-04','03-05','03-06','03-07','03-08','03-09','03-10','03-11',
    '03-12','03-13','03-14','03-15','03-16','03-17','03-18','03-19','03-20','03-21','03-22',
    '03-23','03-24','03-25','03-26','03-27','03-28','03-29','03-30','03-31',
    '04-01','04-02','04-03','04-04','04-05','04-06','04-07','04-08','04-09','04-10','04-11',
    '04-12','04-13','04-14','04-15','04-16','04-17','04-18','04-19','04-20','04-21','04-22',
    '04-23','04-24','04-25','04-26','04-27','04-28','04-29','04-30',
    '05-01','05-02','05-03','05-04','05-05','05-06','05-07','05-08','05-09','05-10','05-11',
    '05-12','05-13','05-14','05-15','05-16','05-17','05-18','05-19','05-20','05-21','05-22',
    '05-23','05-24','05-25','05-26','05-27','05-28','05-29','05-30','05-31',
    '06-01','06-02','06-03','06-04','06-05','06-06','06-07','06-08','06-09','06-10','06-11',
    '06-12','06-13','06-14','06-15','06-16','06-17','06-18','06-19','06-20','06-21','06-22',
    '06-23','06-24','06-25','06-26','06-27','06-28','06-29','06-30',
    '07-01','07-02','07-03','07-04','07-05','07-06','07-07','07-08','07-09','07-10','07-11',
    '07-12','07-13','07-14','07-15','07-16','07-17','07-18','07-19','07-20','07-21','07-22',
    '07-23','07-24','07-25','07-26','07-27','07-28','07-29','07-30','07-31',
    '08-01','08-02','08-03','08-04','08-05','08-06','08-07','08-08','08-09','08-10','08-11',
    '08-12','08-13','08-14','08-15','08-16','08-17','08-18','08-19','08-20','08-21','08-22',
    '08-23','08-24','08-25','08-26','08-27','08-28','08-29','08-30','08-31',
    '09-01','09-02','09-03','09-04','09-05','09-06','09-07','09-08','09-09','09-10','09-11',
    '09-12','09-13','09-14','09-15','09-16','09-17','09-18','09-19','09-20','09-21','09-22',
    '09-23','09-24','09-25','09-26','09-27','09-28','09-29','09-30',
    '10-01','10-02','10-03','10-04','10-05','10-06','10-07','10-08','10-09','10-10','10-11',
    '10-12','10-13','10-14','10-15','10-16','10-17','10-18','10-19','10-20','10-21','10-22',
    '10-23','10-24','10-25','10-26','10-27','10-28','10-29','10-30','10-31',
    '11-01','11-02','11-03','11-04','11-05','11-06','11-07','11-08','11-09','11-10','11-11',
    '11-12','11-13','11-14','11-15','11-16','11-17','11-18','11-19','11-20','11-21','11-22',
    '11-23','11-24','11-25','11-26','11-27','11-28','11-29','11-30',
    '12-01','12-02','12-03','12-04','12-05','12-06','12-07','12-08','12-09','12-10','12-11',
    '12-12','12-13','12-14','12-15','12-16','12-17','12-18','12-19','12-20','12-21','12-22',
    '12-23','12-24','12-25','12-26','12-27','12-28','12-29','12-30','12-31'])
event_dates = df['event_date_date'].values
def get_range(date : datetime.date, calendar_df_active):
    month = date.month
    day = date.day
    year = date.year
    if calendar.isleap(year):
        leap_day = pd.DataFrame(columns = ['days_of_the_year'], data = ['02-29'])
        calendar_df_active = pd.concat([calendar_df_active[0:59], leap_day, calendar_df_active[59:]], ignore_index= True)
    if len(str(month)) == 1:
        if len(str(day)) == 1:
            date = str('0' + str(month) + '-0' + str(day))
        else:
            date = str('0' + str(month) + '-' + str(day))
    else:
        if len(str(day)) == 1:
            date = str(str(month) + '-0' + str(day))
        else:
            date = str(str(month) + '-' + str(day))
    if date < '01-04':
        a = calendar_df_active[:calendar_df_active.index[calendar_df_active['days_of_the_year'] == date].tolist()[0]+4]
        left = 7 - len(a)
        b = calendar_df_active[(365-left):].values
        total = np.concatenate((a,b), axis = 0)
    elif date > '12-28':
        b = calendar_df_active[calendar_df_active.index[calendar_df_active['days_of_the_year'] == date].tolist()[0]-3:]
        left = 7 - len(b)
        a = calendar_df_active[0:(left)].values
        total = np.concatenate((b,a), axis = 0)
    else:
        total = calendar_df_active[calendar_df_active.index[calendar_df_active['days_of_the_year'] == date].tolist()[0]-3:
                calendar_df_active.index[calendar_df_active['days_of_the_year'] == date].tolist()[0]+4].values
    val = 0
    for i in total:
        month = i[0][0:2]
        day = i[0][3:]
        date = str(str(year) + '-' + month + '-' + day)
        current_date = datetime.strptime(date, '%Y-%m-%d').date()
        if current_date in event_dates:
            val += 1
    return val
df['density'] = df.apply(lambda row: get_range(row['event_date'], calendar_df),axis = 1)
df['is_dense'] = df.apply(lambda row: 1 if row['density'] >= 3 else 0, axis = 1)
df['is_not_dense'] = df.apply(lambda row: 1 if row['density'] < 3 else 0, axis = 1)

df.at[0,'is_holiday'] = 1
df = df.rename({'Away': 'full_opponent'}, axis = 1)
df = df.merge(tier_df, on = 'full_opponent', how = 'left')
df

Unnamed: 0,event_date,full_opponent,dow,tier,Premier,total_tickets,predicted_attendance,predicted_parking,predicted_revenue,week_day,...,is_trimester_3,event_date_date,is_thanksgiving,is_christmas,is_nye,is_holiday,density,is_dense,is_not_dense,max
0,2023-10-07,Tampa Bay Lightning,Sat,E,,10000.0,7500.0,2000.0,542433,1,...,0,2023-10-07,0,0,0,1,1,0,1,1.0
1,2023-10-19,Toronto Maple Leafs,Thu,B,P,18438.947202,15858.783478,4278.630325,1762950,3,...,0,2023-10-19,0,0,0,0,2,0,1,1.5
2,2023-10-21,Vancouver Canucks,Sat,D,P,15352.125554,12547.960776,3443.835909,1153420,1,...,0,2023-10-21,0,0,0,0,3,1,0,0.0
3,2023-10-24,San Jose Sharks,Tue,E,,12011.0,9480.923138,3134.454349,642828,3,...,0,2023-10-24,0,0,0,0,2,0,1,0.0
4,2023-10-28,Seattle Kraken,Sat,C,P,16821.002576,14611.468643,3810.502458,1345504,1,...,0,2023-10-28,0,0,0,0,1,0,1,0.0
5,2023-11-06,Columbus Blue Jackets,Mon,E,,12011.0,9480.923138,3134.454349,667828,3,...,0,2023-11-06,0,0,0,0,1,0,1,0.0
6,2023-11-10,Carolina Hurricanes,Fri,D,P,15352.125554,12547.960776,3293.989542,1178420,2,...,0,2023-11-10,0,0,0,0,2,0,1,0.0
7,2023-11-12,Chicago Blackhawks,Sun,D,P,15352.125554,12547.960776,3443.835909,1253420,2,...,0,2023-11-12,0,0,0,0,2,0,1,1.0
8,2023-11-20,Edmonton Oilers,Mon,D,,14714.677329,12023.368347,3182.808072,881301,3,...,0,2023-11-20,1,0,0,1,2,0,1,0.0
9,2023-11-22,Boston Bruins,Wed,B,P,18438.947202,15858.783478,4278.630325,1762950,3,...,0,2023-11-22,1,0,0,1,3,1,0,1.0


In [51]:
teams = {'Boston Bruins' : [1, 1, 1, 1.5], 'Buffalo Sabres' : [1, 1, 0, 0.889], 'Detroit Red Wings' : [1, 1, 0.75, 1.2], 'Florida Panthers' : [1, 1, 0, 0.426],
         'Montreal Canadiens' : [1, 1, 0.75, 1.6], 'Ottawa Senators' : [1, 1, 0, 0.608], 'Tampa Bay Lightning' : [1, 1, 1.5, 0.807], 'Toronto Maple Leafs' : [1, 1, 1, 2],
        'Carolina Hurricanes' : [1, 2, 0, 0.543], 'Columbus Blue Jackets' : [1, 2, 0, 0.514], 'New Jersey Devils' : [1, 2, 0, 0.755], 'New York Islanders' : [1, 2, 0, 0.587], 
        'New York Rangers' : [1, 2, 1, 1.4], 'Philadelphia Flyers' : [1, 2, 1, 1.5], 'Pittsburgh Penguins' : [1, 2, 0.5, 1.8], 'Washington Capitals' : [1, 2, 0.75, 0.809],
        'Arizona Coyotes' : [0, 3, 0, 0.399], 'Chicago Blackhawks' : [0, 3, 1, 2.4], 'Colorado Avalanche' : [0, 3, 0, 0.634], 'Dallas Stars' : [0, 3, 0, 1.1], 
        'Minnesota Wild' : [0, 3, 0, 0.739], 'Nashville Predators' : [0, 3, 0, 0.638], 'St. Louis Blues' : [00, 3, 0, 0.799], 'Winnipeg Jets' : [0, 3, 0, 0.581],
        'Anaheim Ducks' : [0, 4, 0, 0.611], 'Calgary Flames' : [0, 4, 0, 0.675], 'Edmonton Oilers' : [0, 4, 0, 0.908], 'Los Angeles Kings' : [0, 4, 0, 1.1],
         'San Jose Sharks' : [0, 4, 0, 0.817], 'Seattle Kraken' : [0, 4, 0, 0.322], 'Vancouver Canucks' : [0, 4, 0, 1], 'Vegas Golden Knights' : [0, 4, 0.75, 0.520]}

df['conf'] = df.apply(lambda row: teams[row['full_opponent']][0], axis = 1)
df['div'] = df.apply(lambda row: teams[row['full_opponent']][1], axis = 1)
df['original_six_plus_extra'] = df.apply(lambda row: teams[row['full_opponent']][2], axis = 1)
df['twitter_followers'] = df.apply(lambda row: teams[row['full_opponent']][3], axis = 1)

print(df[['conf','div','original_six_plus_extra','twitter_followers']])

    conf  div  original_six_plus_extra  twitter_followers
0      1    1                     1.50              0.807
1      1    1                     1.00              2.000
2      0    4                     0.00              1.000
3      0    4                     0.00              0.817
4      0    4                     0.00              0.322
5      1    2                     0.00              0.514
6      1    2                     0.00              0.543
7      0    3                     1.00              2.400
8      0    4                     0.00              0.908
9      1    1                     1.00              1.500
10     0    3                     0.00              0.581
11     1    2                     0.00              0.587
12     0    3                     0.00              1.100
13     1    2                     0.50              1.800
14     0    3                     0.00              0.799
15     0    4                     0.75              0.520
16     1    2 