In [1]:
import pandas as pd
import numpy as np
from catnip.fla_redshift import FLA_Redshift
from sqlalchemy import null
from datetime import datetime

from prefect.blocks.system import Secret
from typing import Dict
from concurrent.futures import ThreadPoolExecutor

from sklearn.svm import SVR

In [2]:
def get_redshift_credentials() -> Dict:

    cred_dict = {
        "dbname": Secret.load("stellar-redshift-db-name").get(),
        "host": Secret.load("stellar-redshift-host").get(),
        "port": 5439,
        "user": Secret.load("stellar-redshift-user-name").get(),
        "password": Secret.load("stellar-redshift-password").get(),

        "aws_access_key_id": Secret.load("fla-s3-aws-access-key-id-east-1").get(),
        "aws_secret_access_key": Secret.load("fla-s3-aws-secret-access-key-east-1").get(),
        "bucket": Secret.load("fla-s3-bucket-name-east-1").get(),
        "subdirectory": "us-east-1",

        "verbose": False,
    }

    return cred_dict

with ThreadPoolExecutor(1) as pool:
    rs_creds = pool.submit(lambda: get_redshift_credentials()).result()

In [94]:
# Nightly

def extract_nightly():

    q = """
    WITH temp AS
        (SELECT
            event_date, paid_seats, comp_seats, gross_revenue,
            case
                when pc_one in ('A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', '1', '2', '3', '4', '5', '6', '7', '8') then 'Lowers'
                when pc_one in ('K', 'L', 'M') then 'Clubs'
                when pc_one in ('N', 'O', 'P', 'Q', 'R', 'S', 'T', 'Y') then 'Uppers'
                when pc_one in ('U', 'V', 'W') then 'Suites'
                when pc_one in ('X', 'Z') then 'Premium'
                else Null
            end as arena_level_internal
        FROM
            custom.cth_historical_all_1718_2223
        WHERE
            ticket_type in ('Singles', 'Nightly Suites', 'Flex')
            and season in ('2021-22', '2022-23')
        UNION ALL
        SELECT
            date(event_datetime), paid_seats, comp_seats, gross_revenue,
            case
                when pc_one in ('A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', '1', '2', '3', '4', '5', '6', '7', '8') then 'Lowers'
                when pc_one in ('K', 'L', 'M') then 'Clubs'
                when pc_one in ('N', 'O', 'P', 'Q', 'R', 'S', 'T', 'Y') then 'Uppers'
                when pc_one in ('U', 'V', 'W') then 'Suites'
                when pc_one in ('X', 'Z') then 'Premium'
                else Null
            end as arena_level_internal
        FROM
            custom.cth_v_ticket_2324
        WHERE
            ticket_type in ('Singles', 'Nightly Suites', 'Flex'))

    SELECT
        season, temp.event_date, arena_level_internal, sum(paid_seats) AS "paid_seats", sum(comp_seats) AS "comp_seats",
        sum(gross_revenue) AS "gross_revenue", trimester, original_six_plus_extra, is_dense, is_holiday,
        case
            when day_of_week = 'Sat' then 2
            when day_of_week in ('Fri','Sun') then 1
            else 0
        end as is_day_of_week
    FROM
        temp
    LEFT JOIN
        custom.cth_game_descriptions on date(temp.event_date) = date(cth_game_descriptions.event_date)
    WHERE
        tier in ('A','B','C','D','E')
    GROUP BY
        season, temp.event_date, arena_level_internal, trimester, original_six_plus_extra, is_dense, is_holiday, is_day_of_week
    ORDER BY
        season, arena_level_internal, event_date
    """

    # extract historical nightly
    df_nightly = FLA_Redshift(**rs_creds).query_warehouse(sql_string = q)
    return df_nightly

def run_model(total_df, loc):

    # subset to specific location
    total_df = total_df[total_df['arena_level_internal'] == loc]

    # x train
    xtrain = total_df[total_df['season'].isin(['2021-22', '2022-23'])]
    xtrain = xtrain[['is_day_of_week', 'trimester', 'original_six_plus_extra', 'is_dense', 'is_holiday']]

    # x test
    df2324 = total_df[total_df['season'] == '2023-24']
    df2324_predict = df2324[['is_day_of_week', 'trimester', 'original_six_plus_extra', 'is_dense', 'is_holiday']]

    # y train
    ytrain = total_df[total_df['season'].isin(['2021-22', '2022-23'])]
    ytrain = ytrain[['paid_seats']]
    ytrain = np.log1p(ytrain)

    # run model
    svm = SVR(gamma = 'auto').fit(xtrain, np.array(ytrain).ravel())
    df2324_predict = svm.predict(df2324_predict)
    df2324_predict = [np.expm1(i) for i in df2324_predict]
    df2324_predict = pd.DataFrame(df2324_predict).reset_index(drop = True)
    df2324_predict['type'] = 'nightly'
    df2324_predict['loc'] = loc

    return df2324_predict

def transform_nightly(df_nightly):

    #df_nightly['tier_num'] = df_nightly.apply(lambda row: 5 if row['tier'] == 'A' else (4 if row['tier'] == 'B' else (3 if row['tier'] == 'C' else (2 if row['tier'] == 'D' else 1))), axis = 1)

    # run ticket model by location
    proj_df = pd.DataFrame()
    locations = ['Clubs','Lowers','Premium','Suites','Uppers']
    for loc in locations:
        proj = run_model(df_nightly, loc)
        proj_df = pd.concat([proj_df, proj], ignore_index = True)
    proj_df = proj_df.rename(columns = {0:'projected_tickets'})

    # subset y test
    df2324 = df_nightly[df_nightly['season'] == '2023-24'].reset_index()
    df2324 = pd.concat([df2324, proj_df], axis = 1)

    df2324_final = df2324.groupby(by = 'event_date')[['projected_tickets']].sum()
    df2324_final = pd.DataFrame(df2324_final).reset_index()
    df2324_final['type'] = 'nightly'

    return df2324_final

In [118]:
# Plans

def extract_plans():

    v = """ 
    SELECT
        date(c.event_datetime) as event_date, c.arena_level_internal,
        g.tier, count(*) as "num_seats", sum(gross_revenue) as "gross_revenue", sum(gross_revenue)/count(*) as "current_ATP"
    FROM
        custom.cth_v_ticket_2324 c
    LEFT JOIN
        custom.cth_game_descriptions g on date(c.event_datetime) = date(g.event_date)
    WHERE
        c.ticket_type in ('Full', 'Premier', 'Annual Suites', 'Sponsor', 'Trade')
        and tier in ('A','B','C','D','E')
    GROUP BY
        c.event_datetime, c.arena_level_internal, g.tier
    ORDER BY
        c.event_datetime;
    """

    # extract plans
    df_plans = FLA_Redshift(**rs_creds).query_warehouse(sql_string = v)

    return df_plans

def get_budget_proj():
    
    l = """
    SELECT
        sum(budget_goal)
    FROM
        custom.cth_budget_summary_2324
    WHERE
        ticket_type IN ('Full', 'Half/Premier', 'Annual Suites', 'Sponsor', 'Trade')
    """

    # get plans total expected revenue
    budget_proj = FLA_Redshift(**rs_creds).query_warehouse(sql_string = l)

    return budget_proj.iloc[0]

def transform_plans(df_plans):

    # get number of games by tier
    num_tier = {'A' : 2, 'B' : 6, 'C' : 14, 'D' : 10, 'E' : 9}
    num_tier = pd.DataFrame(data = [num_tier]).T
    num_tier.columns = ['num_per_tier']
    num_tier['tier'] = num_tier.index

    # get percent revenue by tier
    count_per = {'A' : 0.07, 'B' : 0.16, 'C' : 0.3, 'D' : 0.25, 'E' : 0.22}
    count_df = pd.DataFrame(data = [count_per]).T
    count_df.columns = ['count_per_tier']
    count_df['tier'] = count_df.index
    left = get_budget_proj() - df_plans['gross_revenue'].sum()

    # get percent revenue by game by location
    loc_per = {'Suites' : 0.1821, 'Clubs' : 0.0851, 'Uppers' : 0.0571, 'Premium' : 0.2509, 'Lowers' : 0.4247}
    loc_per = pd.DataFrame(data = [loc_per]).T
    loc_per.columns = ['count_per_loc']
    loc_per['arena_level_internal'] = loc_per.index

    # merge on plan info
    df_final = df_plans.merge(num_tier, how = 'left', on = 'tier')
    df_final_2 = df_final.merge(count_df, how = 'left', on = 'tier')
    df_final_3 = df_final_2.merge(loc_per, how = 'left', on = 'arena_level_internal')
    df_final_3.rename(columns = {'gross_revenue' : 'current_plans', 'location_specific': 'loc'}, inplace = True)

    # get projected revenue by game and difference from current
    df_final_3['left'] = left.iloc[0]   

    q = """
    SELECT
        date(event_date) as event_date, tier, datediff(day, getdate(), cast(event_date as date)) AS "days_out",
        ratio_to_report("days_out") OVER (PARTITION BY tier) AS "percent_test"
    FROM
        custom.cth_game_descriptions
    WHERE
        season = '2023-24'
    ORDER BY
        event_date
    """
    percent_test = FLA_Redshift(**rs_creds).query_warehouse(sql_string = q)
    df_final_3 = df_final_3.merge(percent_test, how = 'left', on = 'event_date')

    df_final_3['total_expected_revenue'] = df_final_3.apply(lambda row: row['current_plans'] + (left*row['count_per_tier']*row['percent_test']*row['count_per_loc']), axis = 1)

    # # get daily tickets to hit expected and tickets at specific days out
    # df_final_3['proj'] = df_final_3.apply(lambda row: row['total_expected_revenue']/row['current_atp'], axis = 1)
    # df_final_3['daily'] = df_final_3.apply(lambda row: (row['proj']-row['num_seats'])/(datetime.strptime(row['event_date'], '%Y-%m-%d').date()
    #         -datetime.strptime('2022-09-01', '%Y-%m-%d').date()).days, axis = 1)
    # df_final_3['projected_tickets1'] = df_final_3.apply(lambda row: row['num_seats']+(((datetime.strptime(row['event_date'], '%Y-%m-%d').date()-
    #         datetime.strptime('2022-09-01', '%Y-%m-%d').date()).days+1-days_out)*row['daily']), axis = 1)
    # df_final_3['projected_tickets'] = df_final_3.apply(lambda row: 621 if row['loc'] == 'Lounge 954' and row['projected_tickets1'] > 621 else row['projected_tickets1'], axis = 1)
    # df_final_3['type'] = 'plans'

    return df_final_3

In [119]:
df_plans = extract_plans()
transform_plans(df_plans)

Unnamed: 0,event_date,arena_level_internal,tier_x,num_seats,current_plans,current_atp,num_per_tier,count_per_tier,count_per_loc,left,tier_y,days_out,percent_test,total_expected_revenue
0,2023-10-19,Clubs,B,1384,110672.00,79.965318,6,0.16,0.0851,-6578354.93,B,-207,0.316031,82364.866702
1,2023-10-19,Lowers,B,6172,590025.02,95.597054,6,0.16,0.4247,-6578354.93,B,-207,0.316031,448755.460519
2,2023-10-19,Premium,B,1244,349027.34,280.568601,6,0.16,0.2509,-6578354.93,B,-207,0.316031,265569.528665
3,2023-10-19,Suites,B,1192,158367.66,132.858775,6,0.16,0.1821,-6578354.93,B,-207,0.316031,97795.051614
4,2023-10-19,Uppers,B,1953,78316.60,40.100666,6,0.16,0.0571,-6578354.93,B,-207,0.316031,59323.212088
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200,2024-04-16,Premium,B,1271,354787.32,279.140299,6,0.16,0.2509,-6578354.93,B,-27,0.041221,343901.518522
201,2024-04-16,Lowers,B,6213,593770.83,95.569102,6,0.16,0.4247,-6578354.93,B,-27,0.041221,575344.365720
202,2024-04-16,Clubs,B,1434,114987.89,80.186813,6,0.16,0.0851,-6578354.93,B,-27,0.041221,111295.655222
203,2024-04-16,Suites,B,1234,166699.13,135.088436,6,0.16,0.1821,-6578354.93,B,-27,0.041221,158798.354993


In [121]:
# Groups

def extract_groups():

    w = """
    SELECT
        '2023-24' as season, date(c.event_datetime) as event_date, c.arena_level_internal, sum(paid_seats) as "num_seats",
        sum(gross_revenue) as "gross_revenue", sum(gross_revenue)/sum(paid_seats) as "current_ATP"
    FROM
        custom.cth_v_ticket_2324 c
    LEFT JOIN
        custom.cth_game_descriptions g on date(c.event_datetime) = date(g.event_date)
    WHERE
        c.ticket_type in ('Groups')
    AND
        tier != 'PS'
    GROUP BY
        c.event_datetime, c.arena_level_internal, g.tier
    UNION ALL
    SELECT
        g.season, c.event_date,
        case
            when pc_one in ('A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', '1', '2', '3', '4', '5', '6', '7', '8') then 'Lowers'
            when pc_one in ('K', 'L', 'M') then 'Clubs'
            when pc_one in ('N', 'O', 'P', 'Q', 'R', 'S', 'T', 'Y') then 'Uppers'
            when pc_one in ('U', 'V', 'W') then 'Suites'
            when pc_one in ('X', 'Z') then 'Premium'
            else Null
        end as arena_level_internal, sum(paid_seats) as "num_seats", sum(gross_revenue) as "gross_revenue", sum(gross_revenue)/sum(paid_seats) as "current_ATP"
    FROM
        custom.cth_historical_all_1718_2223 c
    LEFT JOIN
        custom.cth_game_descriptions g on date(c.event_date) = date(g.event_date)
    WHERE
        c.ticket_type in ('Groups')
        and tier != 'PS'
        and g.season in ('2021-22','2022-23')
    GROUP BY
        c.event_date, arena_level_internal, g.tier, g.season
    ORDER BY
        event_date
    """

    # get groups
    df_groups = FLA_Redshift(**rs_creds).query_warehouse(sql_string = w)

    return df_groups

def transform_groups(df_groups):

    df_groups_historical = df_groups[df_groups['season'] != '2023-24']

    # get groups

    df_groups_historical = df_groups[df_groups['season'] != '2023-24']
    df_groups_historical_agg = df_groups_historical.groupby(by = ['tier', 'arena_level_internal'])[['gross_revenue', 'num_seats']].sum()
    df_groups_historical_agg = pd.DataFrame(df_groups_historical_agg).reset_index()
    df_groups_historical_agg = df_groups_historical_agg.rename(columns = {'gross_revenue':'gross_revenue_avg', 'num_seats' : 'num_seats_avg'})
    df_groups_historical_agg['gross_revenue_avg'] = df_groups_historical_agg['gross_revenue_avg']/41
    df_groups_historical_agg['num_seats_avg'] = df_groups_historical_agg['num_seats_avg']/41
    df_groups = df_groups.merge(df_groups_historical_agg, how = 'left', on = ['tier', 'arena_level_internal'])

    return df_groups

In [143]:
# Secondary

def extract_secondary():
    
    t = """
        WITH loge_projections AS
            (SELECT
                382744 AS "projected_total_revenue", 'A' AS "tier"
            UNION ALL
            SELECT
                248259 AS "projected_total_revenue", 'B' AS "tier"
            UNION ALL
            SELECT
                148164 AS "projected_total_revenue", 'C' AS "tier"
            UNION ALL
            SELECT
                96566 AS "projected_total_revenue", 'D' AS "tier"
            UNION ALL
            SELECT
                57341 AS "projected_total_revenue", 'E' AS "tier")

        SELECT
            date(c.event_datetime) as event_date, g.abbreviation, c.arena_level_internal,
            g.tier, sum(paid_seats) as "num_seats", sum(gross_revenue) as "gross_revenue", projected_total_revenue
        FROM
            custom.cth_v_ticket_2324 c
        LEFT JOIN
            custom.cth_game_descriptions g ON date(c.event_datetime) = date(g.event_date)
        LEFT JOIN
            loge_projections l ON g.tier = l.tier
        WHERE
            c.ticket_type = 'Secondary'
            and g.tier in ('A','B','C','D','E')
        GROUP BY
            c.event_datetime, c.arena_level_internal, g.tier, g.abbreviation, projected_total_revenue
    """

    # extract secondary 
    df_secondary =FLA_Redshift(**rs_creds).query_warehouse(sql_string = t)
    df_secondary.rename(columns = {'gross_revenue' : 'current_secondary', 'location_specific': 'loc'}, inplace = True)

    return df_secondary

def transform_secondary(df_secondary):

    # group by event date, get percent seats in by day and daily add
    

    return df_secondary[['event_date', 'projected_tickets', 'total_expected_revenue', 'type', 'loc']]

In [145]:
df_secondary = extract_secondary()
df_secondary

Unnamed: 0,event_date,abbreviation,arena_level_internal,tier,num_seats,current_secondary,projected_total_revenue
0,2024-02-29,MTL,Uppers,C,1728,51317.07,148164
1,2023-12-02,NYI,Lowers,C,1255,105500.77,148164
2,2023-12-08,PIT,Lowers,C,1201,114304.39,148164
3,2024-02-08,WSH,Lowers,D,1273,76939.98,96566
4,2023-11-12,CHI,Lowers,D,1245,120604.23,96566
...,...,...,...,...,...,...,...
119,2024-03-28,NYI,Clubs,D,237,16025.33,96566
120,2024-03-30,DET,Clubs,C,237,21277.58,148164
121,2024-02-24,WSH,Clubs,C,237,22922.72,148164
122,2024-03-21,NAS,Clubs,E,237,12991.92,57341


In [None]:
# Concat

In [3]:
q = """
with ticket_info as
    (select event_date, sum(paid_seats) as paid_seats, sum(gross_revenue) as gross_revenue
     from
         custom.cth_historical_all_1718_2223
     where
         season in ('2021-22', '2022-23')
     group by
         event_date
    UNION ALL
    select date(event_datetime) as event_date, sum(paid_seats) as paid_seats, sum(gross_revenue) as gross_revenue
     from
         custom.cth_v_ticket_2324
     group by
         event_date)
select
    season, ticket_info.event_date,
       case
           when tier = 'A' then 1
           when tier = 'B' then 2
           when tier = 'C' then 3
           when tier = 'D' then 4
           else 5
       end as tier_num,
       case
           when day_of_week in ('Fri', 'Sun') then 1
           when day_of_week = 'Sat' then 2
           else 0
       end as day_of_week_num, original_six_plus_extra, trimester, is_holiday, is_dense,
       sum(paid_seats) as paid_seats, sum(gross_revenue) as gross_revenue
from
    ticket_info
left join
        custom.cth_game_descriptions on date(ticket_info.event_date) = date(cth_game_descriptions.event_date)
where
    tier in ('A','B','C','D','E')
group by
    season, ticket_info.event_date, tier_num, day_of_week_num, original_six_plus_extra,
    trimester, is_holiday, is_dense
"""
df = FLA_Redshift(**rs_creds).query_warehouse(sql_string = q)

xtrain = df[df['season'].isin(['2021-22','2022-23'])]
xtrain = xtrain[['tier_num', 'day_of_week_num', 'trimester', 'original_six_plus_extra', 'is_dense','is_holiday']]
xtest_df = df[df['season'] == '2023-24'].reset_index(drop = True)
xtest = xtest_df[['tier_num', 'day_of_week_num', 'trimester', 'original_six_plus_extra', 'is_dense','is_holiday']]
ytrain = df[df['season'].isin(['2021-22','2022-23'])]
ytrain = ytrain[['paid_seats']]
ytrain = np.log1p(ytrain)

svm = SVR(kernel= 'poly').fit(xtrain, np.array(ytrain).ravel())
df2324_predicted = svm.predict(xtest)
df2324_predicted = [np.expm1(i) for i in df2324_predicted]
df2324_predicted = pd.DataFrame(df2324_predicted).reset_index(drop = True)
xtest_df['predicted'] = df2324_predicted
xtest_df[xtest['tier_num'] == 2]

In [None]:
# 4. Predicted Attendance

# mulitply projected tickets by historical show rate

q = """
WITH a as
    (SELECT 
        h.paid_seats, h.did_attend, h.event_date
    FROM 
        custom.cth_historical_all_1718_2223 h
    WHERE 
        h.season in ('2019-20', '2021-22', '2022-23')
        and h.paid_seats != 0)

SELECT 
    g.tier, sum(a.did_attend)/sum(a.paid_seats) as paid_rate
FROM 
    a
LEFT JOIN 
    custom.cth_game_descriptions g on date(a.event_date) = date(g.event_date)
GROUP BY 
    tier
ORDER BY 
    tier"""

paid_seats = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

q = """
WITH a as
    (SELECT
        h.comp_seats, h.did_attend, h.event_date
    FROM
        custom.cth_historical_all_1718_2223 h
    WHERE
        h.season in ('2019-20', '2021-22', '2022-23')
        and h.comp_seats != 0)

SELECT
    g.tier, sum(a.did_attend)/sum(a.comp_seats) as comp_rate
FROM
    a
LEFT JOIN
    custom.cth_game_descriptions g on date(a.event_date) = date(g.event_date)
GROUP BY
    tier
ORDER BY
    tier"""

comp_seats = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

attendance_df = paid_seats.merge(comp_seats, how = 'left', on = 'tier')

final_attendance_df = predicted_df.merge(right = attendance_df, how = 'left', on = 'tier')
final_attendance_df['predicted_attendance'] = [(w*x)+(y*z) for w,x,y,z, in zip(final_attendance_df['expected_final_paid_seats'],
                                                              final_attendance_df['paid_rate'], final_attendance_df['expected_final_comp_seats'], final_attendance_df['comp_rate'])]
just_attendance = final_attendance_df[['event_date', 'predicted_attendance', 'expected_final_gross_revenue', 'expected_final_seats']]

In [None]:
# 5. Predicted Parking

# polynomial model (built by Pavan), multiply by avg atp to get rev

q = """
WITH attendance as
    (SELECT
        '2022-23' as season, event_date, sum(entry) as attendance
    FROM
        custom.cth_attendance_scans_2223
    GROUP BY
        event_date
    ORDER BY
        event_date)

SELECT
    attendance.event_date, tier, coalesce(attendance,0) as ticket_scans,
    coalesce(sum(paid_amount),0) as parking_paid_amount, count(*)-1 as num_parking_transactions
FROM
    attendance
LEFT JOIN
    custom.Parkhub_v_transactions on attendance.event_date = Parkhub_v_transactions.event_date
LEFT JOIN
    custom.cth_game_descriptions on attendance.event_date = cth_game_descriptions.event_date
WHERE
    attendance.event_date > '2022-10-01'
    and tier in ('A','B','C','D','E')
    and attendance.event_date != '2022-11-09'
GROUP BY
    attendance.event_date, tier, ticket_scans
ORDER BY
    attendance.event_date, tier
"""

df = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

#Historical Parking Data Clean Up 
df['weekend'] = df.apply(lambda row: 1 if datetime.weekday(datetime.strptime(row['event_date'], '%Y-%m-%d').date()) == 5 
    or datetime.weekday(datetime.strptime(row['event_date'], '%Y-%m-%d').date()) == 6 else 0, axis = 1)
df['tier_num'] = df.apply(lambda row: 5 if row['tier'] == 'A' else (4 if row['tier'] == 'B' else (3 if row['tier'] == 'C' else 
    (2 if row['tier'] == 'D' else (1 if row['tier'] == 'E' else 0)))), axis = 1)

#2023-2024 Season Data Clean Up 
q = """
select 
    event_date, tier, day_of_week
from 
    custom.cth_game_descriptions
where 
    season = '2023-24'
"""

games = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

games['weekend'] = games.apply(lambda row: 1 if row['day_of_week'] =='Fri' or row['day_of_week']=='Sat'  or row['day_of_week']=='Sun'  else 0 , axis=1)
games['tier_num'] = games.apply(lambda row: 5 if row['tier'] == 'A' else (4 if row['tier'] == 'B' else (3 if row['tier'] == 'C' else 
    (2 if row['tier'] == 'D' else (1 if row['tier'] == 'E' else 0)))), axis = 1)
games = games.merge(right = just_attendance, how = 'left', on = 'event_date')

#Training Data 
xdf = df[['ticket_scans', 'weekend', 'tier_num']]
y = df[['num_parking_transactions']]

# #Test Data 
xdf2 = games[['predicted_attendance', 'weekend', 'tier_num']] ## GET PREDICTED ATTENDANCE FROM ABOVE CODE!

#Scaling Data
poly = PolynomialFeatures(degree=4, include_bias=False)
poly_features2 = poly.fit_transform(xdf2)
poly_features = poly.fit_transform(xdf)

polynomial = LinearRegression().fit(poly_features, np.array(y).ravel())
predicted = polynomial.predict(poly_features2)

games['predicted_parking'] = predicted