In [1]:
import pandas as pd
import numpy as np
from prefect.blocks.system import Secret
from catnip.fla_redshift import FLA_Redshift
from typing import Dict
from concurrent.futures import ThreadPoolExecutor

from sklearn.preprocessing import StandardScaler, Normalizer
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

from datetime import datetime
import statsmodels.api as sm

import polars as pl
import pyarrow

In [None]:
def get_redshift_credentials() -> Dict:

    cred_dict = {
        "dbname": Secret.load("stellar-redshift-db-name").get(),
        "host": Secret.load("stellar-redshift-host").get(),
        "port": 5439,
        "user": Secret.load("stellar-redshift-user-name").get(),
        "password": Secret.load("stellar-redshift-password").get(),

        "aws_access_key_id": Secret.load("fla-s3-aws-access-key-id-east-1").get(),
        "aws_secret_access_key": Secret.load("fla-s3-aws-secret-access-key-east-1").get(),
        "bucket": Secret.load("fla-s3-bucket-name-east-1").get(),
        "subdirectory": "us-east-1",

        "verbose": False,
    }

    return cred_dict

with ThreadPoolExecutor(1) as pool:
    rs_creds = pool.submit(lambda: get_redshift_credentials()).result()

In [19]:
q = """
WITH nightly_suites AS (
    SELECT DISTINCT
        section
    FROM
        custom.cth_v_ticket_2425
    WHERE
        ticket_type = 'Nightly Suites'
        AND pc_one IN ('U','V','W')
        AND section != 'Panther Bar '
),
events AS (
    SELECT distinct
        event_datetime
    FROM
        custom.cth_v_ticket_2425
),
base_table AS (
    SELECT
        *
    FROM
        nightly_suites
    CROSS JOIN
         events
)
SELECT
    base_table.section,
    base_table.event_datetime,
    coalesce(sum(gross_revenue),0) AS gross_rev,
    coalesce(sum(paid_seats),0) AS paid_seats,
    coalesce(sum(gross_revenue)/nullif(sum(paid_seats),0),0) AS ATP,
    CASE
        WHEN sum(gross_revenue) >= 0 THEN 1
        ELSE 0
    END AS is_sold
FROM
    base_table
LEFT JOIN
    custom.cth_v_ticket_2425 ON
    base_table.event_datetime = cth_v_ticket_2425.event_datetime AND
    base_table.section = cth_v_ticket_2425.section
GROUP BY
    base_table.section,
    base_table.event_datetime 
"""

df = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

In [20]:
df.groupby(by = 'section')[['is_sold', 'gross_rev','paid_seats','atp']].sum()/43

Unnamed: 0_level_0,is_sold,gross_rev,paid_seats,atp
section,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CS02A,0.976744,4188.152326,13.372093,274.026047
CS03A,1.0,2129.15814,15.116279,140.243605
CS03B,0.953488,6478.690698,13.186047,419.885581
CS05,0.837209,9731.995349,25.488372,210.436325
CS14A,0.860465,5002.195349,11.232558,329.68595
CS15A,0.906977,5360.34186,12.139535,349.223566
CS15B,1.0,2060.994186,15.116279,136.180407
CS16A,0.953488,5593.552326,12.488372,376.117907
CS17A,1.0,2246.159302,15.302326,144.971621
CS17B,1.0,2211.604651,15.023256,147.103794
