In [2]:
import pandas as pd
import numpy as np
from catnip.fla_redshift import FLA_Redshift
from sqlalchemy import null
from datetime import datetime

from prefect.blocks.system import Secret
from typing import Dict
from concurrent.futures import ThreadPoolExecutor

In [3]:
def get_redshift_credentials() -> Dict:

    cred_dict = {
        "dbname": Secret.load("stellar-redshift-db-name").get(),
        "host": Secret.load("stellar-redshift-host").get(),
        "port": 5439,
        "user": Secret.load("stellar-redshift-user-name").get(),
        "password": Secret.load("stellar-redshift-password").get(),

        "aws_access_key_id": Secret.load("fla-s3-aws-access-key-id-east-1").get(),
        "aws_secret_access_key": Secret.load("fla-s3-aws-secret-access-key-east-1").get(),
        "bucket": Secret.load("fla-s3-bucket-name-east-1").get(),
        "subdirectory": "us-east-1",

        "verbose": False,
    }

    return cred_dict

with ThreadPoolExecutor(1) as pool:
    rs_creds = pool.submit(lambda: get_redshift_credentials()).result()

In [4]:
df = pd.read_csv("C:\\Users\\riffere\\OneDrive - Florida Panthers\\Documents\\LTP\\ALTP_Combined.csv")
FLA_Redshift(**rs_creds).write_to_warehouse(df = df, table_name= "community_altp_data")

In [5]:
q = """
WITH initial AS (
    SELECT
        LOWER(email) AS email,
        MIN(season) AS season,
        MIN(date(transaction_date)) AS transaction_date
    FROM
        custom.community_altp_data
    GROUP BY
        email
),
acct_rev_info AS (
    SELECT
        contacts.email,
        createdon AS add_date,
        CASE
            WHEN ticket.season LIKE '2021-22' THEN gross_revenue
            ELSE 0
        END AS "gross_rev_2122",
        CASE
            WHEN ticket.season LIKE '2022-23' THEN gross_revenue
            ELSE 0
        END AS "gross_rev_2223",
        CASE
            WHEN ticket.season LIKE '2023-24' THEN gross_revenue
            ELSE 0
        END AS "gross_rev_2324",
        CASE
            WHEN ticket.season LIKE '2024-25' THEN gross_revenue
            ELSE 0
        END AS "gross_rev_2425",
        CASE
            WHEN ticket.season LIKE '2021-22' AND "ticket_type" IN ('Annual Suites', 'Full', 'Half', 'Premier') THEN 1
            ELSE 0
        END AS "is_stm_2122",
        CASE
            WHEN ticket.season LIKE '2022-23' AND "ticket_type" IN ('Annual Suites', 'Full', 'Half', 'Premier') THEN 1
            ELSE 0
        END AS "is_stm_2223",
        CASE
            WHEN ticket.season LIKE '2023-24' AND "ticket_type" IN ('Annual Suites', 'Full', 'Half', 'Premier') THEN 1
            ELSE 0
        END AS "is_stm_2324",
        CASE
            WHEN ticket.season LIKE '2024-25' AND "ticket_type" IN ('Annual Suites', 'Full', 'Half', 'Premier') THEN 1
            ELSE 0
        END AS "is_stm_2425"
    FROM
        custom.cth_v_historical_ticket ticket
    JOIN
        custom.korepss_externalsystemtocontact ext ON ticket.purchaser_ticketing_id = ext.externalcontactid
    JOIN
        custom.korepss_contacts contacts ON ext.crmcontactid = contacts.contactid
),
subs_2526 as (
    SELECT
        email,
        sum(gross_revenue) AS "gross_rev_2526",
        1 AS "is_stm_2526"
    FROM
        custom.cth_v_ticket_subscription_2526 subs
    INNER JOIN
        custom.korepss_externalsystemtocontact ext ON subs.purch_client_crm_id = ext.externalcontactid
    INNER JOIN
        custom.korepss_contacts contacts ON ext.crmcontactid = contacts.contactid
    GROUP BY
        email
),
acct_rev_agg AS (
    SELECT
        email,
        MIN(add_date) AS add_date,
        SUM(gross_rev_2122) AS gross_rev_2122,
        SUM(gross_rev_2223) AS gross_rev_2223,
        SUM(gross_rev_2324) AS gross_rev_2324,
        SUM(gross_rev_2425) AS gross_rev_2425,
        MAX(is_stm_2122) AS is_stm_2122,
        MAX(is_stm_2223) AS is_stm_2223,
        MAX(is_stm_2324) AS is_stm_2324,
        MAX(is_stm_2425) AS is_stm_2425
    FROM
        acct_rev_info
    GROUP BY
        email
)
SELECT
    initial.email,
    transaction_date,
    initial.season,
    add_date,
    COALESCE(gross_rev_2122, 0) AS gross_rev_2122,
    COALESCE(gross_rev_2223, 0) AS gross_rev_2223,
    COALESCE(gross_rev_2324, 0) AS gross_rev_2324,
    COALESCE(gross_rev_2425, 0) AS gross_rev_2425,
    COALESCE(gross_rev_2526, 0) AS gross_rev_2526,
    COALESCE(is_stm_2122, 0) AS is_stm_2122,
    COALESCE(is_stm_2223, 0) AS is_stm_2223,
    COALESCE(is_stm_2324, 0) AS is_stm_2324,
    COALESCE(is_stm_2425, 0) AS is_stm_2425,
    COALESCE(is_stm_2526, 0) AS is_stm_2526
FROM
    initial
LEFT JOIN
    acct_rev_agg ON initial.email = acct_rev_agg.email
LEFT JOIN
    subs_2526 ON initial.email = subs_2526.email
WHERE
    initial.email IS NOT NULL
    AND initial.email <> ''
    AND initial.season IN ('2021-22', '2022-23', '2023-24', '2024-25')
"""

In [6]:
df = FLA_Redshift(**rs_creds).query_warehouse(sql_string = q)

In [7]:
df['add_date'].fillna(datetime.strptime('2030-01-01', "%Y-%m-%d"), inplace = True)
df['add_date'] = pd.to_datetime(df['add_date']).dt.date
df['transaction_date'] = pd.to_datetime(df['transaction_date']).dt.date

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['add_date'].fillna(datetime.strptime('2030-01-01', "%Y-%m-%d"), inplace = True)


In [8]:
df['pre_altp_email'] = df.apply(lambda row: 0 if row['transaction_date'] == '' else (1 if row['add_date'] < row['transaction_date'] else 0), axis = 1)
df['post_altp_email'] = df.apply(lambda row: 0 if row['transaction_date'] == '' else (1 if row['add_date'] >= row['transaction_date'] else 0), axis = 1)

In [9]:
season_dict = {
    "2021-22" : 1,
    "2022-23" : 2,
    "2023-24" : 3,
    "2024-25" : 4
}

# sum revenue before/after
for key, value in season_dict.items():
    col_list = list(df.columns[4:(4+value)])
    # print(col_list)
    df.loc[(df['season'] == key), 'pre_altp_revenue'] =  df[col_list].sum(axis = 1)

for key, value in season_dict.items():
    col_list = list(df.columns[(4 + value):9])
    # print(col_list)
    df.loc[(df['season'] == key), 'post_altp_revenue'] =  df[col_list].sum(axis = 1)

# sum stms before/after
for key, value in season_dict.items():
    col_list = list(df.columns[9:(9+value)])
    # print(col_list)
    df.loc[(df['season'] == key), 'pre_altp_tickets'] =  df[col_list].sum(axis = 1)


for key, value in season_dict.items():
    col_list = list(df.columns[(9 + value):14])
    # print(col_list)
    df.loc[(df['season'] == key), 'post_altp_tickets'] =  df[col_list].sum(axis = 1)

In [10]:
df

Unnamed: 0,email,transaction_date,season,add_date,gross_rev_2122,gross_rev_2223,gross_rev_2324,gross_rev_2425,gross_rev_2526,is_stm_2122,is_stm_2223,is_stm_2324,is_stm_2425,is_stm_2526,pre_altp_email,post_altp_email,pre_altp_revenue,post_altp_revenue,pre_altp_tickets,post_altp_tickets
0,albertokan@ymail.com,2022-01-11,2021-22,2018-01-22,5977.00,3450.20,0.00,0.0,0.0,1,1,0,0,0,1,0,5977.00,3450.20,1.0,1.0
1,joshuamittelberg@yahoo.com,2022-01-11,2021-22,2021-05-18,9838.83,12208.92,7603.42,3477.5,4815.0,1,1,1,1,1,1,0,9838.83,28104.84,1.0,4.0
2,cxmbrxwn@gmail.com,2023-03-21,2022-23,2019-12-08,58.00,0.00,62.06,0.0,0.0,0,0,0,0,0,1,0,58.00,62.06,0.0,0.0
3,amarojonathan@hotmail.com,2023-01-17,2022-23,2019-09-23,0.00,1022.00,6467.56,0.0,0.0,0,1,1,0,0,1,0,1022.00,6467.56,1.0,1.0
4,warchole@floridapanthers.com,2022-04-08,2021-22,2021-06-03,731.02,911.00,1044.32,0.0,0.0,1,0,0,0,0,1,0,731.02,1955.32,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
576,harringtonaph@gmail.com,2022-01-11,2021-22,2030-01-01,0.00,0.00,0.00,0.0,0.0,0,0,0,0,0,0,1,0.00,0.00,0.0,0.0
577,e.lindenbaum@hotmail.com,2024-01-19,2023-24,2030-01-01,0.00,0.00,0.00,0.0,0.0,0,0,0,0,0,0,1,0.00,0.00,0.0,0.0
578,jtmurphy78@gmail.com,2022-01-11,2021-22,2030-01-01,0.00,0.00,0.00,0.0,0.0,0,0,0,0,0,0,1,0.00,0.00,0.0,0.0
579,bluesound7@gmail.com,2022-03-09,2021-22,2030-01-01,0.00,0.00,0.00,0.0,0.0,0,0,0,0,0,0,1,0.00,0.00,0.0,0.0


In [11]:
FLA_Redshift(**rs_creds).write_to_warehouse(df = df, table_name= "community_v_altp")