In [1]:
import pandas as pd
import numpy as np
from catnip.fla_redshift import FLA_Redshift
from sqlalchemy import null
from datetime import datetime

from prefect.blocks.system import Secret
from typing import Dict
from concurrent.futures import ThreadPoolExecutor

In [2]:
def get_redshift_credentials() -> Dict:

    cred_dict = {
        "dbname": Secret.load("stellar-redshift-db-name").get(),
        "host": Secret.load("stellar-redshift-host").get(),
        "port": 5439,
        "user": Secret.load("stellar-redshift-user-name").get(),
        "password": Secret.load("stellar-redshift-password").get(),

        "aws_access_key_id": Secret.load("fla-s3-aws-access-key-id-east-1").get(),
        "aws_secret_access_key": Secret.load("fla-s3-aws-secret-access-key-east-1").get(),
        "bucket": Secret.load("fla-s3-bucket-name-east-1").get(),
        "subdirectory": "us-east-1",

        "verbose": False,
    }

    return cred_dict

with ThreadPoolExecutor(1) as pool:
    rs_creds = pool.submit(lambda: get_redshift_credentials()).result()

PrefectHTTPStatusError: Client error '401 Unauthorized' for url 'https://api.prefect.cloud/api/accounts/fbcf5f47-1244-4f4b-8a37-4ea5344727df/workspaces/c24db6e7-9c24-466a-8d86-94154cc57d8d/block_types/slug/secret/block_documents/name/stellar-redshift-db-name?include_secrets=true'
Response: {'detail': 'Unauthorized'}
For more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/401

In [29]:
df = pd.read_csv("C:\\Users\\riffere\\OneDrive - Florida Panthers\\Documents\\LTP\\Kids_Club.csv")
FLA_Redshift(**rs_creds).write_to_warehouse(df = df, table_name= "community_kidsclub_data")

In [30]:
q = """
WITH initial AS (
    SELECT
        LOWER(parent_email) AS parent_email,
        CASE
            WHEN LISTAGG(kc_group) LIKE '%All-Star%' THEN 'All-Star'
            WHEN LISTAGG(kc_group) LIKE '%Pro%' THEN 'Pro'
            ELSE 'Rookie'
        END AS kc_group_final
    FROM
        custom.community_kidsclub_data
    GROUP BY
        LOWER(parent_email)
),
first_kids_club_season AS (
    SELECT
        initial.parent_email,
        kc_group_final AS kc_group,
        MIN(date(transaction_date)) AS transaction_date,
        MIN(season) AS season
    FROM
        initial
    LEFT JOIN
        custom.community_kidsclub_data
    ON
        LOWER(initial.parent_email) = LOWER(community_kidsclub_data.parent_email)
    GROUP BY
        initial.parent_email,
        kc_group_final
),
acct_rev_info AS (
    SELECT
        contacts.email,
        createdon AS add_date,
        CASE
            WHEN ticket.season LIKE '2021-22' THEN gross_revenue
            ELSE 0
        END AS "gross_rev_2122",
        CASE
            WHEN ticket.season LIKE '2022-23' THEN gross_revenue
            ELSE 0
        END AS "gross_rev_2223",
        CASE
            WHEN ticket.season LIKE '2023-24' THEN gross_revenue
            ELSE 0
        END AS "gross_rev_2324",
        CASE
            WHEN ticket.season LIKE '2024-25' THEN gross_revenue
            ELSE 0
        END AS "gross_rev_2425",
        CASE
            WHEN ticket.season LIKE '2021-22' AND "ticket_type" IN ('Annual Suites', 'Full', 'Half', 'Premier') THEN 1
            ELSE 0
        END AS "is_stm_2122",
        CASE
            WHEN ticket.season LIKE '2022-23' AND "ticket_type" IN ('Annual Suites', 'Full', 'Half', 'Premier') THEN 1
            ELSE 0
        END AS "is_stm_2223",
        CASE
            WHEN ticket.season LIKE '2023-24' AND "ticket_type" IN ('Annual Suites', 'Full', 'Half', 'Premier') THEN 1
            ELSE 0
        END AS "is_stm_2324",
        CASE
            WHEN ticket.season LIKE '2024-25' AND "ticket_type" IN ('Annual Suites', 'Full', 'Half', 'Premier') THEN 1
            ELSE 0
        END AS "is_stm_2425"
    FROM
        custom.cth_v_historical_ticket ticket
    INNER JOIN
        custom.korepss_externalsystemtocontact ext ON ticket.purchaser_ticketing_id = ext.externalcontactid
    INNER JOIN
        custom.korepss_contacts contacts ON ext.crmcontactid = contacts.contactid
),
subs_2526 as (
    SELECT
        email,
        sum(gross_revenue) AS "gross_rev_2526",
        1 AS "is_stm_2526"
    FROM
        custom.cth_v_ticket_subscription_2526 subs
    INNER JOIN
        custom.korepss_externalsystemtocontact ext ON subs.purch_client_crm_id = ext.externalcontactid
    INNER JOIN
        custom.korepss_contacts contacts ON ext.crmcontactid = contacts.contactid
    GROUP BY
        email
),
acct_rev_agg AS (
    SELECT
        acct_rev_info.email,
        MIN(add_date) AS add_date,
        SUM(gross_rev_2122) AS gross_rev_2122,
        SUM(gross_rev_2223) AS gross_rev_2223,
        SUM(gross_rev_2324) AS gross_rev_2324,
        SUM(gross_rev_2425) AS gross_rev_2425,
        MAX(is_stm_2122) AS is_stm_2122,
        MAX(is_stm_2223) AS is_stm_2223,
        MAX(is_stm_2324) AS is_stm_2324,
        MAX(is_stm_2425) AS is_stm_2425
    FROM
        acct_rev_info
    GROUP BY
        acct_rev_info.email
)
SELECT
    parent_email,
    kc_group,
    transaction_date::date,
    first_kids_club_season.season,
    add_date,
    COALESCE(gross_rev_2122, 0) AS gross_rev_2122,
    COALESCE(gross_rev_2223, 0) AS gross_rev_2223,
    COALESCE(gross_rev_2324, 0) AS gross_rev_2324,
    COALESCE(gross_rev_2425, 0) AS gross_rev_2425,
    COALESCE(gross_rev_2526, 0) AS gross_rev_2526,
    COALESCE(is_stm_2122, 0) AS is_stm_2122,
    COALESCE(is_stm_2223, 0) AS is_stm_2223,
    COALESCE(is_stm_2324, 0) AS is_stm_2324,
    COALESCE(is_stm_2425, 0) AS is_stm_2425,
    COALESCE(is_stm_2526, 0) AS is_stm_2526
FROM
    first_kids_club_season
LEFT JOIN
    acct_rev_agg ON first_kids_club_season.parent_email = acct_rev_agg.email
LEFT JOIN
    subs_2526 ON first_kids_club_season.parent_email = subs_2526.email
WHERE
    first_kids_club_season.parent_email IS NOT NULL AND first_kids_club_season.parent_email <> ''
"""

In [31]:
df = FLA_Redshift(**rs_creds).query_warehouse(sql_string = q)

In [32]:
df['add_date'].fillna(datetime.strptime('2030-01-01', "%Y-%m-%d"), inplace = True)
df['add_date'] = pd.to_datetime(df['add_date']).dt.date
df['transaction_date'] = pd.to_datetime(df['transaction_date']).dt.date

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['add_date'].fillna(datetime.strptime('2030-01-01', "%Y-%m-%d"), inplace = True)


In [33]:
df['pre_kids_club_email'] = df.apply(lambda row: 0 if row['transaction_date'] == '' else (1 if row['add_date'] < row['transaction_date'] else 0), axis = 1)
df['post_kids_club_email'] = df.apply(lambda row: 0 if row['transaction_date'] == '' else (1 if row['add_date'] >= row['transaction_date'] else 0), axis = 1)

In [34]:
df

Unnamed: 0,email,transaction_date,season,add_date,gross_rev_2122,gross_rev_2223,gross_rev_2324,gross_rev_2425,gross_rev_2526,is_stm_2122,is_stm_2223,is_stm_2324,is_stm_2425,is_stm_2526,pre_kids_club_email,post_kids_club_email
0,mamyers00@gmail.com,2023-01-17,2022-23,2018-01-18,4265.56,1250.00,263.22,0.00,0.00,1,0,0,0,0,1,0
1,tmoy13@gmail.com,2024-12-16,2024-25,2017-11-05,6875.07,13574.00,13159.69,8800.36,12403.44,1,1,1,1,1,1,0
2,jgodinez716@gmail.com,2024-01-19,2023-24,2021-12-30,1320.00,7036.18,9337.08,5029.08,0.00,1,1,1,1,0,1,0
3,tking690@gmail.com,2022-01-11,2021-22,2021-04-19,6258.12,11100.83,7964.66,4079.80,6830.88,1,1,1,1,1,1,0
4,alejandro.mouro@gmail.com,2022-01-21,2021-22,2018-01-18,1137.00,1784.00,7576.73,3873.30,5457.00,0,0,1,1,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
557,gregoryp1997@aol.com,2023-04-18,2022-23,2030-01-01,0.00,0.00,0.00,0.00,0.00,0,0,0,0,0,0,1
558,max.bagnoli001@gmail.com,2024-12-16,2024-25,2030-01-01,0.00,0.00,0.00,0.00,0.00,0,0,0,0,0,0,1
559,robert.behar001@yahoo.com,2022-05-10,2021-22,2030-01-01,0.00,0.00,0.00,0.00,0.00,0,0,0,0,0,0,1
560,lravelo@wgu.edu,2022-05-01,2021-22,2030-01-01,0.00,0.00,0.00,0.00,0.00,0,0,0,0,0,0,1


In [22]:
season_dict = {
    "2021-22" : 1,
    "2022-23" : 2,
    "2023-24" : 3,
    "2024-25" : 4
}

# sum revenue before/after
for key, value in season_dict.items():
    col_list = list(df.columns[5:(5+value)])
    # print(col_list)
    df.loc[(df['season'] == key), 'pre_kids_club_revenue'] =  df[col_list].sum(axis = 1)

for key, value in season_dict.items():
    col_list = list(df.columns[(5 + value):10])
    # print(col_list)
    df.loc[(df['season'] == key), 'post_kids_club_revenue'] =  df[col_list].sum(axis = 1)

# sum stms before/after
for key, value in season_dict.items():
    col_list = list(df.columns[10:(10+value)])
    # print(col_list)
    df.loc[(df['season'] == key), 'pre_kids_club_tickets'] =  df[col_list].sum(axis = 1)

for key, value in season_dict.items():
    col_list = list(df.columns[(10 + value):15])
    # print(col_list)
    df.loc[(df['season'] == key), 'post_kids_club_tickets'] =  df[col_list].sum(axis = 1)

In [24]:
FLA_Redshift(**rs_creds).write_to_warehouse(df = df, table_name= "community_v_kidsclub")

In [None]:
# Kids Club dash: current # in, tenure, what bucket did they start in (KC, LTP, Floorball, tickets, ect) conversion rate from other buckets, total revenue created