In [1]:
import pandas as pd
import numpy as np
from catnip.fla_redshift import FLA_Redshift
from sqlalchemy import null
from datetime import datetime

from prefect.blocks.system import Secret
from typing import Dict
from concurrent.futures import ThreadPoolExecutor

In [2]:
def get_redshift_credentials() -> Dict:

    cred_dict = {
        "dbname": Secret.load("stellar-redshift-db-name").get(),
        "host": Secret.load("stellar-redshift-host").get(),
        "port": 5439,
        "user": Secret.load("stellar-redshift-user-name").get(),
        "password": Secret.load("stellar-redshift-password").get(),

        "aws_access_key_id": Secret.load("fla-s3-aws-access-key-id-east-1").get(),
        "aws_secret_access_key": Secret.load("fla-s3-aws-secret-access-key-east-1").get(),
        "bucket": Secret.load("fla-s3-bucket-name-east-1").get(),
        "subdirectory": "us-east-1",

        "verbose": False,
    }

    return cred_dict

with ThreadPoolExecutor(1) as pool:
    rs_creds = pool.submit(lambda: get_redshift_credentials()).result()

In [3]:
# Tables Used:

# - korepss_opportunities
# - cth_v_historical_ticket
# - cth_historical_attendance
# - ct_customer (& all extra tables)
# - flateamshop
# - location_ticket_type 
    # location_ticket_type_agg AS (
    # SELECT
    #     purchaser_ticketing_id,
    #     arena_level_internal
    # FROM
    #     (SELECT
    #          purchaser_ticketing_id,
    #          arena_level_internal,
    #          ROW_NUMBER() OVER (PARTITION BY purchaser_ticketing_id ORDER BY num_tickets DESC,
    #              CASE arena_level_internal
    #                  WHEN 'Premium' THEN 1
    #                  WHEN 'Lower' THEN 2
    #                  WHEN 'Club' THEN 3
    #                  ELSE 4
    #              END) AS rn
    #      FROM
    #          location_ticket_type)
    # WHERE rn = 1

In [4]:
q = """
SELECT DISTINCT
    purchaser_ticketing_id AS crm_id
FROM
    custom.cth_v_historical_ticket ticket
UNION
SELECT DISTINCT
    ticketing_account_scanned AS crm_id
FROM
    custom.cth_v_historical_attendance

"""

base_df = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

In [7]:
q = """
SELECT
    purchaser_ticketing_id AS crm_id,
    SUM(gross_revenue) AS total_spend,
    SUM(paid_seats) AS total_seats,
    SUM(CASE WHEN season = '2025-26' THEN gross_revenue END) AS spend_2526,
    SUM(CASE WHEN season = '2025-26' THEN paid_seats END) AS seats_2526,
    MAX(transaction_datetime) AS last_purchase_date,
    COUNT(DISTINCT transaction_datetime) AS num_transactions
FROM
    custom.cth_v_historical_ticket
GROUP BY
    purchaser_ticketing_id
"""

ticket_df = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

In [8]:
df = base_df.merge(ticket_df, how = 'left', on= 'crm_id')

In [9]:
q = """
SELECT
    ticketing_account_scanned AS crm_id,
    COUNT(*) AS total_attendance,
    COUNT(DISTINCT event_datetime) AS num_games_attended,
    COUNT(CASE WHEN season = '2025-26' THEN event_datetime END) AS attendance_2526,
    COUNT(DISTINCT CASE WHEN season = '2025-26' THEN event_datetime END) AS attendance_2526,
    MAX(event_datetime) AS last_attendance_date
FROM
    custom.cth_v_historical_attendance
GROUP BY
    ticketing_account_scanned
"""

attendance_df = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

In [10]:
df = df.merge(attendance_df, how = 'left', on= 'crm_id')

In [None]:
q = """
SELECT
    purchaser_ticketing_id AS crm_id,
    LISTAGG(DISTINCT ticket_type, ', ') AS all_plan_types,
    LISTAGG(DISTINCT CASE WHEN season = '2025-26' THEN ticket_type END, ', ') AS plan_types_2526
FROM
    custom.cth_v_historical_plans
GROUP BY
    purchaser_ticketing_id
"""

plans_df = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

In [12]:
df = df.merge(plans_df, how = 'left', on= 'crm_id')

In [13]:
df

Unnamed: 0,crm_id,total_spend,total_seats,spend_2526,seats_2526,last_purchase_date,num_transactions,total_attendance,num_games_attended,attendance_2526,attendance_2526.1,last_attendance_date,all_ticket_types,ticket_types_2526
0,3569707.0,94905.199565,863.0,,,2024-07-03 13:48:59,5.0,217.0,43.0,0.0,0.0,2024-03-30 12:30:00,"Full, Premier",
1,3570556.0,159332.320092,603.0,28312.20,84.0,2025-07-16 18:01:17,49.0,349.0,162.0,38.0,21.0,2026-02-02 19:00:00,Full,Full
2,5096184.0,95754.919888,560.0,16393.26,87.0,2026-01-09 00:24:58,75.0,391.0,172.0,38.0,18.0,2026-02-02 19:00:00,Full,Full
3,5556171.0,16241.439937,248.0,3595.20,42.0,2025-07-16 18:46:40,21.0,195.0,193.0,26.0,26.0,2026-02-04 19:00:00,Full,Full
4,8618199.0,13420.619993,409.0,3415.44,84.0,2025-07-16 17:45:07,32.0,279.0,155.0,33.0,22.0,2026-02-04 19:00:00,Full,Full
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
513303,26174500.0,,,,,NaT,,1.0,1.0,1.0,1.0,2026-02-04 19:00:00,,
513304,26183010.0,,,,,NaT,,1.0,1.0,1.0,1.0,2026-02-04 19:00:00,,
513305,26187767.0,,,,,NaT,,1.0,1.0,1.0,1.0,2026-02-04 19:00:00,,
513306,26183888.0,,,,,NaT,,1.0,1.0,1.0,1.0,2026-02-04 19:00:00,,


In [5]:
q = """
SELECT DISTINCT
    clients.crm_id,
    email,
    addresses.is_local
FROM
    custom.seatgeek_v_clients clients
INNER JOIN
    custom.golden_record_v_addresses addresses USING (email)
"""

is_local_df = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

In [None]:
df = df.merge(is_local_df, how = 'left', on= 'crm_id')

In [7]:
q = """
SELECT
    tradable_bits_activities.email,
    crm_id,
    COUNT(tradable_bits_activities.*) AS num_online_activities,
    COUNT(CASE WHEN creation_date >= '2025-07-01' THEN 1 END) AS num_online_activities_last_fiscal
FROM
    custom.tradable_bits_activities
LEFT JOIN
    custom.seatgeek_v_clients ON tradable_bits_activities.email = seatgeek_v_clients.email
GROUP BY
    tradable_bits_activities.email,
    crm_id
"""

tradable_bits_df = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

In [8]:
df = df.merge(tradable_bits_df, how = 'left', on= 'crm_id')

In [9]:
q = """
SELECT
    crm_id,
    count(*) AS num_recieved,
    sum(CASE WHEN event = 'Opened' AND is_true_false THEN 1 ELSE 0 END) AS num_opened,
    sum(CASE WHEN event = 'Clicked' AND is_true_false THEN 1 ELSE 0 END) AS num_clicked,
    sum(CASE WHEN event = 'Bounced' AND is_true_false THEN 1 ELSE 0 END) AS num_bounced,
    sum(CASE WHEN event = 'Unsubscribed' AND is_true_false THEN 1 ELSE 0 END) AS num_unsubscribed,
    CASE WHEN max(date_unsubscribed_panthers) IS NOT NULL THEN True ELSE False END AS is_unsubscribed
FROM
    custom.sfmc_v_sent_reporting
LEFT JOIN
    custom.sfmc_v_subscribers ON sfmc_v_sent_reporting.subscriberkey = sfmc_v_subscribers.subscriber_key
LEFT JOIN
    custom.seatgeek_v_clients ON sfmc_v_subscribers.email = seatgeek_v_clients.email
WHERE
    sent_date >= '2025-07-01'
GROUP BY
    crm_id
"""

emails_df = FLA_Redshift(**rs_creds).query_warehouse(sql_string=q)

In [10]:
df = df.merge(emails_df, how = 'left', on= 'crm_id')

In [11]:
df

Unnamed: 0,crm_id,email_x,is_local,email_y,num_online_activities,num_online_activities_last_fiscal,num_recieved,num_opened,num_clicked,num_bounced,num_unsubscribed,is_unsubscribed
0,12221200.0,lisaa@cityfurniture.com,True,,,,76.0,2.0,0.0,0.0,0.0,False
1,12239894.0,josephbarbuto@bellsouth.net,True,,,,,,,,,
2,12234139.0,chaddbenedict@yahoo.com,True,chaddbenedict@yahoo.com,5.0,0.0,12.0,3.0,0.0,0.0,0.0,False
3,12353975.0,kdjorgensen7@gmail.com,True,kdjorgensen7@gmail.com,2.0,0.0,36.0,7.0,0.0,0.0,0.0,False
4,12397806.0,ijreich@aol.com,True,ijreich@aol.com,3.0,0.0,48.0,12.0,0.0,0.0,0.0,False
...,...,...,...,...,...,...,...,...,...,...,...,...
546733,26188415.0,josephwmatthewsjr@yahoo.com,True,,,,4.0,1.0,0.0,0.0,0.0,False
546734,26188295.0,gr@grahamrahal.com,True,,,,4.0,0.0,0.0,0.0,0.0,False
546735,26185434.0,joelafford@hotmail.com,False,,,,12.0,0.0,0.0,0.0,0.0,False
546736,26188532.0,dani.comart@gmail.com,True,,,,4.0,1.0,0.0,0.0,0.0,False
