In [1]:
import pandas as pd
import numpy as np
from catnip.fla_redshift import FLA_Redshift
from sqlalchemy import null
from datetime import datetime

from prefect.blocks.system import Secret
from typing import Dict
from concurrent.futures import ThreadPoolExecutor

from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import random

In [2]:
def get_redshift_credentials() -> Dict:

    cred_dict = {
        "dbname": Secret.load("stellar-redshift-db-name").get(),
        "host": Secret.load("stellar-redshift-host").get(),
        "port": 5439,
        "user": Secret.load("stellar-redshift-user-name").get(),
        "password": Secret.load("stellar-redshift-password").get(),

        "aws_access_key_id": Secret.load("fla-s3-aws-access-key-id-east-1").get(),
        "aws_secret_access_key": Secret.load("fla-s3-aws-secret-access-key-east-1").get(),
        "bucket": Secret.load("fla-s3-bucket-name-east-1").get(),
        "subdirectory": "us-east-1",

        "verbose": False,
    }

    return cred_dict

with ThreadPoolExecutor(1) as pool:
    rs_creds = pool.submit(lambda: get_redshift_credentials()).result()

In [3]:
q = """
SELECT
    ticket.event_datetime,
    purch_client_crm_id,
    MAX(CASE WHEN client_crm_id_scan IS NOT NULL THEN 1 ELSE 0 END) AS did_attend
FROM
    custom.cth_v_ticket_2526 ticket
LEFT JOIN
    custom.cth_v_attendance_2526 attendance
        ON ticket.event_datetime = attendance.event_datetime
        AND ticket.purch_client_crm_id = attendance.client_crm_id_purchase
WHERE
    ticket.event_datetime < current_date
GROUP BY
    ticket.event_datetime,
    purch_client_crm_id
"""

df = FLA_Redshift(**rs_creds).query_warehouse(sql_string = q)

In [4]:
df_attended = FLA_Redshift(**rs_creds).query_warehouse(sql_string = q)

#df_attended = df[df['did_attend'] == 1].copy()

# Sort by client and event date
df_attended = df_attended.sort_values(['purch_client_crm_id', 'event_datetime'])

# Identify start of new streak when current row is not immediately after the previous attended game (based on sequence, not date diff)
df_attended['prev_client'] = df_attended['purch_client_crm_id'].shift()
df_attended['prev_event'] = df_attended['event_datetime'].shift()
df_attended['prev_attend'] = df_attended['did_attend'].shift()

# New streak when client changes or row is not immediately after previous one (no consecutive attendance break)
df_attended['new_streak'] = ~(
    (df_attended['purch_client_crm_id'] != df_attended['prev_client']) |
    ((df_attended['did_attend'] != df_attended['prev_attend'])) |
    ((df_attended['did_attend'] == 0) & (df_attended['prev_attend'] == 0))
)

df_attended['streak_group'] = (~df_attended['new_streak']).cumsum()

# Assign streak group using cumulative sum of new streak flags within each client
# df_attended['streak_id'] = df_attended.groupby(by = ['purch_client_crm_id','streak_group'])['new_streak'].cumcount()

df_attended['streak_length'] = 0

# Assign streak group using cumulative sum of new streak flags within each client
mask = df_attended['did_attend'] == 1
df_attended.loc[mask, 'streak_length'] = df_attended.groupby(by = ['purch_client_crm_id','streak_group'])['new_streak'].cumcount() + 1

df_attended 
#df_attended[df_attended['event_datetime'] == max(df_attended['event_datetime'])]

Unnamed: 0,event_datetime,purch_client_crm_id,did_attend,prev_client,prev_event,prev_attend,new_streak,streak_group,streak_length
45468,2025-10-07 17:00:00,8,0,,NaT,,False,1,0
43429,2025-09-29 18:00:00,11,0,8.0,2025-10-07 17:00:00,0.0,False,2,0
3561,2025-10-07 17:00:00,218,1,11.0,2025-09-29 18:00:00,0.0,False,3,1
1631,2025-10-04 19:00:00,759,1,218.0,2025-10-07 17:00:00,1.0,False,4,1
7633,2025-10-07 17:00:00,759,1,759.0,2025-10-04 19:00:00,1.0,True,4,2
...,...,...,...,...,...,...,...,...,...
19459,2025-11-01 18:00:00,26112789,1,26112781.0,2025-11-01 18:00:00,1.0,False,19453,1
18431,2025-11-01 18:00:00,26112791,1,26112789.0,2025-11-01 18:00:00,1.0,False,19454,1
18561,2025-11-01 18:00:00,26112797,1,26112791.0,2025-11-01 18:00:00,1.0,False,19455,1
18579,2025-11-01 18:00:00,26112826,1,26112797.0,2025-11-01 18:00:00,1.0,False,19456,1


In [None]:
df_attended[df_attended['purch_client_crm_id'] == '25643885']

Unnamed: 0,event_datetime,purch_client_crm_id,did_attend,prev_client,prev_event,prev_attend,new_streak,streak_group,streak_length
