In [1]:
import pandas as pd
import numpy as np
from catnip.fla_redshift import FLA_Redshift
from sqlalchemy import null
from datetime import datetime

from prefect.blocks.system import Secret
from typing import Dict
from concurrent.futures import ThreadPoolExecutor

In [2]:
def get_redshift_credentials() -> Dict:

    cred_dict = {
        "dbname": Secret.load("stellar-redshift-db-name").get(),
        "host": Secret.load("stellar-redshift-host").get(),
        "port": 5439,
        "user": Secret.load("stellar-redshift-user-name").get(),
        "password": Secret.load("stellar-redshift-password").get(),

        "aws_access_key_id": Secret.load("fla-s3-aws-access-key-id-east-1").get(),
        "aws_secret_access_key": Secret.load("fla-s3-aws-secret-access-key-east-1").get(),
        "bucket": Secret.load("fla-s3-bucket-name-east-1").get(),
        "subdirectory": "us-east-1",

        "verbose": False,
    }

    return cred_dict

with ThreadPoolExecutor(1) as pool:
    rs_creds = pool.submit(lambda: get_redshift_credentials()).result()

In [16]:
q = """
WITH formstack_data AS (
    SELECT
        lower(email_address) AS email,
        form_name,
        min(date(submission_timestamp)) AS formstack_date
    FROM
        custom.formstack_v_community_events_all
    GROUP BY
        email,
        form_name),
ticket_data AS (
    SELECT
        lower(email) as email,
        min(date(transaction_datetime)) AS min_ticket_date
    FROM
        custom.cth_v_historical_ticket ticket
    JOIN
        custom.korepss_externalsystemtocontact ext ON ticket.purchaser_ticketing_id = ext.externalcontactid
    JOIN
        custom.korepss_contacts contacts ON ext.crmcontactid = contacts.contactid
    GROUP BY
        email
)
SELECT
    *
FROM
    formstack_data
LEFT JOIN
    ticket_data USING (email)
"""

df = FLA_Redshift(**rs_creds).query_warehouse(sql_string = q)

In [18]:
df['min_ticket_date'] = df['min_ticket_date'].fillna(0)

df['pre_formstack_email'] = df.apply(lambda row: 0 if row['min_ticket_date'] == 0 else (1 if row['formstack_date'] < row['min_ticket_date'] else 0), axis = 1)
df['post_formstack_email'] = df.apply(lambda row: 0 if row['min_ticket_date'] == 0 else (1 if row['formstack_date'] >= row['min_ticket_date'] else 0), axis = 1)
df['no_tickets'] = df.apply(lambda row: 1 if row['min_ticket_date'] == 0 else 0, axis = 1)

In [21]:
df['post_formstack_email'].sum()

9928