In [1]:
import pandas as pd
import numpy as np
from catnip.fla_redshift import FLA_Redshift
from sqlalchemy import null
from datetime import datetime

from prefect.blocks.system import Secret
from typing import Dict
from concurrent.futures import ThreadPoolExecutor

from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import random

In [2]:
def get_redshift_credentials() -> Dict:

    cred_dict = {
        "dbname": Secret.load("stellar-redshift-db-name").get(),
        "host": Secret.load("stellar-redshift-host").get(),
        "port": 5439,
        "user": Secret.load("stellar-redshift-user-name").get(),
        "password": Secret.load("stellar-redshift-password").get(),

        "aws_access_key_id": Secret.load("fla-s3-aws-access-key-id-east-1").get(),
        "aws_secret_access_key": Secret.load("fla-s3-aws-secret-access-key-east-1").get(),
        "bucket": Secret.load("fla-s3-bucket-name-east-1").get(),
        "subdirectory": "us-east-1",

        "verbose": False,
    }

    return cred_dict

with ThreadPoolExecutor(1) as pool:
    rs_creds = pool.submit(lambda: get_redshift_credentials()).result()

In [3]:
# get 24-25 Premier data

q = """
WITH zip AS (
    SELECT
        purch_client_crm_id,
        seatgeek_v_clients.email,
        bool_or(is_local) AS is_local
    FROM
        custom.cth_v_ticket_subscription_2425
    LEFT JOIN
        custom.seatgeek_v_clients ON cth_v_ticket_subscription_2425.purch_client_crm_id = seatgeek_v_clients.crm_id
    LEFT JOIN
        custom.golden_record_v_addresses ON seatgeek_v_clients.email = golden_record_v_addresses.email
    WHERE
        ticket_type IN ('Premier')
        and price_type not ilike '%comp%'
    GROUP BY
        purch_client_crm_id,
        seatgeek_v_clients.email
),
touchpoints AS (
    SELECT DISTINCT
        a.sf_contactid
    FROM
        custom.korepss_contacts a
    WHERE
        a.contacttype = 'Member'
),
touchpoints_agg AS (
    SELECT
        a.primary_ticketing_id,
        d.last_activity_date_new__c,
        COUNT(DISTINCT act.activityid) AS touchpoints
    FROM
        custom.korepss_v_contacts a
        LEFT JOIN custom.korepss_contacts b ON a.sf_contactid = b.sf_contactid
        LEFT JOIN custom.korepss_accounts_extension d ON a.sf_accountid = d.sf_accountid
        LEFT JOIN custom.korepss_v_users c ON b.ticketing_service_rep = c.sf_userid
        LEFT JOIN custom.korepss_v_activities act ON a.sf_contactid = act.contactid AND b.ticketing_service_rep = act.ownerid
    WHERE
        a.sf_contactid IN (SELECT sf_contactid FROM touchpoints)
        AND act.completed_on_coalesce >= '7/1/2024'
    GROUP BY
        a.primary_ticketing_id,
        d.last_activity_date_new__c
),
tenure AS (
    SELECT
        purchaser_ticketing_id,
        LISTAGG(DISTINCT season),
        CASE
            WHEN LISTAGG(DISTINCT season) LIKE '2021-222022-232023-242024-25' THEN 4
            WHEN LISTAGG(DISTINCT season) LIKE '2022-232023-242024-25' THEN 3
            WHEN LISTAGG(DISTINCT season) LIKE '2023-242024-25' THEN 2
            WHEN LISTAGG(DISTINCT season) LIKE '%2024-25%' THEN 1
            ELSE 0
        END AS tenure
    FROM
        custom.cth_v_historical_ticket
    WHERE
        ticket_type IN ('Premier')
        AND section_name != 'Panther Bar'
    GROUP BY
        purchaser_ticketing_id
),
attendance_info AS (
    SELECT
        ticketing_id,
        num_games_attended,
        self_show_rate,
        seat_show_rate,
        real_show_rate
    FROM
        custom.cth_v_show_rates
    WHERE
        season = '2024-25'
),
total_spend AS (
    SELECT
        purchaser_ticketing_id,
        SUM(gross_revenue) AS gross_revenue
    FROM
        custom.cth_v_historical_ticket
    WHERE
        season = '2024-25'
    GROUP BY
        purchaser_ticketing_id
),
location_ticket_type AS (
    SELECT
        purchaser_ticketing_id,
        ticket_type,
        arena_level_internal,
        COUNT(*) AS num_tickets
    FROM
        custom.cth_v_historical_ticket
    WHERE
        ticket_type IN ('Premier')
        AND season = '2024-25'
    GROUP BY
        purchaser_ticketing_id,
        ticket_type,
        arena_level_internal
),
location_ticket_type_agg AS (
    SELECT
        purchaser_ticketing_id,
        arena_level_internal
    FROM
        (SELECT
             purchaser_ticketing_id,
             arena_level_internal,
             ROW_NUMBER() OVER (PARTITION BY purchaser_ticketing_id ORDER BY num_tickets DESC,
                 CASE arena_level_internal
                     WHEN 'Premium' THEN 1
                     WHEN 'Lower' THEN 2
                     WHEN 'Club' THEN 3
                     ELSE 4
                 END) AS rn
         FROM
             location_ticket_type)
    WHERE rn = 1
),
renewal_survey AS (
    SELECT
        formstack_v_stm_renewal_survey_2526.email,
        crm_id,
        MAX(CASE how_likely_are_you_to_renew_your_territory_membership_for_next_season
            WHEN 'Extremely Likely' THEN 3
            WHEN 'Likely' THEN 2
            WHEN 'Neutral' THEN 1
            WHEN 'Unlikely' THEN -1
            WHEN 'Extremely Unlikely' THEN -2
            ELSE 0
        END) AS renewal_likelihood_survey
    FROM
        custom.formstack_v_stm_renewal_survey_2526
    LEFT JOIN
        custom.seatgeek_v_clients on formstack_v_stm_renewal_survey_2526.email = seatgeek_v_clients.email
    GROUP BY
        formstack_v_stm_renewal_survey_2526.email,
        crm_id
)
SELECT DISTINCT
    zip.purch_client_crm_id,
    zip.email,
    CASE
        WHEN zip.is_local = TRUE THEN 1
        ELSE 0
    END AS is_local,
    COALESCE(touchpoints_agg.touchpoints, 0) AS touchpoints,
    tenure.tenure,
    attendance_info.real_show_rate,
    attendance_info.seat_show_rate,
    attendance_info.self_show_rate,
    attendance_info.num_games_attended,
    total_spend.gross_revenue,
    CASE
        WHEN location_ticket_type_agg.arena_level_internal = 'Premium' THEN 1
        WHEN location_ticket_type_agg.arena_level_internal = 'Lowers' THEN 2
        WHEN location_ticket_type_agg.arena_level_internal = 'Clubs' THEN 3
        WHEN location_ticket_type_agg.arena_level_internal = 'Uppers' THEN 4
    END AS arena_level_internal,
    coalesce(renewal_likelihood_survey, 0) as renewal_likelihood_survey
FROM
    zip
LEFT JOIN
    touchpoints_agg 
        ON zip.purch_client_crm_id = touchpoints_agg.primary_ticketing_id
LEFT JOIN
    tenure 
        ON zip.purch_client_crm_id = tenure.purchaser_ticketing_id
LEFT JOIN
    attendance_info 
        ON zip.purch_client_crm_id = attendance_info.ticketing_id
LEFT JOIN
    total_spend 
        ON zip.purch_client_crm_id = total_spend.purchaser_ticketing_id
LEFT JOIN
    location_ticket_type_agg 
        ON zip.purch_client_crm_id = location_ticket_type_agg.purchaser_ticketing_id
LEFT JOIN
    renewal_survey 
        ON zip.purch_client_crm_id = renewal_survey.crm_id
WHERE
    total_spend.gross_revenue > 0
"""

df_premier_2425 = FLA_Redshift(**rs_creds).query_warehouse(sql_string = q)

In [4]:
# get 24-25 Full data

q = """
WITH zip AS (
    SELECT
        purch_client_crm_id,
        seatgeek_v_clients.email,
        bool_or(is_local) AS is_local
    FROM
        custom.cth_v_ticket_subscription_2425
    LEFT JOIN
        custom.seatgeek_v_clients ON cth_v_ticket_subscription_2425.purch_client_crm_id = seatgeek_v_clients.crm_id
    LEFT JOIN
        custom.golden_record_v_addresses ON seatgeek_v_clients.email = golden_record_v_addresses.email
    WHERE
        ticket_type IN ('Full')
        and price_type not ilike '%comp%'
    GROUP BY
        purch_client_crm_id,
        seatgeek_v_clients.email
),
touchpoints AS (
    SELECT DISTINCT
        a.sf_contactid
    FROM
        custom.korepss_contacts a
    WHERE
        a.contacttype = 'Member'
),
touchpoints_agg AS (
    SELECT
        a.primary_ticketing_id,
        d.last_activity_date_new__c,
        COUNT(DISTINCT act.activityid) AS touchpoints
    FROM
        custom.korepss_v_contacts a
        LEFT JOIN custom.korepss_contacts b ON a.sf_contactid = b.sf_contactid
        LEFT JOIN custom.korepss_accounts_extension d ON a.sf_accountid = d.sf_accountid
        LEFT JOIN custom.korepss_v_users c ON b.ticketing_service_rep = c.sf_userid
        LEFT JOIN custom.korepss_v_activities act ON a.sf_contactid = act.contactid AND b.ticketing_service_rep = act.ownerid
    WHERE
        a.sf_contactid IN (SELECT sf_contactid FROM touchpoints)
        AND act.completed_on_coalesce >= '7/1/2024'
    GROUP BY
        a.primary_ticketing_id,
        d.last_activity_date_new__c
),
tenure AS (
    SELECT
        purchaser_ticketing_id,
        LISTAGG(DISTINCT season),
        CASE
            WHEN LISTAGG(DISTINCT season) LIKE '2021-222022-232023-242024-25' THEN 4
            WHEN LISTAGG(DISTINCT season) LIKE '2022-232023-242024-25' THEN 3
            WHEN LISTAGG(DISTINCT season) LIKE '2023-242024-25' THEN 2
            WHEN LISTAGG(DISTINCT season) LIKE '%2024-25%' THEN 1
            ELSE 0
        END AS tenure
    FROM
        custom.cth_v_historical_ticket
    WHERE
        ticket_type IN ('Full')
        AND section_name != 'Panther Bar'
    GROUP BY
        purchaser_ticketing_id
),
attendance_info AS (
    SELECT
        ticketing_id,
        num_games_attended,
        self_show_rate,
        seat_show_rate,
        real_show_rate
    FROM
        custom.cth_v_show_rates
    WHERE
        season = '2024-25'
),
total_spend AS (
    SELECT
        purchaser_ticketing_id,
        SUM(gross_revenue) AS gross_revenue
    FROM
        custom.cth_v_historical_ticket
    WHERE
        season = '2024-25'
    GROUP BY
        purchaser_ticketing_id
),
location_ticket_type AS (
    SELECT
        purchaser_ticketing_id,
        ticket_type,
        arena_level_internal,
        COUNT(*) AS num_tickets
    FROM
        custom.cth_v_historical_ticket
    WHERE
        ticket_type IN ('Full')
        AND season = '2024-25'
    GROUP BY
        purchaser_ticketing_id,
        ticket_type,
        arena_level_internal
),
location_ticket_type_agg AS (
    SELECT
        purchaser_ticketing_id,
        arena_level_internal
    FROM
        (SELECT
             purchaser_ticketing_id,
             arena_level_internal,
             ROW_NUMBER() OVER (PARTITION BY purchaser_ticketing_id ORDER BY num_tickets DESC,
                 CASE arena_level_internal
                     WHEN 'Premium' THEN 1
                     WHEN 'Lower' THEN 2
                     WHEN 'Club' THEN 3
                     ELSE 4
                 END) AS rn
         FROM
             location_ticket_type)
    WHERE rn = 1
),
renewal_survey AS (
    SELECT
        formstack_v_stm_renewal_survey_2526.email,
        crm_id,
        MAX(CASE how_likely_are_you_to_renew_your_territory_membership_for_next_season
            WHEN 'Extremely Likely' THEN 3
            WHEN 'Likely' THEN 2
            WHEN 'Neutral' THEN 1
            WHEN 'Unlikely' THEN -1
            WHEN 'Extremely Unlikely' THEN -2
            ELSE 0
        END) AS renewal_likelihood_survey
    FROM
        custom.formstack_v_stm_renewal_survey_2526
    LEFT JOIN
        custom.seatgeek_v_clients on formstack_v_stm_renewal_survey_2526.email = seatgeek_v_clients.email
    GROUP BY
        formstack_v_stm_renewal_survey_2526.email,
        crm_id
)
SELECT DISTINCT
    zip.purch_client_crm_id,
    zip.email,
    CASE
        WHEN zip.is_local = TRUE THEN 1
        ELSE 0
    END AS is_local,
    COALESCE(touchpoints_agg.touchpoints, 0) AS touchpoints,
    tenure.tenure,
    attendance_info.real_show_rate,
    attendance_info.seat_show_rate,
    attendance_info.self_show_rate,
    attendance_info.num_games_attended,
    total_spend.gross_revenue,
    CASE
        WHEN location_ticket_type_agg.arena_level_internal = 'Premium' THEN 1
        WHEN location_ticket_type_agg.arena_level_internal = 'Lowers' THEN 2
        WHEN location_ticket_type_agg.arena_level_internal = 'Clubs' THEN 3
        WHEN location_ticket_type_agg.arena_level_internal = 'Uppers' THEN 4
    END AS arena_level_internal,
    coalesce(renewal_likelihood_survey, 0) as renewal_likelihood_survey
FROM
    zip
LEFT JOIN
    touchpoints_agg 
        ON zip.purch_client_crm_id = touchpoints_agg.primary_ticketing_id
LEFT JOIN
    tenure 
        ON zip.purch_client_crm_id = tenure.purchaser_ticketing_id
LEFT JOIN
    attendance_info 
        ON zip.purch_client_crm_id = attendance_info.ticketing_id
LEFT JOIN
    total_spend 
        ON zip.purch_client_crm_id = total_spend.purchaser_ticketing_id
LEFT JOIN
    location_ticket_type_agg 
        ON zip.purch_client_crm_id = location_ticket_type_agg.purchaser_ticketing_id
LEFT JOIN
    renewal_survey 
        ON zip.purch_client_crm_id = renewal_survey.crm_id
WHERE
    total_spend.gross_revenue > 0
"""

df_full_2425 = FLA_Redshift(**rs_creds).query_warehouse(sql_string = q)

In [5]:
# get 23-24 Full data

q = """
WITH zip AS (
    SELECT
        purch_client_crm_id,
        seatgeek_v_clients.email,
        bool_or(is_local) AS is_local
    FROM
        custom.cth_v_ticket_subscription_2324
    LEFT JOIN
        custom.seatgeek_v_clients ON cth_v_ticket_subscription_2324.purch_client_crm_id = seatgeek_v_clients.crm_id
    LEFT JOIN
        custom.golden_record_v_addresses ON seatgeek_v_clients.email = golden_record_v_addresses.email
    WHERE
        ticket_type IN ('Full')
        and price_type not ilike '%comp%'
    GROUP BY
        purch_client_crm_id,
        seatgeek_v_clients.email
),
touchpoints AS (
    SELECT DISTINCT
        a.sf_contactid
    FROM
        custom.korepss_contacts a
    WHERE
        a.contacttype = 'Member'
),
touchpoints_agg AS (
    SELECT
        a.primary_ticketing_id,
        d.last_activity_date_new__c,
        COUNT(DISTINCT act.activityid) AS touchpoints
    FROM
        custom.korepss_v_contacts a
        LEFT JOIN custom.korepss_contacts b ON a.sf_contactid = b.sf_contactid
        LEFT JOIN custom.korepss_accounts_extension d ON a.sf_accountid = d.sf_accountid
        LEFT JOIN custom.korepss_v_users c ON b.ticketing_service_rep = c.sf_userid
        LEFT JOIN custom.korepss_v_activities act ON a.sf_contactid = act.contactid AND b.ticketing_service_rep = act.ownerid
    WHERE
        a.sf_contactid IN (SELECT sf_contactid FROM touchpoints)
        AND act.completed_on_coalesce >= '7/1/2023'
        AND act.completed_on_coalesce < '7/1/2024'
    GROUP BY
        a.primary_ticketing_id,
        d.last_activity_date_new__c
),
tenure AS (
    SELECT
        purchaser_ticketing_id,
        LISTAGG(DISTINCT season),
        CASE
            WHEN LISTAGG(DISTINCT season) LIKE '2021-222022-232023-24' THEN 3
            WHEN LISTAGG(DISTINCT season) LIKE '2022-232023-24' THEN 2
            WHEN LISTAGG(DISTINCT season) LIKE '%2023-24%' THEN 1
            ELSE 0
        END AS tenure
    FROM
        custom.cth_v_historical_ticket
    WHERE
        ticket_type IN ('Full')
        and section_name != 'Panther Bar'
    GROUP BY
        purchaser_ticketing_id
),
attendance_info AS (
    SELECT
        ticketing_id,
        num_games_attended,
        self_show_rate,
        seat_show_rate,
        real_show_rate
    FROM
        custom.cth_v_show_rates
    WHERE
        season = '2023-24'
),
total_spend AS (
    SELECT
        purchaser_ticketing_id,
        SUM(gross_revenue) AS gross_revenue
    FROM
        custom.cth_v_historical_ticket
    WHERE
        season = '2023-24'
    GROUP BY
        purchaser_ticketing_id
),
location_ticket_type AS (
    SELECT
        purchaser_ticketing_id,
        ticket_type,
        arena_level_internal,
        count(*) as num_tickets
    FROM
        custom.cth_v_historical_ticket
    WHERE
        ticket_type IN ('Full')
        AND season = '2023-24'
    GROUP BY
        purchaser_ticketing_id,
        ticket_type,
        arena_level_internal
),
location_ticket_type_agg as (
    SELECT
        purchaser_ticketing_id,
        arena_level_internal
    FROM
        (SELECT
             purchaser_ticketing_id,
             arena_level_internal,
             ROW_NUMBER() OVER (PARTITION BY purchaser_ticketing_id ORDER BY num_tickets DESC,
                 CASE arena_level_internal WHEN 'Premium' THEN 1 WHEN 'Lower' THEN 2 WHEN 'Club' THEN 3 ELSE 4 END) AS rn
         FROM
             location_ticket_type)
    WHERE rn = 1
),
did_renew AS (
    SELECT
        subs_2324.purch_client_crm_id,
        CASE
            WHEN subs_2425.purch_client_crm_id IS NULL THEN 0
            ELSE 1
        END AS did_renew
    FROM
        custom.cth_v_ticket_subscription_2324 subs_2324
    LEFT JOIN
        custom.cth_v_ticket_subscription_2425 subs_2425 ON subs_2324.purch_client_crm_id = subs_2425.purch_client_crm_id
    GROUP BY
        subs_2425.purch_client_crm_id,
        subs_2324.purch_client_crm_id
),
renewal_survey AS (
    SELECT
        formstack_v_stm_renewal_survey_2425.email,
        crm_id,
        MAX(CASE how_likely_are_you_to_renew_your_territory_membership_for_next_season
            WHEN 'Extremely Likely' THEN 3
            WHEN 'Likely' THEN 2
            WHEN 'Neutral' THEN 1
            WHEN 'Unlikely' THEN -1
            WHEN 'Extremely Unlikely' THEN -2
            ELSE 0
        END) AS renewal_likelihood_survey
    FROM
        custom.formstack_v_stm_renewal_survey_2425
    LEFT JOIN
        custom.seatgeek_v_clients on formstack_v_stm_renewal_survey_2425.email = seatgeek_v_clients.email
    GROUP BY
        formstack_v_stm_renewal_survey_2425.email,
        crm_id
)
SELECT DISTINCT
    zip.purch_client_crm_id,
    zip.email,
    CASE
        WHEN zip.is_local = TRUE THEN 1
        ELSE 0
    END AS is_local,
    COALESCE(touchpoints_agg.touchpoints,0) AS touchpoints,
    tenure.tenure,
    attendance_info.real_show_rate,
    attendance_info.seat_show_rate,
    attendance_info.self_show_rate,
    attendance_info.num_games_attended,
    total_spend.gross_revenue,
    CASE
        WHEN location_ticket_type_agg.arena_level_internal = 'Premium' THEN 1
        WHEN location_ticket_type_agg.arena_level_internal = 'Lowers' THEN 2
        WHEN location_ticket_type_agg.arena_level_internal = 'Clubs' THEN 3
        WHEN location_ticket_type_agg.arena_level_internal = 'Uppers' THEN 4
    END AS arena_level_internal,
    did_renew.did_renew,
    coalesce(renewal_likelihood_survey, 0) as renewal_likelihood_survey
FROM
    zip
LEFT JOIN
    touchpoints_agg 
        ON zip.purch_client_crm_id = touchpoints_agg.primary_ticketing_id
LEFT JOIN
    tenure 
        ON zip.purch_client_crm_id = tenure.purchaser_ticketing_id
LEFT JOIN
    attendance_info 
        ON zip.purch_client_crm_id = attendance_info.ticketing_id
LEFT JOIN
    total_spend 
        ON zip.purch_client_crm_id = total_spend.purchaser_ticketing_id
LEFT JOIN
    location_ticket_type_agg 
        ON zip.purch_client_crm_id = location_ticket_type_agg.purchaser_ticketing_id
LEFT JOIN
    did_renew 
        ON zip.purch_client_crm_id = did_renew.purch_client_crm_id
LEFT JOIN
    renewal_survey 
        ON zip.purch_client_crm_id = renewal_survey.crm_id
WHERE
    total_spend.gross_revenue > 0
"""

df_full_2324 = FLA_Redshift(**rs_creds).query_warehouse(sql_string = q)

In [6]:
# get 23-24 Premier data

Q = """
WITH zip AS (
    SELECT
        purch_client_crm_id,
        seatgeek_v_clients.email,
        bool_or(is_local) AS is_local
    FROM
        custom.cth_v_ticket_subscription_2324
    LEFT JOIN
        custom.seatgeek_v_clients ON cth_v_ticket_subscription_2324.purch_client_crm_id = seatgeek_v_clients.crm_id
    LEFT JOIN
        custom.golden_record_v_addresses ON seatgeek_v_clients.email = golden_record_v_addresses.email
    WHERE
        ticket_type IN ('Premier')
        and price_type not ilike '%comp%'
    GROUP BY
        purch_client_crm_id,
        seatgeek_v_clients.email
),
touchpoints AS (
    SELECT
        DISTINCT a.sf_contactid
    FROM
        custom.korepss_contacts a
    WHERE
        a.contacttype = 'Member'
),
touchpoints_agg AS (
    SELECT
        a.primary_ticketing_id,
        d.last_activity_date_new__c,
        COUNT(DISTINCT act.activityid) AS touchpoints
    FROM
        custom.korepss_v_contacts a
        LEFT JOIN custom.korepss_contacts b ON a.sf_contactid = b.sf_contactid
        LEFT JOIN custom.korepss_accounts_extension d ON a.sf_accountid = d.sf_accountid
        LEFT JOIN custom.korepss_v_users c ON b.ticketing_service_rep = c.sf_userid
        LEFT JOIN custom.korepss_v_activities act ON a.sf_contactid = act.contactid AND b.ticketing_service_rep = act.ownerid
    WHERE
        a.sf_contactid IN (SELECT sf_contactid FROM touchpoints)
        AND act.completed_on_coalesce >= '7/1/2023'
        AND act.completed_on_coalesce < '7/1/2024'
    GROUP BY
        a.primary_ticketing_id,
        d.last_activity_date_new__c
),
tenure AS (
    SELECT
        purchaser_ticketing_id,
        LISTAGG(DISTINCT season),
        CASE
            WHEN LISTAGG(DISTINCT season) LIKE '2021-222022-232023-24' THEN 3
            WHEN LISTAGG(DISTINCT season) LIKE '2022-232023-24' THEN 2
            WHEN LISTAGG(DISTINCT season) LIKE '%2023-24%' THEN 1
            ELSE 0
        END AS tenure
    FROM
        custom.cth_v_historical_ticket
    WHERE
        ticket_type IN ('Premier')
        AND section_name != 'Panther Bar'
    GROUP BY
        purchaser_ticketing_id
),
attendance_info AS (
    SELECT
        ticketing_id,
        num_games_attended,
        self_show_rate,
        seat_show_rate,
        real_show_rate
    FROM
        custom.cth_v_show_rates
    WHERE
        season = '2023-24'
),
total_spend AS (
    SELECT
        purchaser_ticketing_id,
        SUM(gross_revenue) AS gross_revenue
    FROM
        custom.cth_v_historical_ticket
    WHERE
        season = '2023-24'
    GROUP BY
        purchaser_ticketing_id
),
location_ticket_type AS (
    SELECT
        purchaser_ticketing_id,
        ticket_type,
        arena_level_internal,
        count(*) as num_tickets
    FROM
        custom.cth_v_historical_ticket
    WHERE
        ticket_type IN ('Premier')
        AND season = '2023-24'
    GROUP BY
        purchaser_ticketing_id,
        ticket_type,
        arena_level_internal
),
location_ticket_type_agg as (
    SELECT
        purchaser_ticketing_id,
        arena_level_internal
    FROM
        (SELECT
             purchaser_ticketing_id,
             arena_level_internal,
             ROW_NUMBER() OVER (PARTITION BY purchaser_ticketing_id ORDER BY num_tickets DESC,
                 CASE arena_level_internal WHEN 'Premium' THEN 1 WHEN 'Lower' THEN 2 WHEN 'Club' THEN 3 ELSE 4 END) AS rn
         FROM
             location_ticket_type)
    WHERE rn = 1
),
did_renew AS (
    SELECT
        subs_2324.purch_client_crm_id,
        CASE
            WHEN subs_2425.purch_client_crm_id IS NULL THEN 0
            ELSE 1
        END AS did_renew
    FROM
        custom.cth_v_ticket_subscription_2324 subs_2324
    LEFT JOIN
        custom.cth_v_ticket_subscription_2425 subs_2425 ON subs_2324.purch_client_crm_id = subs_2425.purch_client_crm_id
    GROUP BY
        subs_2425.purch_client_crm_id,
        subs_2324.purch_client_crm_id
),
renewal_survey AS (
    SELECT
        formstack_v_stm_renewal_survey_2425.email,
        crm_id,
        MAX(CASE how_likely_are_you_to_renew_your_territory_membership_for_next_season
            WHEN 'Extremely Likely' THEN 3
            WHEN 'Likely' THEN 2
            WHEN 'Neutral' THEN 1
            WHEN 'Unlikely' THEN -1
            WHEN 'Extremely Unlikely' THEN -2
            ELSE 0
        END) AS renewal_likelihood_survey
    FROM
        custom.formstack_v_stm_renewal_survey_2425
    LEFT JOIN
        custom.seatgeek_v_clients on formstack_v_stm_renewal_survey_2425.email = seatgeek_v_clients.email
    GROUP BY
        formstack_v_stm_renewal_survey_2425.email,
        crm_id
)
SELECT DISTINCT
    zip.purch_client_crm_id,
    zip.email,
    CASE
        WHEN zip.is_local = TRUE THEN 1
        ELSE 0
    END AS is_local,
    COALESCE(touchpoints_agg.touchpoints,0) AS touchpoints,
    tenure.tenure,
    attendance_info.real_show_rate,
    attendance_info.seat_show_rate,
    attendance_info.self_show_rate,
    attendance_info.num_games_attended,
    total_spend.gross_revenue,
    CASE
        WHEN location_ticket_type_agg.arena_level_internal = 'Premium' THEN 1
        WHEN location_ticket_type_agg.arena_level_internal = 'Lowers' THEN 2
        WHEN location_ticket_type_agg.arena_level_internal = 'Clubs' THEN 3
        WHEN location_ticket_type_agg.arena_level_internal = 'Uppers' THEN 4
    END AS arena_level_internal,
    did_renew.did_renew,
    coalesce(renewal_likelihood_survey, 0) as renewal_likelihood_survey
FROM
    zip
LEFT JOIN
    touchpoints_agg
        ON zip.purch_client_crm_id = touchpoints_agg.primary_ticketing_id
LEFT JOIN
    tenure
        ON zip.purch_client_crm_id = tenure.purchaser_ticketing_id
LEFT JOIN
    attendance_info
        ON zip.purch_client_crm_id = attendance_info.ticketing_id
LEFT JOIN
    total_spend
        ON zip.purch_client_crm_id = total_spend.purchaser_ticketing_id
LEFT JOIN
    location_ticket_type_agg
        ON zip.purch_client_crm_id = location_ticket_type_agg.purchaser_ticketing_id
LEFT JOIN
    did_renew
        ON zip.purch_client_crm_id = did_renew.purch_client_crm_id
LEFT JOIN
    renewal_survey
        ON zip.purch_client_crm_id = renewal_survey.crm_id
WHERE
    total_spend.gross_revenue > 0
"""

df_premier_2324 = FLA_Redshift(**rs_creds).query_warehouse(sql_string = q)

In [7]:
# segment touchpoints 

df_premier_2324['touchpoints_agg'] = pd.cut(df_premier_2324['touchpoints'], bins = [-np.inf,0,3,7,15,np.inf], labels = [0,1,2,3,4])

df_full_2324['touchpoints_agg'] = pd.cut(df_full_2324['touchpoints'], bins = [-np.inf,0,3,7,15,np.inf], labels = [0,1,2,3,4])

df_premier_2425['touchpoints_agg'] = pd.cut(df_premier_2425['touchpoints'], bins = [-np.inf,0,3,7,15,np.inf], labels = [0,1,2,3,4])

df_full_2425['touchpoints_agg'] = pd.cut(df_full_2425['touchpoints'], bins = [-np.inf,0,3,7,15,np.inf], labels = [0,1,2,3,4])

In [8]:
# segment gross_revenue

df_premier_2324['gross_revenue_agg'] = pd.cut(df_premier_2324['gross_revenue'], bins = [-np.inf,0,2500,5000,10000,20000,np.inf], labels = [0,1,2,3,4,5])

df_full_2324['gross_revenue_agg'] = pd.cut(df_full_2324['gross_revenue'], bins = [-np.inf,0,5000,10000,20000,40000,np.inf], labels = [0,1,2,3,4,5])

df_premier_2425['gross_revenue_agg'] = pd.cut(df_premier_2425['gross_revenue'], bins = [-np.inf,0,2500,5000,10000,20000,np.inf], labels = [0,1,2,3,4,5])

df_full_2425['gross_revenue_agg'] = pd.cut(df_full_2425['gross_revenue'], bins = [-np.inf,0,5000,10000,20000,40000,np.inf], labels = [0,1,2,3,4,5])

In [9]:
# break into x and y data

x_premier_train = df_premier_2324[['is_local','tenure','real_show_rate','seat_show_rate','self_show_rate','num_games_attended','gross_revenue_agg', 'arena_level_internal', 'renewal_likelihood_survey']]
y_premier = df_premier_2324[['did_renew']]

x_premier_test = df_premier_2425[['is_local','tenure','real_show_rate','seat_show_rate','self_show_rate','num_games_attended','gross_revenue_agg', 'arena_level_internal', 'renewal_likelihood_survey']]
premier_acct_ids = df_premier_2425[['purch_client_crm_id', 'email', 'gross_revenue', 'touchpoints']]

x_full_train = df_full_2324[['is_local','tenure','real_show_rate','seat_show_rate','self_show_rate','num_games_attended','gross_revenue_agg', 'arena_level_internal', 'renewal_likelihood_survey']]
y_full = df_full_2324[['did_renew']]

x_full_test = df_full_2425[['is_local', 'tenure','real_show_rate','seat_show_rate','self_show_rate','num_games_attended','gross_revenue_agg', 'arena_level_internal', 'renewal_likelihood_survey']]
full_acct_ids = df_full_2425[['purch_client_crm_id', 'email', 'gross_revenue', 'touchpoints']]

In [10]:
# create monte carlo simulation for model

def run_log_reg(ticket_type, x_train, x_test, y, acct_ids, num_simulations):

    ss = StandardScaler()
    x_train_scaler = ss.fit_transform(x_train)
    x_test_scaler = ss.fit_transform(x_test)

    if ticket_type == 'Full':
        weights = [1,1,2,2,1,1,1,1,2]
    else:
        weights = [1,1,1.5,1.5,1,1,1,1,2]
    x_train_scaler = x_train_scaler*weights
    x_test_scaler = x_test_scaler*weights

    predicted_probs = np.zeros((x_test_scaler.shape[0], num_simulations))

    for i in range(num_simulations):

        logi = LogisticRegression().fit(x_train_scaler, np.array(y))
        predicted_probs[:,i] = logi.predict_proba(x_test_scaler)[:,1]
    
    avg_predicted_probs = np.mean(predicted_probs, axis = 1)

    result_df = x_test
    result_df['avg_predicted_prob'] = avg_predicted_probs
    result_df = pd.concat([acct_ids, result_df], axis = 1)

    return result_df

In [11]:
# run Full model

full_final_df = run_log_reg('Full', x_full_train, x_full_test, y_full, full_acct_ids, 1000)

subtract = [random.uniform(.09, .11) for i in range(len(full_final_df))]

full_final_df['avg_predicted_prob'] = [value - subtract_value if value >= 0.12 else value for value, subtract_value in zip(full_final_df['avg_predicted_prob'], subtract)]

full_final_df['ticket_type'] = 'Full'

full_final_df = full_final_df.drop(['gross_revenue_agg'], axis = 1)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = colu

In [12]:
# remap arena locations to numerical values

survey_map = {
    1 : 'Premium',
    2 : 'Lowers',
    3 : 'Clubs',
    4 : 'Uppers'
}

full_final_df['arena_level_internal'] = full_final_df.apply(
    lambda row: survey_map.get(row['arena_level_internal'], 0), axis=1)

In [13]:
# run Premier model

premier_final_df = run_log_reg('Premier', x_premier_train, x_premier_test, y_premier, premier_acct_ids, 1000)

subtract = [random.uniform(.09, .11) for i in range(len(premier_final_df))]

premier_final_df['avg_predicted_prob'] = [value - subtract_value if value >= 0.12 else value for value, subtract_value in zip(premier_final_df['avg_predicted_prob'], subtract)]

premier_final_df['ticket_type'] = 'Premier'

premier_final_df = premier_final_df.drop(['gross_revenue_agg'], axis = 1)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = colu

In [14]:
# remap arena locations to numerical values

survey_map = {
    1 : 'Premium',
    2 : 'Lowers',
    3 : 'Clubs',
    4 : 'Uppers'
}

premier_final_df['arena_level_internal'] = premier_final_df.apply(
    lambda row: survey_map.get(row['arena_level_internal'], 0), axis=1)

In [15]:
# concat fulls and premiers and write to the warehouse 

final_overall_df = pd.concat([full_final_df, premier_final_df])

FLA_Redshift(**rs_creds).write_to_warehouse(df = final_overall_df, table_name= "forecasting_renewal_scoring_2526")