In [1]:
import pandas as pd
import numpy as np
from catnip.fla_redshift import FLA_Redshift
from sqlalchemy import null
from datetime import datetime

from prefect.blocks.system import Secret
from typing import Dict
from concurrent.futures import ThreadPoolExecutor

from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

In [2]:
def get_redshift_credentials() -> Dict:

    cred_dict = {
        "dbname": Secret.load("stellar-redshift-db-name").get(),
        "host": Secret.load("stellar-redshift-host").get(),
        "port": 5439,
        "user": Secret.load("stellar-redshift-user-name").get(),
        "password": Secret.load("stellar-redshift-password").get(),

        "aws_access_key_id": Secret.load("fla-s3-aws-access-key-id-east-1").get(),
        "aws_secret_access_key": Secret.load("fla-s3-aws-secret-access-key-east-1").get(),
        "bucket": Secret.load("fla-s3-bucket-name-east-1").get(),
        "subdirectory": "us-east-1",

        "verbose": False,
    }

    return cred_dict

with ThreadPoolExecutor(1) as pool:
    rs_creds = pool.submit(lambda: get_redshift_credentials()).result()

In [3]:
q = """
WITH zip AS (
    SELECT
        purch_client_crm_id,
        section,
        zip AS zip_code,
        counties
    FROM
        custom.cth_v_ticket_2425
    LEFT JOIN
        custom.seatgeek_v_clients ON cth_v_ticket_2425.purch_client_crm_id = seatgeek_v_clients.crm_id
    LEFT JOIN
        custom.golden_record_v_addresses ON seatgeek_v_clients.email = golden_record_v_addresses.email
    WHERE
        ticket_type IN ('Premier')
        AND purch_client_crm_id IN (SELECT DISTINCT purch_client_crm_id FROM custom.cth_v_ticket_subscription_2425)
    GROUP BY
        purch_client_crm_id,
        section,
        zip,
        counties
),
touchpoints AS (
    SELECT DISTINCT
        a.sf_contactid
    FROM
        custom.korepss_contacts a
    WHERE
        a.contacttype = 'Member'
),
touchpoints_agg AS (
    SELECT
        a.primary_ticketing_id,
        d.last_activity_date_new__c,
        COUNT(DISTINCT act.activityid) AS touchpoints
    FROM
        custom.korepss_v_contacts a
        LEFT JOIN custom.korepss_contacts b ON a.sf_contactid = b.sf_contactid
        LEFT JOIN custom.korepss_accounts_extension d ON a.sf_accountid = d.sf_accountid
        LEFT JOIN custom.korepss_v_users c ON b.ticketing_service_rep = c.sf_userid
        LEFT JOIN custom.korepss_v_activities act ON a.sf_contactid = act.contactid AND b.ticketing_service_rep = act.ownerid
    WHERE
        a.sf_contactid IN (SELECT sf_contactid FROM touchpoints)
        AND act.completed_on_coalesce >= '7/1/2024'
    GROUP BY
        a.primary_ticketing_id,
        d.last_activity_date_new__c
),
tenure AS (
    SELECT
        purchaser_ticketing_id,
        LISTAGG(DISTINCT season),
        CASE
            WHEN LISTAGG(DISTINCT season) LIKE '2021-222022-232023-242024-25' THEN 4
            WHEN LISTAGG(DISTINCT season) LIKE '2022-232023-242024-25' THEN 3
            WHEN LISTAGG(DISTINCT season) LIKE '2023-242024-25' THEN 2
            WHEN LISTAGG(DISTINCT season) LIKE '%2024-25%' THEN 1
            ELSE 0
        END AS tenure
    FROM
        custom.cth_v_historical_ticket
    WHERE
        ticket_type IN ('Premier')
        AND section_name != 'Panther Bar'
    GROUP BY
        purchaser_ticketing_id
),
attendance_info AS (
    SELECT
        ticketing_id,
        num_games_attended,
        self_show_rate,
        seat_show_rate,
        real_show_rate
    FROM
        custom.cth_v_show_rates
    WHERE
        season = '2024-25'
),
total_spend AS (
    SELECT
        purchaser_ticketing_id,
        SUM(gross_revenue) AS gross_revenue
    FROM
        custom.cth_v_historical_ticket
    WHERE
        season = '2024-25'
    GROUP BY
        purchaser_ticketing_id
),
location_ticket_type AS (
    SELECT
        purchaser_ticketing_id,
        ticket_type,
        arena_level_internal,
        COUNT(*) AS num_tickets
    FROM
        custom.cth_v_historical_ticket
    WHERE
        ticket_type IN ('Premier')
        AND season = '2024-25'
    GROUP BY
        purchaser_ticketing_id,
        ticket_type,
        arena_level_internal
),
location_ticket_type_agg AS (
    SELECT
        purchaser_ticketing_id,
        arena_level_internal
    FROM
        (SELECT
             purchaser_ticketing_id,
             arena_level_internal,
             ROW_NUMBER() OVER (PARTITION BY purchaser_ticketing_id ORDER BY num_tickets DESC,
                 CASE arena_level_internal
                     WHEN 'Premium' THEN 1
                     WHEN 'Lower' THEN 2
                     WHEN 'Club' THEN 3
                     ELSE 4
                 END) AS rn
         FROM
             location_ticket_type)
    WHERE rn = 1
)
SELECT
    zip.purch_client_crm_id,
    CASE
        WHEN counties LIKE '%Broward%'
        OR counties LIKE '%Miami-Dade%'
        OR counties LIKE '%Palm Beach%'
        THEN 1
        ELSE 0
    END AS is_local,
    COALESCE(touchpoints_agg.touchpoints, 0) AS touchpoints,
    tenure.tenure,
    attendance_info.real_show_rate,
    attendance_info.seat_show_rate,
    attendance_info.self_show_rate,
    attendance_info.num_games_attended,
    total_spend.gross_revenue,
    CASE
        WHEN location_ticket_type_agg.arena_level_internal = 'Premium' THEN 1
        WHEN location_ticket_type_agg.arena_level_internal = 'Lowers' THEN 2
        WHEN location_ticket_type_agg.arena_level_internal = 'Clubs' THEN 3
        WHEN location_ticket_type_agg.arena_level_internal = 'Uppers' THEN 4
    END AS arena_level_internal
FROM
    zip
LEFT JOIN
    touchpoints_agg ON zip.purch_client_crm_id = touchpoints_agg.primary_ticketing_id
LEFT JOIN
    tenure ON zip.purch_client_crm_id = tenure.purchaser_ticketing_id
LEFT JOIN
    attendance_info ON zip.purch_client_crm_id = attendance_info.ticketing_id
LEFT JOIN
    total_spend ON zip.purch_client_crm_id = total_spend.purchaser_ticketing_id
LEFT JOIN
    location_ticket_type_agg ON zip.purch_client_crm_id = location_ticket_type_agg.purchaser_ticketing_id
"""

df_premier_2425 = FLA_Redshift(**rs_creds).query_warehouse(sql_string = q)

In [4]:
q = """
WITH zip AS (
    SELECT
        purch_client_crm_id,
        section,
        zip AS zip_code,
        counties
    FROM
        custom.cth_v_ticket_2425
    LEFT JOIN
        custom.seatgeek_v_clients ON cth_v_ticket_2425.purch_client_crm_id = seatgeek_v_clients.crm_id
    LEFT JOIN
        custom.golden_record_v_addresses ON seatgeek_v_clients.email = golden_record_v_addresses.email
    WHERE
        ticket_type IN ('Full')
        AND purch_client_crm_id IN (SELECT DISTINCT purch_client_crm_id FROM custom.cth_v_ticket_subscription_2425)
    GROUP BY
        purch_client_crm_id,
        section,
        zip,
        counties
),
touchpoints AS (
    SELECT DISTINCT
        a.sf_contactid
    FROM
        custom.korepss_contacts a
    WHERE
        a.contacttype = 'Member'
),
touchpoints_agg AS (
    SELECT
        a.primary_ticketing_id,
        d.last_activity_date_new__c,
        COUNT(DISTINCT act.activityid) AS touchpoints
    FROM
        custom.korepss_v_contacts a
        LEFT JOIN custom.korepss_contacts b ON a.sf_contactid = b.sf_contactid
        LEFT JOIN custom.korepss_accounts_extension d ON a.sf_accountid = d.sf_accountid
        LEFT JOIN custom.korepss_v_users c ON b.ticketing_service_rep = c.sf_userid
        LEFT JOIN custom.korepss_v_activities act ON a.sf_contactid = act.contactid AND b.ticketing_service_rep = act.ownerid
    WHERE
        a.sf_contactid IN (SELECT sf_contactid FROM touchpoints)
        AND act.completed_on_coalesce >= '7/1/2024'
    GROUP BY
        a.primary_ticketing_id,
        d.last_activity_date_new__c
),
tenure AS (
    SELECT
        purchaser_ticketing_id,
        LISTAGG(DISTINCT season),
        CASE
            WHEN LISTAGG(DISTINCT season) LIKE '2021-222022-232023-242024-25' THEN 4
            WHEN LISTAGG(DISTINCT season) LIKE '2022-232023-242024-25' THEN 3
            WHEN LISTAGG(DISTINCT season) LIKE '2023-242024-25' THEN 2
            WHEN LISTAGG(DISTINCT season) LIKE '%2024-25%' THEN 1
            ELSE 0
        END AS tenure
    FROM
        custom.cth_v_historical_ticket
    WHERE
        ticket_type IN ('Full')
        AND section_name != 'Panther Bar'
    GROUP BY
        purchaser_ticketing_id
),
attendance_info AS (
    SELECT
        ticketing_id,
        num_games_attended,
        self_show_rate,
        seat_show_rate,
        real_show_rate
    FROM
        custom.cth_v_show_rates
    WHERE
        season = '2024-25'
),
total_spend AS (
    SELECT
        purchaser_ticketing_id,
        SUM(gross_revenue) AS gross_revenue
    FROM
        custom.cth_v_historical_ticket
    WHERE
        season = '2024-25'
    GROUP BY
        purchaser_ticketing_id
),
location_ticket_type AS (
    SELECT
        purchaser_ticketing_id,
        ticket_type,
        arena_level_internal,
        COUNT(*) AS num_tickets
    FROM
        custom.cth_v_historical_ticket
    WHERE
        ticket_type IN ('Full')
        AND season = '2024-25'
    GROUP BY
        purchaser_ticketing_id,
        ticket_type,
        arena_level_internal
),
location_ticket_type_agg AS (
    SELECT
        purchaser_ticketing_id,
        arena_level_internal
    FROM
        (SELECT
             purchaser_ticketing_id,
             arena_level_internal,
             ROW_NUMBER() OVER (PARTITION BY purchaser_ticketing_id ORDER BY num_tickets DESC,
                 CASE arena_level_internal
                     WHEN 'Premium' THEN 1
                     WHEN 'Lower' THEN 2
                     WHEN 'Club' THEN 3
                     ELSE 4
                 END) AS rn
         FROM
             location_ticket_type)
    WHERE rn = 1
)
SELECT
    zip.purch_client_crm_id,
    CASE
        WHEN counties LIKE '%Broward%'
        OR counties LIKE '%Miami-Dade%'
        OR counties LIKE '%Palm Beach%'
        THEN 1
        ELSE 0
    END AS is_local,
    COALESCE(touchpoints_agg.touchpoints, 0) AS touchpoints,
    tenure.tenure,
    attendance_info.real_show_rate,
    attendance_info.seat_show_rate,
    attendance_info.self_show_rate,
    attendance_info.num_games_attended,
    total_spend.gross_revenue,
    CASE
        WHEN location_ticket_type_agg.arena_level_internal = 'Premium' THEN 1
        WHEN location_ticket_type_agg.arena_level_internal = 'Lowers' THEN 2
        WHEN location_ticket_type_agg.arena_level_internal = 'Clubs' THEN 3
        WHEN location_ticket_type_agg.arena_level_internal = 'Uppers' THEN 4
    END AS arena_level_internal
FROM
    zip
LEFT JOIN
    touchpoints_agg ON zip.purch_client_crm_id = touchpoints_agg.primary_ticketing_id
LEFT JOIN
    tenure ON zip.purch_client_crm_id = tenure.purchaser_ticketing_id
LEFT JOIN
    attendance_info ON zip.purch_client_crm_id = attendance_info.ticketing_id
LEFT JOIN
    total_spend ON zip.purch_client_crm_id = total_spend.purchaser_ticketing_id
LEFT JOIN
    location_ticket_type_agg ON zip.purch_client_crm_id = location_ticket_type_agg.purchaser_ticketing_id
"""

df_full_2425 = FLA_Redshift(**rs_creds).query_warehouse(sql_string = q)

In [5]:
q = """
WITH zip AS (
    SELECT
        purch_client_crm_id,
        section,
        zip AS zip_code,
        counties
    FROM
        custom.cth_v_ticket_2324
    LEFT JOIN
        custom.seatgeek_v_clients ON cth_v_ticket_2324.purch_client_crm_id = seatgeek_v_clients.crm_id
    LEFT JOIN
        custom.golden_record_v_addresses ON seatgeek_v_clients.email = golden_record_v_addresses.email
    WHERE
        ticket_type IN ('Full')
        AND purch_client_crm_id IN (SELECT DISTINCT purch_client_crm_id FROM custom.cth_v_ticket_subscription_2324)
    GROUP BY
        purch_client_crm_id,
        section,
        zip,
        counties
),
touchpoints AS (
    SELECT DISTINCT
        a.sf_contactid
    FROM
        custom.korepss_contacts a
    WHERE
        a.contacttype = 'Member'
),
touchpoints_agg AS (
    SELECT
        a.primary_ticketing_id,
        d.last_activity_date_new__c,
        COUNT(DISTINCT act.activityid) AS touchpoints
    FROM
        custom.korepss_v_contacts a
        LEFT JOIN custom.korepss_contacts b ON a.sf_contactid = b.sf_contactid
        LEFT JOIN custom.korepss_accounts_extension d ON a.sf_accountid = d.sf_accountid
        LEFT JOIN custom.korepss_v_users c ON b.ticketing_service_rep = c.sf_userid
        LEFT JOIN custom.korepss_v_activities act ON a.sf_contactid = act.contactid AND b.ticketing_service_rep = act.ownerid
    WHERE
        a.sf_contactid IN (SELECT sf_contactid FROM touchpoints)
        AND act.completed_on_coalesce >= '7/1/2023'
        AND act.completed_on_coalesce < '7/1/2024'
    GROUP BY
        a.primary_ticketing_id,
        d.last_activity_date_new__c
),
tenure AS (
    SELECT
        purchaser_ticketing_id,
        LISTAGG(DISTINCT season),
        CASE
            WHEN LISTAGG(DISTINCT season) LIKE '2021-222022-232023-24' THEN 3
            WHEN LISTAGG(DISTINCT season) LIKE '2022-232023-24' THEN 2
            WHEN LISTAGG(DISTINCT season) LIKE '%2023-24%' THEN 1
            ELSE 0
        END AS tenure
    FROM
        custom.cth_v_historical_ticket
    WHERE
        ticket_type IN ('Full')
        and section_name != 'Panther Bar'
    GROUP BY
        purchaser_ticketing_id
),
attendance_info AS (
    SELECT
        ticketing_id,
        num_games_attended,
        self_show_rate,
        seat_show_rate,
        real_show_rate
    FROM
        custom.cth_v_show_rates
    WHERE
        season = '2023-24'
),
total_spend AS (
    SELECT
        purchaser_ticketing_id,
        SUM(gross_revenue) AS gross_revenue
    FROM
        custom.cth_v_historical_ticket
    WHERE
        season = '2023-24'
    GROUP BY
        purchaser_ticketing_id
),
location_ticket_type AS (
    SELECT
        purchaser_ticketing_id,
        ticket_type,
        arena_level_internal,
        count(*) as num_tickets
    FROM
        custom.cth_v_historical_ticket
    WHERE
        ticket_type IN ('Full')
        AND season = '2023-24'
    GROUP BY
        purchaser_ticketing_id,
        ticket_type,
        arena_level_internal
),
location_ticket_type_agg as (
    SELECT
        purchaser_ticketing_id,
        arena_level_internal
    FROM
        (SELECT
             purchaser_ticketing_id,
             arena_level_internal,
             ROW_NUMBER() OVER (PARTITION BY purchaser_ticketing_id ORDER BY num_tickets DESC,
                 CASE arena_level_internal WHEN 'Premium' THEN 1 WHEN 'Lower' THEN 2 WHEN 'Club' THEN 3 ELSE 4 END) AS rn
         FROM
             location_ticket_type)
    WHERE rn = 1
),
did_renew AS (
    SELECT
        subs_2324.purch_client_crm_id,
        CASE
            WHEN subs_2425.purch_client_crm_id IS NULL THEN 0
            ELSE 1
        END AS did_renew
    FROM
        custom.cth_v_ticket_subscription_2324 subs_2324
    LEFT JOIN
        custom.cth_v_ticket_subscription_2425 subs_2425 ON subs_2324.purch_client_crm_id = subs_2425.purch_client_crm_id
    GROUP BY
        subs_2425.purch_client_crm_id,
        subs_2324.purch_client_crm_id
)
SELECT
    zip.purch_client_crm_id,
    CASE
        WHEN counties LIKE '%Broward%'
        OR counties LIKE '%Miami-Dade%'
        OR counties LIKE '%Palm Beach%'
        THEN 1
        ELSE 0
    END AS is_local,
    COALESCE(touchpoints_agg.touchpoints,0) AS touchpoints,
    tenure.tenure,
    attendance_info.real_show_rate,
    attendance_info.seat_show_rate,
    attendance_info.self_show_rate,
    attendance_info.num_games_attended,
    total_spend.gross_revenue,
    CASE
        WHEN location_ticket_type_agg.arena_level_internal = 'Premium' THEN 1
        WHEN location_ticket_type_agg.arena_level_internal = 'Lowers' THEN 2
        WHEN location_ticket_type_agg.arena_level_internal = 'Clubs' THEN 3
        WHEN location_ticket_type_agg.arena_level_internal = 'Uppers' THEN 4
    END AS arena_level_internal,
    did_renew.did_renew
FROM
    zip
LEFT JOIN
    touchpoints_agg ON zip.purch_client_crm_id = touchpoints_agg.primary_ticketing_id
LEFT JOIN
    tenure ON zip.purch_client_crm_id = tenure.purchaser_ticketing_id
LEFT JOIN
    attendance_info ON zip.purch_client_crm_id = attendance_info.ticketing_id
LEFT JOIN
    total_spend ON zip.purch_client_crm_id = total_spend.purchaser_ticketing_id
LEFT JOIN
    location_ticket_type_agg ON zip.purch_client_crm_id = location_ticket_type_agg.purchaser_ticketing_id
LEFT JOIN
    did_renew ON zip.purch_client_crm_id = did_renew.purch_client_crm_id
"""

df_full_2324 = FLA_Redshift(**rs_creds).query_warehouse(sql_string = q)

In [6]:
Q = """
WITH zip AS (
    SELECT
        purch_client_crm_id,
        section,
        zip AS zip_code,
        counties
    FROM
        custom.cth_v_ticket_2324
    LEFT JOIN
        custom.seatgeek_v_clients ON cth_v_ticket_2324.purch_client_crm_id = seatgeek_v_clients.crm_id
    LEFT JOIN
        custom.golden_record_v_addresses ON seatgeek_v_clients.email = golden_record_v_addresses.email
    WHERE
        ticket_type IN ('Premier')
        AND purch_client_crm_id IN (SELECT DISTINCT purch_client_crm_id FROM custom.cth_v_ticket_subscription_2324)
    GROUP BY
        purch_client_crm_id,
        section,
        zip,
        counties
),
touchpoints AS (
    SELECT 
        DISTINCT a.sf_contactid
    FROM
        custom.korepss_contacts a
    WHERE
        a.contacttype = 'Member'
),
touchpoints_agg AS (
    SELECT
        a.primary_ticketing_id,
        d.last_activity_date_new__c,
        COUNT(DISTINCT act.activityid) AS touchpoints
    FROM
        custom.korepss_v_contacts a
        LEFT JOIN custom.korepss_contacts b ON a.sf_contactid = b.sf_contactid
        LEFT JOIN custom.korepss_accounts_extension d ON a.sf_accountid = d.sf_accountid
        LEFT JOIN custom.korepss_v_users c ON b.ticketing_service_rep = c.sf_userid
        LEFT JOIN custom.korepss_v_activities act ON a.sf_contactid = act.contactid AND b.ticketing_service_rep = act.ownerid
    WHERE
        a.sf_contactid IN (SELECT sf_contactid FROM touchpoints)
        AND act.completed_on_coalesce >= '7/1/2023'
        AND act.completed_on_coalesce < '7/1/2024'
    GROUP BY
        a.primary_ticketing_id,
        d.last_activity_date_new__c
),
tenure AS (
    SELECT
        purchaser_ticketing_id,
        LISTAGG(DISTINCT season),
        CASE
            WHEN LISTAGG(DISTINCT season) LIKE '2021-222022-232023-24' THEN 3
            WHEN LISTAGG(DISTINCT season) LIKE '2022-232023-24' THEN 2
            WHEN LISTAGG(DISTINCT season) LIKE '%2023-24%' THEN 1
            ELSE 0
        END AS tenure
    FROM
        custom.cth_v_historical_ticket
    WHERE
        ticket_type IN ('Premier')
        AND section_name != 'Panther Bar'
    GROUP BY
        purchaser_ticketing_id
),
attendance_info AS (
    SELECT
        ticketing_id,
        num_games_attended,
        self_show_rate,
        seat_show_rate,
        real_show_rate
    FROM
        custom.cth_v_show_rates
    WHERE
        season = '2023-24'
),
total_spend AS (
    SELECT
        purchaser_ticketing_id,
        SUM(gross_revenue) AS gross_revenue
    FROM
        custom.cth_v_historical_ticket
    WHERE
        season = '2023-24'
    GROUP BY
        purchaser_ticketing_id
),
location_ticket_type AS (
    SELECT
        purchaser_ticketing_id,
        ticket_type,
        arena_level_internal,
        count(*) as num_tickets
    FROM
        custom.cth_v_historical_ticket
    WHERE
        ticket_type IN ('Premier')
        AND season = '2023-24'
    GROUP BY
        purchaser_ticketing_id,
        ticket_type,
        arena_level_internal
),
location_ticket_type_agg as (
    SELECT
        purchaser_ticketing_id,
        arena_level_internal
    FROM
        (SELECT
             purchaser_ticketing_id,
             arena_level_internal,
             ROW_NUMBER() OVER (PARTITION BY purchaser_ticketing_id ORDER BY num_tickets DESC,
                 CASE arena_level_internal WHEN 'Premium' THEN 1 WHEN 'Lower' THEN 2 WHEN 'Club' THEN 3 ELSE 4 END) AS rn
         FROM
             location_ticket_type)
    WHERE rn = 1
),
did_renew AS (
    SELECT
        subs_2324.purch_client_crm_id,
        CASE
            WHEN subs_2425.purch_client_crm_id IS NULL THEN 0
            ELSE 1
        END AS did_renew
    FROM
        custom.cth_v_ticket_subscription_2324 subs_2324
    LEFT JOIN
        custom.cth_v_ticket_subscription_2425 subs_2425 ON subs_2324.purch_client_crm_id = subs_2425.purch_client_crm_id
    GROUP BY
        subs_2425.purch_client_crm_id,
        subs_2324.purch_client_crm_id
)
SELECT
    zip.purch_client_crm_id,
    CASE
        WHEN counties LIKE '%Broward%'
        OR counties LIKE '%Miami-Dade%'
        OR counties LIKE '%Palm Beach%'
        THEN 1
        ELSE 0
    END AS is_local,
    COALESCE(touchpoints_agg.touchpoints,0) AS touchpoints,
    tenure.tenure,
    attendance_info.real_show_rate,
    attendance_info.seat_show_rate,
    attendance_info.self_show_rate,
    attendance_info.num_games_attended,
    total_spend.gross_revenue,
    CASE
        WHEN location_ticket_type_agg.arena_level_internal = 'Premium' THEN 1
        WHEN location_ticket_type_agg.arena_level_internal = 'Lowers' THEN 2
        WHEN location_ticket_type_agg.arena_level_internal = 'Clubs' THEN 3
        WHEN location_ticket_type_agg.arena_level_internal = 'Uppers' THEN 4
    END AS arena_level_internal,
    did_renew.did_renew
FROM
    zip
LEFT JOIN
    touchpoints_agg ON zip.purch_client_crm_id = touchpoints_agg.primary_ticketing_id
LEFT JOIN
    tenure ON zip.purch_client_crm_id = tenure.purchaser_ticketing_id
LEFT JOIN
    attendance_info ON zip.purch_client_crm_id = attendance_info.ticketing_id
LEFT JOIN
    total_spend ON zip.purch_client_crm_id = total_spend.purchaser_ticketing_id
LEFT JOIN
    location_ticket_type_agg ON zip.purch_client_crm_id = location_ticket_type_agg.purchaser_ticketing_id
LEFT JOIN
    did_renew ON zip.purch_client_crm_id = did_renew.purch_client_crm_id
"""

df_premier_2324 = FLA_Redshift(**rs_creds).query_warehouse(sql_string = q)

In [7]:
df_premier_2324 = df_premier_2324.dropna(how = 'any',axis =0)
df_full_2324 = df_full_2324.dropna(how = 'any',axis =0)

df_premier_2425 = df_premier_2425.dropna(how = 'any',axis =0)
df_full_2425 = df_full_2425.dropna(how = 'any',axis =0)

In [8]:
x_premier_train = df_premier_2324[['is_local','touchpoints','tenure','real_show_rate','seat_show_rate','self_show_rate','num_games_attended','gross_revenue', 'arena_level_internal']]
y_premier = df_premier_2324[['did_renew']]

x_premier_test = df_premier_2425[['is_local','touchpoints','tenure','real_show_rate','seat_show_rate','self_show_rate','num_games_attended','gross_revenue', 'arena_level_internal']]


x_full_train = df_full_2324[['is_local', 'touchpoints','tenure','real_show_rate','seat_show_rate','self_show_rate','num_games_attended','gross_revenue', 'arena_level_internal']]
y_full = df_full_2324[['did_renew']]

x_test_full = df_full_2425[['is_local', 'touchpoints','tenure','real_show_rate','seat_show_rate','self_show_rate','num_games_attended','gross_revenue', 'arena_level_internal']]

In [25]:
# Premier Model

# knock everybodys percentage down 10%
# histogram of accounts by predicted renewal percentage

ss = StandardScaler()
x_train_scaler_premier = ss.fit_transform(x_premier_train)
x_test_scaler_premier = ss.fit_transform(x_premier_test)

logi = LogisticRegression().fit(x_train_scaler_premier, np.array(y_premier))

  y = column_or_1d(y, warn=True)


In [30]:
proba_predicted = logi.predict_proba(x_test_scaler_premier)

#plt.hist(list(map(lambda x: x[1]-.1, proba_predictsd)))

df_premier_2425['predicted_renew_percentage'] = list(map(lambda x: x[1]-.1, proba_predicted))

df_premier_2425[df_premier_2425['predicted_renew_percentage'] < 0.5]

Unnamed: 0,purch_client_crm_id,is_local,touchpoints,tenure,real_show_rate,seat_show_rate,self_show_rate,num_games_attended,gross_revenue,arena_level_internal,predicted_renew_percentage
20,16242768,0,0,3,0.0,0.931818,0.0,0,6848.0,2,0.426334
23,17554852,0,0,3,0.857143,0.909091,0.545455,6,5778.0,2,0.45986
72,6011858,0,4,3,0.153846,0.0,0.0,1,2514.5,2,0.289315
112,15868949,0,1,3,0.5,0.666667,0.333333,3,6219.38,2,0.398399
121,16493089,0,3,4,0.333333,0.545455,0.181818,2,9518.56,2,0.307394
314,8753301,0,0,3,1.0,1.0,0.454545,5,1613.62,4,0.491689
494,15847612,1,0,4,1.0,1.0,0.777778,5,5109.12,2,0.243036
510,15269591,0,1,3,0.5,0.681818,0.272727,3,3263.5,4,0.380198
573,22884406,1,0,3,0.0,1.0,0.0,0,1551.5,4,0.452031
695,20702338,0,1,3,0.6,0.636364,0.181818,3,3477.5,3,0.359034


In [None]:
# Full Model

ss = StandardScaler()
x_train_scaler_full = ss.fit_transform(x_full_train)
x_test_scaler_full = ss.fit_transform(x_test_full)

logi = LogisticRegression().fit(x_train_scaler_full, np.array(y_full))

  y = column_or_1d(y, warn=True)


array([0.11752071, 0.88247929])

In [29]:
proba_predicted = logi.predict_proba(x_test_scaler_full)

#plt.hist(list(map(lambda x: x[1]-.1, proba_predictsd)))

df_full_2425['predicted_renew_percentage'] = list(map(lambda x: x[1]-.1, proba_predicted))

df_full_2425[df_full_2425['predicted_renew_percentage'] < 0.5]

Unnamed: 0,purch_client_crm_id,is_local,touchpoints,tenure,real_show_rate,seat_show_rate,self_show_rate,num_games_attended,gross_revenue,arena_level_internal,predicted_renew_percentage
98,23020677,1,0,4,0.000000,0.772727,0.000000,0,192.36,2,0.356866
132,21330444,0,0,4,0.000000,0.924242,0.000000,0,5931.81,4,0.445218
139,15867473,0,0,4,0.000000,0.375000,0.000000,0,0.00,2,0.113813
184,23448985,0,0,3,0.433333,0.577778,0.288889,13,301.60,2,0.461763
244,15834531,0,0,4,0.000000,0.886364,0.000000,0,0.00,2,0.410139
...,...,...,...,...,...,...,...,...,...,...,...
5611,19923598,1,0,4,0.000000,0.931818,0.000000,0,3954.52,4,0.463486
5649,11620510,0,0,4,0.764706,0.863636,0.443182,14,9706.60,2,0.483916
5656,22113710,0,0,4,0.000000,0.860215,0.000000,0,293.90,2,0.394019
5777,14213814,0,0,4,0.391304,0.806818,0.102273,4,0.00,2,0.379372
