In [10]:
from datetime import datetime, timedelta
import pandas as pd
import seaborn as sns
import pandas_gbq
import pydata_google_auth
from concurrent.futures import ThreadPoolExecutor, as_completed
from google.cloud import bigquery
from tqdm.notebook import tqdm
from dateutil import tz

SCOPES = [
    'https://www.googleapis.com/auth/cloud-platform',
    'https://www.googleapis.com/auth/drive',
]
credentials = pydata_google_auth.get_user_credentials(
    SCOPES,
    # Set auth_local_webserver to True to have a slightly more convienient
    # authorization flow. Note, this doesn't work if you're running from a
    # notebook on a remote sever, such as over SSH or with Google Colab.
    auth_local_webserver=True,
)

%load_ext google.cloud.bigquery
%env GCLOUD_PROJECT=nbcu-ds-sandbox-a-001

The google.cloud.bigquery extension is already loaded. To reload it, use:
  %reload_ext google.cloud.bigquery
env: GCLOUD_PROJECT=nbcu-ds-sandbox-a-001


In [11]:
def build(query, name, dates):
    query = query.format(report=name)
    with bigquery.Client() as client:
        job_config = bigquery.QueryJobConfig(
            query_parameters=[
                bigquery.ScalarQueryParameter("report_start_date", "STRING", dates['report_start_date']),
                bigquery.ScalarQueryParameter("report_end_date", "STRING", dates['report_end_date']),
            ]
        )
        job = client.query(query, job_config=job_config)
    return name, job.job_id

In [12]:
def monitor(name, job_id):
    to_zone = tz.tzlocal()
    with bigquery.Client() as client:
        job = client.get_job(job_id)
        if job.error_result:
            print(f"{name} \t | \t {job.job_id} \t | \t Created: {job.created.astimezone(to_zone).strftime('%b %d, %Y %I:%M %p %Z')} \t | \t Error: {job.error_result['message']}")
        else:
            print(f"{name} \t | \t {job.job_id} \t | \t Created: {job.created.astimezone(to_zone).strftime('%b %d, %Y %I:%M %p %Z')} \t | \t State: {job.state}")

def cancel_jobs(jobs):
    with bigquery.Client() as client:
        for job_id in jobs.values():
            job = client.cancel_job(job_id)
            print(f"{job.location}:{job.job_id} cancelled")

# Email Channel

In [13]:
start_date = '2022-01-01'
end_date = '2023-05-31'

## Monthly

In [18]:
def generate_monthly_report(start_date, end_date):
    start = datetime.strptime(start_date, "%Y-%m-%d")
    end = datetime.strptime(end_date, "%Y-%m-%d")
    monthly_reports = {}

    while start <= end:
        month_year = start.strftime("%b_%Y")
        report_start_date = start.replace(day=1)
        next_month = start.replace(day=28) + timedelta(days=4)
        report_end_date = next_month - timedelta(days=next_month.day)
        monthly_reports[month_year] = {
            "report_start_date": report_start_date.strftime("%Y-%m-%d"),
            "report_end_date": report_end_date.strftime("%Y-%m-%d")
        }
        start = next_month

    return monthly_reports

In [19]:
months = generate_monthly_report(start_date, end_date)
months

{'Jan_2022': {'report_start_date': '2022-01-01',
  'report_end_date': '2022-01-31'},
 'Feb_2022': {'report_start_date': '2022-02-01',
  'report_end_date': '2022-02-28'},
 'Mar_2022': {'report_start_date': '2022-03-01',
  'report_end_date': '2022-03-31'},
 'Apr_2022': {'report_start_date': '2022-04-01',
  'report_end_date': '2022-04-30'},
 'May_2022': {'report_start_date': '2022-05-01',
  'report_end_date': '2022-05-31'},
 'Jun_2022': {'report_start_date': '2022-06-01',
  'report_end_date': '2022-06-30'},
 'Jul_2022': {'report_start_date': '2022-07-01',
  'report_end_date': '2022-07-31'},
 'Aug_2022': {'report_start_date': '2022-08-01',
  'report_end_date': '2022-08-31'},
 'Sep_2022': {'report_start_date': '2022-09-01',
  'report_end_date': '2022-09-30'},
 'Oct_2022': {'report_start_date': '2022-10-01',
  'report_end_date': '2022-10-31'},
 'Nov_2022': {'report_start_date': '2022-11-01',
  'report_end_date': '2022-11-30'},
 'Dec_2022': {'report_start_date': '2022-12-01',
  'report_end_da

In [7]:
threads = []
results_monthly = {}

with open('Silver_Email_Monthly.sql', 'r') as file:
    sql_file = file.read()
    with ThreadPoolExecutor(len(months)) as executor:
        for report, dates in months.items():
            threads.append(executor.submit(build, sql_file, report, dates))
        for future in as_completed(threads):
            name, id = future.result()
            results_monthly[name] = id

In [10]:
with ThreadPoolExecutor(len(results_monthly)) as executor:
    for name, id in results_monthly.items():
        executor.submit(monitor(name, id))

Jul_2022 	 | 	 20443d46-aea5-4ac0-ab39-5810d76f51e3 	 | 	 Created: Jun 15, 2023 04:01 PM Pacific Daylight Time 	 | 	 State: RUNNING
Jan_2023 	 | 	 53b9f637-5868-48a6-b42d-f628a6305678 	 | 	 Created: Jun 15, 2023 04:01 PM Pacific Daylight Time 	 | 	 State: RUNNING
May_2022 	 | 	 dfb975e6-b055-4339-b666-bc945e924993 	 | 	 Created: Jun 15, 2023 04:01 PM Pacific Daylight Time 	 | 	 State: RUNNING
Apr_2023 	 | 	 54d2130a-641d-46a1-bcb2-928db63fc6c8 	 | 	 Created: Jun 15, 2023 04:01 PM Pacific Daylight Time 	 | 	 State: RUNNING
Aug_2022 	 | 	 1a75e567-45c4-44f4-b494-f235e6d81292 	 | 	 Created: Jun 15, 2023 04:01 PM Pacific Daylight Time 	 | 	 State: RUNNING
Mar_2023 	 | 	 826146cc-c94b-4060-9b77-f98dd13d0f25 	 | 	 Created: Jun 15, 2023 04:01 PM Pacific Daylight Time 	 | 	 State: RUNNING
Nov_2022 	 | 	 767668ec-cdb1-462d-9887-69088a9ee82c 	 | 	 Created: Jun 15, 2023 04:01 PM Pacific Daylight Time 	 | 	 State: RUNNING
Feb_2023 	 | 	 1e2b29cd-a610-43ad-bb60-e5c5df3f8553 	 | 	 Created: Jun 15, 2

In [42]:
# Merge and group monthly results
query = """
        CREATE or replace TABLE `nbcu-ds-sandbox-a-001.SLi_sandbox.SILVER_EMAIL_CHANNEL_PERFORMANCE_MONTHLY` AS 
        
        SELECT *
        FROM ( 
        """ + \
        (' UNION ALL ').join([f'(select * from `nbcu-ds-sandbox-a-001.SLi_sandbox.Email_Channel_Base_{report}`)' for report in months]) + \
        """
        )
        """
        
with bigquery.Client() as client:
    client.query(query).result()

## Quarterly

Run quarterly after monthly when updating churn

In [14]:
def get_quarter(p_date) -> int:
    return (p_date.month - 1) // 3 + 1

def get_quarter_dates(start_date_str, end_date_str):
    start_date = datetime.strptime(start_date_str, '%Y-%m-%d')
    end_date = datetime.strptime(end_date_str, '%Y-%m-%d')

    quarter_dates = {}

    while start_date <= end_date:
        quarter = get_quarter(start_date)
        quarter_name = 'Q{}_{}'.format(quarter, start_date.year)
        quarter_start = datetime(start_date.year, (start_date.month - 1) // 3 * 3 + 1, 1)
        quarter_end = quarter_start.replace(year=quarter_start.year + 3 * quarter // 12, month=(quarter_start.month + 3) % 12, day=1) - timedelta(days=1)
        quarter_dates[quarter_name] = {
            'report_start_date': quarter_start.strftime('%Y-%m-%d'),
            'report_end_date': quarter_end.strftime('%Y-%m-%d')
        }
        start_date = quarter_end + timedelta(days=1)

    return quarter_dates


In [15]:
quarters = get_quarter_dates(start_date, end_date)
quarters

{'Q1_2022': {'report_start_date': '2022-01-01',
  'report_end_date': '2022-03-31'},
 'Q2_2022': {'report_start_date': '2022-04-01',
  'report_end_date': '2022-06-30'},
 'Q3_2022': {'report_start_date': '2022-07-01',
  'report_end_date': '2022-09-30'},
 'Q4_2022': {'report_start_date': '2022-10-01',
  'report_end_date': '2022-12-31'},
 'Q1_2023': {'report_start_date': '2023-01-01',
  'report_end_date': '2023-03-31'},
 'Q2_2023': {'report_start_date': '2023-04-01',
  'report_end_date': '2023-06-30'}}

In [12]:
threads = []
results_quarter = {}

with open('Silver_Email_Quarterly.sql', 'r') as file:
    sql_file = file.read()
    with ThreadPoolExecutor(len(quarters)) as executor:
        for report, dates in quarters.items():
            threads.append(executor.submit(build, sql_file, report, dates))
        for future in as_completed(threads):
            name, id = future.result()
            results_quarter[name] = id

In [13]:
with ThreadPoolExecutor(len(results_quarter)) as executor:
    for name, id in results_quarter.items():
        executor.submit(monitor(name, id))

Q4_2022 	 | 	 9d3c18df-5f6c-4219-bd01-3743dc0b8533 	 | 	 Created: Jun 15, 2023 02:54 PM Pacific Daylight Time 	 | 	 State: RUNNING
Q1_2023 	 | 	 53c3a264-a066-4a10-8749-9c68a3b9e71f 	 | 	 Created: Jun 15, 2023 02:54 PM Pacific Daylight Time 	 | 	 State: RUNNING
Q2_2022 	 | 	 71b279e3-cd5a-4bfc-a7be-8730d51ae8da 	 | 	 Created: Jun 15, 2023 02:54 PM Pacific Daylight Time 	 | 	 State: RUNNING
Q3_2022 	 | 	 05709364-65fb-48eb-b8a7-4a715984ed6a 	 | 	 Created: Jun 15, 2023 02:54 PM Pacific Daylight Time 	 | 	 State: RUNNING
Q1_2022 	 | 	 9a72111e-06f2-42b3-b5e2-92b57475d42f 	 | 	 Created: Jun 15, 2023 02:54 PM Pacific Daylight Time 	 | 	 State: RUNNING
Q2_2023 	 | 	 8c09c960-4a2f-4aa3-be9a-7340b577e70b 	 | 	 Created: Jun 15, 2023 02:54 PM Pacific Daylight Time 	 | 	 State: RUNNING


In [7]:
# Merge and group monthly results
query = """
        CREATE or replace TABLE `nbcu-ds-sandbox-a-001.SLi_sandbox.SILVER_EMAIL_CHANNEL_PERFORMANCE_QUARTERLY` AS 
        
        SELECT *
        FROM ( 
        """ + \
        (' UNION ALL ').join([f'(select * from `nbcu-ds-sandbox-a-001.SLi_sandbox.Email_Channel_Base_{report}`)' for report in quarters]) + \
        """
        )
        """
        
with bigquery.Client() as client:
    client.query(query).result()

# Build Gold

In [9]:
# Merge and group monthly results

with open('Gold_Email_Performance.sql', 'r') as file:
    query = file.read()
    with bigquery.Client() as client:
            client.query(query).result()

# Analysis

In [1]:
def gen_analysis_query_func(total=False):
    def gen_analysis_query(report): 
        return f"""
                WITH CTE_1 AS
                (
                    SELECT  @report_start_date                                                     AS Report_Month 
                        {'--' if total else ''}, Account_Type
                        ,COUNT(distinct CASE WHEN cohort = 'Targeted' THEN aid END )            AS Distinct_Cohort_Size_Targeted
                        ,COUNT(distinct CASE WHEN cohort = 'Holdout' THEN aid END)              AS Distinct_Cohort_Size_Holdout

                        ,SUM(CASE WHEN cohort = 'Targeted' THEN viewer END )                    AS Total_Returns_Targeted
                        ,SUM(CASE WHEN cohort = 'Holdout' THEN viewer END)                      AS Total_Returns_Holdout

                        ,SUM(CASE WHEN cohort = 'Targeted' THEN Viewing_Time END )              AS Total_Usage_Targeted
                        ,SUM(CASE WHEN cohort = 'Holdout' THEN Viewing_Time END)                AS Total_Usage_Holdout

                        ,SUM(CASE WHEN cohort = 'Targeted' THEN Repertoire_Pavo_Method END )    AS Total_Repertoire_Targeted
                        ,SUM(CASE WHEN cohort = 'Holdout' THEN Repertoire_Pavo_Method END)      AS Total_Repertoire_Holdout

                        ,SUM(CASE WHEN cohort = 'Targeted' THEN Distinct_Viewing_Sessions END ) AS Total_Viewing_Sessions_Targeted
                        ,SUM(CASE WHEN cohort = 'Holdout' THEN Distinct_Viewing_Sessions END)   AS Total_Viewing_Sessions_Holdout

                        ,SUM(CASE WHEN cohort = 'Targeted' THEN Lapsed_Save_Denom END )         AS Lapsed_Save_Denom_Targeted
                        ,SUM(CASE WHEN cohort = 'Targeted' THEN Lapsed_Save_Num END)            AS Lapsed_Save_Num_Targeted
                        ,SUM(CASE WHEN cohort = 'Holdout' THEN Lapsed_Save_Denom END )          AS Lapsed_Save_Denom_Holdout
                        ,SUM(CASE WHEN cohort = 'Holdout' THEN Lapsed_Save_Num END)             AS Lapsed_Save_Num_Holdout

                        ,SUM(CASE WHEN cohort = 'Targeted' THEN Lapsing_Save_Denom END )        AS Lapsing_Save_Denom_Targeted
                        ,SUM(CASE WHEN cohort = 'Targeted' THEN Lapsing_Save_Num END)           AS Lapsing_Save_Num_Targeted
                        ,SUM(CASE WHEN cohort = 'Holdout' THEN Lapsing_Save_Denom END )         AS Lapsing_Save_Denom_Holdout
                        ,SUM(CASE WHEN cohort = 'Holdout' THEN Lapsing_Save_Num END)            AS Lapsing_Save_Num_Holdout

                        ,SUM(CASE WHEN cohort = 'Targeted' THEN Free_To_Paid_Denom END )             AS Free_To_Paid_Denom_Targeted
                        ,SUM(CASE WHEN cohort = 'Targeted' THEN Free_To_Paid_Num END)                AS Free_To_Paid_Num_Targeted
                        ,SUM(CASE WHEN cohort = 'Holdout' THEN Free_To_Paid_Denom END )              AS Free_To_Paid_Denom_Holdout
                        ,SUM(CASE WHEN cohort = 'Holdout' THEN Free_To_Paid_Num END)                 AS Free_To_Paid_Num_Holdout

                        ,SUM(CASE WHEN cohort = 'Targeted' THEN Net_New_Upgrade_Denom END )     AS Total_Net_New_Upgrade_Denom_Targeted
                        ,SUM(CASE WHEN cohort = 'Targeted' THEN Net_New_Upgrade_Num END)        AS Total_Net_New_Upgrade_Num_Targeted
                        ,SUM(CASE WHEN cohort = 'Holdout' THEN Net_New_Upgrade_Denom END )      AS Total_Net_New_Upgrade_Denom_Holdout
                        ,SUM(CASE WHEN cohort = 'Holdout' THEN Net_New_Upgrade_Num END)         AS Total_Net_New_Upgrade_Num_Holdout

                        ,SUM(CASE WHEN cohort = 'Targeted' THEN Paid_Winbacks_Denom END )       AS Total_Paid_Winbacks_Denom_Targeted
                        ,SUM(CASE WHEN cohort = 'Targeted' THEN Paid_Winbacks_Num END)          AS Total_Paid_Winbacks_Num_Targeted
                        ,SUM(CASE WHEN cohort = 'Holdout' THEN Paid_Winbacks_Denom END )        AS Total_Paid_Winbacks_Denom_Holdout
                        ,SUM(CASE WHEN cohort = 'Holdout' THEN Paid_Winbacks_Num END)           AS Total_Paid_Winbacks_Num_Holdout

                        ,SUM(CASE WHEN cohort = 'Targeted' THEN Cancel_Save_Denom END )         AS Total_Cancel_Save_Denom_Targeted
                        ,SUM(CASE WHEN cohort = 'Targeted' THEN Cancel_Save_Num END)            AS Total_Cancel_Save_Num_Targeted
                        ,SUM(CASE WHEN cohort = 'Holdout' THEN Cancel_Save_Denom END )          AS Total_Cancel_Save_Denom_Holdout
                        ,SUM(CASE WHEN cohort = 'Holdout' THEN Cancel_Save_Num END)             AS Total_Cancel_Save_Num_Holdout

                        ,SUM(CASE WHEN cohort = 'Targeted' THEN EOM_Paid_Churn_Denom END )      AS EOM_Paid_Churn_Denom_Targeted
                        ,SUM(CASE WHEN cohort = 'Targeted' THEN EOM_Paid_Churn_Num END)         AS EOM_Paid_Churn_Num_Targeted
                        ,SUM(CASE WHEN cohort = 'Holdout' THEN EOM_Paid_Churn_Denom END )       AS EOM_Paid_Churn_Denom_Holdout
                        ,SUM(CASE WHEN cohort = 'Holdout' THEN EOM_Paid_Churn_Num END)          AS EOM_Paid_Churn_Num_Holdout
                    FROM `nbcu-ds-sandbox-a-001.SLi_sandbox.Email_Channel_Base_{report}`
                    GROUP BY  1 {'--' if total else ''}, 2
                ), CTE_2 AS
                (
                    SELECT  Report_Month 
                        {'--' if total else ''}, Account_Type
                        ,Distinct_Cohort_Size_Targeted                                                        AS Total_Targeted
                        ,Distinct_Cohort_Size_Holdout                                                         AS Total_Holdout

                        ,safe_divide(Total_Returns_Targeted,Distinct_Cohort_Size_Targeted)                    AS Return_Rate_Engagers
                        ,safe_divide(Total_Returns_Holdout,Distinct_Cohort_Size_Holdout)                      AS Return_Rate_Holdout

                        ,safe_divide(Total_Usage_Targeted,Distinct_Cohort_Size_Targeted)                      AS Usage_Engagers
                        ,safe_divide(Total_Usage_Holdout,Distinct_Cohort_Size_Holdout)                        AS Usage_Holdout

                        ,safe_divide(Total_Repertoire_Targeted,Distinct_Cohort_Size_Targeted)                 AS Repertoire_Engagers
                        ,safe_divide(Total_Repertoire_Holdout,Distinct_Cohort_Size_Holdout)                   AS Repertoire_Holdout

                        ,safe_divide(Total_Viewing_Sessions_Targeted,Distinct_Cohort_Size_Targeted)           AS Sessions_Engagers
                        ,safe_divide(Total_Viewing_Sessions_Holdout,Distinct_Cohort_Size_Holdout)             AS Sessions_Holdout

                        ,Lapsed_Save_Denom_Targeted
                        ,safe_divide(Lapsed_Save_Num_Targeted,Lapsed_Save_Denom_Targeted)                     AS Lapsed_Save_Rate_Engagers
                        ,safe_divide(Lapsed_Save_Num_Holdout,Lapsed_Save_Denom_Holdout)                       AS Lapsed_Save_Rate_Holdout

                        ,Lapsing_Save_Denom_Targeted
                        ,safe_divide(Lapsing_Save_Num_Targeted,Lapsing_Save_Denom_Targeted)                   AS Lapsing_Save_Rate_Engagers
                        ,safe_divide(Lapsing_Save_Num_Holdout,Lapsing_Save_Denom_Holdout)                     AS Lapsing_Save_Rate_Holdout

                        ,Free_To_Paid_Denom_Targeted
                        ,safe_divide(Free_To_Paid_Num_Targeted,Free_To_Paid_Denom_Targeted)                           AS Free_To_Paid_Rate_Engagers
                        ,safe_divide(Free_To_Paid_Num_Holdout,Free_To_Paid_Denom_Holdout)                             AS Free_To_Paid_Rate_Holdout

                        ,Total_Net_New_Upgrade_Denom_Targeted
                        ,safe_divide(Total_Net_New_Upgrade_Num_Targeted,Total_Net_New_Upgrade_Denom_Targeted) AS Net_New_Upgrade_Rate_Engagers
                        ,safe_divide(Total_Net_New_Upgrade_Num_Holdout,Total_Net_New_Upgrade_Denom_Holdout)   AS Net_New_Upgrade_Rate_Holdout

                        ,Total_Paid_Winbacks_Denom_Targeted
                        ,safe_divide(Total_Paid_Winbacks_Num_Targeted,Total_Paid_Winbacks_Denom_Targeted)     AS Paid_Winback_Rate_Engagers
                        ,safe_divide(Total_Paid_Winbacks_Num_Holdout,Total_Paid_Winbacks_Denom_Holdout)       AS Paid_Winback_Rate_Holdout

                        ,Total_Cancel_Save_Denom_Targeted
                        ,safe_divide(Total_Cancel_Save_Num_Targeted,Total_Cancel_Save_Denom_Targeted)         AS Cancel_Save_Rate_Engagers
                        ,safe_divide(Total_Cancel_Save_Num_Holdout,Total_Cancel_Save_Denom_Holdout)           AS Cancel_Save_Rate_Holdout

                        ,EOM_Paid_Churn_Denom_Targeted
                        ,safe_divide(EOM_Paid_Churn_Num_Targeted,EOM_Paid_Churn_Denom_Targeted)               AS Paid_Churn_Rate_Engagers
                        ,safe_divide(EOM_Paid_Churn_Num_Holdout,EOM_Paid_Churn_Denom_Holdout)                 AS Paid_Churn_Rate_Holdout
                    FROM CTE_1
                )
                SELECT  Report_Month
                    {'--' if total else ''}, Account_Type
                    ,Total_Targeted
                    ,Total_Holdout
                    
                    ,Return_Rate_Engagers                                                                                  AS Return_Rate_Engagers
                    ,Return_Rate_Holdout                                                                                   AS Return_Rate_Holdout
                    ,Return_Rate_Engagers - Return_Rate_Holdout                                                            AS Return_Rate_Lift_PTS
                    ,safe_divide(Return_Rate_Engagers,Return_Rate_Holdout) *100                                            AS Return_Rate_Lift_Index
                    ,(Return_Rate_Engagers - Return_Rate_Holdout) * Total_Targeted                                         AS Returns_Incrementals

                    ,Usage_Engagers                                                                                        AS Usage_Engagers
                    ,Usage_Holdout                                                                                         AS Usage_Holdout
                    ,Usage_Engagers - Usage_Holdout                                                                        AS Usage_Lift_PTS
                    ,safe_divide(Usage_Engagers,Usage_Holdout) *100                                                        AS Usage_Lift_Index
                    ,(Usage_Engagers - Usage_Holdout) * Total_Targeted                                                     AS Usage_Incrementals

                    ,Repertoire_Engagers                                                                                   AS Repertoire_Engagers
                    ,Repertoire_Holdout                                                                                    AS Repertoire_Holdout
                    ,Repertoire_Engagers - Repertoire_Holdout                                                              AS Repertoire_Lift_PTS
                    ,safe_divide(Repertoire_Engagers,Repertoire_Holdout) *100                                              AS Repertoire_Lift_Index
                    ,(Repertoire_Engagers - Repertoire_Holdout) * Total_Targeted                                           AS Repertoire_Incrementals

                    ,Sessions_Engagers                                                                                     AS Sessions_Engagers
                    ,Sessions_Holdout                                                                                      AS Sessions_Holdout
                    ,Sessions_Engagers - Sessions_Holdout                                                                  AS Sessions_Lift_PTS
                    ,safe_divide(Sessions_Engagers,Sessions_Holdout) *100                                                  AS Sessions_Lift_Index
                    ,(Sessions_Engagers - Sessions_Holdout) * Total_Targeted                                               AS Sessions_Incrementals

                    ,Lapsed_Save_Rate_Engagers                                                                             AS Lapsed_Save_Rate_Engagers
                    ,Lapsed_Save_Rate_Holdout                                                                              AS Lapsed_Save_Rate_Holdout
                    ,Lapsed_Save_Rate_Engagers - Lapsed_Save_Rate_Holdout                                                  AS Lapsed_Save_Rate_Lift_PTS
                    ,safe_divide(Lapsed_Save_Rate_Engagers,Lapsed_Save_Rate_Holdout) *100                                  AS Lapsed_Save_Rate_Lift_Index
                    ,(Lapsed_Save_Rate_Engagers - Lapsed_Save_Rate_Holdout) * Lapsed_Save_Denom_Targeted                   AS Lapsed_Save_Incrementals

                    ,Lapsing_Save_Rate_Engagers                                                                            AS Lapsing_Save_Rate_Engagers
                    ,Lapsing_Save_Rate_Holdout                                                                             AS Lapsing_Save_Rate_Holdout
                    ,Lapsing_Save_Rate_Engagers - Lapsing_Save_Rate_Holdout                                                AS Lapsing_Save_Rate_Lift_PTS
                    ,safe_divide(Lapsing_Save_Rate_Engagers,Lapsing_Save_Rate_Holdout) *100                                AS Lapsing_Save_Rate_Lift_Index
                    ,(Lapsing_Save_Rate_Engagers - Lapsing_Save_Rate_Holdout) * Lapsing_Save_Denom_Targeted                AS Lapsing_Save_Rate_Lift_Incrementals

                    ,Free_To_Paid_Rate_Engagers                                                                                 AS Free_To_Paid_Rate_Engagers
                    ,Free_To_Paid_Rate_Holdout                                                                                  AS Free_To_Paid_Rate_Holdout
                    ,Free_To_Paid_Rate_Engagers - Free_To_Paid_Rate_Holdout                                                          AS Free_To_Paid_Rate_Lift_PTS
                    ,safe_divide(Free_To_Paid_Rate_Engagers,Free_To_Paid_Rate_Holdout) *100                                          AS Free_To_Paid_Rate_Lift_Index
                    ,(Free_To_Paid_Rate_Engagers - Free_To_Paid_Rate_Holdout) * Free_To_Paid_Denom_Targeted                              AS Free_To_Paid_Incrementals

                    ,Net_New_Upgrade_Rate_Engagers                                                                         AS Net_New_Upgrade_Rate_Engagers
                    ,Net_New_Upgrade_Rate_Holdout                                                                          AS Net_New_Upgrade_Rate_Holdout
                    ,Net_New_Upgrade_Rate_Engagers - Net_New_Upgrade_Rate_Holdout                                          AS Net_New_Upgrade_Rate_Lift_PTS
                    ,safe_divide(Net_New_Upgrade_Rate_Engagers,Net_New_Upgrade_Rate_Holdout) *100                          AS Net_New_Upgrade_Rate_Lift_Index
                    ,(Net_New_Upgrade_Rate_Engagers - Net_New_Upgrade_Rate_Holdout) * Total_Net_New_Upgrade_Denom_Targeted AS Net_New_Upgrade_Incrementals

                    ,Paid_Winback_Rate_Engagers                                                                            AS Paid_Winback_Rate_Engagers
                    ,Paid_Winback_Rate_Holdout                                                                             AS Paid_Winback_Rate_Holdout
                    ,Paid_Winback_Rate_Engagers - Paid_Winback_Rate_Holdout                                                AS Paid_Winback_Rate_Lift_PTS
                    ,safe_divide(Paid_Winback_Rate_Engagers,Paid_Winback_Rate_Holdout) *100                                AS Paid_Winback_Rate_Lift_Index
                    ,(Paid_Winback_Rate_Engagers - Paid_Winback_Rate_Holdout) * Total_Paid_Winbacks_Denom_Targeted         AS Paid_Winback_Rate_Lift_Incrementals

                    ,Cancel_Save_Rate_Engagers                                                                             AS Cancel_Save_Rate_Engagers
                    ,Cancel_Save_Rate_Holdout                                                                              AS Cancel_Save_Rate_Holdout
                    ,Cancel_Save_Rate_Engagers - Cancel_Save_Rate_Holdout                                                  AS Cancel_Save_Rate_Lift_PTS
                    ,safe_divide(Cancel_Save_Rate_Engagers,Cancel_Save_Rate_Holdout) *100                                  AS Cancel_Save_Rate_Lift_Index
                    ,(Cancel_Save_Rate_Engagers - Cancel_Save_Rate_Holdout) * Total_Cancel_Save_Denom_Targeted             AS Cancel_Save_Rate_Incrementals

                    ,Paid_Churn_Rate_Engagers                                                                              AS EOM_Paid_Churn_Rate_Engagers
                    ,Paid_Churn_Rate_Holdout                                                                               AS EOM_Paid_Churn_Rate_Holdout
                    ,Paid_Churn_Rate_Engagers - Paid_Churn_Rate_Holdout                                                    AS EOM_Paid_Churn_Rate_Lift_PTS
                    ,safe_divide(Paid_Churn_Rate_Engagers,Paid_Churn_Rate_Holdout) *100                                    AS EOM_Paid_Churn_Rate_Lift_Index
                    ,(Paid_Churn_Rate_Engagers - Paid_Churn_Rate_Holdout) * EOM_Paid_Churn_Denom_Targeted                  AS EOM_Paid_Churn_Rate_Incrementals
                FROM CTE_2
                ORDER BY 1 {'--' if total else ''},2
                """
    return gen_analysis_query

In [2]:
def gen_analysis_query_churn(report): 
	return f"""
			WITH CTE_1 AS
			(
				SELECT  @report_start_date                                                                                     AS Report_Month
					--, Account_Type
					,paid_tenure
					,SUM(CASE WHEN (cohort = 'Targeted') AND (Account_Type = 'Paying SVOD') THEN 1 END )  AS Distinct_Cohort_Size_Targeted
					,SUM(CASE WHEN (cohort = 'Holdout') AND (Account_Type = 'Paying SVOD') THEN 1 END)         AS Distinct_Cohort_Size_Holdout
					,SUM(CASE WHEN cohort = 'Targeted' THEN EOM_Paid_Churn_Denom END)                                AS EOM_Paid_Churn_Denom_Targeted
					,SUM(CASE WHEN cohort = 'Targeted' THEN EOM_Paid_Churn_Num END)                                   AS EOM_Paid_Churn_Num_Targeted
					,SUM(CASE WHEN cohort = 'Holdout' THEN EOM_Paid_Churn_Denom END)                                       AS EOM_Paid_Churn_Denom_Holdout
					,SUM(CASE WHEN cohort = 'Holdout' THEN EOM_Paid_Churn_Num END)                                          AS EOM_Paid_Churn_Num_Holdout
				FROM `nbcu-ds-sandbox-a-001.SLi_sandbox.Email_Channel_Base_{report}`
				GROUP BY  1,2
			), CTE_2 AS
			(
				SELECT  Report_Month
					--, Account_Type
					,paid_tenure
					,Distinct_Cohort_Size_Targeted                                          AS Targeted
					,Distinct_Cohort_Size_Holdout                                           AS Holdout
					,EOM_Paid_Churn_Denom_Targeted
					,safe_divide(EOM_Paid_Churn_Num_Targeted,EOM_Paid_Churn_Denom_Targeted) AS Paid_Churn_Rate_Engagers
					,safe_divide(EOM_Paid_Churn_Num_Holdout,EOM_Paid_Churn_Denom_Holdout)   AS Paid_Churn_Rate_Holdout
				FROM CTE_1
			)
			SELECT  Report_Month
				--, Account_Type
				,paid_tenure
				,Targeted
				,Holdout
				,Paid_Churn_Rate_Engagers                                                             AS EOM_Paid_Churn_Rate_Engagers
				,Paid_Churn_Rate_Holdout                                                              AS EOM_Paid_Churn_Rate_Holdout
				,Paid_Churn_Rate_Engagers - Paid_Churn_Rate_Holdout                                   AS EOM_Paid_Churn_Rate_Lift_PTS
				,safe_divide(Paid_Churn_Rate_Engagers,Paid_Churn_Rate_Holdout) *100                   AS EOM_Paid_Churn_Rate_Lift_Index
				,(Paid_Churn_Rate_Engagers - Paid_Churn_Rate_Holdout) * EOM_Paid_Churn_Denom_Targeted AS EOM_Paid_Churn_Rate_Incrementals
			FROM CTE_2
			ORDER BY 1, 2
			"""

In [3]:
analyze_queries = [
    gen_analysis_query_func(total=True),
    gen_analysis_query_func(total=False),
    gen_analysis_query_churn
]

In [4]:
def build_report(report, dates, queries):
    queries = [fn(report) for fn in queries]
    dfs = []
    for q in queries:
        query_config = {
            'query': {
                'parameterMode': 'NAMED',
                'queryParameters': [
                    {
                        'name': 'report_start_date',
                        'parameterType': {'type': 'STRING'},
                        'parameterValue': {'value': dates['report_start_date']}
                    },
                    {
                        'name': 'report_end_date',
                        'parameterType': {'type': 'STRING'},
                        'parameterValue': {'value': dates['report_end_date']}
                    }
                ]
            }
        }
        dfs.append(pandas_gbq.read_gbq(q, configuration=query_config))
    return dfs

## Monthly

In [20]:
threads = []
results = []

with ThreadPoolExecutor(len(months)) as executor:
    for report, dates in months.items():
        threads.append(executor.submit(build_report, report, dates, analyze_queries))
    for future in as_completed(threads):
        results.append(future.result())

Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading:   0%|[32m          [0m|
[A

[A[A


[A[A[A



[A[A[A[A




[A[A[A[A[A




Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|



Downloading: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|

Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|


Downloading: 100%|[32m██████████[0m|
Downloading:   0%|[32m          [0m|
[A

[A[A


[A[A[A

Downloading: 100%|[32m██████████[0m|



Downloading: 100%|[32m██████████[0m|


Downloading: 100%|[32m██████████[0m|



[A[A[A

Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|



Downloading: 100%|[32m██████████[0m|

Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|

Downloading: 100%|

In [21]:
aggregate_reports = pd.concat([results[i][0] for i in range(len(results))]).sort_values(by='Report_Month')
acc_type_reports = pd.concat([results[i][1] for i in range(len(results))]).sort_values(by=['Report_Month', 'Account_Type'])

with pd.ExcelWriter(f'email_channel_report_monthly.xlsx') as writer:
    aggregate_reports.to_excel(writer, sheet_name='total')
    acc_type_reports.to_excel(writer, sheet_name='acc_type')

## Quarterly

In [16]:
threads = []
results = []

with ThreadPoolExecutor(len(quarters)) as executor:
    for report, dates in quarters.items():
        threads.append(executor.submit(build_report, report, dates, analyze_queries))
    for future in as_completed(threads):
        results.append(future.result())

Downloading: |[32m          [0m|
Downloading: |[32m          [0m|
Downloading: |[32m          [0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading:   0%|[32m          [0m|
Downloading: 100%|[32m██████████[0m|

Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|


In [17]:
aggregate_reports = pd.concat([results[i][0] for i in range(len(results))]).sort_values(by='Report_Month')
acc_type_reports = pd.concat([results[i][1] for i in range(len(results))]).sort_values(by=['Report_Month', 'Account_Type'])

with pd.ExcelWriter(f'email_channel_report_quarterly.xlsx') as writer:
    aggregate_reports.to_excel(writer, sheet_name='total')
    acc_type_reports.to_excel(writer, sheet_name='acc_type')