In [3]:
from datetime import datetime, timedelta
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pandas_gbq
import pydata_google_auth
from concurrent.futures import ThreadPoolExecutor, as_completed
from google.cloud import bigquery
from tqdm.notebook import tqdm
from dateutil import tz
from BigQueryTools import QueryTool
import re

SCOPES = [
    'https://www.googleapis.com/auth/cloud-platform',
    'https://www.googleapis.com/auth/drive',
]
credentials = pydata_google_auth.get_user_credentials(
    SCOPES,
    # Set auth_local_webserver to True to have a slightly more convienient
    # authorization flow. Note, this doesn't work if you're running from a
    # notebook on a remote sever, such as over SSH or with Google Colab.
    auth_local_webserver=True,
)

%load_ext google.cloud.bigquery
%env GCLOUD_PROJECT=nbcu-ds-sandbox-a-001

The google.cloud.bigquery extension is already loaded. To reload it, use:
  %reload_ext google.cloud.bigquery
env: GCLOUD_PROJECT=nbcu-ds-sandbox-a-001


# Email Channel

In [4]:
start_date = '2022-01-01'
end_date = '2023-12-31'

In [5]:
def get_monthly_report_dates(start_date, end_date):
    start = datetime.strptime(start_date, "%Y-%m-%d")
    end = datetime.strptime(end_date, "%Y-%m-%d")
    monthly_reports = {}

    while start <= end:
        month_year = start.strftime("%b_%Y")
        report_start_date = start.replace(day=1)
        next_month = start.replace(day=28) + timedelta(days=4)
        report_end_date = next_month - timedelta(days=next_month.day)
        monthly_reports[month_year] = {
            "report_start_date": report_start_date.strftime("%Y-%m-%d"),
            "report_end_date": report_end_date.strftime("%Y-%m-%d")
        }
        start = next_month

    return monthly_reports

def get_quarter(p_date) -> int:
    return (p_date.month - 1) // 3 + 1

def get_quarterly_report_dates(start_date_str, end_date_str):
    start_date = datetime.strptime(start_date_str, '%Y-%m-%d')
    end_date = datetime.strptime(end_date_str, '%Y-%m-%d')

    quarter_dates = {}

    while start_date <= end_date:
        quarter = get_quarter(start_date)
        quarter_name = 'Q{}_{}'.format(quarter, start_date.year)
        quarter_start = datetime(start_date.year, (start_date.month - 1) // 3 * 3 + 1, 1)
        quarter_end = quarter_start.replace(year=quarter_start.year + 3 * quarter // 12, month=(quarter_start.month + 3) % 12, day=1) - timedelta(days=1)
        quarter_dates[quarter_name] = {
            'report_start_date': quarter_start.strftime('%Y-%m-%d'),
            'report_end_date': quarter_end.strftime('%Y-%m-%d')
        }
        start_date = quarter_end + timedelta(days=1)

    return quarter_dates

def generate_report_variants(period_dict, suffix="report"):
    return {'_'.join([period, suffix]): {'string_format': {'report': period}, 'params': params} for period, params in period_dict.items()}

In [7]:
months = get_monthly_report_dates(start_date, end_date)
quarters = get_quarterly_report_dates(start_date, end_date)
monthly_viewing_tables = generate_report_variants(months, 'viewing')
quarterly_viewing_tables = generate_report_variants(quarters, 'viewing')
month_tables = generate_report_variants(months)
quarter_tables = generate_report_variants(quarters)
bq = QueryTool()

## Monthly

In [8]:
with open('Silver_Email_Monthly.sql', 'r') as file:
    sql_file = file.read()
    bq.add(sql_file, month_tables)

Added: Jan_2022_report	 | 	Awaiting start
Added: Feb_2022_report	 | 	Awaiting start
Added: Mar_2022_report	 | 	Awaiting start
Added: Apr_2022_report	 | 	Awaiting start
Added: May_2022_report	 | 	Awaiting start
Added: Jun_2022_report	 | 	Awaiting start
Added: Jul_2022_report	 | 	Awaiting start
Added: Aug_2022_report	 | 	Awaiting start
Added: Sep_2022_report	 | 	Awaiting start
Added: Oct_2022_report	 | 	Awaiting start
Added: Nov_2022_report	 | 	Awaiting start
Added: Dec_2022_report	 | 	Awaiting start
Added: Jan_2023_report	 | 	Awaiting start
Added: Feb_2023_report	 | 	Awaiting start
Added: Mar_2023_report	 | 	Awaiting start
Added: Apr_2023_report	 | 	Awaiting start
Added: May_2023_report	 | 	Awaiting start
Added: Jun_2023_report	 | 	Awaiting start
Added: Jul_2023_report	 | 	Awaiting start
Added: Aug_2023_report	 | 	Awaiting start
Added: Sep_2023_report	 | 	Awaiting start
Added: Oct_2023_report	 | 	Awaiting start
Added: Nov_2023_report	 | 	Awaiting start
Added: Dec_2023_report	 | 	Awaitin

## Quarterly

In [9]:
with open('Silver_Email_Quarterly.sql', 'r') as file:
    sql_file = file.read()
    bq.add(sql_file, quarter_tables)

FileNotFoundError: [Errno 2] No such file or directory: 'Silver_Email_Quarterly.sql'

In [10]:
bq.start()

start 983f07bc-3107-4e02-a2b2-0653dc258e9a
start fccfe40a-e385-4886-9fbf-5ecc14ea6b07
start 9c90309b-230d-4cfa-ac53-e59cbd544373
start bcb4d8a4-57f5-4622-b4a2-744a246a01bd
start d182d547-c521-49e1-9044-2086608f558a
start 7e3addfb-53d8-4605-b751-87037f02b1ba
start c7dc50ac-39c3-421f-befd-52f3f397afab
start d9696e2a-24ed-46a6-b262-429cff681e25
start 4b3e07bb-f7f4-460f-a9ac-56a68fe8fe7e
start 0dbec751-9781-4528-927b-462be987e3e7
start 6d79cbb6-c2f7-43e4-81cb-3659b1f038ba
start 4d7459c4-94d0-4522-9acb-21fab20eda20
start 5e1660c9-ad8f-4f03-a4c1-85b0c869bc8f
start c2fd2de8-2efd-4562-a2e0-f6b558358943
start 02ecb12d-c746-4202-b6ef-e6a7838d8e3d
start 5678204a-0cea-4d44-987b-565993b7fe61
start 79c6b855-de78-4fc6-9344-f3a7a6b6d882
start bd439467-196b-41d6-8c82-9fd909211272
start 0e80b626-6f0a-4b30-a611-0b42208a52f4
start 52933eed-dc3a-4afa-bfe1-c72561ecd898
start 12404e84-322e-41a3-aafb-5b15dfe8ee64
start c0319ffa-a89f-47b8-ba5e-97ebb586b237
start 7c8cd04e-7849-48c7-a062-185ec763ab39
start c7063

In [13]:
bq.update()

Active jobs: 


Finished jobs: 
Jan_2022_report	 | 	bcb4d8a4-57f5-4622-b4a2-744a246a01bd	 | 	Created: Jan 08, 2024 05:00 PM	 | 	Last Checked: Jan 09, 2024 09:17 AM	 | 	DONE
Feb_2022_report	 | 	4b3e07bb-f7f4-460f-a9ac-56a68fe8fe7e	 | 	Created: Jan 08, 2024 05:00 PM	 | 	Last Checked: Jan 09, 2024 09:17 AM	 | 	DONE
Mar_2022_report	 | 	d9696e2a-24ed-46a6-b262-429cff681e25	 | 	Created: Jan 08, 2024 05:00 PM	 | 	Last Checked: Jan 09, 2024 09:17 AM	 | 	DONE
Apr_2022_report	 | 	983f07bc-3107-4e02-a2b2-0653dc258e9a	 | 	Created: Jan 08, 2024 05:00 PM	 | 	Last Checked: Jan 09, 2024 09:17 AM	 | 	DONE
May_2022_report	 | 	9c90309b-230d-4cfa-ac53-e59cbd544373	 | 	Created: Jan 08, 2024 05:00 PM	 | 	Last Checked: Jan 09, 2024 09:17 AM	 | 	DONE
Jun_2022_report	 | 	7e3addfb-53d8-4605-b751-87037f02b1ba	 | 	Created: Jan 08, 2024 05:00 PM	 | 	Last Checked: Jan 09, 2024 09:17 AM	 | 	DONE
Jul_2022_report	 | 	d182d547-c521-49e1-9044-2086608f558a	 | 	Created: Jan 08, 2024 05:00 PM	 | 	Last Checked: Jan 09, 2024

# Compile Results

In [12]:
# Merge and group monthly results
query = """
        CREATE or replace TABLE `nbcu-ds-sandbox-a-001.SLi_sandbox.SILVER_EMAIL_CHANNEL_PERFORMANCE_MONTHLY` AS 
        
        SELECT *
        FROM ( 
        """ + \
        (' UNION ALL ').join([f'(select * from `nbcu-ds-sandbox-a-001.SLi_sandbox.Email_Channel_Base_{report}`)' for report in months]) + \
        """
        )
        """
        
with bigquery.Client() as client:
    client.query(query).result()

NotFound: 404 Not found: Table nbcu-ds-sandbox-a-001:SLi_sandbox.Email_Channel_Base_Dec_2023 was not found in location US

Location: US
Job ID: 7d5f829f-d7c5-44e1-b64b-bcc6a8200f3d


In [54]:
# Merge and group monthly results
query = """
        CREATE or replace TABLE `nbcu-ds-sandbox-a-001.SLi_sandbox.SILVER_EMAIL_CHANNEL_PERFORMANCE_QUARTERLY` AS 
        
        SELECT *
        FROM ( 
        """ + \
        (' UNION ALL ').join([f'(select * from `nbcu-ds-sandbox-a-001.SLi_sandbox.Email_Channel_Base_{report}`)' for report in quarters]) + \
        """
        )
        """
        
with bigquery.Client() as client:
    client.query(query).result()

# Build Gold

In [55]:
# Merge and group monthly results

with open('Gold_Email_Performance.sql', 'r') as file:
    query = file.read()
    with bigquery.Client() as client:
        client.query(query).result()

# Push to Prod

In [55]:
%%bigquery

CREATE OR REPLACE TABLE `nbcu-ds-sandbox-a-001.SLi_sandbox.GOLD_EMAIL_CHANNEL_PERFORMANCE` AS (
    SELECT * FROM `nbcu-ds-sandbox-a-001.SLi_sandbox.GOLD_EMAIL_CHANNEL_PERFORMANCE_TEST`
)

Query is running:   0%|          |

In [14]:
%%bigquery df

select * from `nbcu-ds-sandbox-a-001.SLi_sandbox.GOLD_EMAIL_CHANNEL_PERFORMANCE_TEST` limit 10

Query is running:   0%|          |

Downloading:   0%|          |

In [15]:
df.columns

Index(['Result_Type', 'Report_Month', 'Cohort', 'Account_Type',
       'Active_Viewer', 'Primary_Device', 'Account_Tenure', 'Paid_Tenure',
       'Billing_Platform', 'Bundling_Partner', 'Billing_Cycle_Category',
       'Offer', 'Churn_Frequency', 'Previously_Bundled', 'Prev_30d_Viewer',
       'Prev_Paying_Account_Flag', 'Intender_Audience', 'Genre', 'Network',
       'Users', 'Viewers', 'Viewing_Time', 'Repertoire', 'Viewing_Sessions',
       'Lapsed_Save_Denom', 'Lapsed_Save_Num', 'Lapsing_Save_Denom',
       'Lapsing_Save_Num', 'Free_To_Paid_Denom', 'Free_To_Paid_Num',
       'Net_New_Upgrade_Denom', 'Net_New_Upgrade_Num', 'Paid_Winbacks_Denom',
       'Paid_Winbacks_Num', 'Cancel_Save_Denom', 'Cancel_Save_Num',
       'EOM_Paid_Churn_Denom', 'EOM_Paid_Churn_Num'],
      dtype='object')

# Analysis

In [22]:
def gen_analysis_query_func(total=False):
    def gen_analysis_query(report): 
        return f"""
                WITH CTE_1 AS
                (
                    SELECT  @report_start_date                                                     AS Report_Month 
                        {'--' if total else ''}, Account_Type
                        ,COUNT(distinct CASE WHEN cohort = 'Targeted' THEN aid END )            AS Distinct_Cohort_Size_Targeted
                        ,COUNT(distinct CASE WHEN cohort = 'Holdout' THEN aid END)              AS Distinct_Cohort_Size_Holdout

                        ,SUM(CASE WHEN cohort = 'Targeted' THEN viewer END )                    AS Total_Returns_Targeted
                        ,SUM(CASE WHEN cohort = 'Holdout' THEN viewer END)                      AS Total_Returns_Holdout

                        ,SUM(CASE WHEN cohort = 'Targeted' THEN Viewing_Time END )              AS Total_Usage_Targeted
                        ,SUM(CASE WHEN cohort = 'Holdout' THEN Viewing_Time END)                AS Total_Usage_Holdout

                        ,SUM(CASE WHEN cohort = 'Targeted' THEN Repertoire_Pavo_Method END )    AS Total_Repertoire_Targeted
                        ,SUM(CASE WHEN cohort = 'Holdout' THEN Repertoire_Pavo_Method END)      AS Total_Repertoire_Holdout

                        ,SUM(CASE WHEN cohort = 'Targeted' THEN Distinct_Viewing_Sessions END ) AS Total_Viewing_Sessions_Targeted
                        ,SUM(CASE WHEN cohort = 'Holdout' THEN Distinct_Viewing_Sessions END)   AS Total_Viewing_Sessions_Holdout

                        ,SUM(CASE WHEN cohort = 'Targeted' THEN Lapsed_Save_Denom END )         AS Lapsed_Save_Denom_Targeted
                        ,SUM(CASE WHEN cohort = 'Targeted' THEN Lapsed_Save_Num END)            AS Lapsed_Save_Num_Targeted
                        ,SUM(CASE WHEN cohort = 'Holdout' THEN Lapsed_Save_Denom END )          AS Lapsed_Save_Denom_Holdout
                        ,SUM(CASE WHEN cohort = 'Holdout' THEN Lapsed_Save_Num END)             AS Lapsed_Save_Num_Holdout

                        ,SUM(CASE WHEN cohort = 'Targeted' THEN Lapsing_Save_Denom END )        AS Lapsing_Save_Denom_Targeted
                        ,SUM(CASE WHEN cohort = 'Targeted' THEN Lapsing_Save_Num END)           AS Lapsing_Save_Num_Targeted
                        ,SUM(CASE WHEN cohort = 'Holdout' THEN Lapsing_Save_Denom END )         AS Lapsing_Save_Denom_Holdout
                        ,SUM(CASE WHEN cohort = 'Holdout' THEN Lapsing_Save_Num END)            AS Lapsing_Save_Num_Holdout

                        ,SUM(CASE WHEN cohort = 'Targeted' THEN Free_To_Paid_Denom END )             AS Free_To_Paid_Denom_Targeted
                        ,SUM(CASE WHEN cohort = 'Targeted' THEN Free_To_Paid_Num END)                AS Free_To_Paid_Num_Targeted
                        ,SUM(CASE WHEN cohort = 'Holdout' THEN Free_To_Paid_Denom END )              AS Free_To_Paid_Denom_Holdout
                        ,SUM(CASE WHEN cohort = 'Holdout' THEN Free_To_Paid_Num END)                 AS Free_To_Paid_Num_Holdout

                        ,SUM(CASE WHEN cohort = 'Targeted' THEN Net_New_Upgrade_Denom END )     AS Total_Net_New_Upgrade_Denom_Targeted
                        ,SUM(CASE WHEN cohort = 'Targeted' THEN Net_New_Upgrade_Num END)        AS Total_Net_New_Upgrade_Num_Targeted
                        ,SUM(CASE WHEN cohort = 'Holdout' THEN Net_New_Upgrade_Denom END )      AS Total_Net_New_Upgrade_Denom_Holdout
                        ,SUM(CASE WHEN cohort = 'Holdout' THEN Net_New_Upgrade_Num END)         AS Total_Net_New_Upgrade_Num_Holdout

                        ,SUM(CASE WHEN cohort = 'Targeted' THEN Paid_Winbacks_Denom END )       AS Total_Paid_Winbacks_Denom_Targeted
                        ,SUM(CASE WHEN cohort = 'Targeted' THEN Paid_Winbacks_Num END)          AS Total_Paid_Winbacks_Num_Targeted
                        ,SUM(CASE WHEN cohort = 'Holdout' THEN Paid_Winbacks_Denom END )        AS Total_Paid_Winbacks_Denom_Holdout
                        ,SUM(CASE WHEN cohort = 'Holdout' THEN Paid_Winbacks_Num END)           AS Total_Paid_Winbacks_Num_Holdout

                        ,SUM(CASE WHEN cohort = 'Targeted' THEN Cancel_Save_Denom END )         AS Total_Cancel_Save_Denom_Targeted
                        ,SUM(CASE WHEN cohort = 'Targeted' THEN Cancel_Save_Num END)            AS Total_Cancel_Save_Num_Targeted
                        ,SUM(CASE WHEN cohort = 'Holdout' THEN Cancel_Save_Denom END )          AS Total_Cancel_Save_Denom_Holdout
                        ,SUM(CASE WHEN cohort = 'Holdout' THEN Cancel_Save_Num END)             AS Total_Cancel_Save_Num_Holdout

                        ,SUM(CASE WHEN cohort = 'Targeted' THEN EOM_Paid_Churn_Denom END )      AS EOM_Paid_Churn_Denom_Targeted
                        ,SUM(CASE WHEN cohort = 'Targeted' THEN EOM_Paid_Churn_Num END)         AS EOM_Paid_Churn_Num_Targeted
                        ,SUM(CASE WHEN cohort = 'Holdout' THEN EOM_Paid_Churn_Denom END )       AS EOM_Paid_Churn_Denom_Holdout
                        ,SUM(CASE WHEN cohort = 'Holdout' THEN EOM_Paid_Churn_Num END)          AS EOM_Paid_Churn_Num_Holdout
                    FROM `nbcu-ds-sandbox-a-001.SLi_sandbox.Email_Channel_Base_{report}`
                    WHERE paying_account_flag = 'Paying'
                    GROUP BY  1 {'--' if total else ''}, 2
                ), CTE_2 AS
                (
                    SELECT  Report_Month 
                        {'--' if total else ''}, Account_Type
                        ,Distinct_Cohort_Size_Targeted                                                        AS Total_Targeted
                        ,Distinct_Cohort_Size_Holdout                                                         AS Total_Holdout

                        ,safe_divide(Total_Returns_Targeted,Distinct_Cohort_Size_Targeted)                    AS Return_Rate_Engagers
                        ,safe_divide(Total_Returns_Holdout,Distinct_Cohort_Size_Holdout)                      AS Return_Rate_Holdout

                        ,safe_divide(Total_Usage_Targeted,Distinct_Cohort_Size_Targeted)                      AS Usage_Engagers
                        ,safe_divide(Total_Usage_Holdout,Distinct_Cohort_Size_Holdout)                        AS Usage_Holdout

                        ,safe_divide(Total_Repertoire_Targeted,Distinct_Cohort_Size_Targeted)                 AS Repertoire_Engagers
                        ,safe_divide(Total_Repertoire_Holdout,Distinct_Cohort_Size_Holdout)                   AS Repertoire_Holdout

                        ,safe_divide(Total_Viewing_Sessions_Targeted,Distinct_Cohort_Size_Targeted)           AS Sessions_Engagers
                        ,safe_divide(Total_Viewing_Sessions_Holdout,Distinct_Cohort_Size_Holdout)             AS Sessions_Holdout

                        ,Lapsed_Save_Denom_Targeted
                        ,safe_divide(Lapsed_Save_Num_Targeted,Lapsed_Save_Denom_Targeted)                     AS Lapsed_Save_Rate_Engagers
                        ,safe_divide(Lapsed_Save_Num_Holdout,Lapsed_Save_Denom_Holdout)                       AS Lapsed_Save_Rate_Holdout

                        ,Lapsing_Save_Denom_Targeted
                        ,safe_divide(Lapsing_Save_Num_Targeted,Lapsing_Save_Denom_Targeted)                   AS Lapsing_Save_Rate_Engagers
                        ,safe_divide(Lapsing_Save_Num_Holdout,Lapsing_Save_Denom_Holdout)                     AS Lapsing_Save_Rate_Holdout

                        ,Free_To_Paid_Denom_Targeted
                        ,safe_divide(Free_To_Paid_Num_Targeted,Free_To_Paid_Denom_Targeted)                           AS Free_To_Paid_Rate_Engagers
                        ,safe_divide(Free_To_Paid_Num_Holdout,Free_To_Paid_Denom_Holdout)                             AS Free_To_Paid_Rate_Holdout

                        ,Total_Net_New_Upgrade_Denom_Targeted
                        ,safe_divide(Total_Net_New_Upgrade_Num_Targeted,Total_Net_New_Upgrade_Denom_Targeted) AS Net_New_Upgrade_Rate_Engagers
                        ,safe_divide(Total_Net_New_Upgrade_Num_Holdout,Total_Net_New_Upgrade_Denom_Holdout)   AS Net_New_Upgrade_Rate_Holdout

                        ,Total_Paid_Winbacks_Denom_Targeted
                        ,safe_divide(Total_Paid_Winbacks_Num_Targeted,Total_Paid_Winbacks_Denom_Targeted)     AS Paid_Winback_Rate_Engagers
                        ,safe_divide(Total_Paid_Winbacks_Num_Holdout,Total_Paid_Winbacks_Denom_Holdout)       AS Paid_Winback_Rate_Holdout

                        ,Total_Cancel_Save_Denom_Targeted
                        ,safe_divide(Total_Cancel_Save_Num_Targeted,Total_Cancel_Save_Denom_Targeted)         AS Cancel_Save_Rate_Engagers
                        ,safe_divide(Total_Cancel_Save_Num_Holdout,Total_Cancel_Save_Denom_Holdout)           AS Cancel_Save_Rate_Holdout

                        ,EOM_Paid_Churn_Denom_Targeted
                        ,safe_divide(EOM_Paid_Churn_Num_Targeted,EOM_Paid_Churn_Denom_Targeted)               AS Paid_Churn_Rate_Engagers
                        ,safe_divide(EOM_Paid_Churn_Num_Holdout,EOM_Paid_Churn_Denom_Holdout)                 AS Paid_Churn_Rate_Holdout
                    FROM CTE_1
                )
                SELECT  Report_Month
                    {'--' if total else ''}, Account_Type
                    ,Total_Targeted
                    ,Total_Holdout
                    
                    ,Return_Rate_Engagers                                                                                  AS Return_Rate_Engagers
                    ,Return_Rate_Holdout                                                                                   AS Return_Rate_Holdout
                    ,Return_Rate_Engagers - Return_Rate_Holdout                                                            AS Return_Rate_Lift_PTS
                    ,safe_divide(Return_Rate_Engagers,Return_Rate_Holdout) *100                                            AS Return_Rate_Lift_Index
                    ,(Return_Rate_Engagers - Return_Rate_Holdout) * Total_Targeted                                         AS Returns_Incrementals

                    ,Usage_Engagers                                                                                        AS Usage_Engagers
                    ,Usage_Holdout                                                                                         AS Usage_Holdout
                    ,Usage_Engagers - Usage_Holdout                                                                        AS Usage_Lift_PTS
                    ,safe_divide(Usage_Engagers,Usage_Holdout) *100                                                        AS Usage_Lift_Index
                    ,(Usage_Engagers - Usage_Holdout) * Total_Targeted                                                     AS Usage_Incrementals

                    ,Repertoire_Engagers                                                                                   AS Repertoire_Engagers
                    ,Repertoire_Holdout                                                                                    AS Repertoire_Holdout
                    ,Repertoire_Engagers - Repertoire_Holdout                                                              AS Repertoire_Lift_PTS
                    ,safe_divide(Repertoire_Engagers,Repertoire_Holdout) *100                                              AS Repertoire_Lift_Index
                    ,(Repertoire_Engagers - Repertoire_Holdout) * Total_Targeted                                           AS Repertoire_Incrementals

                    ,Sessions_Engagers                                                                                     AS Sessions_Engagers
                    ,Sessions_Holdout                                                                                      AS Sessions_Holdout
                    ,Sessions_Engagers - Sessions_Holdout                                                                  AS Sessions_Lift_PTS
                    ,safe_divide(Sessions_Engagers,Sessions_Holdout) *100                                                  AS Sessions_Lift_Index
                    ,(Sessions_Engagers - Sessions_Holdout) * Total_Targeted                                               AS Sessions_Incrementals

                    ,Lapsed_Save_Rate_Engagers                                                                             AS Lapsed_Save_Rate_Engagers
                    ,Lapsed_Save_Rate_Holdout                                                                              AS Lapsed_Save_Rate_Holdout
                    ,Lapsed_Save_Rate_Engagers - Lapsed_Save_Rate_Holdout                                                  AS Lapsed_Save_Rate_Lift_PTS
                    ,safe_divide(Lapsed_Save_Rate_Engagers,Lapsed_Save_Rate_Holdout) *100                                  AS Lapsed_Save_Rate_Lift_Index
                    ,(Lapsed_Save_Rate_Engagers - Lapsed_Save_Rate_Holdout) * Lapsed_Save_Denom_Targeted                   AS Lapsed_Save_Incrementals

                    ,Lapsing_Save_Rate_Engagers                                                                            AS Lapsing_Save_Rate_Engagers
                    ,Lapsing_Save_Rate_Holdout                                                                             AS Lapsing_Save_Rate_Holdout
                    ,Lapsing_Save_Rate_Engagers - Lapsing_Save_Rate_Holdout                                                AS Lapsing_Save_Rate_Lift_PTS
                    ,safe_divide(Lapsing_Save_Rate_Engagers,Lapsing_Save_Rate_Holdout) *100                                AS Lapsing_Save_Rate_Lift_Index
                    ,(Lapsing_Save_Rate_Engagers - Lapsing_Save_Rate_Holdout) * Lapsing_Save_Denom_Targeted                AS Lapsing_Save_Rate_Lift_Incrementals

                    ,Free_To_Paid_Rate_Engagers                                                                                 AS Free_To_Paid_Rate_Engagers
                    ,Free_To_Paid_Rate_Holdout                                                                                  AS Free_To_Paid_Rate_Holdout
                    ,Free_To_Paid_Rate_Engagers - Free_To_Paid_Rate_Holdout                                                          AS Free_To_Paid_Rate_Lift_PTS
                    ,safe_divide(Free_To_Paid_Rate_Engagers,Free_To_Paid_Rate_Holdout) *100                                          AS Free_To_Paid_Rate_Lift_Index
                    ,(Free_To_Paid_Rate_Engagers - Free_To_Paid_Rate_Holdout) * Free_To_Paid_Denom_Targeted                              AS Free_To_Paid_Incrementals

                    ,Net_New_Upgrade_Rate_Engagers                                                                         AS Net_New_Upgrade_Rate_Engagers
                    ,Net_New_Upgrade_Rate_Holdout                                                                          AS Net_New_Upgrade_Rate_Holdout
                    ,Net_New_Upgrade_Rate_Engagers - Net_New_Upgrade_Rate_Holdout                                          AS Net_New_Upgrade_Rate_Lift_PTS
                    ,safe_divide(Net_New_Upgrade_Rate_Engagers,Net_New_Upgrade_Rate_Holdout) *100                          AS Net_New_Upgrade_Rate_Lift_Index
                    ,(Net_New_Upgrade_Rate_Engagers - Net_New_Upgrade_Rate_Holdout) * Total_Net_New_Upgrade_Denom_Targeted AS Net_New_Upgrade_Incrementals

                    ,Paid_Winback_Rate_Engagers                                                                            AS Paid_Winback_Rate_Engagers
                    ,Paid_Winback_Rate_Holdout                                                                             AS Paid_Winback_Rate_Holdout
                    ,Paid_Winback_Rate_Engagers - Paid_Winback_Rate_Holdout                                                AS Paid_Winback_Rate_Lift_PTS
                    ,safe_divide(Paid_Winback_Rate_Engagers,Paid_Winback_Rate_Holdout) *100                                AS Paid_Winback_Rate_Lift_Index
                    ,(Paid_Winback_Rate_Engagers - Paid_Winback_Rate_Holdout) * Total_Paid_Winbacks_Denom_Targeted         AS Paid_Winback_Rate_Lift_Incrementals

                    ,Cancel_Save_Rate_Engagers                                                                             AS Cancel_Save_Rate_Engagers
                    ,Cancel_Save_Rate_Holdout                                                                              AS Cancel_Save_Rate_Holdout
                    ,Cancel_Save_Rate_Engagers - Cancel_Save_Rate_Holdout                                                  AS Cancel_Save_Rate_Lift_PTS
                    ,safe_divide(Cancel_Save_Rate_Engagers,Cancel_Save_Rate_Holdout) *100                                  AS Cancel_Save_Rate_Lift_Index
                    ,(Cancel_Save_Rate_Engagers - Cancel_Save_Rate_Holdout) * Total_Cancel_Save_Denom_Targeted             AS Cancel_Save_Rate_Incrementals

                    ,Paid_Churn_Rate_Engagers                                                                              AS EOM_Paid_Churn_Rate_Engagers
                    ,Paid_Churn_Rate_Holdout                                                                               AS EOM_Paid_Churn_Rate_Holdout
                    ,Paid_Churn_Rate_Engagers - Paid_Churn_Rate_Holdout                                                    AS EOM_Paid_Churn_Rate_Lift_PTS
                    ,safe_divide(Paid_Churn_Rate_Engagers,Paid_Churn_Rate_Holdout) *100                                    AS EOM_Paid_Churn_Rate_Lift_Index
                    ,(Paid_Churn_Rate_Engagers - Paid_Churn_Rate_Holdout) * EOM_Paid_Churn_Denom_Targeted                  AS EOM_Paid_Churn_Rate_Incrementals
                FROM CTE_2
                ORDER BY 1 {'--' if total else ''},2
                """
    return gen_analysis_query

In [23]:
def gen_analysis_query_churn(report): 
	return f"""
			WITH CTE_1 AS
			(
				SELECT  @report_start_date                                                                                     AS Report_Month
					--, Account_Type
					,paid_tenure
					,SUM(CASE WHEN (cohort = 'Targeted') AND (Account_Type = 'Paying SVOD') THEN 1 END )  AS Distinct_Cohort_Size_Targeted
					,SUM(CASE WHEN (cohort = 'Holdout') AND (Account_Type = 'Paying SVOD') THEN 1 END)         AS Distinct_Cohort_Size_Holdout
					,SUM(CASE WHEN cohort = 'Targeted' THEN EOM_Paid_Churn_Denom END)                                AS EOM_Paid_Churn_Denom_Targeted
					,SUM(CASE WHEN cohort = 'Targeted' THEN EOM_Paid_Churn_Num END)                                   AS EOM_Paid_Churn_Num_Targeted
					,SUM(CASE WHEN cohort = 'Holdout' THEN EOM_Paid_Churn_Denom END)                                       AS EOM_Paid_Churn_Denom_Holdout
					,SUM(CASE WHEN cohort = 'Holdout' THEN EOM_Paid_Churn_Num END)                                          AS EOM_Paid_Churn_Num_Holdout
				FROM `nbcu-ds-sandbox-a-001.SLi_sandbox.Email_Channel_Base_{report}`
				GROUP BY  1,2
			), CTE_2 AS
			(
				SELECT  Report_Month
					--, Account_Type
					,paid_tenure
					,Distinct_Cohort_Size_Targeted                                          AS Targeted
					,Distinct_Cohort_Size_Holdout                                           AS Holdout
					,EOM_Paid_Churn_Denom_Targeted
					,safe_divide(EOM_Paid_Churn_Num_Targeted,EOM_Paid_Churn_Denom_Targeted) AS Paid_Churn_Rate_Engagers
					,safe_divide(EOM_Paid_Churn_Num_Holdout,EOM_Paid_Churn_Denom_Holdout)   AS Paid_Churn_Rate_Holdout
				FROM CTE_1
			)
			SELECT  Report_Month
				--, Account_Type
				,paid_tenure
				,Targeted
				,Holdout
				,Paid_Churn_Rate_Engagers                                                             AS EOM_Paid_Churn_Rate_Engagers
				,Paid_Churn_Rate_Holdout                                                              AS EOM_Paid_Churn_Rate_Holdout
				,Paid_Churn_Rate_Engagers - Paid_Churn_Rate_Holdout                                   AS EOM_Paid_Churn_Rate_Lift_PTS
				,safe_divide(Paid_Churn_Rate_Engagers,Paid_Churn_Rate_Holdout) *100                   AS EOM_Paid_Churn_Rate_Lift_Index
				,(Paid_Churn_Rate_Engagers - Paid_Churn_Rate_Holdout) * EOM_Paid_Churn_Denom_Targeted AS EOM_Paid_Churn_Rate_Incrementals
			FROM CTE_2
			ORDER BY 1, 2
			"""

In [24]:
analyze_queries = [
    gen_analysis_query_func(total=True),
    gen_analysis_query_func(total=False),
    gen_analysis_query_churn
]

In [25]:
def build_report(report, dates, queries):
    queries = [fn(report) for fn in queries]
    dfs = []
    for q in queries:
        query_config = {
            'query': {
                'parameterMode': 'NAMED',
                'queryParameters': [
                    {
                        'name': 'report_start_date',
                        'parameterType': {'type': 'STRING'},
                        'parameterValue': {'value': dates['report_start_date']}
                    },
                    {
                        'name': 'report_end_date',
                        'parameterType': {'type': 'STRING'},
                        'parameterValue': {'value': dates['report_end_date']}
                    }
                ]
            }
        }
        dfs.append(pandas_gbq.read_gbq(q, configuration=query_config))
    return dfs

## Monthly

In [32]:
threads = []
results = []

with ThreadPoolExecutor(len(months)) as executor:
    for report, dates in months.items():
        threads.append(executor.submit(build_report, report, dates, analyze_queries))
    for future in as_completed(threads):
        results.append(future.result())

Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading:   0%|[32m          [0m|
Downloading: 100%|[32m██████████[0m|


Downloading: 100%|[32m██████████[0m|

Downloading: 100%|[32m██████████[0m|


Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[

In [33]:
aggregate_reports = pd.concat([results[i][0] for i in range(len(results))]).sort_values(by='Report_Month')
acc_type_reports = pd.concat([results[i][1] for i in range(len(results))]).sort_values(by=['Report_Month', 'Account_Type'])

with pd.ExcelWriter(f'email_channel_report_monthly.xlsx') as writer:
    aggregate_reports.to_excel(writer, sheet_name='total')
    acc_type_reports.to_excel(writer, sheet_name='acc_type')

## Quarterly

In [27]:
threads = []
results = []

with ThreadPoolExecutor(len(quarters)) as executor:
    for report, dates in quarters.items():
        threads.append(executor.submit(build_report, report, dates, analyze_queries))
    for future in as_completed(threads):
        results.append(future.result())

Downloading: |[32m          [0m|
Downloading: |[32m          [0m|
Downloading: |[32m          [0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|


In [28]:
aggregate_reports = pd.concat([results[i][0] for i in range(len(results))]).sort_values(by='Report_Month')
acc_type_reports = pd.concat([results[i][1] for i in range(len(results))]).sort_values(by=['Report_Month', 'Account_Type'])

with pd.ExcelWriter(f'email_channel_report_quarterly.xlsx') as writer:
    aggregate_reports.to_excel(writer, sheet_name='total')
    acc_type_reports.to_excel(writer, sheet_name='acc_type')

## Upgrades

In [29]:
%%bigquery ftp

WITH CTE_1 AS
(
	SELECT  Report_Month
	       ,COUNT(distinct CASE WHEN cohort = 'Targeted' THEN aid END )     AS Distinct_Cohort_Size_Targeted
	       ,COUNT(distinct CASE WHEN cohort = 'Holdout' THEN aid END)       AS Distinct_Cohort_Size_Holdout
	       ,SUM(CASE WHEN cohort = 'Targeted' THEN Free_To_Paid_Denom END ) AS Free_To_Paid_Denom_Targeted
	       ,SUM(CASE WHEN cohort = 'Targeted' THEN Free_To_Paid_Num END)    AS Free_To_Paid_Num_Targeted
	       ,SUM(CASE WHEN cohort = 'Holdout' THEN Free_To_Paid_Denom END )  AS Free_To_Paid_Denom_Holdout
	       ,SUM(CASE WHEN cohort = 'Holdout' THEN Free_To_Paid_Num END)     AS Free_To_Paid_Num_Holdout
	FROM `nbcu-ds-sandbox-a-001.SLi_sandbox.SILVER_EMAIL_CHANNEL_PERFORMANCE_MONTHLY`
	GROUP BY  1
), CTE_2 AS
(
	SELECT  Report_Month
	       ,Distinct_Cohort_Size_Targeted                                      AS Total_Targeted
	       ,Distinct_Cohort_Size_Holdout                                       AS Total_Holdout
	       ,Free_To_Paid_Denom_Targeted
	       ,Free_To_Paid_Num_Targeted
	       ,safe_divide(Free_To_Paid_Num_Targeted,Free_To_Paid_Denom_Targeted) AS Free_To_Paid_Rate_Engagers
	       ,safe_divide(Free_To_Paid_Num_Holdout,Free_To_Paid_Denom_Holdout)   AS Free_To_Paid_Rate_Holdout
	FROM CTE_1
)
SELECT  Report_Month
       ,Total_Targeted
       ,Total_Holdout
       ,Free_To_Paid_Rate_Engagers                                                             AS Free_To_Paid_Rate_Engagers
       ,Free_To_Paid_Rate_Holdout                                                              AS Free_To_Paid_Rate_Holdout
       ,Free_To_Paid_Rate_Engagers - Free_To_Paid_Rate_Holdout                                 AS Free_To_Paid_Rate_Lift_PTS
       ,safe_divide(Free_To_Paid_Rate_Engagers,Free_To_Paid_Rate_Holdout) *100                 AS Free_To_Paid_Rate_Lift_Index
       ,(Free_To_Paid_Rate_Engagers - Free_To_Paid_Rate_Holdout) * Free_To_Paid_Denom_Targeted AS Free_To_Paid_Incrementals
FROM CTE_2
ORDER BY 1

Query is running:   0%|          |

Downloading:   0%|          |

In [30]:
ftp.to_excel('./ftp/ftp.xlsx')

In [None]:
WITH a AS
(
	SELECT  SUM(CASE WHEN cohort = 'Targeted' THEN EOM_Paid_Churn_Denom END ) AS EOM_Paid_Churn_Denom_Targeted
	       ,SUM(CASE WHEN cohort = 'Targeted' THEN EOM_Paid_Churn_Num END)    AS EOM_Paid_Churn_Num_Targeted
	       ,SUM(CASE WHEN cohort = 'Holdout' THEN EOM_Paid_Churn_Denom END )  AS EOM_Paid_Churn_Denom_Holdout
	       ,SUM(CASE WHEN cohort = 'Holdout' THEN EOM_Paid_Churn_Num END)     AS EOM_Paid_Churn_Num_Holdout
	FROM `nbcu-ds-sandbox-a-001.SLi_sandbox.GOLD_EMAIL_CHANNEL_PERFORMANCE_TEST`
	WHERE Report_Month = '2023-07-01'
	AND Result_Type = 'Monthly' 
), b AS
(
	SELECT  EOM_Paid_Churn_Denom_Targeted
	       ,safe_divide(EOM_Paid_Churn_Num_Targeted,EOM_Paid_Churn_Denom_Targeted) AS Paid_Churn_Rate_Engagers
	       ,safe_divide(EOM_Paid_Churn_Num_Holdout,EOM_Paid_Churn_Denom_Holdout)   AS Paid_Churn_Rate_Holdout
	FROM a
)
SELECT  Paid_Churn_Rate_Engagers                                                             AS EOM_Paid_Churn_Rate_Engagers
       ,Paid_Churn_Rate_Holdout                                                              AS EOM_Paid_Churn_Rate_Holdout
       ,Paid_Churn_Rate_Engagers - Paid_Churn_Rate_Holdout                                   AS EOM_Paid_Churn_Rate_Lift_PTS
       ,safe_divide(Paid_Churn_Rate_Engagers,Paid_Churn_Rate_Holdout) *100                   AS EOM_Paid_Churn_Rate_Lift_Index
       ,(Paid_Churn_Rate_Engagers - Paid_Churn_Rate_Holdout) * EOM_Paid_Churn_Denom_Targeted AS EOM_Paid_Churn_Rate_Incrementals
FROM b