In [37]:
#### import global modules
import os
import sys
import pandas as pd
import numpy as np
from pathlib import Path
from yaml import safe_load
import google.oauth2.credentials
from google.cloud import bigquery
import gc

# Set global vars
pth_project = Path(os.getcwd().split('notebooks')[0])
pth_data = pth_project / 'data'
pth_queries = pth_project / 'core' / 'queries'
pth_creds = pth_project / 'conf' / 'local' / 'project_config.yaml'
sys.path.insert(0, str(pth_project))
d_project_config = safe_load(pth_creds.open())
# d_params = safe_load((pth_project / 'core' / 'parameters' / 'common.yaml').open())['data_extract']

# import local modules
from core.utils.gcp import connect_bq_services
# from core.etl.extract import extract_bq_data, extract_pr_codes, format_conv_df, filter_convs

# Connect to google services
bq_client = connect_bq_services(d_project_config['gcp-project-name'])
pd.options.display.max_rows = 100

In [38]:
def extract_bq_data(bq_client, sql=None, pth_query=None):
    if sql is not None:
        df = bq_client.query(sql).to_dataframe()
    elif pth_query is not None:
        sql = pth_query.read_text()
        df = bq_client.query(sql).to_dataframe()
    else:
        raise ValueError('`sql` or `pth_query` should be set')  
    return df

In [39]:
Query_Snapshot='''


DECLARE initial_date DATE DEFAULT "2022-11-30"; --Campaign Date
DECLARE final_date DATE DEFAULT "2023-03-31"; -- attribution window 
--DECLARE target_date_end DATE DEFAULT "2022-12-31";
--DECLARE target_date_start_next_month DATE DEFAULT "2023-01-01";

WITH date_sql as (

SELECT 


date_sub(initial_date, interval 30 day) AS intial_snapshot_start,
date_sub(final_date, interval 30 day) AS final_snapshot_end

)


,Camp_V1_BAN as (

select distinct BACCT_NUM as BAN,CAMP_CREATIVE, 'Treatment' as Experiment
from `divgpras-pr-579355.SHS.HBUR_Campaign_data_SAS`
where CAMP_TEST='R'

)

,Camp_Control as (

select distinct BAN,CAMP_CREATIVE, Experiment
from `divgpras-pr-579355.SHS.HBUR_Control_data`

)

, prod_snpsht_SMHM_initial as (


select distinct bacct_bus_bacct_num as BAN,1 as count_of_BAN_intial,date(max(pi_cntrct_end_ts)) as contract_end_date_intial
FROM `cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht` 
where pi_prod_instnc_typ_cd='SMHM' 
and pi_prod_instnc_stat_cd in ('A')
and  consldt_cust_typ_cd = 'R'
and DATE(prod_instnc_ts)=initial_date
group by BAN


)

, prod_snpsht_SMHM_final as (


select distinct bacct_bus_bacct_num as BAN,1 as count_of_BAN_final,date(max(pi_cntrct_end_ts)) as contract_end_date_final
FROM `cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht` 
where pi_prod_instnc_typ_cd='SMHM' 
and pi_prod_instnc_stat_cd in ('A')
and  consldt_cust_typ_cd = 'R'
and DATE(prod_instnc_ts)=final_date
group by BAN


)
, ARPU_Calculation_intial as(
SELECT
  distinct billg_acct_num AS ban,
  EXTRACT(YEAR
  FROM
    bill.bill_dt)||'-'||LPAD(CAST(EXTRACT(MONTH
      FROM
        bill.bill_dt) AS STRING), 2, '0') AS bill_year_month,
  sum(bill.rid_unit_chrg_amt) as ARPU_SMHM,
  sum(tot_inv_amt) as ARPU_FFH
  FROM
  `cio-datahub-enterprise-pr-183a.ent_cust_bill.bq_wln_inv_sum_view` bill
WHERE
  DATE(bill.bill_dt)>=  (select intial_snapshot_start from date_sql)
  and DATE(bill.bill_dt)< initial_date
  group by ban,bill_year_month
)


, ARPU_Calculation_final as(
SELECT
  distinct billg_acct_num AS ban,
  EXTRACT(YEAR
  FROM
    bill.bill_dt)||'-'||LPAD(CAST(EXTRACT(MONTH
      FROM
        bill.bill_dt) AS STRING), 2, '0') AS bill_year_month,
  sum(bill.rid_unit_chrg_amt) as ARPU_SMHM,
  sum(tot_inv_amt) as ARPU_FFH
  FROM
  `cio-datahub-enterprise-pr-183a.ent_cust_bill.bq_wln_inv_sum_view` bill
WHERE
  DATE(bill.bill_dt)>=  (select final_snapshot_end from date_sql)
  and DATE(bill.bill_dt)< final_date
  group by ban,bill_year_month
)

,Merge_data as(

select * from Camp_V1_BAN
UNION ALL
select * from Camp_Control
UNION ALL
select * from `divgpras-pr-579355.SHS.Santa_Campaign`

)

, ADC_login_data_Jan as

(

select ban as BAN, avg(login_consistency) as Login_consistency,
   CASE
      WHEN avg(login_consistency) = 0 THEN  "NO_User"
      WHEN avg(login_consistency)>=30 THEN 'Heavy_User'
      WHEN avg(login_consistency)<30 THEN "Low_User"
    ELSE
    'Un_assigned'
  END
    AS Login_camera_Segment 

from `bi-stg-divg-speech-pr-9d940b.adc_feature_store.ADC_Feature_data_store` 
where month_snapshot='2023-01-01' and dealer_name='TELUS Communications Inc.'
group by ban
)



, cte_product_mix_intial as (
  
SELECT distinct bacct_bus_bacct_num AS ban,
         COUNT(DISTINCT CASE WHEN ffh_prod.pi_prod_instnc_typ_cd IN ('SING', 'HSIC', 'TTV', 'SMHM', 'STV', 'DIIC','C') THEN ffh_prod.pi_prod_instnc_typ_cd ELSE NULL END) AS product_mix_all,
         COUNT(DISTINCT CASE WHEN ffh_prod.pi_prod_instnc_typ_cd = 'HSIC' THEN ffh_prod.bus_prod_instnc_id ELSE NULL END) AS hsic_count,
         COUNT(DISTINCT CASE WHEN ffh_prod.pi_prod_instnc_typ_cd = 'SING' THEN ffh_prod.bus_prod_instnc_id ELSE NULL END) AS sing_count,
         COUNT(DISTINCT CASE WHEN ffh_prod.pi_prod_instnc_typ_cd = 'SMHM' THEN ffh_prod.bus_prod_instnc_id ELSE NULL END) AS shs_count,
         COUNT(DISTINCT CASE WHEN ffh_prod.pi_prod_instnc_typ_cd = 'TTV'  THEN ffh_prod.bus_prod_instnc_id ELSE NULL END) AS ttv_count,
         COUNT(DISTINCT CASE WHEN ffh_prod.pi_prod_instnc_typ_cd = 'STV'  THEN ffh_prod.bus_prod_instnc_id ELSE NULL END) AS stv_count,
         COUNT(DISTINCT CASE WHEN ffh_prod.pi_prod_instnc_typ_cd = 'DIIC' THEN ffh_prod.bus_prod_instnc_id ELSE NULL END) AS diic_count   
         FROM `cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht` AS ffh_prod
   
   WHERE DATE(ffh_prod.prod_instnc_ts) = initial_date
     AND ffh_prod.pi_prod_instnc_stat_cd = 'A' --Active Products
     AND ffh_prod.consldt_cust_typ_cd = 'R' --Regular (not Business)
     AND ffh_prod.pi_prod_instnc_typ_cd IN 
         (
           'DIIC', --Dialup
           'HSIC', --High Speed
           'SING', --Home Phone
           'SMHM', --Smart Home
           'STV',  --Satelite
           'TTV'   --TV
         )
GROUP BY ffh_prod.bacct_bus_bacct_num
--QUALIFY ROW_NUMBER() OVER (PARTITION BY ffh_prod.bacct_bus_bacct_num ORDER BY ffh_prod.prod_instnc_ts ) = 1
)

, cte_product_mix_final as (
  
SELECT distinct ffh_prod.bacct_bus_bacct_num  AS ban,
         COUNT(DISTINCT CASE WHEN ffh_prod.pi_prod_instnc_typ_cd IN ('SING', 'HSIC', 'TTV', 'SMHM', 'STV', 'DIIC','C') THEN ffh_prod.pi_prod_instnc_typ_cd ELSE NULL END) AS product_mix_all,
         COUNT(DISTINCT CASE WHEN ffh_prod.pi_prod_instnc_typ_cd = 'HSIC' THEN ffh_prod.bus_prod_instnc_id ELSE NULL END) AS hsic_count,
         COUNT(DISTINCT CASE WHEN ffh_prod.pi_prod_instnc_typ_cd = 'SING' THEN ffh_prod.bus_prod_instnc_id ELSE NULL END) AS sing_count,
         COUNT(DISTINCT CASE WHEN ffh_prod.pi_prod_instnc_typ_cd = 'SMHM' THEN ffh_prod.bus_prod_instnc_id ELSE NULL END) AS shs_count,
         COUNT(DISTINCT CASE WHEN ffh_prod.pi_prod_instnc_typ_cd = 'TTV'  THEN ffh_prod.bus_prod_instnc_id ELSE NULL END) AS ttv_count,
         COUNT(DISTINCT CASE WHEN ffh_prod.pi_prod_instnc_typ_cd = 'STV'  THEN ffh_prod.bus_prod_instnc_id ELSE NULL END) AS stv_count,
         COUNT(DISTINCT CASE WHEN ffh_prod.pi_prod_instnc_typ_cd = 'DIIC' THEN ffh_prod.bus_prod_instnc_id ELSE NULL END) AS diic_count   
         FROM `cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht` AS ffh_prod
   
   WHERE DATE(ffh_prod.prod_instnc_ts) = final_date
     AND ffh_prod.pi_prod_instnc_stat_cd = 'A' --Active Products
     AND ffh_prod.consldt_cust_typ_cd = 'R' --Regular (not Business)
     AND ffh_prod.pi_prod_instnc_typ_cd IN 
         (
           'DIIC', --Dialup
           'HSIC', --High Speed
           'SING', --Home Phone
           'SMHM', --Smart Home
           'STV',  --Satelite
           'TTV'   --TV
         )
GROUP BY ffh_prod.bacct_bus_bacct_num
--QUALIFY ROW_NUMBER() OVER (PARTITION BY ffh_prod.bacct_bus_bacct_num ORDER BY ffh_prod.prod_instnc_ts ) = 1
)


select a.BAN,a.CAMP_CREATIVE,a.Experiment

,b.count_of_BAN_intial
,b.contract_end_date_intial
,d.count_of_BAN_final
,d.contract_end_date_final
,c.bill_year_month as bill_year_month_initial
,c.ARPU_SMHM as ARPU_SMHM_intial
,c.ARPU_FFH as ARPU_FFH_intial
,e.bill_year_month as bill_year_month_final
,e.ARPU_SMHM as ARPU_SMHM_final
,e.ARPU_FFH as ARPU_FFH_final
,f.Login_consistency,f.Login_camera_Segment
,g.product_mix_all as prod_mix_intial
,h.product_mix_all as prod_mix_final
 from Merge_data a
left join prod_snpsht_SMHM_initial b
on a.BAN=b.BAN
left join prod_snpsht_SMHM_final d
on a.BAN=d.BAN
left join ARPU_Calculation_intial c
on a.BAN=c.ban
left join ARPU_Calculation_final e
on a.BAN=e.ban
left join ADC_login_data_Jan f
on a.BAN=f.BAN
left join cte_product_mix_intial g
on a.BAN=g.ban
left join cte_product_mix_final h
on a.BAN=h.ban
'''

In [40]:
HBUR_Campaign_Analysis_DF=extract_bq_data(bq_client, sql=Query_Snapshot)

In [41]:
HBUR_Campaign_Analysis_DF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25397 entries, 0 to 25396
Data columns (total 17 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   BAN                       25397 non-null  Int64  
 1   CAMP_CREATIVE             25397 non-null  object 
 2   Experiment                25397 non-null  object 
 3   count_of_BAN_intial       24966 non-null  Int64  
 4   contract_end_date_intial  24966 non-null  object 
 5   count_of_BAN_final        23346 non-null  Int64  
 6   contract_end_date_final   23346 non-null  object 
 7   bill_year_month_initial   25390 non-null  object 
 8   ARPU_SMHM_intial          25379 non-null  object 
 9   ARPU_FFH_intial           25390 non-null  object 
 10  bill_year_month_final     25395 non-null  object 
 11  ARPU_SMHM_final           24932 non-null  object 
 12  ARPU_FFH_final            25395 non-null  object 
 13  Login_consistency         15784 non-null  float64
 14  Login_

In [42]:
HBUR_Campaign_Analysis_DF['count_of_BAN_final'].fillna(0,inplace=True)

In [43]:
# def date_converter(x):
#     try:
#         return(pd.to_datetime(x, format='%Y-%b-%d'))
#     except:
#         return x

In [44]:
HBUR_Campaign_Analysis_DF['contract_end_date_final']

0        2027-09-30
1        9999-12-31
2        2021-10-13
3        9999-12-31
4        2022-10-29
            ...    
25392    2026-07-27
25393    2023-09-06
25394    2027-01-04
25395    2027-01-16
25396          None
Name: contract_end_date_final, Length: 25397, dtype: object

In [45]:
HBUR_Campaign_Analysis_DF['Login_camera_Segment'].value_counts()

Heavy_User     10123
Un_assigned     8167
Low_User        5661
Name: Login_camera_Segment, dtype: int64

In [46]:
HBUR_Campaign_Analysis_DF['Churn']= HBUR_Campaign_Analysis_DF['count_of_BAN_final'].apply(lambda x: 1 if x==0 else 0)

In [47]:
HBUR_Campaign_Analysis_DF['Churn'].value_counts(normalize=True)*100

0    91.924243
1     8.075757
Name: Churn, dtype: float64

In [48]:
# def renewal (row):
    
#     if row['Churn']==1:
#         return 'Customer_churned'
#     elif row['Churn']==0 and row['contract_end_date_final']=='9999-12-31' and row['contract_end_date_intial']=='9999-12-31':
#         return 'No_Renewal'
#     elif row['Churn']==0 and row['contract_end_date_final'] > row['contract_end_date_intial'] and row['contract_end_date_final']!='9999-12-31':
#         return 'Renewal'
#     elif row['Churn']==0 and row['contract_end_date_intial']=='9999-12-31' and row['contract_end_date_final']!='9999-12-31':
#         return 'Renewal'
#     else:
#         return 'None_of_Above'
    
    

In [49]:
# HBUR_Campaign_Analysis_DF['Renewal']=HBUR_Campaign_Analysis_DF.apply(renewal,axis=1)

In [50]:
pd.DataFrame(HBUR_Campaign_Analysis_DF.groupby(['CAMP_CREATIVE','Experiment',]).agg(
    
    Customer_count= ('BAN','nunique'),
    Customer_count_intial= ('count_of_BAN_intial','sum'),
    Customer_count_final= ('count_of_BAN_final','sum'),
    Churn_rate=('Churn',lambda x: round(x.mean()*100,2)),
    AVG_SMHM_ARPU_intial=('ARPU_SMHM_intial',lambda x: round(x.mean(),0)),
    AVG_SMHM_ARPU_final=('ARPU_SMHM_final',lambda x: round(x.mean(),0)),
    Login_consistency=('Login_consistency',lambda x: round(x.mean(),0)),
    prod_mix_intial=('prod_mix_intial',lambda x: round(x.mean(),1)),
    prod_mix_final=('prod_mix_final',lambda x: round(x.mean(),1)) 


    # Email_Opened_count=('OPENED','sum'),
    # CT_Opened_count=('CLICKTHROUGH','sum'),
    # softbounce_count=('SOFTBOUNCE','sum'),
    # Hardbounce_count=('HARDBOUNCE','sum'),
    # Unsub_count=('UNSUBSCRIBE','sum'),
    # campaign_date_min=('Campaign_date','min'),
    # campaign_date_max=('Campaign_date','max')

    
    # # Customer_Share= ('customer_id',lambda x:x.count()*100/Merge_DF_4.shape[0])
    # Churn_total=('Telus_Churn_Flag',lambda x: x.sum()),
    # Churn_rate=('Telus_Churn_Flag',lambda x: x.mean()*100)
).reset_index())

Unnamed: 0,CAMP_CREATIVE,Experiment,Customer_count,Customer_count_intial,Customer_count_final,Churn_rate,AVG_SMHM_ARPU_intial,AVG_SMHM_ARPU_final,Login_consistency,prod_mix_intial,prod_mix_final
0,ADCLOWUSEINCON11.30.,Control,3564,3490,3263,8.45,58.0,55.0,50.0,2.7,2.7
1,ADCLOWUSEINCON11.30.,Treatment,1625,1620,1541,5.17,54.0,52.0,48.0,3.0,3.0
2,ADCNOUSEINCON11.30.2,Control,5299,5190,4872,8.06,58.0,55.0,31.0,2.9,2.9
3,ADCNOUSEINCON11.30.2,Treatment,1855,1853,1754,5.44,58.0,56.0,33.0,3.1,3.1
4,ADCNOUSEOOC11.30.22,Control,5255,5066,4662,11.28,55.0,52.0,28.0,2.5,2.6
5,ADCNOUSEOOC11.30.22,Treatment,1190,1177,1069,10.17,58.0,54.0,31.0,3.3,3.3
6,Santa_Campaign,Control,3872,4604,4293,7.54,85.0,74.0,79.0,2.8,2.9
7,Santa_Campaign,Treatment,1966,1966,1892,3.76,86.0,77.0,79.0,3.0,3.0


In [51]:
HBUR_Campaign_Analysis_DF.to_csv('HBUR_Campaign_Analysis_result.csv',index=False)