In [113]:
#### import global modules
import os
import sys
import pandas as pd
import numpy as np
from pathlib import Path
from yaml import safe_load
import google.oauth2.credentials
from google.cloud import bigquery
import gc

# Set global vars
pth_project = Path(os.getcwd().split('notebooks')[0])
pth_data = pth_project / 'data'
pth_queries = pth_project / 'core' / 'queries'
pth_creds = pth_project / 'conf' / 'local' / 'project_config.yaml'
sys.path.insert(0, str(pth_project))
d_project_config = safe_load(pth_creds.open())
# d_params = safe_load((pth_project / 'core' / 'parameters' / 'common.yaml').open())['data_extract']

# import local modules
from core.utils.gcp import connect_bq_services
# from core.etl.extract import extract_bq_data, extract_pr_codes, format_conv_df, filter_convs

# Connect to google services
bq_client = connect_bq_services(d_project_config['gcp-project-name'])
pd.options.display.max_rows = 100

In [114]:
def extract_bq_data(bq_client, sql=None, pth_query=None):
    if sql is not None:
        df = bq_client.query(sql).to_dataframe()
    elif pth_query is not None:
        sql = pth_query.read_text()
        df = bq_client.query(sql).to_dataframe()
    else:
        raise ValueError('`sql` or `pth_query` should be set')  
    return df

In [4]:
Campaign_Query='''


select *
from `divgpras-pr-579355.SHS.SHS_ALR_Campaign_data_SAS`
where REGEXP_CONTAINS (CAMP_CREATIVE, r'ALR1_TP5')
and date(Campaign_date)>='2022-01-01' and date(Campaign_date)<='2022-06-30'

'''

In [5]:
Camp_DF=extract_bq_data(bq_client, sql=Campaign_Query)

In [7]:
Camp_DF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4943 entries, 0 to 4942
Data columns (total 52 columns):
 #   Column                  Non-Null Count  Dtype         
---  ------                  --------------  -----         
 0   TRACKING                4943 non-null   object        
 1   CAMP_INHOME             4943 non-null   object        
 2   CAMP_ID                 4943 non-null   object        
 3   DBM_VERSION             4943 non-null   object        
 4   DBM_VERSION_DESC        0 non-null      float64       
 5   CAMP_MODEL_DECILE       0 non-null      float64       
 6   MISC_INFO1              0 non-null      float64       
 7   MISC_INFO2              0 non-null      float64       
 8   MISC_INFO3              0 non-null      float64       
 9   MISC_INFO4              0 non-null      float64       
 10  CAMP_CREATIVE           4943 non-null   object        
 11  CAMP_CREATIVE_DESC      0 non-null      float64       
 12  CAMP_TEST               4943 non-null   object  

In [9]:
def Camp_creative_making(row):
    
    if row['CAMP_CREATIVE']=='ALR1_TP5X_V1' or row['CAMP_CREATIVE']=='ALR1_TP5_V1':
        return 'V1'
    elif row['CAMP_CREATIVE']=='ALR1_TP5X_V2' or row['CAMP_CREATIVE']=='ALR1_TP5_V2':
        return 'V2'
    elif row['CAMP_CREATIVE']=='ALR1_TP5X_V3' or row['CAMP_CREATIVE']=='ALR1_TP5_V3':
        return 'V3'
    elif row['CAMP_CREATIVE']=='ALR1_TP5X_V4' or row['CAMP_CREATIVE']=='ALR1_TP5_V4':
        return 'V4'
    else:
        return "None"

In [10]:
Camp_DF['Campaign_date'].value_counts().sort_index()

2022-01-05      56
2022-01-11    1340
2022-03-02      12
2022-03-09      23
2022-03-16      62
2022-03-23      69
2022-03-30      84
2022-04-06     113
2022-04-13     184
2022-04-20     215
2022-04-27     212
2022-05-04     268
2022-05-11     433
2022-05-18     120
2022-05-25     395
2022-06-01     262
2022-06-08     373
2022-06-15     143
2022-06-22     302
2022-06-29     277
Name: Campaign_date, dtype: int64

In [11]:
Camp_DF['CAMP_CREATIVE_New']=Camp_DF.apply(Camp_creative_making,axis=1)

In [12]:
Camp_DF['CAMP_CREATIVE_New'].value_counts().sort_index()

V1     265
V2    2449
V3    1243
V4     986
Name: CAMP_CREATIVE_New, dtype: int64

In [14]:
pd.DataFrame(Camp_DF.groupby(['CAMP_CREATIVE_New','OPENED']).agg(
    
    Customer_count= ('BACCT_NUM','nunique'),
    # Email_Opened_count=('OPENED','sum'),
    # CT_Opened_count=('CLICKTHROUGH','sum'),
    # softbounce_count=('SOFTBOUNCE','sum'),
    # Hardbounce_count=('HARDBOUNCE','sum'),
    # Unsub_count=('UNSUBSCRIBE','sum'),
    campaign_date_min=('Campaign_date','min'),
    campaign_date_max=('Campaign_date','max')

    
    # # Customer_Share= ('customer_id',lambda x:x.count()*100/Merge_DF_4.shape[0])
    # Churn_total=('Telus_Churn_Flag',lambda x: x.sum()),
    # Churn_rate=('Telus_Churn_Flag',lambda x: x.mean()*100)
).reset_index())

Unnamed: 0,CAMP_CREATIVE_New,OPENED,Customer_count,campaign_date_min,campaign_date_max
0,V1,0.0,113,2022-01-11,2022-06-29
1,V1,1.0,89,2022-01-11,2022-06-29
2,V2,0.0,854,2022-01-05,2022-06-29
3,V2,1.0,1266,2022-01-05,2022-06-29
4,V3,0.0,490,2022-01-05,2022-06-29
5,V3,1.0,479,2022-01-05,2022-06-29
6,V4,0.0,324,2022-01-05,2022-06-29
7,V4,1.0,540,2022-01-05,2022-06-29


In [15]:
config= bigquery.job.LoadJobConfig()
config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

Table_BQ = 'SHS.SHS_ALR_Campaign_data_Jan_June_2022'

bq_table_instance= bq_client.load_table_from_dataframe(Camp_DF, Table_BQ,job_config=config)

In [93]:
H2_2022_Snapshot='''




DECLARE target_date_start_H2 DATE DEFAULT "2022-07-01";
DECLARE target_date_start DATE DEFAULT "2022-12-01";
DECLARE target_date_end DATE DEFAULT "2022-12-31";
DECLARE target_date_start_next_month DATE DEFAULT "2023-01-01";



With Camp_V1_BAN as (

select distinct BACCT_NUM as BAN,CAMP_CREATIVE_New,sum(OPENED) as OPENED from `divgpras-pr-579355.SHS.SHS_ALR_Campaign_data_Jan_June_2022`
group by BACCT_NUM,CAMP_CREATIVE_New
)


, prod_snpsht_SMHM_start_H2 as (


select distinct bacct_bus_bacct_num as BAN,1 as count_of_BAN_FFH_start_H2
FROM `cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht` 
where pi_prod_instnc_typ_cd='SMHM' 
and pi_prod_instnc_stat_cd in ('A')
and  consldt_cust_typ_cd = 'R'
and DATE(prod_instnc_ts)=target_date_start_H2


)

, prod_snpsht_SMHM_end_H2 as (


select distinct bacct_bus_bacct_num as BAN,1 as count_of_BAN_FFH_end_H2
FROM `cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht` 
where pi_prod_instnc_typ_cd='SMHM' 
and pi_prod_instnc_stat_cd in ('A')
and  consldt_cust_typ_cd = 'R'
and DATE(prod_instnc_ts)=target_date_end


)
, ARPU_Calculation_start_H1 as(
SELECT
  distinct billg_acct_num AS ban,
  EXTRACT(YEAR
  FROM
    bill.bill_dt)||'-'||LPAD(CAST(EXTRACT(MONTH
      FROM
        bill.bill_dt) AS STRING), 2, '0') AS bill_year_month,
  sum(bill.rid_unit_chrg_amt) as ARPU_SMHM,
  sum(tot_inv_amt) as ARPU_FFH
  FROM
  `cio-datahub-enterprise-pr-183a.ent_cust_bill.bq_wln_inv_sum_view` bill
WHERE
  DATE(bill.bill_dt)>=  '2021-12-01'
  and DATE(bill.bill_dt)< '2022-01-01'
  group by ban,bill_year_month
)


, ARPU_Calculation_start_H2 as(
SELECT
  distinct billg_acct_num AS ban,
  EXTRACT(YEAR
  FROM
    bill.bill_dt)||'-'||LPAD(CAST(EXTRACT(MONTH
      FROM
        bill.bill_dt) AS STRING), 2, '0') AS bill_year_month,
  sum(bill.rid_unit_chrg_amt) as ARPU_SMHM,
  sum(tot_inv_amt) as ARPU_FFH
  FROM
  `cio-datahub-enterprise-pr-183a.ent_cust_bill.bq_wln_inv_sum_view` bill
WHERE
  DATE(bill.bill_dt)>=  '2022-06-01'
  and DATE(bill.bill_dt)< '2022-07-01'
  group by ban,bill_year_month
)

, ARPU_Calculation_end_H2 as(
SELECT
  distinct billg_acct_num AS ban,
  EXTRACT(YEAR
  FROM
    bill.bill_dt)||'-'||LPAD(CAST(EXTRACT(MONTH
      FROM
        bill.bill_dt) AS STRING), 2, '0') AS bill_year_month,
  sum(bill.rid_unit_chrg_amt) as ARPU_SMHM,
  sum(tot_inv_amt) as ARPU_FFH
  FROM
  `cio-datahub-enterprise-pr-183a.ent_cust_bill.bq_wln_inv_sum_view` bill
WHERE
  DATE(bill.bill_dt)>=  '2022-12-01'
  and DATE(bill.bill_dt)< '2023-01-01'
  group by ban,bill_year_month
)

select a.BAN,a.CAMP_CREATIVE_New,
case when a.opened>0 then 1 else 0 end as Opened
,b.count_of_BAN_FFH_start_H2
,d.count_of_BAN_FFH_end_H2
,c.bill_year_month as bill_year_month_H1_2022_start
,c.ARPU_SMHM as ARPU_SMHM_H1_2022_start
,c.ARPU_FFH as ARPU_FFH_H1_2022_start
,e.bill_year_month as bill_year_month_H2_2022_start
,e.ARPU_SMHM as ARPU_SMHM_H2_2022_start
,e.ARPU_FFH as ARPU_FFH_H2_2022_start
,f.bill_year_month as bill_year_month_H2_2022_end
,f.ARPU_SMHM as ARPU_SMHM_H2_2022_end
,f.ARPU_FFH as ARPU_FFH_H2_2022_end

 from Camp_V1_BAN a
left join prod_snpsht_SMHM_start_H2 b
on a.BAN=b.BAN
left join prod_snpsht_SMHM_end_H2 d
on a.BAN=d.BAN
left join ARPU_Calculation_start_H1 c
on a.BAN=c.ban
left join ARPU_Calculation_start_H2 e
on a.BAN=e.ban
left join ARPU_Calculation_end_H2 f
on a.BAN=f.ban


'''

In [94]:
H2_2022_Snapshot_count=extract_bq_data(bq_client, sql=H2_2022_Snapshot)

In [95]:
H2_2022_Snapshot_count.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3981 entries, 0 to 3980
Data columns (total 14 columns):
 #   Column                         Non-Null Count  Dtype 
---  ------                         --------------  ----- 
 0   BAN                            3981 non-null   Int64 
 1   CAMP_CREATIVE_New              3981 non-null   object
 2   Opened                         3981 non-null   Int64 
 3   count_of_BAN_FFH_start_H2      3782 non-null   Int64 
 4   count_of_BAN_FFH_end_H2        3491 non-null   Int64 
 5   bill_year_month_H1_2022_start  3970 non-null   object
 6   ARPU_SMHM_H1_2022_start        3968 non-null   object
 7   ARPU_FFH_H1_2022_start         3970 non-null   object
 8   bill_year_month_H2_2022_start  3977 non-null   object
 9   ARPU_SMHM_H2_2022_start        3936 non-null   object
 10  ARPU_FFH_H2_2022_start         3977 non-null   object
 11  bill_year_month_H2_2022_end    3981 non-null   object
 12  ARPU_SMHM_H2_2022_end          3790 non-null   object
 13  ARP

In [96]:
H2_2022_Snapshot_count.head()

Unnamed: 0,BAN,CAMP_CREATIVE_New,Opened,count_of_BAN_FFH_start_H2,count_of_BAN_FFH_end_H2,bill_year_month_H1_2022_start,ARPU_SMHM_H1_2022_start,ARPU_FFH_H1_2022_start,bill_year_month_H2_2022_start,ARPU_SMHM_H2_2022_start,ARPU_FFH_H2_2022_start,bill_year_month_H2_2022_end,ARPU_SMHM_H2_2022_end,ARPU_FFH_H2_2022_end
0,601866088,V2,0,1,1,2021-12,48.0,127.21,2022-06,46.3,117.46,2022-12,78.6,143.69
1,601358262,V4,0,1,1,2021-12,70.0,205.67,2022-06,55.81,141.59,2022-12,54.0,230.52
2,208719387,V2,1,1,1,2021-12,53.0,155.89,2022-06,50.67,156.73,2022-12,365.0,500.43
3,602281312,V2,0,1,1,2021-12,38.0,199.45,2022-06,43.55,210.76,2022-12,50.0,182.9
4,601859562,V2,1,1,1,2021-12,43.39,211.0,2022-06,42.0,178.0,2022-12,42.0,174.0


In [98]:
pd.DataFrame(H2_2022_Snapshot_count.groupby(['CAMP_CREATIVE_New','Opened']).agg(
    
    Customer_count= ('BAN','nunique'),
    Customer_count_1= ('count_of_BAN_FFH_start_H2','sum'),
    Customer_count_2= ('count_of_BAN_FFH_end_H2','sum'),
    # Email_Opened_count=('OPENED','sum'),
    # CT_Opened_count=('CLICKTHROUGH','sum'),
    # softbounce_count=('SOFTBOUNCE','sum'),
    # Hardbounce_count=('HARDBOUNCE','sum'),
    # Unsub_count=('UNSUBSCRIBE','sum'),
    # campaign_date_min=('Campaign_date','min'),
    # campaign_date_max=('Campaign_date','max')

    
    # # Customer_Share= ('customer_id',lambda x:x.count()*100/Merge_DF_4.shape[0])
    # Churn_total=('Telus_Churn_Flag',lambda x: x.sum()),
    # Churn_rate=('Telus_Churn_Flag',lambda x: x.mean()*100)
).reset_index())

Unnamed: 0,CAMP_CREATIVE_New,Opened,Customer_count,Customer_count_1,Customer_count_2
0,V1,0,100,95,87
1,V1,1,89,84,76
2,V2,0,774,739,679
3,V2,1,1266,1205,1119
4,V3,0,446,415,384
5,V3,1,479,455,408
6,V4,0,287,272,258
7,V4,1,540,517,480


In [99]:
H2_2022_Snapshot_count['BAN'].value_counts()

208624864    2
603583757    2
603514816    1
201363656    1
300023707    1
            ..
602998610    1
603913733    1
603588009    1
601818900    1
603678071    1
Name: BAN, Length: 3979, dtype: Int64

In [100]:
H2_2022_Snapshot_count[H2_2022_Snapshot_count['BAN']==208624864]

Unnamed: 0,BAN,CAMP_CREATIVE_New,Opened,count_of_BAN_FFH_start_H2,count_of_BAN_FFH_end_H2,bill_year_month_H1_2022_start,ARPU_SMHM_H1_2022_start,ARPU_FFH_H1_2022_start,bill_year_month_H2_2022_start,ARPU_SMHM_H2_2022_start,ARPU_FFH_H2_2022_start,bill_year_month_H2_2022_end,ARPU_SMHM_H2_2022_end,ARPU_FFH_H2_2022_end
785,208624864,V3,0,,,2021-12,53.0,79.88,2022-06,-51.29,-53.85,2022-12,,0.0
786,208624864,V2,0,,,2021-12,53.0,79.88,2022-06,-51.29,-53.85,2022-12,,0.0


In [101]:
H2_2022_Snapshot_count.to_csv('ALR_Campaign_2022H2.csv',index=False)

In [109]:
Contract_status_Query='''




with Intial_snapshot as (
        
     SELECT 
     distinct b.BACCT_NUM as ban_start_H1
    ,b.CAMP_CREATIVE_New as CAMP_CREATIVE_start_h1
    ,sum(b.Opened) as opened_start_h1
    ,a.pi_prod_instnc_stat_cd as status_intial_start_H1
    , max(pi_cntrct_end_ts) as latest_contract_end_dt_start_h1

    FROM `divgpras-pr-579355.SHS.SHS_ALR_Campaign_data_Jan_June_2022` b 
    LEFT JOIN`cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht` a
    
    ON a.bacct_bus_bacct_num = b.BACCT_NUM 

    WHERE DATE(prod_instnc_ts) = '2022-01-01'
    and a.pi_prod_instnc_typ_cd='SMHM'
    and a.pi_prod_instnc_stat_cd='A'

    GROUP BY b.CAMP_CREATIVE_New
    , b.BACCT_NUM,a.pi_prod_instnc_stat_cd

)


, H2_start_snapshot as (
        
        SELECT
     distinct b.BACCT_NUM as ban_start_h2
    , b.CAMP_CREATIVE_New as CAMP_CREATIVE_start_h2
    ,sum(b.Opened) as Opened_start_h2
    ,a.pi_prod_instnc_stat_cd as status_start_h2
    , max(pi_cntrct_end_ts) as contract_end_dt_start_h2

    FROM `divgpras-pr-579355.SHS.SHS_ALR_Campaign_data_Jan_June_2022` b 
    LEFT JOIN`cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht` a
    
    ON a.bacct_bus_bacct_num = b.BACCT_NUM 

    WHERE DATE(prod_instnc_ts) = '2022-07-01'
    and a.pi_prod_instnc_typ_cd='SMHM'

    GROUP BY b.CAMP_CREATIVE_New
    , b.BACCT_NUM,a.pi_prod_instnc_stat_cd

)

, H2_end_snapshot as (
        
        SELECT
     distinct b.BACCT_NUM as ban_end_h2
    , b.CAMP_CREATIVE_New as CAMP_CREATIVE_end_h2
    ,sum(b.Opened) as Opened_end_h2
    ,a.pi_prod_instnc_stat_cd as status_end_h2
    , max(pi_cntrct_end_ts) as contract_end_dt_end_h2

    FROM `divgpras-pr-579355.SHS.SHS_ALR_Campaign_data_Jan_June_2022` b 
    LEFT JOIN`cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht` a
    
    ON a.bacct_bus_bacct_num = b.BACCT_NUM 

    WHERE DATE(prod_instnc_ts) = '2022-12-31'
    and a.pi_prod_instnc_typ_cd='SMHM'

    GROUP BY b.CAMP_CREATIVE_New
    , b.BACCT_NUM,a.pi_prod_instnc_stat_cd

)




select * from Intial_snapshot a
left JOIN  H2_start_snapshot b
on a.ban_start_H1=b.ban_start_h2 and a.CAMP_CREATIVE_start_h1=b.CAMP_CREATIVE_start_h2
left join H2_end_snapshot c
on a.ban_start_H1=c.ban_end_h2 and a.CAMP_CREATIVE_start_h1=c.CAMP_CREATIVE_end_h2


'''

In [110]:
Contract_DF=extract_bq_data(bq_client, sql=Contract_status_Query)

In [111]:
Contract_DF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4634 entries, 0 to 4633
Data columns (total 15 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   ban_start_H1                     4634 non-null   Int64  
 1   CAMP_CREATIVE_start_h1           4634 non-null   object 
 2   opened_start_h1                  4634 non-null   float64
 3   status_intial_start_H1           4634 non-null   object 
 4   latest_contract_end_dt_start_h1  4634 non-null   object 
 5   ban_start_h2                     4633 non-null   Int64  
 6   CAMP_CREATIVE_start_h2           4633 non-null   object 
 7   Opened_start_h2                  4633 non-null   float64
 8   status_start_h2                  4633 non-null   object 
 9   contract_end_dt_start_h2         4633 non-null   object 
 10  ban_end_h2                       4633 non-null   Int64  
 11  CAMP_CREATIVE_end_h2             4633 non-null   object 
 12  Opened_end_h2       

In [112]:
Contract_DF.to_csv('ALR_Campaign_status.csv',index=False)