In [180]:
#### import global modules
import os
import sys
import pandas as pd
import numpy as np
from pathlib import Path
from yaml import safe_load
import google.oauth2.credentials
from google.cloud import bigquery
import gc

# Set global vars
pth_project = Path(os.getcwd().split('notebooks')[0])
pth_data = pth_project / 'data'
pth_queries = pth_project / 'core' / 'queries'
pth_creds = pth_project / 'conf' / 'local' / 'project_config.yaml'
sys.path.insert(0, str(pth_project))
d_project_config = safe_load(pth_creds.open())
# d_params = safe_load((pth_project / 'core' / 'parameters' / 'common.yaml').open())['data_extract']

# import local modules
from core.utils.gcp import connect_bq_services
# from core.etl.extract import extract_bq_data, extract_pr_codes, format_conv_df, filter_convs

# Connect to google services
bq_client = connect_bq_services(d_project_config['gcp-project-name'])
pd.options.display.max_rows = 100

In [181]:
def extract_bq_data(bq_client, sql=None, pth_query=None):
    if sql is not None:
        df = bq_client.query(sql).to_dataframe()
    elif pth_query is not None:
        sql = pth_query.read_text()
        df = bq_client.query(sql).to_dataframe()
    else:
        raise ValueError('`sql` or `pth_query` should be set')  
    return df

In [194]:
Campaign_Query='''


select *
from `divgpras-pr-579355.SHS.SHS_ALR_Campaign_data_SAS`
where REGEXP_CONTAINS (CAMP_CREATIVE, r'ALR1_TP5')
and date(Campaign_date)>='2022-05-01' and date(Campaign_date)<='2022-08-31'

'''

In [195]:
Camp_DF=extract_bq_data(bq_client, sql=Campaign_Query)

In [196]:
Camp_DF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5294 entries, 0 to 5293
Data columns (total 52 columns):
 #   Column                  Non-Null Count  Dtype         
---  ------                  --------------  -----         
 0   TRACKING                5294 non-null   object        
 1   CAMP_INHOME             5294 non-null   object        
 2   CAMP_ID                 5294 non-null   object        
 3   DBM_VERSION             5294 non-null   object        
 4   DBM_VERSION_DESC        0 non-null      float64       
 5   CAMP_MODEL_DECILE       0 non-null      float64       
 6   MISC_INFO1              0 non-null      float64       
 7   MISC_INFO2              0 non-null      float64       
 8   MISC_INFO3              0 non-null      float64       
 9   MISC_INFO4              0 non-null      float64       
 10  CAMP_CREATIVE           5294 non-null   object        
 11  CAMP_CREATIVE_DESC      0 non-null      float64       
 12  CAMP_TEST               5294 non-null   object  

In [197]:
def Camp_creative_making(row):
    
    if row['CAMP_CREATIVE']=='ALR1_TP5X_V1' or row['CAMP_CREATIVE']=='ALR1_TP5_V1':
        return 'V1'
    elif row['CAMP_CREATIVE']=='ALR1_TP5X_V2' or row['CAMP_CREATIVE']=='ALR1_TP5_V2':
        return 'V2'
    elif row['CAMP_CREATIVE']=='ALR1_TP5X_V3' or row['CAMP_CREATIVE']=='ALR1_TP5_V3':
        return 'V3'
    elif row['CAMP_CREATIVE']=='ALR1_TP5X_V4' or row['CAMP_CREATIVE']=='ALR1_TP5_V4':
        return 'V4'
    else:
        return "None"

In [198]:
Camp_DF['CAMP_CREATIVE'].value_counts().sort_index()

ALR1_TP5X_V1      69
ALR1_TP5X_V2     798
ALR1_TP5X_V3     316
ALR1_TP5X_V4     264
ALR1_TP5_V1      125
ALR1_TP5_V2     2333
ALR1_TP5_V3      618
ALR1_TP5_V4      771
Name: CAMP_CREATIVE, dtype: int64

In [199]:
Camp_DF['Campaign_date'].value_counts().sort_index()

2022-05-04    268
2022-05-11    433
2022-05-18    120
2022-05-25    395
2022-06-01    262
2022-06-08    373
2022-06-15    143
2022-06-22    302
2022-06-29    277
2022-07-06    551
2022-07-13    154
2022-07-20    388
2022-07-27    351
2022-08-03    508
2022-08-10    456
2022-08-17     38
2022-08-24     91
2022-08-31    184
Name: Campaign_date, dtype: int64

In [200]:
Camp_DF['CAMP_CREATIVE_New']=Camp_DF.apply(Camp_creative_making,axis=1)

In [201]:
Camp_DF['CAMP_CREATIVE_New'].value_counts().sort_index()

V1     194
V2    3131
V3     934
V4    1035
Name: CAMP_CREATIVE_New, dtype: int64

In [202]:
# Camp_DF_May_Sep22=Camp_DF[Camp_DF.Campaign_date<='2022-09-30' and Camp_DF.Campaign_date>='2022-05-01']

In [203]:
# Camp_DF['Campaign_date'].value_counts().sort_index()

In [204]:
# Camp_DF_Mar2022['CAMP_CREATIVE_New'].value_counts().sort_index()

In [205]:
pd.DataFrame(Camp_DF.groupby(['CAMP_CREATIVE_New','OPENED']).agg(
    
    Customer_count= ('BACCT_NUM','nunique'),
    Email_Opened_count=('OPENED','sum'),
    CT_Opened_count=('CLICKTHROUGH','sum'),
    softbounce_count=('SOFTBOUNCE','sum'),
    Hardbounce_count=('HARDBOUNCE','sum'),
    Unsub_count=('UNSUBSCRIBE','sum'),
    campaign_date_min=('Campaign_date','min'),
    campaign_date_max=('Campaign_date','max')

    
    # # Customer_Share= ('customer_id',lambda x:x.count()*100/Merge_DF_4.shape[0])
    # Churn_total=('Telus_Churn_Flag',lambda x: x.sum()),
    # Churn_rate=('Telus_Churn_Flag',lambda x: x.mean()*100)
).reset_index())

Unnamed: 0,CAMP_CREATIVE_New,OPENED,Customer_count,Email_Opened_count,CT_Opened_count,softbounce_count,Hardbounce_count,Unsub_count,campaign_date_min,campaign_date_max
0,V1,0.0,72,0.0,0.0,0.0,0.0,0.0,2022-05-04,2022-08-31
1,V1,1.0,67,71.0,5.0,0.0,0.0,0.0,2022-05-04,2022-08-31
2,V2,0.0,986,0.0,0.0,1.0,0.0,0.0,2022-05-04,2022-08-31
3,V2,1.0,1439,1454.0,87.0,0.0,0.0,1.0,2022-05-04,2022-08-31
4,V3,0.0,350,0.0,0.0,0.0,0.0,0.0,2022-05-04,2022-08-31
5,V3,1.0,313,325.0,19.0,0.0,0.0,0.0,2022-05-04,2022-08-31
6,V4,0.0,329,0.0,0.0,0.0,0.0,0.0,2022-05-04,2022-08-31
7,V4,1.0,481,488.0,53.0,0.0,0.0,0.0,2022-05-04,2022-08-31


In [None]:
# config= bigquery.job.LoadJobConfig()
# config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

# Table_BQ = 'SHS.SHS_ALR_Campaign_data_Jan_June_2022'

# bq_table_instance= bq_client.load_table_from_dataframe(Camp_DF, Table_BQ,job_config=config)

In [111]:
Sep_Snapshot='''



DECLARE target_date DATE DEFAULT "2022-09-01";


With Camp_V1_BAN as (

select distinct BACCT_NUM as BAN,CAMP_CREATIVE_New from `divgpras-pr-579355.SHS.SHS_ALR_Campaign_data_May_Aug_2022`

)


, prod_snpsht_SMHM as (


select distinct bacct_bus_bacct_num as BAN,1 as count_of_BAN_FFH
FROM `cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht` 
where pi_prod_instnc_typ_cd='SMHM' 
and pi_prod_instnc_stat_cd in ('A')
and  consldt_cust_typ_cd = 'R'
and DATE(prod_instnc_ts)=target_date


)

select a.CAMP_CREATIVE_New, sum(b.count_of_BAN_FFH) as Distinct_BAN_count
 from Camp_V1_BAN a
inner join prod_snpsht_SMHM b
on a.BAN=b.BAN
group by CAMP_CREATIVE_New
order by CAMP_CREATIVE_New


'''

In [112]:
Sep_Snapshot_count=extract_bq_data(bq_client, sql=Sep_Snapshot)

In [113]:
Sep_Snapshot_count

Unnamed: 0,CAMP_CREATIVE_New,Distinct_BAN_count
0,V1,117
1,V2,2172
2,V3,581
3,V4,720


In [114]:
Feb2023_Snapshot='''



DECLARE target_date DATE DEFAULT "2023-02-01";


With Camp_V1_BAN as (

select distinct BACCT_NUM as BAN,CAMP_CREATIVE_New from `divgpras-pr-579355.SHS.SHS_ALR_Campaign_data_May_Aug_2022`

)


, prod_snpsht_SMHM as (


select distinct bacct_bus_bacct_num as BAN,1 as count_of_BAN_FFH
FROM `cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht` 
where pi_prod_instnc_typ_cd='SMHM' 
and pi_prod_instnc_stat_cd in ('A')
and  consldt_cust_typ_cd = 'R'
and DATE(prod_instnc_ts)=target_date


)

select a.CAMP_CREATIVE_New, sum(b.count_of_BAN_FFH) as Distinct_BAN_count
 from Camp_V1_BAN a
inner join prod_snpsht_SMHM b
on a.BAN=b.BAN
group by CAMP_CREATIVE_New
order by CAMP_CREATIVE_New


'''

In [115]:
Feb2023_Snapshot_count=extract_bq_data(bq_client, sql=Feb2023_Snapshot)

In [116]:
Feb2023_Snapshot_count

Unnamed: 0,CAMP_CREATIVE_New,Distinct_BAN_count
0,V1,106
1,V2,2046
2,V3,529
3,V4,685


In [124]:
Sep_Snapshot='''

DECLARE target_date_start DATE DEFAULT "2022-08-01";
DECLARE target_date_end DATE DEFAULT "2022-09-01";



With Camp_V1_BAN as (

select distinct BACCT_NUM as BAN,CAMP_CREATIVE_New from `divgpras-pr-579355.SHS.SHS_ALR_Campaign_data_May_Aug_2022`

)


, prod_snpsht_SMHM as (


select distinct bacct_bus_bacct_num as BAN,1 as count_of_BAN_FFH
FROM `cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht` 
where pi_prod_instnc_typ_cd='SMHM' 
and pi_prod_instnc_stat_cd in ('A')
and  consldt_cust_typ_cd = 'R'
and DATE(prod_instnc_ts)=target_date_end


)

, ARPU_Calculation as(
SELECT
  distinct billg_acct_num AS ban,
  EXTRACT(YEAR
  FROM
    bill.bill_dt)||'-'||LPAD(CAST(EXTRACT(MONTH
      FROM
        bill.bill_dt) AS STRING), 2, '0') AS bill_year_month,
  sum(bill.rid_unit_chrg_amt) as ARPU_SMHM,
  sum(tot_inv_amt) as ARPU_FFH
  FROM
  `cio-datahub-enterprise-pr-183a.ent_cust_bill.bq_wln_inv_sum_view` bill
WHERE
  DATE(bill.bill_dt)>=  target_date_start
  and DATE(bill.bill_dt)< target_date_end
  group by ban,bill_year_month
)

select *
 from Camp_V1_BAN a
inner join prod_snpsht_SMHM b
on a.BAN=b.BAN
left join ARPU_Calculation ARPU_df
on a.BAN=ARPU_df.ban


'''

In [125]:
Sep_Snapshot_DF=extract_bq_data(bq_client, sql=Sep_Snapshot)

In [126]:
Sep_Snapshot_DF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3590 entries, 0 to 3589
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   BAN                3590 non-null   Int64 
 1   CAMP_CREATIVE_New  3590 non-null   object
 2   BAN_1              3590 non-null   Int64 
 3   count_of_BAN_FFH   3590 non-null   Int64 
 4   ban_2              3590 non-null   object
 5   bill_year_month    3590 non-null   object
 6   ARPU_SMHM          3590 non-null   object
 7   ARPU_FFH           3590 non-null   object
dtypes: Int64(3), object(5)
memory usage: 235.0+ KB


In [127]:
pd.DataFrame(Sep_Snapshot_DF.groupby(['CAMP_CREATIVE_New']).agg(
    
    Customer_count= ('count_of_BAN_FFH','sum'),
    AVG_ARPU_FFH=('ARPU_FFH','mean'),
    AVG_ARPU_SMHM=('ARPU_SMHM','mean'),
    
    # # Customer_Share= ('customer_id',lambda x:x.count()*100/Merge_DF_4.shape[0])
    # Churn_total=('Telus_Churn_Flag',lambda x: x.sum()),
    # Churn_rate=('Telus_Churn_Flag',lambda x: x.mean()*100)
).reset_index())

Unnamed: 0,CAMP_CREATIVE_New,Customer_count,AVG_ARPU_FFH,AVG_ARPU_SMHM
0,V1,117,53.540171,54.496325
1,V2,2172,218.55308,61.888881
2,V3,581,51.560964,54.767711
3,V4,720,222.733958,62.486833


In [128]:
Feb2023_Snapshot='''

DECLARE target_date_start DATE DEFAULT "2023-01-01";
DECLARE target_date_end DATE DEFAULT "2023-02-01";



With Camp_V1_BAN as (

select distinct BACCT_NUM as BAN,CAMP_CREATIVE_New from `divgpras-pr-579355.SHS.SHS_ALR_Campaign_data_May_Aug_2022`

)


, prod_snpsht_SMHM as (


select distinct bacct_bus_bacct_num as BAN,1 as count_of_BAN_FFH
FROM `cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht` 
where pi_prod_instnc_typ_cd='SMHM' 
and pi_prod_instnc_stat_cd in ('A')
and  consldt_cust_typ_cd = 'R'
and DATE(prod_instnc_ts)=target_date_end


)

, ARPU_Calculation as(
SELECT
  distinct billg_acct_num AS ban,
  EXTRACT(YEAR
  FROM
    bill.bill_dt)||'-'||LPAD(CAST(EXTRACT(MONTH
      FROM
        bill.bill_dt) AS STRING), 2, '0') AS bill_year_month,
  sum(bill.rid_unit_chrg_amt) as ARPU_SMHM,
  sum(tot_inv_amt) as ARPU_FFH
  FROM
  `cio-datahub-enterprise-pr-183a.ent_cust_bill.bq_wln_inv_sum_view` bill
WHERE
  DATE(bill.bill_dt)>=  target_date_start
  and DATE(bill.bill_dt)< target_date_end
  group by ban,bill_year_month
)

select *
 from Camp_V1_BAN a
inner join prod_snpsht_SMHM b
on a.BAN=b.BAN
left join ARPU_Calculation ARPU_df
on a.BAN=ARPU_df.ban


'''

In [129]:
Feb_Snapshot_DF=extract_bq_data(bq_client, sql=Feb2023_Snapshot)

In [130]:
pd.DataFrame(Feb_Snapshot_DF.groupby(['CAMP_CREATIVE_New']).agg(
    
    Customer_count= ('count_of_BAN_FFH','sum'),
    AVG_ARPU_FFH=('ARPU_FFH','mean'),
    AVG_ARPU_SMHM=('ARPU_SMHM','mean'),
    
    # # Customer_Share= ('customer_id',lambda x:x.count()*100/Merge_DF_4.shape[0])
    # Churn_total=('Telus_Churn_Flag',lambda x: x.sum()),
    # Churn_rate=('Telus_Churn_Flag',lambda x: x.mean()*100)
).reset_index())

Unnamed: 0,CAMP_CREATIVE_New,Customer_count,AVG_ARPU_FFH,AVG_ARPU_SMHM
0,V1,106,62.576698,58.521509
1,V2,2046,212.117097,62.361982
2,V3,529,53.071304,56.490718
3,V4,685,215.570409,61.691212


In [131]:
May2022_Snapshot='''

DECLARE target_date_start DATE DEFAULT "2022-04-01";
DECLARE target_date_end DATE DEFAULT "2022-05-01";



With Camp_V1_BAN as (

select distinct BACCT_NUM as BAN,CAMP_CREATIVE_New from `divgpras-pr-579355.SHS.SHS_ALR_Campaign_data_May_Aug_2022`

)


, prod_snpsht_SMHM as (


select distinct bacct_bus_bacct_num as BAN,1 as count_of_BAN_FFH
FROM `cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht` 
where pi_prod_instnc_typ_cd='SMHM' 
and pi_prod_instnc_stat_cd in ('A')
and  consldt_cust_typ_cd = 'R'
and DATE(prod_instnc_ts)=target_date_end


)

, ARPU_Calculation as(
SELECT
  distinct billg_acct_num AS ban,
  EXTRACT(YEAR
  FROM
    bill.bill_dt)||'-'||LPAD(CAST(EXTRACT(MONTH
      FROM
        bill.bill_dt) AS STRING), 2, '0') AS bill_year_month,
  sum(bill.rid_unit_chrg_amt) as ARPU_SMHM,
  sum(tot_inv_amt) as ARPU_FFH
  FROM
  `cio-datahub-enterprise-pr-183a.ent_cust_bill.bq_wln_inv_sum_view` bill
WHERE
  DATE(bill.bill_dt)>=  target_date_start
  and DATE(bill.bill_dt)< target_date_end
  group by ban,bill_year_month
)

select *
 from Camp_V1_BAN a
inner join prod_snpsht_SMHM b
on a.BAN=b.BAN
left join ARPU_Calculation ARPU_df
on a.BAN=ARPU_df.ban


'''

In [132]:
May2022_Snapshot_DF=extract_bq_data(bq_client, sql=May2022_Snapshot)

In [133]:
pd.DataFrame(May2022_Snapshot_DF.groupby(['CAMP_CREATIVE_New']).agg(
    
    Customer_count= ('count_of_BAN_FFH','sum'),
    AVG_ARPU_FFH=('ARPU_FFH','mean'),
    AVG_ARPU_SMHM=('ARPU_SMHM','mean'),
    
    # # Customer_Share= ('customer_id',lambda x:x.count()*100/Merge_DF_4.shape[0])
    # Churn_total=('Telus_Churn_Flag',lambda x: x.sum()),
    # Churn_rate=('Telus_Churn_Flag',lambda x: x.mean()*100)
).reset_index())

Unnamed: 0,CAMP_CREATIVE_New,Customer_count,AVG_ARPU_FFH,AVG_ARPU_SMHM
0,V1,127,46.820472,54.892913
1,V2,2272,213.789921,63.345152
2,V3,610,46.855213,56.993361
3,V4,760,217.489171,61.792421


In [147]:
Contract_status_Query='''




with Intial_snapshot as (
        
        SELECT 
     distinct b.BACCT_NUM as ban
    ,b.CAMP_CREATIVE_New
    ,a.pi_prod_instnc_stat_cd as status_intial
    , max(pi_cntrct_end_ts) as latest_contract_end_dt

    FROM `divgpras-pr-579355.SHS.SHS_ALR_Campaign_data_May_Aug_2022` b 
    LEFT JOIN`cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht` a
    
    ON a.bacct_bus_bacct_num = b.BACCT_NUM 

    WHERE DATE(prod_instnc_ts) = '2022-05-01'
    and a.pi_prod_instnc_typ_cd='SMHM'
    and a.pi_prod_instnc_stat_cd='A'

    GROUP BY b.CAMP_CREATIVE_New
    , b.BACCT_NUM,a.pi_prod_instnc_stat_cd

)


, Sep_snapshot as (
        
        SELECT
     distinct b.BACCT_NUM as ban
    , b.CAMP_CREATIVE_New as camp_creav_sep
    ,a.pi_prod_instnc_stat_cd as status_Sep
    , max(pi_cntrct_end_ts) as contract_end_dt_sep

    FROM `divgpras-pr-579355.SHS.SHS_ALR_Campaign_data_May_Aug_2022` b 
    LEFT JOIN`cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht` a
    
    ON a.bacct_bus_bacct_num = b.BACCT_NUM 

    WHERE DATE(prod_instnc_ts) = '2022-09-01'
    and a.pi_prod_instnc_typ_cd='SMHM'

    GROUP BY b.CAMP_CREATIVE_New
    , b.BACCT_NUM,a.pi_prod_instnc_stat_cd

)

, Feb_snapshot as (
        
        SELECT
     distinct b.BACCT_NUM as ban
    , b.CAMP_CREATIVE_New as camp_creav_Feb
    ,a.pi_prod_instnc_stat_cd as status_Feb
    , max(pi_cntrct_end_ts) as contract_end_dt_Feb

    FROM `divgpras-pr-579355.SHS.SHS_ALR_Campaign_data_May_Aug_2022` b 
    LEFT JOIN`cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht` a
    
    ON a.bacct_bus_bacct_num = b.BACCT_NUM 

    WHERE DATE(prod_instnc_ts) = '2023-02-01'
    and a.pi_prod_instnc_typ_cd='SMHM'

    GROUP BY b.CAMP_CREATIVE_New
    , b.BACCT_NUM,a.pi_prod_instnc_stat_cd

)




select * from Intial_snapshot a
left JOIN  Sep_snapshot b
on a.ban=b.ban and a.CAMP_CREATIVE_New=b.camp_creav_sep

left join Feb_snapshot c
on a.ban=c.ban and a.CAMP_CREATIVE_New=c.camp_creav_Feb



'''

In [148]:
Contract_DF=extract_bq_data(bq_client, sql=Contract_status_Query)

In [149]:
Contract_DF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4069 entries, 0 to 4068
Data columns (total 12 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   ban                     4069 non-null   Int64 
 1   CAMP_CREATIVE_New       4069 non-null   object
 2   status_intial           4069 non-null   object
 3   latest_contract_end_dt  4069 non-null   object
 4   ban_1                   4068 non-null   Int64 
 5   camp_creav_sep          4068 non-null   object
 6   status_Sep              4068 non-null   object
 7   contract_end_dt_sep     4068 non-null   object
 8   ban_2                   4068 non-null   Int64 
 9   camp_creav_Feb          4068 non-null   object
 10  status_Feb              4068 non-null   object
 11  contract_end_dt_Feb     4068 non-null   object
dtypes: Int64(3), object(9)
memory usage: 393.5+ KB


In [151]:
Contract_DF.to_csv('Contract_Status.csv',index=False)

In [152]:
May_2021_prod_mix='''



DECLARE target_date DATE DEFAULT "2022-05-01";
DECLARE interval_days INT64 DEFAULT 0;


WITH cte_max_prod_instnc_date AS(
  SELECT DATE_ADD(target_date,INTERVAL interval_days DAY) AS max_date 
)

, cte_product_mix AS (
  SELECT ffh_prod.bacct_bus_bacct_num AS ban,
         COUNT(DISTINCT CASE WHEN ffh_prod.pi_prod_instnc_typ_cd IN ('SING', 'HSIC', 'TTV', 'SMHM', 'STV', 'DIIC','C') THEN ffh_prod.pi_prod_instnc_typ_cd ELSE NULL END) AS product_mix_all,
         COUNT(DISTINCT CASE WHEN ffh_prod.pi_prod_instnc_typ_cd = 'HSIC' THEN ffh_prod.bus_prod_instnc_id ELSE NULL END) AS hsic_count,
         COUNT(DISTINCT CASE WHEN ffh_prod.pi_prod_instnc_typ_cd = 'SING' THEN ffh_prod.bus_prod_instnc_id ELSE NULL END) AS sing_count,
         COUNT(DISTINCT CASE WHEN ffh_prod.pi_prod_instnc_typ_cd = 'SMHM' THEN ffh_prod.bus_prod_instnc_id ELSE NULL END) AS shs_count,
         COUNT(DISTINCT CASE WHEN ffh_prod.pi_prod_instnc_typ_cd = 'TTV'  THEN ffh_prod.bus_prod_instnc_id ELSE NULL END) AS ttv_count,
         COUNT(DISTINCT CASE WHEN ffh_prod.pi_prod_instnc_typ_cd = 'STV'  THEN ffh_prod.bus_prod_instnc_id ELSE NULL END) AS stv_count,
         COUNT(DISTINCT CASE WHEN ffh_prod.pi_prod_instnc_typ_cd = 'DIIC' THEN ffh_prod.bus_prod_instnc_id ELSE NULL END) AS diic_count,
         COUNT(DISTINCT CASE WHEN ffh_prod.pi_prod_instnc_typ_cd = 'C' THEN ffh_prod.bus_prod_instnc_id ELSE NULL END) AS C_count
   FROM `cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht` AS ffh_prod
   CROSS JOIN cte_max_prod_instnc_date AS dt
   WHERE DATE(ffh_prod.prod_instnc_ts) = dt.max_date
    --  AND ffh_prod.bacct_bus_bacct_num_src_id = 1001 --Wireline
     AND ffh_prod.pi_prod_instnc_stat_cd = 'A' --Active Products
     AND ffh_prod.consldt_cust_typ_cd = 'R' --Regular (not Business)
     AND ffh_prod.pi_prod_instnc_typ_cd IN 
         (
           'DIIC', --Dialup
           'HSIC', --High Speed
           'SING', --Home Phone
           'SMHM', --Smart Home
           'STV',  --Satelite
           'TTV'   --TV
            ,'C'    
         )
   GROUP BY ffh_prod.bacct_bus_bacct_num
)

, Camp_BAN as (

select distinct BACCT_NUM as BAN,CAMP_CREATIVE_New from `divgpras-pr-579355.SHS.SHS_ALR_Campaign_data_May_Aug_2022`

)

select * 
from Camp_BAN a
left join cte_product_mix as b
on a.BAN=b.ban
order by CAMP_CREATIVE_New


'''



In [154]:
May2022_Prod_Mix_DF=extract_bq_data(bq_client, sql=May_2021_prod_mix)

In [155]:
May2022_Prod_Mix_DF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3785 entries, 0 to 3784
Data columns (total 11 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   BAN                3785 non-null   Int64 
 1   CAMP_CREATIVE_New  3785 non-null   object
 2   ban_1              3769 non-null   Int64 
 3   product_mix_all    3769 non-null   Int64 
 4   hsic_count         3769 non-null   Int64 
 5   sing_count         3769 non-null   Int64 
 6   shs_count          3769 non-null   Int64 
 7   ttv_count          3769 non-null   Int64 
 8   stv_count          3769 non-null   Int64 
 9   diic_count         3769 non-null   Int64 
 10  C_count            3769 non-null   Int64 
dtypes: Int64(10), object(1)
memory usage: 362.4+ KB


In [156]:
May2022_Prod_Mix_DF.head()

Unnamed: 0,BAN,CAMP_CREATIVE_New,ban_1,product_mix_all,hsic_count,sing_count,shs_count,ttv_count,stv_count,diic_count,C_count
0,603561721,V1,603561721,1,0,0,1,0,0,0,0
1,604036634,V1,604036634,2,0,0,1,0,0,1,0
2,201753114,V1,201753114,1,0,0,1,0,0,0,0
3,603594665,V1,603594665,1,0,0,1,0,0,0,0
4,603585465,V1,603585465,1,0,0,1,0,0,0,0


In [157]:
May2022_Prod_Mix_DF.to_csv('May2022_Prod_Mix.csv',index=False)

In [158]:
Sep_2022_prod_mix='''



DECLARE target_date DATE DEFAULT "2022-09-01";
DECLARE interval_days INT64 DEFAULT 0;


WITH cte_max_prod_instnc_date AS(
  SELECT DATE_ADD(target_date,INTERVAL interval_days DAY) AS max_date 
)

, cte_product_mix AS (
  SELECT ffh_prod.bacct_bus_bacct_num AS ban,
         COUNT(DISTINCT CASE WHEN ffh_prod.pi_prod_instnc_typ_cd IN ('SING', 'HSIC', 'TTV', 'SMHM', 'STV', 'DIIC','C') THEN ffh_prod.pi_prod_instnc_typ_cd ELSE NULL END) AS product_mix_all,
         COUNT(DISTINCT CASE WHEN ffh_prod.pi_prod_instnc_typ_cd = 'HSIC' THEN ffh_prod.bus_prod_instnc_id ELSE NULL END) AS hsic_count,
         COUNT(DISTINCT CASE WHEN ffh_prod.pi_prod_instnc_typ_cd = 'SING' THEN ffh_prod.bus_prod_instnc_id ELSE NULL END) AS sing_count,
         COUNT(DISTINCT CASE WHEN ffh_prod.pi_prod_instnc_typ_cd = 'SMHM' THEN ffh_prod.bus_prod_instnc_id ELSE NULL END) AS shs_count,
         COUNT(DISTINCT CASE WHEN ffh_prod.pi_prod_instnc_typ_cd = 'TTV'  THEN ffh_prod.bus_prod_instnc_id ELSE NULL END) AS ttv_count,
         COUNT(DISTINCT CASE WHEN ffh_prod.pi_prod_instnc_typ_cd = 'STV'  THEN ffh_prod.bus_prod_instnc_id ELSE NULL END) AS stv_count,
         COUNT(DISTINCT CASE WHEN ffh_prod.pi_prod_instnc_typ_cd = 'DIIC' THEN ffh_prod.bus_prod_instnc_id ELSE NULL END) AS diic_count,
         COUNT(DISTINCT CASE WHEN ffh_prod.pi_prod_instnc_typ_cd = 'C' THEN ffh_prod.bus_prod_instnc_id ELSE NULL END) AS C_count
   FROM `cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht` AS ffh_prod
   CROSS JOIN cte_max_prod_instnc_date AS dt
   WHERE DATE(ffh_prod.prod_instnc_ts) = dt.max_date
     AND ffh_prod.pi_prod_instnc_stat_cd = 'A' --Active Products
     AND ffh_prod.consldt_cust_typ_cd = 'R' --Regular (not Business)
     AND ffh_prod.pi_prod_instnc_typ_cd IN 
         (
           'DIIC', --Dialup
           'HSIC', --High Speed
           'SING', --Home Phone
           'SMHM', --Smart Home
           'STV',  --Satelite
           'TTV'   --TV
            ,'C'    
         )
   GROUP BY ffh_prod.bacct_bus_bacct_num
)

, Camp_BAN as (

select distinct BACCT_NUM as BAN,CAMP_CREATIVE_New from `divgpras-pr-579355.SHS.SHS_ALR_Campaign_data_May_Aug_2022`

)

select * 
from Camp_BAN a
left join cte_product_mix as b
on a.BAN=b.ban
order by CAMP_CREATIVE_New


'''

In [159]:
Sep2022_Prod_Mix_DF=extract_bq_data(bq_client, sql=Sep_2022_prod_mix)

In [161]:
Sep2022_Prod_Mix_DF.head()

Unnamed: 0,BAN,CAMP_CREATIVE_New,ban_1,product_mix_all,hsic_count,sing_count,shs_count,ttv_count,stv_count,diic_count,C_count
0,603581139,V1,603581139.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1,603587895,V1,603587895.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,603237703,V1,,,,,,,,,
3,603582333,V1,603582333.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
4,603574455,V1,603574455.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [167]:
Sep2022_Prod_Mix_DF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3785 entries, 0 to 3784
Data columns (total 11 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   BAN                3785 non-null   Int64 
 1   CAMP_CREATIVE_New  3785 non-null   object
 2   ban_1              3711 non-null   Int64 
 3   product_mix_all    3711 non-null   Int64 
 4   hsic_count         3711 non-null   Int64 
 5   sing_count         3711 non-null   Int64 
 6   shs_count          3711 non-null   Int64 
 7   ttv_count          3711 non-null   Int64 
 8   stv_count          3711 non-null   Int64 
 9   diic_count         3711 non-null   Int64 
 10  C_count            3711 non-null   Int64 
dtypes: Int64(10), object(1)
memory usage: 362.4+ KB


In [160]:
Sep2022_Prod_Mix_DF.to_csv('Sep2022_Prod_Mix.csv',index=False)

In [174]:
Feb_2023_prod_mix='''



DECLARE target_date DATE DEFAULT "2023-02-01";
DECLARE interval_days INT64 DEFAULT 0;


WITH cte_max_prod_instnc_date AS(
  SELECT DATE_ADD(target_date,INTERVAL interval_days DAY) AS max_date 
)

, cte_product_mix AS (
  SELECT ffh_prod.bacct_bus_bacct_num AS ban,
         COUNT(DISTINCT CASE WHEN ffh_prod.pi_prod_instnc_typ_cd IN ('SING', 'HSIC', 'TTV', 'SMHM', 'STV', 'DIIC','C') THEN ffh_prod.pi_prod_instnc_typ_cd ELSE NULL END) AS product_mix_all,
         COUNT(DISTINCT CASE WHEN ffh_prod.pi_prod_instnc_typ_cd = 'HSIC' THEN ffh_prod.bus_prod_instnc_id ELSE NULL END) AS hsic_count,
         COUNT(DISTINCT CASE WHEN ffh_prod.pi_prod_instnc_typ_cd = 'SING' THEN ffh_prod.bus_prod_instnc_id ELSE NULL END) AS sing_count,
         COUNT(DISTINCT CASE WHEN ffh_prod.pi_prod_instnc_typ_cd = 'SMHM' THEN ffh_prod.bus_prod_instnc_id ELSE NULL END) AS shs_count,
         COUNT(DISTINCT CASE WHEN ffh_prod.pi_prod_instnc_typ_cd = 'TTV'  THEN ffh_prod.bus_prod_instnc_id ELSE NULL END) AS ttv_count,
         COUNT(DISTINCT CASE WHEN ffh_prod.pi_prod_instnc_typ_cd = 'STV'  THEN ffh_prod.bus_prod_instnc_id ELSE NULL END) AS stv_count,
         COUNT(DISTINCT CASE WHEN ffh_prod.pi_prod_instnc_typ_cd = 'DIIC' THEN ffh_prod.bus_prod_instnc_id ELSE NULL END) AS diic_count,
         COUNT(DISTINCT CASE WHEN ffh_prod.pi_prod_instnc_typ_cd = 'C' THEN ffh_prod.bus_prod_instnc_id ELSE NULL END) AS C_count
   FROM `cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht` AS ffh_prod
   CROSS JOIN cte_max_prod_instnc_date AS dt
   WHERE DATE(ffh_prod.prod_instnc_ts) = dt.max_date
     AND ffh_prod.pi_prod_instnc_stat_cd = 'A' --Active Products
     AND ffh_prod.consldt_cust_typ_cd = 'R' --Regular (not Business)
     AND ffh_prod.pi_prod_instnc_typ_cd IN 
         (
           'DIIC', --Dialup
           'HSIC', --High Speed
           'SING', --Home Phone
           'SMHM', --Smart Home
           'STV',  --Satelite
           'TTV'   --TV
            ,'C'    
         )
   GROUP BY ffh_prod.bacct_bus_bacct_num
)

, Camp_BAN as (

select distinct BACCT_NUM as BAN,CAMP_CREATIVE_New from `divgpras-pr-579355.SHS.SHS_ALR_Campaign_data_May_Aug_2022`

)

select * 
from Camp_BAN a
left join cte_product_mix as b
on a.BAN=b.ban
order by CAMP_CREATIVE_New


'''

In [175]:
Feb2023_Prod_Mix_DF=extract_bq_data(bq_client, sql=Feb_2023_prod_mix)

In [176]:
Feb2023_Prod_Mix_DF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3785 entries, 0 to 3784
Data columns (total 11 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   BAN                3785 non-null   Int64 
 1   CAMP_CREATIVE_New  3785 non-null   object
 2   ban_1              3608 non-null   Int64 
 3   product_mix_all    3608 non-null   Int64 
 4   hsic_count         3608 non-null   Int64 
 5   sing_count         3608 non-null   Int64 
 6   shs_count          3608 non-null   Int64 
 7   ttv_count          3608 non-null   Int64 
 8   stv_count          3608 non-null   Int64 
 9   diic_count         3608 non-null   Int64 
 10  C_count            3608 non-null   Int64 
dtypes: Int64(10), object(1)
memory usage: 362.4+ KB


In [177]:
Feb2023_Prod_Mix_DF.head(10)

Unnamed: 0,BAN,CAMP_CREATIVE_New,ban_1,product_mix_all,hsic_count,sing_count,shs_count,ttv_count,stv_count,diic_count,C_count
0,603577867,V1,603577867,1,0,0,1,0,0,0,0
1,600816709,V1,600816709,5,1,1,2,1,0,1,0
2,604015304,V1,604015304,5,1,1,1,1,0,1,0
3,603918623,V1,603918623,1,0,0,1,0,0,0,0
4,602249380,V1,602249380,1,0,0,1,0,0,0,0
5,211393709,V1,211393709,1,0,0,1,0,0,0,0
6,603589530,V1,603589530,2,0,0,1,0,0,1,0
7,603795769,V1,603795769,1,0,0,1,0,0,0,0
8,603576381,V1,603576381,1,0,0,1,0,0,0,0
9,603602535,V1,603602535,1,0,0,1,0,0,0,0


In [178]:
Feb2023_Prod_Mix_DF.to_csv('Feb2023_Prod_Mix.csv',index=False)