In [55]:
#### import global modules
import os
import sys
import pandas as pd
import numpy as np
from pathlib import Path
from yaml import safe_load
import google.oauth2.credentials
from google.cloud import bigquery
import gc

# Set global vars
pth_project = Path(os.getcwd().split('notebooks')[0])
pth_data = pth_project / 'data'
pth_queries = pth_project / 'core' / 'queries'
pth_creds = pth_project / 'conf' / 'local' / 'project_config.yaml'
sys.path.insert(0, str(pth_project))
d_project_config = safe_load(pth_creds.open())
# d_params = safe_load((pth_project / 'core' / 'parameters' / 'common.yaml').open())['data_extract']

# import local modules
from core.utils.gcp import connect_bq_services
# from core.etl.extract import extract_bq_data, extract_pr_codes, format_conv_df, filter_convs

# Connect to google services
bq_client = connect_bq_services(d_project_config['gcp-project-name'])
pd.options.display.max_rows = 100

In [56]:
def extract_bq_data(bq_client, sql=None, pth_query=None):
    if sql is not None:
        df = bq_client.query(sql).to_dataframe()
    elif pth_query is not None:
        sql = pth_query.read_text()
        df = bq_client.query(sql).to_dataframe()
    else:
        raise ValueError('`sql` or `pth_query` should be set')  
    return df

In [63]:
print(pd.to_datetime('22SEP2021:00:00:00', format='%d%b%Y:%H:%M:%S').strftime('%Y-%m-%d'))

2021-09-22


In [22]:
# Query='''



# With Telus_customers_prev as(

# select cust_bus_cust_id,pi_cntrct_end_ts as contract_end_date,pi_cntrct_start_ts as contract_start_date
# from `cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht` 
# WHERE DATE(prod_instnc_ts) = '2022-0' #Snapshot of the last day of the month
# and pi_prod_instnc_typ_cd ='SMHM' #Serice type
# and bus_prod_instnc_src_id = 1001 #BANs that are for home services
# and pi_prod_instnc_stat_cd in ('A')
# and  consldt_cust_typ_cd = 'R'
# QUALIFY ROW_NUMBER() OVER (PARTITION BY cust_bus_cust_id ORDER BY prod_instnc_ts DESC) = 1
# order by cust_bus_cust_id

# )



# '''

In [23]:
Campaign_Query='''


select *
from `cio-datahub-enterprise-pr-183a.ent_cust_intractn.bq_ffh_dbm`
where REGEXP_CONTAINS (creative_cd, r'ALR[1-9]_')
and cmpgn_cd='ALR' 

'''

In [24]:
Camp_DF=extract_bq_data(bq_client, sql=Campaign_Query)

In [25]:
Camp_DF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 107512 entries, 0 to 107511
Data columns (total 81 columns):
 #   Column                   Non-Null Count   Dtype              
---  ------                   --------------   -----              
 0   seg_id                   107512 non-null  object             
 1   cmpgn_id                 107512 non-null  object             
 2   lang_cd                  107512 non-null  object             
 3   seg_desc                 107512 non-null  object             
 4   vndr_cd                  107512 non-null  object             
 5   vndr_cd_desc             107512 non-null  object             
 6   cntct_med                107512 non-null  object             
 7   cntct_med_desc           107512 non-null  object             
 8   ctrl_flg                 107512 non-null  object             
 9   creative_cd              107512 non-null  object             
 10  creative_grp             0 non-null       object             
 11  creative_desc

In [26]:
Camp_DF['in_hm_dt'].value_counts()

2022-10-26 00:00:00+00:00    4862
2022-05-11 00:00:00+00:00    4356
2022-11-02 00:00:00+00:00    4317
2022-07-06 00:00:00+00:00    4310
2022-11-09 00:00:00+00:00    3769
2022-05-04 00:00:00+00:00    3408
2022-08-03 00:00:00+00:00    3394
2022-11-16 00:00:00+00:00    3266
2022-08-10 00:00:00+00:00    3201
2022-05-25 00:00:00+00:00    2857
2022-07-20 00:00:00+00:00    2791
2022-04-13 00:00:00+00:00    2663
2022-11-23 00:00:00+00:00    2636
2022-04-20 00:00:00+00:00    2542
2022-07-27 00:00:00+00:00    2436
2022-03-30 00:00:00+00:00    2404
2022-06-01 00:00:00+00:00    2370
2022-03-16 00:00:00+00:00    2331
2022-06-08 00:00:00+00:00    2245
2022-04-27 00:00:00+00:00    2215
2022-06-22 00:00:00+00:00    2182
2022-03-23 00:00:00+00:00    2083
2022-11-30 00:00:00+00:00    2075
2022-06-29 00:00:00+00:00    1770
2022-10-12 00:00:00+00:00    1745
2021-10-13 00:00:00+00:00    1716
2022-12-07 00:00:00+00:00    1576
2022-03-02 00:00:00+00:00    1468
2022-08-31 00:00:00+00:00    1378
2022-01-11 00:

In [27]:
Camp_DF['creative_cd'].value_counts()

ALR1_TP1        27167
ALR1_TP2        13124
ALR1_TP3        11789
ALR1_TP4        10802
ALR1_TP1X        8170
ALR1_TP2X        7388
ALR1_TP5_V2      6944
ALR1_TP3X        6496
ALR1_TP4X        5827
ALR1_TP5X_V2     3294
ALR1_TP5_V4      2106
ALR1_TP5_V3      2091
ALR1_TP5X_V3      906
ALR1_TP5X_V4      826
ALR1_TP5_V1       389
ALR1_TP5X_V1      193
Name: creative_cd, dtype: int64

In [28]:
Camp_DF['cmpgn_typ_desc'].value_counts()

SHS    107512
Name: cmpgn_typ_desc, dtype: int64

In [30]:
pd.DataFrame(Camp_DF.groupby(['creative_cd']).agg(
    
    Customer_count= ('bus_bacct_num','count'),
    campaign_date_min=('in_hm_dt','min'),
    campaign_date_max=('in_hm_dt','max')

    
    # # Customer_Share= ('customer_id',lambda x:x.count()*100/Merge_DF_4.shape[0])
    # Churn_total=('Telus_Churn_Flag',lambda x: x.sum()),
    # Churn_rate=('Telus_Churn_Flag',lambda x: x.mean()*100)
).reset_index())

Unnamed: 0,creative_cd,Customer_count,campaign_date_min,campaign_date_max
0,ALR1_TP1,27111,2021-09-22 00:00:00+00:00,2023-02-08 00:00:00+00:00
1,ALR1_TP1X,8154,2021-09-29 00:00:00+00:00,2023-02-08 00:00:00+00:00
2,ALR1_TP2,13091,2021-09-29 00:00:00+00:00,2023-02-08 00:00:00+00:00
3,ALR1_TP2X,7372,2021-10-06 00:00:00+00:00,2023-02-08 00:00:00+00:00
4,ALR1_TP3,11757,2021-10-06 00:00:00+00:00,2023-02-08 00:00:00+00:00
5,ALR1_TP3X,6478,2021-10-13 00:00:00+00:00,2023-02-08 00:00:00+00:00
6,ALR1_TP4,10775,2021-10-13 00:00:00+00:00,2023-02-08 00:00:00+00:00
7,ALR1_TP4X,5809,2021-10-20 00:00:00+00:00,2023-02-08 00:00:00+00:00
8,ALR1_TP5X_V1,193,2021-11-04 00:00:00+00:00,2023-02-08 00:00:00+00:00
9,ALR1_TP5X_V2,3280,2021-10-28 00:00:00+00:00,2023-02-08 00:00:00+00:00


In [33]:
pd.DataFrame(Camp_DF.groupby(['creative_cd','ctrl_flg']).agg(
    
    Customer_count= ('bus_bacct_num','count'),
    campaign_date_min=('in_hm_dt','min'),
    campaign_date_max=('in_hm_dt','max')

    
    # # Customer_Share= ('customer_id',lambda x:x.count()*100/Merge_DF_4.shape[0])
    # Churn_total=('Telus_Churn_Flag',lambda x: x.sum()),
    # Churn_rate=('Telus_Churn_Flag',lambda x: x.mean()*100)
).reset_index())

Unnamed: 0,creative_cd,ctrl_flg,Customer_count,campaign_date_min,campaign_date_max
0,ALR1_TP1,N,15014,2021-09-22 00:00:00+00:00,2023-02-08 00:00:00+00:00
1,ALR1_TP1,Y,12097,2021-09-22 00:00:00+00:00,2023-02-08 00:00:00+00:00
2,ALR1_TP1X,N,7714,2021-09-29 00:00:00+00:00,2023-02-08 00:00:00+00:00
3,ALR1_TP1X,Y,440,2021-11-04 00:00:00+00:00,2023-02-08 00:00:00+00:00
4,ALR1_TP2,N,12550,2021-09-29 00:00:00+00:00,2023-02-08 00:00:00+00:00
5,ALR1_TP2,Y,541,2021-09-29 00:00:00+00:00,2023-01-25 00:00:00+00:00
6,ALR1_TP2X,N,7123,2021-10-06 00:00:00+00:00,2023-02-08 00:00:00+00:00
7,ALR1_TP2X,Y,249,2021-10-13 00:00:00+00:00,2023-02-08 00:00:00+00:00
8,ALR1_TP3,N,11364,2021-10-06 00:00:00+00:00,2023-02-08 00:00:00+00:00
9,ALR1_TP3,Y,393,2021-10-20 00:00:00+00:00,2023-02-08 00:00:00+00:00


In [32]:
Camp_DF['seg_desc'].value_counts()

SHS Recontracting Journey 1.0    107512
Name: seg_desc, dtype: int64

In [42]:
Camp_DF[Camp_DF.creative_cd=='ALR1_TP5_V2']['in_hm_dt'].value_counts().sort_index()

2021-10-20 00:00:00+00:00      2
2021-10-28 00:00:00+00:00      3
2021-11-04 00:00:00+00:00      8
2021-11-17 00:00:00+00:00     16
2021-11-24 00:00:00+00:00      4
2021-12-01 00:00:00+00:00      6
2021-12-08 00:00:00+00:00     11
2021-12-15 00:00:00+00:00      4
2021-12-22 00:00:00+00:00      6
2021-12-29 00:00:00+00:00     13
2022-01-05 00:00:00+00:00      7
2022-01-11 00:00:00+00:00    675
2022-03-02 00:00:00+00:00      1
2022-03-09 00:00:00+00:00      3
2022-03-16 00:00:00+00:00     17
2022-03-23 00:00:00+00:00     15
2022-03-30 00:00:00+00:00     18
2022-04-06 00:00:00+00:00     13
2022-04-13 00:00:00+00:00     51
2022-04-20 00:00:00+00:00     75
2022-04-27 00:00:00+00:00     83
2022-05-04 00:00:00+00:00    118
2022-05-11 00:00:00+00:00    207
2022-05-18 00:00:00+00:00     15
2022-05-25 00:00:00+00:00    190
2022-06-01 00:00:00+00:00    116
2022-06-08 00:00:00+00:00    144
2022-06-15 00:00:00+00:00     64
2022-06-22 00:00:00+00:00    141
2022-06-29 00:00:00+00:00    126
2022-07-06

In [31]:
config= bigquery.job.LoadJobConfig()

# config._properties['timePartitioning'] = {'field': 'Month_Year'}
config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

Table_BQ = 'SHS.SHS_ALR_Campaign_data'

bq_table_instance= bq_client.load_table_from_dataframe(Camp_DF, Table_BQ,job_config=config)



In [91]:
Camp_SAS=pd.read_csv('SHS_ALR_Campaign.csv')

In [97]:
Camp_SAS.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 107385 entries, 0 to 107384
Data columns (total 52 columns):
 #   Column                  Non-Null Count   Dtype         
---  ------                  --------------   -----         
 0   TRACKING                107385 non-null  object        
 1   CAMP_INHOME             107385 non-null  object        
 2   CAMP_ID                 107385 non-null  object        
 3   DBM_VERSION             107385 non-null  object        
 4   DBM_VERSION_DESC        0 non-null       float64       
 5   CAMP_MODEL_DECILE       0 non-null       float64       
 6   MISC_INFO1              0 non-null       float64       
 7   MISC_INFO2              0 non-null       float64       
 8   MISC_INFO3              0 non-null       float64       
 9   MISC_INFO4              0 non-null       float64       
 10  CAMP_CREATIVE           107385 non-null  object        
 11  CAMP_CREATIVE_DESC      0 non-null       float64       
 12  CAMP_TEST               107385

In [93]:
def date_converter(x):
    try:
        return(pd.to_datetime(x, format='%d%b%Y:%H:%M:%S'))
    except:
        return x

In [94]:
# from datetime import datetime

# pd.to_datetime('22SEP2021:00:00:00', format='%d%b%Y:%H:%M:%S').strftime('%Y-%m-%d')

# datetime.strptime('22SEP2021:00:00:00', '%d%b%Y:%H:%M:%S').strftime('%Y-%m-%d')

In [95]:
Camp_SAS['Campaign_date']=Camp_SAS['CAMP_INHOME'].apply(date_converter)

In [96]:
Camp_SAS[Camp_SAS.Campaign_date>='2021-09-22']['CAMP_CREATIVE'].value_counts().sort_index()

ALR1_TP1        27154
ALR1_TP1X        8167
ALR1_TP2        13118
ALR1_TP2X        7383
ALR1_TP3        11769
ALR1_TP3X        6485
ALR1_TP4        10789
ALR1_TP4X        5813
ALR1_TP5X_V1      191
ALR1_TP5X_V2     3280
ALR1_TP5X_V3      903
ALR1_TP5X_V4      823
ALR1_TP5_V1       387
ALR1_TP5_V2      6933
ALR1_TP5_V3      2087
ALR1_TP5_V4      2103
Name: CAMP_CREATIVE, dtype: int64

In [98]:
config= bigquery.job.LoadJobConfig()

# config._properties['timePartitioning'] = {'field': 'Month_Year'}
config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

Table_BQ = 'SHS.SHS_ALR_Campaign_data_SAS'

bq_table_instance= bq_client.load_table_from_dataframe(Camp_SAS, Table_BQ,job_config=config)