In [37]:
#### import global modules
import os
import sys
import pandas as pd
import numpy as np
from pathlib import Path
from yaml import safe_load
import google.oauth2.credentials
from google.cloud import bigquery
import gc

# Set global vars
pth_project = Path(os.getcwd().split('notebooks')[0])
pth_data = pth_project / 'data'
pth_queries = pth_project / 'core' / 'queries'
pth_creds = pth_project / 'conf' / 'local' / 'project_config.yaml'
sys.path.insert(0, str(pth_project))
d_project_config = safe_load(pth_creds.open())
# d_params = safe_load((pth_project / 'core' / 'parameters' / 'common.yaml').open())['data_extract']

# import local modules
from core.utils.gcp import connect_bq_services
# from core.etl.extract import extract_bq_data, extract_pr_codes, format_conv_df, filter_convs

# Connect to google services
bq_client = connect_bq_services(d_project_config['gcp-project-name'])
pd.options.display.max_rows = 100

In [38]:
def extract_bq_data(bq_client, sql=None, pth_query=None):
    if sql is not None:
        df = bq_client.query(sql).to_dataframe()
    elif pth_query is not None:
        sql = pth_query.read_text()
        df = bq_client.query(sql).to_dataframe()
    else:
        raise ValueError('`sql` or `pth_query` should be set')  
    return df

In [5]:
Campaign_Query='''


select *
from `cio-datahub-enterprise-pr-183a.ent_cust_intractn.bq_ffh_dbm`
where 
--REGEXP_CONTAINS (creative_cd, r'ALR[1-9]_') and 
cmpgn_cd='HBUR' 

'''

In [6]:
Camp_DF=extract_bq_data(bq_client, sql=Campaign_Query)

In [7]:
Camp_DF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4846 entries, 0 to 4845
Data columns (total 81 columns):
 #   Column                   Non-Null Count  Dtype              
---  ------                   --------------  -----              
 0   seg_id                   4846 non-null   object             
 1   cmpgn_id                 4846 non-null   object             
 2   lang_cd                  4846 non-null   object             
 3   seg_desc                 0 non-null      object             
 4   vndr_cd                  4846 non-null   object             
 5   vndr_cd_desc             4846 non-null   object             
 6   cntct_med                4846 non-null   object             
 7   cntct_med_desc           4846 non-null   object             
 8   ctrl_flg                 4846 non-null   object             
 9   creative_cd              4846 non-null   object             
 10  creative_grp             0 non-null      object             
 11  creative_desc            0 non

In [8]:
Camp_DF['in_hm_dt'].value_counts()

2022-11-30 00:00:00+00:00    4846
Name: in_hm_dt, dtype: int64

In [9]:
Camp_DF['creative_cd'].value_counts()

ADCNOUSEINCON11.30.2    1906
ADCLOWUSEINCON11.30.    1697
ADCNOUSEOOC11.30.22     1243
Name: creative_cd, dtype: int64

In [10]:
pd.DataFrame(Camp_DF.groupby(['creative_cd']).agg(
    
    Customer_count= ('bus_bacct_num','count'),
    campaign_date_min=('in_hm_dt','min'),
    campaign_date_max=('in_hm_dt','max')

    
    # # Customer_Share= ('customer_id',lambda x:x.count()*100/Merge_DF_4.shape[0])
    # Churn_total=('Telus_Churn_Flag',lambda x: x.sum()),
    # Churn_rate=('Telus_Churn_Flag',lambda x: x.mean()*100)
).reset_index())

Unnamed: 0,creative_cd,Customer_count,campaign_date_min,campaign_date_max
0,ADCLOWUSEINCON11.30.,1697,2022-11-30 00:00:00+00:00,2022-11-30 00:00:00+00:00
1,ADCNOUSEINCON11.30.2,1906,2022-11-30 00:00:00+00:00,2022-11-30 00:00:00+00:00
2,ADCNOUSEOOC11.30.22,1243,2022-11-30 00:00:00+00:00,2022-11-30 00:00:00+00:00


In [11]:
pd.DataFrame(Camp_DF.groupby(['creative_cd','ctrl_flg']).agg(
    
    Customer_count= ('bus_bacct_num','count'),
    campaign_date_min=('in_hm_dt','min'),
    campaign_date_max=('in_hm_dt','max')

    
    # # Customer_Share= ('customer_id',lambda x:x.count()*100/Merge_DF_4.shape[0])
    # Churn_total=('Telus_Churn_Flag',lambda x: x.sum()),
    # Churn_rate=('Telus_Churn_Flag',lambda x: x.mean()*100)
).reset_index())

Unnamed: 0,creative_cd,ctrl_flg,Customer_count,campaign_date_min,campaign_date_max
0,ADCLOWUSEINCON11.30.,N,1608,2022-11-30 00:00:00+00:00,2022-11-30 00:00:00+00:00
1,ADCLOWUSEINCON11.30.,Y,89,2022-11-30 00:00:00+00:00,2022-11-30 00:00:00+00:00
2,ADCNOUSEINCON11.30.2,N,1813,2022-11-30 00:00:00+00:00,2022-11-30 00:00:00+00:00
3,ADCNOUSEINCON11.30.2,Y,93,2022-11-30 00:00:00+00:00,2022-11-30 00:00:00+00:00
4,ADCNOUSEOOC11.30.22,N,1168,2022-11-30 00:00:00+00:00,2022-11-30 00:00:00+00:00
5,ADCNOUSEOOC11.30.22,Y,75,2022-11-30 00:00:00+00:00,2022-11-30 00:00:00+00:00


In [12]:
Camp_DF['seg_desc'].value_counts()

Series([], Name: seg_desc, dtype: int64)

In [13]:
config= bigquery.job.LoadJobConfig()

# config._properties['timePartitioning'] = {'field': 'Month_Year'}
config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

Table_BQ = 'SHS.HBUR_Campaign_data'

bq_table_instance= bq_client.load_table_from_dataframe(Camp_DF, Table_BQ,job_config=config)



In [6]:
Camp_SAS=pd.read_csv('CAMPAIGN_HBUR_data_SAS.csv')

In [7]:
Camp_SAS.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4932 entries, 0 to 4931
Data columns (total 51 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   TRACKING                4932 non-null   object 
 1   CAMP_INHOME             4932 non-null   object 
 2   CAMP_ID                 4932 non-null   object 
 3   DBM_VERSION             0 non-null      float64
 4   DBM_VERSION_DESC        0 non-null      float64
 5   CAMP_MODEL_DECILE       0 non-null      float64
 6   MISC_INFO1              0 non-null      float64
 7   MISC_INFO2              0 non-null      float64
 8   MISC_INFO3              0 non-null      float64
 9   MISC_INFO4              0 non-null      float64
 10  CAMP_CREATIVE           4932 non-null   object 
 11  CAMP_CREATIVE_DESC      0 non-null      float64
 12  CAMP_TEST               4932 non-null   object 
 13  DBM_PRIME               4932 non-null   object 
 14  CAMP_CONTACT            4932 non-null   

In [8]:
Camp_SAS['CAMP_CREATIVE'].value_counts()

ADCNOUSEINCON11.30.2    1950
ADCLOWUSEINCON11.30.    1715
ADCNOUSEOOC11.30.22     1267
Name: CAMP_CREATIVE, dtype: int64

In [9]:
pd.DataFrame(Camp_SAS.groupby(['CAMP_CREATIVE','CAMP_TEST']).agg(
    
    Customer_count= ('BACCT_NUM','nunique'),
    Opened=('OPENED','sum')
    
    
    # # Customer_Share= ('customer_id',lambda x:x.count()*100/Merge_DF_4.shape[0])
    # Churn_total=('Telus_Churn_Flag',lambda x: x.sum()),
    # Churn_rate=('Telus_Churn_Flag',lambda x: x.mean()*100)
).reset_index())

Unnamed: 0,CAMP_CREATIVE,CAMP_TEST,Customer_count,Opened
0,ADCLOWUSEINCON11.30.,C,90,0
1,ADCLOWUSEINCON11.30.,R,1625,675
2,ADCNOUSEINCON11.30.2,C,95,0
3,ADCNOUSEINCON11.30.2,R,1855,799
4,ADCNOUSEOOC11.30.22,C,77,0
5,ADCNOUSEOOC11.30.22,R,1190,534


In [10]:
def date_converter(x):
    try:
        return(pd.to_datetime(x, format='%d%b%Y:%H:%M:%S'))
    except:
        return x

In [11]:
Camp_SAS['Campaign_date']=Camp_SAS['CAMP_INHOME'].apply(date_converter)

In [12]:
Camp_SAS['Campaign_date'].value_counts()

2022-11-30    4932
Name: Campaign_date, dtype: int64

In [20]:
config= bigquery.job.LoadJobConfig()

# config._properties['timePartitioning'] = {'field': 'Month_Year'}
config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

Table_BQ = 'SHS.HBUR_Campaign_data_SAS'

bq_table_instance= bq_client.load_table_from_dataframe(Camp_SAS, Table_BQ,job_config=config)

In [15]:
ADC_NOOOCON=pd.read_csv('ADC Use Case-1 - Sheet1.csv')
ADC_NOINCON=pd.read_csv('ADC Use Case-2 - Sheet1.csv')
ADC_LOWINCON=pd.read_csv('ADC Use Case-3 - Sheet1.csv')

In [16]:
ADC_NOOOCON['CAMP_CREATIVE']='ADCNOUSEOOC11.30.22'
ADC_NOINCON['CAMP_CREATIVE']='ADCNOUSEINCON11.30.2'
ADC_LOWINCON['CAMP_CREATIVE']='ADCLOWUSEINCON11.30.'

In [17]:
ADC_Control=pd.concat([ADC_NOOOCON,ADC_NOINCON,ADC_LOWINCON])

In [18]:
ADC_Control.head()

Unnamed: 0,BAN,CAMP_CREATIVE
0,602695078,ADCNOUSEOOC11.30.22
1,228855985,ADCNOUSEOOC11.30.22
2,602081872,ADCNOUSEOOC11.30.22
3,603332083,ADCNOUSEOOC11.30.22
4,603566649,ADCNOUSEOOC11.30.22


In [24]:
ADC_Control_1=ADC_Control.merge(Camp_SAS[['BACCT_NUM']], how='left',left_on='BAN',right_on='BACCT_NUM')

In [25]:
ADC_Control_1.head()

Unnamed: 0,BAN,CAMP_CREATIVE,BACCT_NUM
0,602695078,ADCNOUSEOOC11.30.22,
1,228855985,ADCNOUSEOOC11.30.22,
2,602081872,ADCNOUSEOOC11.30.22,
3,603332083,ADCNOUSEOOC11.30.22,
4,603566649,ADCNOUSEOOC11.30.22,


In [26]:
ADC_Control_1.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 19050 entries, 0 to 19049
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   BAN            19050 non-null  int64  
 1   CAMP_CREATIVE  19050 non-null  object 
 2   BACCT_NUM      4932 non-null   float64
dtypes: float64(1), int64(1), object(1)
memory usage: 595.3+ KB


In [27]:
ADC_Control_1=ADC_Control_1[ADC_Control_1['BACCT_NUM'].isna()==1]

In [28]:
ADC_Control_1.head()

Unnamed: 0,BAN,CAMP_CREATIVE,BACCT_NUM
0,602695078,ADCNOUSEOOC11.30.22,
1,228855985,ADCNOUSEOOC11.30.22,
2,602081872,ADCNOUSEOOC11.30.22,
3,603332083,ADCNOUSEOOC11.30.22,
4,603566649,ADCNOUSEOOC11.30.22,


In [34]:
ADC_Control_1.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 14118 entries, 0 to 19049
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   BAN            14118 non-null  int64 
 1   CAMP_CREATIVE  14118 non-null  object
 2   Experiment     14118 non-null  object
dtypes: int64(1), object(2)
memory usage: 957.2+ KB


In [30]:
ADC_Control_1['Experiment']='Control'

In [33]:
ADC_Control_1.drop(['BACCT_NUM'],inplace=True,axis=1)

In [36]:
config= bigquery.job.LoadJobConfig()

# config._properties['timePartitioning'] = {'field': 'Month_Year'}
config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

Table_BQ = 'SHS.HBUR_Control_data'

bq_table_instance= bq_client.load_table_from_dataframe(ADC_Control_1, Table_BQ,job_config=config)

In [39]:
ADC_Santa_Campaign_treatement=pd.read_csv('SantaCampaign_data_SAS.csv')

ADC_Santa_Campaign_control=pd.read_csv('ADC -Santa Campaign Use Case - Sheet1.csv')

In [57]:
ADC_Santa_Campaign_treatement.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1966 entries, 0 to 1965
Data columns (total 51 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   TRACKING                1966 non-null   object 
 1   CAMP_INHOME             1966 non-null   object 
 2   CAMP_ID                 1966 non-null   object 
 3   DBM_VERSION             0 non-null      float64
 4   DBM_VERSION_DESC        0 non-null      float64
 5   CAMP_MODEL_DECILE       0 non-null      float64
 6   MISC_INFO1              0 non-null      float64
 7   MISC_INFO2              0 non-null      float64
 8   MISC_INFO3              0 non-null      float64
 9   MISC_INFO4              0 non-null      float64
 10  CAMP_CREATIVE           1966 non-null   object 
 11  CAMP_CREATIVE_DESC      0 non-null      float64
 12  CAMP_TEST               1966 non-null   object 
 13  DBM_PRIME               1966 non-null   object 
 14  CAMP_CONTACT            1966 non-null   

In [44]:
ADC_Santa_Campaign_control['Experiment']='Control'

In [40]:
ADC_Santa_Campaign_treatement.head()

Unnamed: 0,TRACKING,CAMP_INHOME,CAMP_ID,DBM_VERSION,DBM_VERSION_DESC,CAMP_MODEL_DECILE,MISC_INFO1,MISC_INFO2,MISC_INFO3,MISC_INFO4,...,DEVICE,OPERATINGSYSTEM,EMAILCLIENT,CAMP_KEY,LPDS_ID,LIST_GEN_STATUS,MIN_EVENT_DATE,MAX_EVENT_DATE,SNET_PREMISE_TYPE_CD,LIST_NUMBER
0,FFH Contact,06DEC2022:00:00:00,ADC,,,,,,,,...,,,,**********************************************...,12811387.0,9,06DEC2022:00:00:00,06DEC2022:00:00:00,SFU,29042
1,FFH Contact,06DEC2022:00:00:00,ADC,,,,,,,,...,unknown,Unknown,5.0,**********************************************...,21999017.0,9,06DEC2022:00:00:00,07DEC2022:00:00:00,SXU,29042
2,FFH Contact,06DEC2022:00:00:00,ADC,,,,,,,,...,unknown,Unknown,5.0,**********************************************...,2022541.0,9,06DEC2022:00:00:00,07DEC2022:00:00:00,SFU,29042
3,FFH Contact,06DEC2022:00:00:00,ADC,,,,,,,,...,unknown,Unknown,5.0,**********************************************...,18853324.0,9,06DEC2022:00:00:00,06DEC2022:00:00:00,MDU,29042
4,FFH Contact,06DEC2022:00:00:00,ADC,,,,,,,,...,,,,**********************************************...,13567026.0,9,06DEC2022:00:00:00,06DEC2022:00:00:00,SFU,29042


In [45]:
ADC_Santa_Campaign_control_1=ADC_Santa_Campaign_control.merge(ADC_Santa_Campaign_treatement[['BACCT_NUM']], how='left',left_on='BAN',right_on='BACCT_NUM')

In [50]:
ADC_Santa_Campaign_control_1.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4643 entries, 0 to 6998
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   BAN         4643 non-null   int64  
 1   Experiment  4643 non-null   object 
 2   BACCT_NUM   0 non-null      float64
dtypes: float64(1), int64(1), object(1)
memory usage: 145.1+ KB


In [49]:
ADC_Santa_Campaign_control_1=ADC_Santa_Campaign_control_1[ADC_Santa_Campaign_control_1['BACCT_NUM'].isna()==True]

In [51]:
ADC_Santa_Campaign_control_1.head()

Unnamed: 0,BAN,Experiment,BACCT_NUM
0,234349072,Control,
1,123589978,Control,
2,603968749,Control,
3,604752063,Control,
6,604143502,Control,


In [52]:
ADC_Santa_Campaign_control_1.drop(['BACCT_NUM'],inplace=True,axis=1)

In [53]:
ADC_Santa_Campaign_treatement_1=ADC_Santa_Campaign_treatement[['BACCT_NUM']]

In [54]:
ADC_Santa_Campaign_treatement_1.drop_duplicates(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ADC_Santa_Campaign_treatement_1.drop_duplicates(inplace=True)


In [58]:
ADC_Santa_Campaign_treatement_1['Experiment']='Treatment'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ADC_Santa_Campaign_treatement_1['Experiment']='Treatment'


In [67]:
ADC_Santa_Campaign_treatement_1.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1966 entries, 0 to 1965
Data columns (total 2 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   BAN         1966 non-null   int64 
 1   Experiment  1966 non-null   object
dtypes: int64(1), object(1)
memory usage: 110.6+ KB


In [66]:
ADC_Santa_Campaign_treatement_1.rename({'BACCT_NUM':'BAN'},axis=1,inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ADC_Santa_Campaign_treatement_1.rename({'BACCT_NUM':'BAN'},axis=1,inplace=True)


In [68]:
ADC_Santa_Campaign_control_1.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4643 entries, 0 to 6998
Data columns (total 2 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   BAN         4643 non-null   int64 
 1   Experiment  4643 non-null   object
dtypes: int64(1), object(1)
memory usage: 108.8+ KB


In [71]:
ADC_Santa_Campaign=pd.concat([ADC_Santa_Campaign_treatement_1,ADC_Santa_Campaign_control_1])

In [76]:
ADC_Santa_Campaign.head()

Unnamed: 0,BAN,Experiment,CAMP_CREATIVE
0,604430780,Treatment,Santa_Campaign
1,604366056,Treatment,Santa_Campaign
2,600148997,Treatment,Santa_Campaign
3,604519203,Treatment,Santa_Campaign
4,603625752,Treatment,Santa_Campaign


In [78]:
ADC_Santa_Campaign_1=ADC_Santa_Campaign[['BAN','CAMP_CREATIVE','Experiment']]

In [79]:
ADC_Santa_Campaign_1.head()

Unnamed: 0,BAN,CAMP_CREATIVE,Experiment
0,604430780,Santa_Campaign,Treatment
1,604366056,Santa_Campaign,Treatment
2,600148997,Santa_Campaign,Treatment
3,604519203,Santa_Campaign,Treatment
4,603625752,Santa_Campaign,Treatment


In [74]:
ADC_Santa_Campaign['CAMP_CREATIVE']='Santa_Campaign'

In [80]:
config= bigquery.job.LoadJobConfig()

# config._properties['timePartitioning'] = {'field': 'Month_Year'}
config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

Table_BQ = 'SHS.Santa_Campaign'

bq_table_instance= bq_client.load_table_from_dataframe(ADC_Santa_Campaign_1, Table_BQ,job_config=config)