In [None]:
#### import global modules
import os
import sys
import pandas as pd
import numpy as np
from pathlib import Path
from yaml import safe_load
import google.oauth2.credentials
from google.cloud import bigquery
import gc

# Set global vars
pth_project = Path(os.getcwd().split('notebooks')[0])
pth_data = pth_project / 'data'
pth_queries = pth_project / 'core' / 'queries'
pth_creds = pth_project / 'conf' / 'local' / 'project_config.yaml'
sys.path.insert(0, str(pth_project))
d_project_config = safe_load(pth_creds.open())
# d_params = safe_load((pth_project / 'core' / 'parameters' / 'common.yaml').open())['data_extract']

# import local modules
from core.utils.gcp import connect_bq_services
# from core.etl.extract import extract_bq_data, extract_pr_codes, format_conv_df, filter_convs

# Connect to google services
bq_client = connect_bq_services(d_project_config['gcp-project-name'])
pd.options.display.max_rows = 100

In [None]:
def extract_bq_data(bq_client, sql=None, pth_query=None):
    if sql is not None:
        df = bq_client.query(sql).to_dataframe()
    elif pth_query is not None:
        sql = pth_query.read_text()
        df = bq_client.query(sql).to_dataframe()
    else:
        raise ValueError('`sql` or `pth_query` should be set')  
    return df

In [None]:
Campaign_Query='''


select *
from `cio-datahub-enterprise-pr-183a.ent_cust_intractn.bq_ffh_dbm`
where 
--REGEXP_CONTAINS (creative_cd, r'ALR[1-9]_') and 
cmpgn_cd='HBUR' 

'''

In [None]:
Camp_DF=extract_bq_data(bq_client, sql=Campaign_Query)

In [None]:
Camp_DF.info()

In [None]:
Camp_DF['in_hm_dt'].value_counts()

In [None]:
Camp_DF['creative_cd'].value_counts()

In [None]:
pd.DataFrame(Camp_DF.groupby(['creative_cd']).agg(
    
    Customer_count= ('bus_bacct_num','count'),
    campaign_date_min=('in_hm_dt','min'),
    campaign_date_max=('in_hm_dt','max')

    
    # # Customer_Share= ('customer_id',lambda x:x.count()*100/Merge_DF_4.shape[0])
    # Churn_total=('Telus_Churn_Flag',lambda x: x.sum()),
    # Churn_rate=('Telus_Churn_Flag',lambda x: x.mean()*100)
).reset_index())

In [None]:
pd.DataFrame(Camp_DF.groupby(['creative_cd','ctrl_flg']).agg(
    
    Customer_count= ('bus_bacct_num','count'),
    campaign_date_min=('in_hm_dt','min'),
    campaign_date_max=('in_hm_dt','max')

    
    # # Customer_Share= ('customer_id',lambda x:x.count()*100/Merge_DF_4.shape[0])
    # Churn_total=('Telus_Churn_Flag',lambda x: x.sum()),
    # Churn_rate=('Telus_Churn_Flag',lambda x: x.mean()*100)
).reset_index())

In [None]:
Camp_DF['seg_desc'].value_counts()

In [None]:
config= bigquery.job.LoadJobConfig()

# config._properties['timePartitioning'] = {'field': 'Month_Year'}
config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

Table_BQ = 'SHS.HBUR_Campaign_data'

bq_table_instance= bq_client.load_table_from_dataframe(Camp_DF, Table_BQ,job_config=config)

In [None]:
Camp_SAS=pd.read_csv('CAMPAIGN_HBUR_data_SAS.csv')

In [None]:
Camp_SAS.info()

In [None]:
Camp_SAS['CAMP_CREATIVE'].value_counts()

In [None]:
pd.DataFrame(Camp_SAS.groupby(['CAMP_CREATIVE','CAMP_TEST']).agg(
    
    Customer_count= ('BACCT_NUM','nunique'),
    Opened=('OPENED','sum')
    
    
    # # Customer_Share= ('customer_id',lambda x:x.count()*100/Merge_DF_4.shape[0])
    # Churn_total=('Telus_Churn_Flag',lambda x: x.sum()),
    # Churn_rate=('Telus_Churn_Flag',lambda x: x.mean()*100)
).reset_index())

In [None]:
def date_converter(x):
    try:
        return(pd.to_datetime(x, format='%d%b%Y:%H:%M:%S'))
    except:
        return x

In [None]:
Camp_SAS['Campaign_date']=Camp_SAS['CAMP_INHOME'].apply(date_converter)

In [None]:
Camp_SAS['Campaign_date'].value_counts()

In [None]:
config= bigquery.job.LoadJobConfig()

# config._properties['timePartitioning'] = {'field': 'Month_Year'}
config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

Table_BQ = 'SHS.HBUR_Campaign_data_SAS'

bq_table_instance= bq_client.load_table_from_dataframe(Camp_SAS, Table_BQ,job_config=config)

In [None]:
ADC_NOOOCON=pd.read_csv('ADC Use Case-1 - Sheet1.csv')
ADC_NOINCON=pd.read_csv('ADC Use Case-2 - Sheet1.csv')
ADC_LOWINCON=pd.read_csv('ADC Use Case-3 - Sheet1.csv')

In [None]:
ADC_NOOOCON['CAMP_CREATIVE']='ADCNOUSEOOC11.30.22'
ADC_NOINCON['CAMP_CREATIVE']='ADCNOUSEINCON11.30.2'
ADC_LOWINCON['CAMP_CREATIVE']='ADCLOWUSEINCON11.30.'

In [None]:
ADC_Control=pd.concat([ADC_NOOOCON,ADC_NOINCON,ADC_LOWINCON])

In [None]:
ADC_Control.head()

In [None]:
ADC_Control_1=ADC_Control.merge(Camp_SAS[['BACCT_NUM']], how='left',left_on='BAN',right_on='BACCT_NUM')

In [None]:
ADC_Control_1.head()

In [None]:
ADC_Control_1.info()

In [None]:
ADC_Control_1=ADC_Control_1[ADC_Control_1['BACCT_NUM'].isna()==1]

In [None]:
ADC_Control_1.head()

In [None]:
ADC_Control_1.info()

In [None]:
ADC_Control_1['Experiment']='Control'

In [None]:
ADC_Control_1.drop(['BACCT_NUM'],inplace=True,axis=1)

In [None]:
config= bigquery.job.LoadJobConfig()

# config._properties['timePartitioning'] = {'field': 'Month_Year'}
config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

Table_BQ = 'SHS.HBUR_Control_data'

bq_table_instance= bq_client.load_table_from_dataframe(ADC_Control_1, Table_BQ,job_config=config)

In [None]:
ADC_Santa_Campaign_treatement=pd.read_csv('SantaCampaign_data_SAS.csv')

ADC_Santa_Campaign_control=pd.read_csv('ADC -Santa Campaign Use Case - Sheet1.csv')

In [None]:
ADC_Santa_Campaign_treatement.info()

In [None]:
ADC_Santa_Campaign_control['Experiment']='Control'

In [None]:
ADC_Santa_Campaign_treatement.head()

In [None]:
ADC_Santa_Campaign_control_1=ADC_Santa_Campaign_control.merge(ADC_Santa_Campaign_treatement[['BACCT_NUM']], how='left',left_on='BAN',right_on='BACCT_NUM')

In [None]:
ADC_Santa_Campaign_control_1.info()

In [None]:
ADC_Santa_Campaign_control_1=ADC_Santa_Campaign_control_1[ADC_Santa_Campaign_control_1['BACCT_NUM'].isna()==True]

In [None]:
ADC_Santa_Campaign_control_1.head()

In [None]:
ADC_Santa_Campaign_control_1.drop(['BACCT_NUM'],inplace=True,axis=1)

In [None]:
ADC_Santa_Campaign_treatement_1=ADC_Santa_Campaign_treatement[['BACCT_NUM']]

In [None]:
ADC_Santa_Campaign_treatement_1.drop_duplicates(inplace=True)

In [None]:
ADC_Santa_Campaign_treatement_1['Experiment']='Treatment'

In [None]:
ADC_Santa_Campaign_treatement_1.info()

In [None]:
ADC_Santa_Campaign_treatement_1.rename({'BACCT_NUM':'BAN'},axis=1,inplace=True)

In [None]:
ADC_Santa_Campaign_control_1.info()

In [None]:
ADC_Santa_Campaign=pd.concat([ADC_Santa_Campaign_treatement_1,ADC_Santa_Campaign_control_1])

In [None]:
ADC_Santa_Campaign.head()

In [None]:
ADC_Santa_Campaign_1=ADC_Santa_Campaign[['BAN','CAMP_CREATIVE','Experiment']]

In [None]:
ADC_Santa_Campaign_1.head()

In [None]:
ADC_Santa_Campaign['CAMP_CREATIVE']='Santa_Campaign'

In [None]:
config= bigquery.job.LoadJobConfig()

# config._properties['timePartitioning'] = {'field': 'Month_Year'}
config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

Table_BQ = 'SHS.Santa_Campaign'

bq_table_instance= bq_client.load_table_from_dataframe(ADC_Santa_Campaign_1, Table_BQ,job_config=config)