In [3]:
#### import global modules
import os
import sys
import pandas as pd
import numpy as np
from pathlib import Path
from yaml import safe_load
import google.oauth2.credentials
from google.cloud import bigquery
import gc

# Set global vars
pth_project = Path(os.getcwd().split('notebooks')[0])
pth_data = pth_project / 'data'
pth_queries = pth_project / 'core' / 'queries'
pth_creds = pth_project / 'conf' / 'local' / 'project_config.yaml'
sys.path.insert(0, str(pth_project))
d_project_config = safe_load(pth_creds.open())
# d_params = safe_load((pth_project / 'core' / 'parameters' / 'common.yaml').open())['data_extract']

# import local modules
from core.utils.gcp import connect_bq_services
# from core.etl.extract import extract_bq_data, extract_pr_codes, format_conv_df, filter_convs

# Connect to google services
bq_client = connect_bq_services(d_project_config['gcp-project-name'])
pd.options.display.max_rows = 100

In [4]:
def extract_bq_data(bq_client, sql=None, pth_query=None):
    if sql is not None:
        df = bq_client.query(sql).to_dataframe()
    elif pth_query is not None:
        sql = pth_query.read_text()
        df = bq_client.query(sql).to_dataframe()
    else:
        raise ValueError('`sql` or `pth_query` should be set')  
    return df

In [5]:
Query='''


select * from `divgpras-pr-579355.SHS.SHS_GTM_ADC_UseCases_30Nov2022`

'''

In [20]:
Customer_details='''


DECLARE _end_dt_snpsht DATE DEFAULT '2023-01-20';
DECLARE _start_dt_snpsht DATE DEFAULT '2022-12-01';




SELECT customer_id,dealer_customer_id,primary_login_id,dealer_name,join_date,account_type_name,customer_type_name,primary_email,primary_phone,last_updt_ts
FROM `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_account_details` 
where date(last_updt_ts) <= _end_dt_snpsht
and date(last_updt_ts) >= _start_dt_snpsht
and  dealer_name in ('TELUS Communications Inc.')
QUALIFY ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY last_updt_ts DESC) = 1
order by dealer_name,join_date,customer_id







'''

In [21]:
Best_practices_details_Query='''

select customer_id,best_practices_id,best_practices_ind,date(dt_last_calculate_utc) as last_date_calculate
from `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_best_practice`
where DATE(dt_last_calculate_utc)<'2023-01-20'

QUALIFY ROW_NUMBER() OVER (PARTITION BY customer_id,best_practices_id ORDER BY last_updt_ts DESC) = 1
order by customer_id,best_practices_id


'''

In [22]:
Arming_Query=  '''

with date_sql as (
select  count(distinct date(date)) as count_of_dates from `cio-datahub-enterprise-pr-183a.src_adc.bq_aggregate_daily_arming_commands`
where date(date)<= '2023-01-20' and date(date)>= '2022-12-01'

)

select id_cust as customer_id,sum(count_arm_commands) as sum_arm_commands,sum(count_disarm_commands) as sum_disarm_commands,count (distinct date(date)) as number_days_arming_disarming, (select count_of_dates from date_sql ) as count_of_dates_arming
from `cio-datahub-enterprise-pr-183a.src_adc.bq_aggregate_daily_arming_commands`
where date(date)< '2023-01-20' and date(date)>= '2022-12-01'
group by customer_id
order by number_days_arming_disarming desc

'''

In [23]:
Login_SQL='''

with date_sql as (
select  count(distinct date(login_dt_utc)) as count_of_dates from `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_daily_logins`
where date(login_dt_utc)<= '2023-01-20' and date(login_dt_utc)>= '2022-12-01' 

)



select customer_id,sum(login_count) as sum_login_count,count (distinct date(login_dt_utc)) as number_of_login_days, (select count_of_dates from date_sql ) as count_of_dates_logins
from `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_daily_logins`
where date(login_dt_utc)<= '2023-01-20' and date(login_dt_utc)>= '2022-12-01' 
group by customer_id
order by number_of_login_days desc


'''

In [24]:
Customer_DF=extract_bq_data(bq_client, sql=Customer_details)

In [26]:
Customer_DF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 557226 entries, 0 to 557225
Data columns (total 10 columns):
 #   Column              Non-Null Count   Dtype              
---  ------              --------------   -----              
 0   customer_id         557226 non-null  Int64              
 1   dealer_customer_id  557226 non-null  object             
 2   primary_login_id    557226 non-null  Int64              
 3   dealer_name         557226 non-null  object             
 4   join_date           557226 non-null  datetime64[ns, UTC]
 5   account_type_name   557226 non-null  object             
 6   customer_type_name  557226 non-null  object             
 7   primary_email       557226 non-null  object             
 8   primary_phone       557226 non-null  object             
 9   last_updt_ts        557226 non-null  datetime64[ns, UTC]
dtypes: Int64(2), datetime64[ns, UTC](2), object(6)
memory usage: 43.6+ MB


In [28]:
Best_Practices_DF=extract_bq_data(bq_client, sql=Best_practices_details_Query)

In [29]:
Best_Practices_DF['Best_practices_flag']=Best_Practices_DF['best_practices_ind'].apply(lambda x: 1 if x==True else 0)

In [30]:
Best_Practices_DF_wide=Best_Practices_DF.pivot(index='customer_id', columns='best_practices_id', values='Best_practices_flag').reset_index()

In [31]:
Best_Practices_DF_wide.head()

best_practices_id,customer_id,1,2,3,4,5,6
0,287850,1,0,0,0,1,0
1,292877,1,1,0,0,0,0
2,299100,1,1,0,0,1,1
3,355703,1,1,0,1,1,1
4,359052,1,1,0,0,1,1


In [32]:
Best_Practices_DF_wide.columns=['customer_id','Best_partices_1','Best_partices_2','Best_partices_3','Best_partices_4','Best_partices_5','Best_partices_6']

In [33]:
cols_to_sum=['Best_partices_1','Best_partices_2','Best_partices_3','Best_partices_4','Best_partices_5','Best_partices_6']
Best_Practices_DF_wide['Best_Practice_All']=Best_Practices_DF_wide[cols_to_sum].sum(axis=1)

In [34]:
Best_Practices_DF_wide['Best_Practice_All_flag']=Best_Practices_DF_wide['Best_Practice_All'].apply(lambda x: 1 if x==6 else 0)

In [35]:
Best_Practices_DF_wide.head()

Unnamed: 0,customer_id,Best_partices_1,Best_partices_2,Best_partices_3,Best_partices_4,Best_partices_5,Best_partices_6,Best_Practice_All,Best_Practice_All_flag
0,287850,1,0,0,0,1,0,2,0
1,292877,1,1,0,0,0,0,2,0
2,299100,1,1,0,0,1,1,4,0
3,355703,1,1,0,1,1,1,5,0
4,359052,1,1,0,0,1,1,4,0


In [36]:
Arming_DF=extract_bq_data(bq_client, sql=Arming_Query)

In [37]:
max_number_arming_dates=Arming_DF['count_of_dates_arming'].max()
Arming_DF['Arming_Consistency']=Arming_DF['number_days_arming_disarming'].apply(lambda x: x*100/max_number_arming_dates)

In [38]:
Login_DF=extract_bq_data(bq_client, sql=Login_SQL)

In [39]:
max_number_login_dates=Login_DF['count_of_dates_logins'].max()
Login_DF['Login_Consistency']=Login_DF['number_of_login_days'].apply(lambda x: x*100/max_number_login_dates)

In [40]:
Merge_DF=Customer_DF.merge(Best_Practices_DF_wide,on='customer_id',how='left')

In [41]:
Merge_DF_1=Merge_DF.merge(Arming_DF,on='customer_id',how='left')

In [42]:
Merge_DF_2=Merge_DF_1.merge(Login_DF,on='customer_id',how='left')

In [43]:
del Merge_DF_1,Merge_DF

In [46]:
Merge_DF_2.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 557226 entries, 0 to 557225
Data columns (total 27 columns):
 #   Column                        Non-Null Count   Dtype              
---  ------                        --------------   -----              
 0   customer_id                   557226 non-null  Int64              
 1   dealer_customer_id            557226 non-null  object             
 2   primary_login_id              557226 non-null  Int64              
 3   dealer_name                   557226 non-null  object             
 4   join_date                     557226 non-null  datetime64[ns, UTC]
 5   account_type_name             557226 non-null  object             
 6   customer_type_name            557226 non-null  object             
 7   primary_email                 557226 non-null  object             
 8   primary_phone                 557226 non-null  object             
 9   last_updt_ts                  557226 non-null  datetime64[ns, UTC]
 10  Best_partices_1     

In [45]:
Merge_DF_2=Merge_DF_2.replace(np.nan, 0)

In [47]:
def Segment_making(row
                   ,min_login_consistency_threshold=50
                   ,min_arming_consistency_threshold=50
                   ,max_login_consistency_threshold=50
                   ,max_arming_consistency_threshold=50):
    
    if row['Best_Practice_All']>=5 and row['Login_Consistency']>=max_login_consistency_threshold  and row['Arming_Consistency']>=max_arming_consistency_threshold:
        return 'Heavy_User'
    elif row['Login_Consistency']>=max_login_consistency_threshold  and row['Arming_Consistency']< min_arming_consistency_threshold:
        return 'Home_automation_Savvy'
    elif row['Login_Consistency']< min_login_consistency_threshold  and row['Arming_Consistency']>=max_login_consistency_threshold:
        return 'Old_Fashion'
    elif row['Login_Consistency']==0  and row['Arming_Consistency']==0:
        return 'Disengaged'
    else:
        return "Moderate_Users"

In [48]:
Merge_DF_2['Segment']=Merge_DF_2.apply(Segment_making,axis=1)

In [49]:
Merge_DF_2['Segment'].value_counts()

Moderate_Users           222120
Home_automation_Savvy    132986
Disengaged               101956
Old_Fashion               60269
Heavy_User                39895
Name: Segment, dtype: int64

In [51]:
Merge_DF_2['Segment'].value_counts(normalize=True)*100

Moderate_Users           39.861744
Home_automation_Savvy    23.865721
Disengaged               18.297064
Old_Fashion              10.815899
Heavy_User                7.159573
Name: Segment, dtype: float64

In [52]:
GTM_data=extract_bq_data(bq_client, sql=Query)

In [53]:
GTM_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4846 entries, 0 to 4845
Data columns (total 81 columns):
 #   Column                   Non-Null Count  Dtype              
---  ------                   --------------  -----              
 0   seg_id                   4846 non-null   object             
 1   cmpgn_id                 4846 non-null   object             
 2   lang_cd                  4846 non-null   object             
 3   seg_desc                 0 non-null      Int64              
 4   vndr_cd                  4846 non-null   object             
 5   vndr_cd_desc             4846 non-null   object             
 6   cntct_med                4846 non-null   object             
 7   cntct_med_desc           4846 non-null   object             
 8   ctrl_flg                 4846 non-null   object             
 9   creative_cd              4846 non-null   object             
 10  creative_grp             0 non-null      Int64              
 11  creative_desc            0 non

In [55]:
GTM_data['creative_cd'].value_counts()

ADCNOUSEINCON11.30.2    1906
ADCLOWUSEINCON11.30.    1697
ADCNOUSEOOC11.30.22     1243
Name: creative_cd, dtype: int64

In [57]:
pd.DataFrame(GTM_data.groupby(['creative_cd','ctrl_flg']).agg(
    
    Customer_count= ('bus_cust_id','count'),
    # # Customer_Share= ('customer_id',lambda x:x.count()*100/Merge_DF_4.shape[0])
    # Churn_total=('Telus_Churn_Flag',lambda x: x.sum()),
    # Churn_rate=('Telus_Churn_Flag',lambda x: x.mean()*100)
).reset_index())

Unnamed: 0,creative_cd,ctrl_flg,Customer_count
0,ADCLOWUSEINCON11.30.,N,1608
1,ADCLOWUSEINCON11.30.,Y,89
2,ADCNOUSEINCON11.30.2,N,1813
3,ADCNOUSEINCON11.30.2,Y,93
4,ADCNOUSEOOC11.30.22,N,1168
5,ADCNOUSEOOC11.30.22,Y,75


In [58]:
GTM_NOUse_INCON=GTM_data[GTM_data.creative_cd=='ADCNOUSEINCON11.30.2'].merge(Merge_DF_2,left_on='bus_cust_id',right_on='dealer_customer_id',how='left')

In [59]:
GTM_NOUse_INCON.shape

(1959, 109)

In [66]:
pd.DataFrame(GTM_NOUse_INCON[GTM_NOUse_INCON.ctrl_flg=='N'].groupby(['ctrl_flg','Segment']).agg(
    
    Customer_count= ('bus_cust_id','count'),
    Customer_Share= ('bus_cust_id',lambda x:x.count()*100/GTM_NOUse_INCON[GTM_NOUse_INCON.ctrl_flg=='N'].shape[0])
    
    # # Customer_Share= ('customer_id',lambda x:x.count()*100/Merge_DF_4.shape[0])
    # Churn_total=('Telus_Churn_Flag',lambda x: x.sum()),
    # Churn_rate=('Telus_Churn_Flag',lambda x: x.mean()*100)
).reset_index())

Unnamed: 0,ctrl_flg,Segment,Customer_count,Customer_Share
0,N,Disengaged,695,37.265416
1,N,Heavy_User,4,0.214477
2,N,Home_automation_Savvy,247,13.243968
3,N,Moderate_Users,903,48.418231
4,N,Old_Fashion,4,0.214477


In [67]:
pd.DataFrame(GTM_NOUse_INCON[GTM_NOUse_INCON.ctrl_flg=='Y'].groupby(['ctrl_flg','Segment']).agg(
    
    Customer_count= ('bus_cust_id','count'),
    Customer_Share= ('bus_cust_id',lambda x:x.count()*100/GTM_NOUse_INCON[GTM_NOUse_INCON.ctrl_flg=='Y'].shape[0])
    
    # # Customer_Share= ('customer_id',lambda x:x.count()*100/Merge_DF_4.shape[0])
    # Churn_total=('Telus_Churn_Flag',lambda x: x.sum()),
    # Churn_rate=('Telus_Churn_Flag',lambda x: x.mean()*100)
).reset_index())

Unnamed: 0,ctrl_flg,Segment,Customer_count,Customer_Share
0,Y,Disengaged,37,39.361702
1,Y,Home_automation_Savvy,14,14.893617
2,Y,Moderate_Users,41,43.617021
3,Y,Old_Fashion,1,1.06383


In [70]:
GTM_NOUse_OOCON=GTM_data[GTM_data.creative_cd=='ADCNOUSEOOC11.30.22'].merge(Merge_DF_2,left_on='bus_cust_id',right_on='dealer_customer_id',how='left')

In [71]:
GTM_NOUse_OOCON.shape

(1286, 109)

In [72]:
pd.DataFrame(GTM_NOUse_OOCON[GTM_NOUse_OOCON.ctrl_flg=='N'].groupby(['ctrl_flg','Segment']).agg(
    
    Customer_count= ('bus_cust_id','count'),
    Customer_Share= ('bus_cust_id',lambda x:x.count()*100/GTM_NOUse_OOCON[GTM_NOUse_OOCON.ctrl_flg=='N'].shape[0])
    
    # # Customer_Share= ('customer_id',lambda x:x.count()*100/Merge_DF_4.shape[0])
    # Churn_total=('Telus_Churn_Flag',lambda x: x.sum()),
    # Churn_rate=('Telus_Churn_Flag',lambda x: x.mean()*100)
).reset_index())

Unnamed: 0,ctrl_flg,Segment,Customer_count,Customer_Share
0,N,Disengaged,527,43.553719
1,N,Heavy_User,3,0.247934
2,N,Home_automation_Savvy,121,10.0
3,N,Moderate_Users,536,44.297521
4,N,Old_Fashion,8,0.661157


In [73]:
pd.DataFrame(GTM_NOUse_OOCON[GTM_NOUse_OOCON.ctrl_flg=='Y'].groupby(['ctrl_flg','Segment']).agg(
    
    Customer_count= ('bus_cust_id','count'),
    Customer_Share= ('bus_cust_id',lambda x:x.count()*100/GTM_NOUse_OOCON[GTM_NOUse_OOCON.ctrl_flg=='Y'].shape[0])
    
    # # Customer_Share= ('customer_id',lambda x:x.count()*100/Merge_DF_4.shape[0])
    # Churn_total=('Telus_Churn_Flag',lambda x: x.sum()),
    # Churn_rate=('Telus_Churn_Flag',lambda x: x.mean()*100)
).reset_index())

Unnamed: 0,ctrl_flg,Segment,Customer_count,Customer_Share
0,Y,Disengaged,37,48.684211
1,Y,Home_automation_Savvy,5,6.578947
2,Y,Moderate_Users,34,44.736842


In [74]:
GTM_LowUse_InCON=GTM_data[GTM_data.creative_cd=='ADCLOWUSEINCON11.30.'].merge(Merge_DF_2,left_on='bus_cust_id',right_on='dealer_customer_id',how='left')

In [75]:
pd.DataFrame(GTM_LowUse_InCON[GTM_LowUse_InCON.ctrl_flg=='N'].groupby(['ctrl_flg','Segment']).agg(
    
    Customer_count= ('bus_cust_id','count'),
    Customer_Share= ('bus_cust_id',lambda x:x.count()*100/GTM_LowUse_InCON[GTM_LowUse_InCON.ctrl_flg=='N'].shape[0])
    
    # # Customer_Share= ('customer_id',lambda x:x.count()*100/Merge_DF_4.shape[0])
    # Churn_total=('Telus_Churn_Flag',lambda x: x.sum()),
    # Churn_rate=('Telus_Churn_Flag',lambda x: x.mean()*100)
).reset_index())

Unnamed: 0,ctrl_flg,Segment,Customer_count,Customer_Share
0,N,Disengaged,352,21.529052
1,N,Heavy_User,55,3.363914
2,N,Home_automation_Savvy,284,17.370031
3,N,Moderate_Users,817,49.969419
4,N,Old_Fashion,122,7.461774


In [76]:
pd.DataFrame(GTM_LowUse_InCON[GTM_LowUse_InCON.ctrl_flg=='Y'].groupby(['ctrl_flg','Segment']).agg(
    
    Customer_count= ('bus_cust_id','count'),
    Customer_Share= ('bus_cust_id',lambda x:x.count()*100/GTM_LowUse_InCON[GTM_LowUse_InCON.ctrl_flg=='Y'].shape[0])
    
    # # Customer_Share= ('customer_id',lambda x:x.count()*100/Merge_DF_4.shape[0])
    # Churn_total=('Telus_Churn_Flag',lambda x: x.sum()),
    # Churn_rate=('Telus_Churn_Flag',lambda x: x.mean()*100)
).reset_index())

Unnamed: 0,ctrl_flg,Segment,Customer_count,Customer_Share
0,Y,Disengaged,25,27.777778
1,Y,Heavy_User,5,5.555556
2,Y,Home_automation_Savvy,13,14.444444
3,Y,Moderate_Users,41,45.555556
4,Y,Old_Fashion,6,6.666667
