In [1]:
#### import global modules
import os
import sys
import pandas as pd
import numpy as np
from pathlib import Path
from yaml import safe_load
import google.oauth2.credentials
from google.cloud import bigquery
import gc

# Set global vars
pth_project = Path(os.getcwd().split('notebooks')[0])
pth_data = pth_project / 'data'
pth_queries = pth_project / 'core' / 'queries'
pth_creds = pth_project / 'conf' / 'local' / 'project_config.yaml'
sys.path.insert(0, str(pth_project))
d_project_config = safe_load(pth_creds.open())
# d_params = safe_load((pth_project / 'core' / 'parameters' / 'common.yaml').open())['data_extract']

# import local modules
from core.utils.gcp import connect_bq_services
# from core.etl.extract import extract_bq_data, extract_pr_codes, format_conv_df, filter_convs

# Connect to google services
bq_client = connect_bq_services(d_project_config['gcp-project-name'])
pd.options.display.max_rows = 100

In [2]:
def extract_bq_data(bq_client, sql=None, pth_query=None):
    if sql is not None:
        df = bq_client.query(sql).to_dataframe()
    elif pth_query is not None:
        sql = pth_query.read_text()
        df = bq_client.query(sql).to_dataframe()
    else:
        raise ValueError('`sql` or `pth_query` should be set')  
    return df

In [3]:
Customer_details='''

DECLARE _end_dt_snpsht DATE DEFAULT '2022-11-30';



with ADC_Customer_Base as (

SELECT date('2022-11-01') as Month_Snapshot,customer_id,dealer_customer_id,primary_login_id,dealer_name,join_date,account_type_name,customer_type_name,primary_email,primary_phone,last_updt_ts
FROM `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_account_details` 
where date(last_updt_ts) =_end_dt_snpsht
and  dealer_name in ('TELUS Communications Inc.','ADT by TELUS')
QUALIFY ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY last_updt_ts DESC) = 1
order by dealer_name,join_date,customer_id


)



,Telus_customers as
(
select bacct_bus_bacct_num,bacct_billg_acct_id,cust_bus_cust_id
--,pi_prod_instnc_typ_cd,pi_prod_instnc_stat_ts,prod_instnc_ts,pi_cntrct_start_ts as contract_start_date
--,pi_cntrct_end_ts as contract_end_date
from `cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht` 
WHERE DATE(prod_instnc_ts) = _end_dt_snpsht #Snapshot of the last day of the month
and pi_prod_instnc_typ_cd ='SMHM' #Serice type
and bus_prod_instnc_src_id = 1001 #BANs that are for home services
and pi_prod_instnc_stat_cd in ('A')
and  consldt_cust_typ_cd = 'R'
--QUALIFY ROW_NUMBER() OVER (PARTITION BY bacct_bus_bacct_num ORDER BY pi_prod_instnc_stat_ts DESC) = 1
order by cust_bus_cust_id

)



 ,Telus_Customer_Base as (

SELECT a.customer_id,'Telus_Customer' as Dealer_type_flag,b.bacct_bus_bacct_num as BAN, NULL as ADT_site_no
FROM ADC_Customer_Base a
inner join Telus_customers b
on a.dealer_customer_id=b.cust_bus_cust_id
)



, ADT_Customer_Base as (

SELECT customer_id,'ADT_Customer' as Dealer_type_flag,NULL  as BAN,site_no as ADT_site_no 
FROM `divgpras-pr-579355.ADC_updated.ADC_ADT_mapping` 

)



, Union_data as (

SELECT * from Telus_Customer_Base
UNION ALL
SELECT * from ADT_Customer_Base

)

,Merge_data as (

SELECT * from ADC_Customer_Base a
INNER JOIN Union_data b
on a.customer_id=b.customer_id
order by Dealer_type_flag

)

select * from Merge_data



'''

In [4]:
Customer_info=extract_bq_data(bq_client, sql=Customer_details)

In [5]:
Customer_info.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 556450 entries, 0 to 556449
Data columns (total 15 columns):
 #   Column              Non-Null Count   Dtype              
---  ------              --------------   -----              
 0   Month_Snapshot      556450 non-null  dbdate             
 1   customer_id         556450 non-null  Int64              
 2   dealer_customer_id  556450 non-null  object             
 3   primary_login_id    556450 non-null  Int64              
 4   dealer_name         556450 non-null  object             
 5   join_date           556450 non-null  datetime64[ns, UTC]
 6   account_type_name   556450 non-null  object             
 7   customer_type_name  556450 non-null  object             
 8   primary_email       556450 non-null  object             
 9   primary_phone       556450 non-null  object             
 10  last_updt_ts        556450 non-null  datetime64[ns, UTC]
 11  customer_id_1       556450 non-null  Int64              
 12  Dealer_type_flag

In [6]:
Customer_info['account_type_name'].value_counts()

Security System             417319
Awareness and Automation     91420
Standalone                   47711
Name: account_type_name, dtype: int64

In [7]:
Customer_info['dealer_name'].value_counts()

TELUS Communications Inc.    456805
ADT by TELUS                  99645
Name: dealer_name, dtype: int64

In [8]:
Customer_info['customer_id'].value_counts()

13796294    27
13577026    27
13277319    27
13620646    27
13720388    27
            ..
13557706     1
14383480     1
8878154      1
10734918     1
14459995     1
Name: customer_id, Length: 545639, dtype: Int64

In [9]:
Customer_info.head()

Unnamed: 0,Month_Snapshot,customer_id,dealer_customer_id,primary_login_id,dealer_name,join_date,account_type_name,customer_type_name,primary_email,primary_phone,last_updt_ts,customer_id_1,Dealer_type_flag,BAN,ADT_site_no
0,2022-11-01,1959761,,2119613,TELUS Communications Inc.,2013-04-11 17:35:54+00:00,Security System,Customer,AUZqIaGDQWISx5SAgbLWGbkqpR34FdoEquLolwjZALvvAV...,16042986894,2022-11-30 11:21:22.043651+00:00,1959761,ADT_Customer,,700225358.0
1,2022-11-01,3665878,E0020745,4107879,TELUS Communications Inc.,2015-07-08 11:53:42+00:00,Security System,Customer,AcmLCynkLQ5OaLVoxMw6utZjnssfq8D9knFeFjhdabyEF3...,12503768376,2022-11-30 11:21:22.043651+00:00,3665878,ADT_Customer,,700181906.0
2,2022-11-01,3704772,E2078594,4153473,ADT by TELUS,2015-07-24 12:10:54+00:00,Security System,Customer,ASO4Nlp6zarzH7M75rxDmfZ/O94+0KryeETMdYnGzq4CuE...,16049306299,2022-11-30 11:21:22.043651+00:00,3704772,ADT_Customer,,700210214.0
3,2022-11-01,3802548,,4269154,ADT by TELUS,2015-09-05 09:29:11+00:00,Security System,Customer,AccGahawi1JeKVOc/XdEdzUjaQ7+QYB4OeJEyN//eOhFBFdx,17789917021,2022-11-30 11:21:22.043651+00:00,3802548,ADT_Customer,,700442510.0
4,2022-11-01,4096401,ACG1040E80,4619538,ADT by TELUS,2016-01-29 23:53:37+00:00,Security System,Customer,AcE+517O43J5sW+c5hv5fg9BpKt9oO4sdaSNYzAieHYJEiDO,15149515058,2022-11-30 11:21:22.043651+00:00,4096401,ADT_Customer,,700456989.0


In [10]:
Best_practices_details_Query='''

select customer_id,best_practices_id,best_practices_ind,date(dt_last_calculate_utc) as last_date_calculate
from `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_best_practice`
where DATE(dt_last_calculate_utc)<'2022-12-01'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customer_id,best_practices_id ORDER BY last_updt_ts DESC) = 1
order by customer_id,best_practices_id


'''

In [11]:
Best_Practices_DF= extract_bq_data(bq_client,sql=Best_practices_details_Query)

In [12]:
Best_Practices_DF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3698202 entries, 0 to 3698201
Data columns (total 4 columns):
 #   Column               Dtype  
---  ------               -----  
 0   customer_id          Int64  
 1   best_practices_id    Int64  
 2   best_practices_ind   boolean
 3   last_date_calculate  dbdate 
dtypes: Int64(2), boolean(1), dbdate(1)
memory usage: 98.8 MB


In [13]:
Best_Practices_DF.head()

Unnamed: 0,customer_id,best_practices_id,best_practices_ind,last_date_calculate
0,287850,1,True,2022-08-14
1,287850,2,False,2022-08-14
2,287850,3,False,2022-08-14
3,287850,4,False,2022-08-14
4,287850,5,True,2022-08-14


In [14]:
Best_Practices_DF['Best_practices_flag']=Best_Practices_DF['best_practices_ind'].apply(lambda x: 1 if x==True else 0)

In [15]:
Best_Practices_DF_wide=Best_Practices_DF.pivot(index='customer_id', columns='best_practices_id', values='Best_practices_flag').reset_index()

In [16]:
Best_Practices_DF_wide.head()

best_practices_id,customer_id,1,2,3,4,5,6
0,287850,1,0,0,0,1,0
1,292877,1,1,0,0,0,0
2,299100,1,1,0,0,1,1
3,355703,1,1,0,1,1,1
4,359052,1,1,0,0,1,1


In [17]:
Best_Practices_DF_wide.columns=['customer_id','Best_partices_1','Best_partices_2','Best_partices_3','Best_partices_4','Best_partices_5','Best_partices_6']

In [18]:
cols_to_sum=['Best_partices_1','Best_partices_2','Best_partices_3','Best_partices_4','Best_partices_5','Best_partices_6']
Best_Practices_DF_wide['Best_Practice_All']=Best_Practices_DF_wide[cols_to_sum].sum(axis=1)

In [19]:
Best_Practices_DF_wide['Best_Practice_All_flag']=Best_Practices_DF_wide['Best_Practice_All'].apply(lambda x: 1 if x==6 else 0)

In [20]:
Best_Practices_DF_wide.head()

Unnamed: 0,customer_id,Best_partices_1,Best_partices_2,Best_partices_3,Best_partices_4,Best_partices_5,Best_partices_6,Best_Practice_All,Best_Practice_All_flag
0,287850,1,0,0,0,1,0,2,0
1,292877,1,1,0,0,0,0,2,0
2,299100,1,1,0,0,1,1,4,0
3,355703,1,1,0,1,1,1,5,0
4,359052,1,1,0,0,1,1,4,0


In [21]:
Best_Practices_DF_wide['Best_Practice_All'].value_counts(normalize=True)*100

4    28.091867
5    21.742079
3    15.304356
2    11.591795
0     9.924120
6     6.699580
1     6.646203
Name: Best_Practice_All, dtype: float64

In [22]:
Arming_Query=  '''

with date_sql as (
select  count(distinct date(date)) as count_of_dates from `cio-datahub-enterprise-pr-183a.src_adc.bq_aggregate_daily_arming_commands`
where date(date)< '2022-12-01' and date(date)>= '2022-11-01'

)

select id_cust as customer_id,sum(count_arm_commands) as sum_arm_commands,sum(count_disarm_commands) as sum_disarm_commands,count (distinct date(date)) as number_days_arming_disarming, (select count_of_dates from date_sql ) as count_of_dates_arming
from `cio-datahub-enterprise-pr-183a.src_adc.bq_aggregate_daily_arming_commands`
where date(date)< '2022-12-01' and date(date)>= '2022-11-01'
group by customer_id
order by number_days_arming_disarming desc

'''


In [23]:
Arming_DF=extract_bq_data(bq_client, sql=Arming_Query)

In [24]:
max_number_arming_dates=Arming_DF['count_of_dates_arming'].max()
Arming_DF['Arming_Consistency']=Arming_DF['number_days_arming_disarming'].apply(lambda x: x*100/max_number_arming_dates)

In [25]:
max_number_arming_dates

30

In [26]:
Arming_DF['Arming_Flag']=1

In [27]:
pd.DataFrame(Arming_DF['Arming_Consistency'].value_counts(normalize=True)*100).reset_index().sort_values(by='index',ascending=False)

Unnamed: 0,index,Arming_Consistency
0,100.0,26.315302
2,96.666667,7.671449
4,93.333333,4.907578
5,90.0,3.99092
6,86.666667,3.751872
7,83.333333,2.9464
9,80.0,2.539648
11,76.666667,2.432169
12,73.333333,2.235743
14,70.0,1.822197


In [28]:
Login_SQL='''

with date_sql as (
select  count(distinct date(login_dt_utc)) as count_of_dates from `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_daily_logins`
where date(login_dt_utc)< '2022-12-01' and date(login_dt_utc)>= '2022-11-01' 

)



select customer_id,sum(login_count) as sum_login_count,count (distinct date(login_dt_utc)) as number_of_login_days, (select count_of_dates from date_sql ) as count_of_dates_logins
from `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_daily_logins`
where date(login_dt_utc)< '2022-12-01' and date(login_dt_utc)>= '2022-11-01' 
group by customer_id
order by number_of_login_days desc


'''

In [29]:
Login_DF=extract_bq_data(bq_client, sql=Login_SQL)

In [30]:
max_number_login_dates=Login_DF['count_of_dates_logins'].max()
Login_DF['Login_Consistency']=Login_DF['number_of_login_days'].apply(lambda x: x*100/max_number_login_dates)

In [31]:
max_number_login_dates

28

In [32]:
Login_DF.head()

Unnamed: 0,customer_id,sum_login_count,number_of_login_days,count_of_dates_logins,Login_Consistency
0,7013212,114,28,28,100.0
1,5013714,159,28,28,100.0
2,7786426,117,28,28,100.0
3,6841955,263,28,28,100.0
4,9258010,187,28,28,100.0


In [33]:
Login_DF['Login_Consistency'].value_counts(normalize=True)*100

53.571429     23.271273
3.571429       9.587142
92.857143      7.573065
7.142857       6.514636
10.714286      5.007614
50.000000      4.583064
14.285714      4.221926
17.857143      3.758245
46.428571      3.726657
21.428571      3.427751
42.857143      3.393099
25.000000      3.290085
28.571429      3.186128
39.285714      3.142753
35.714286      3.058362
32.142857      3.024888
78.571429      1.333761
82.142857      1.244891
57.142857      1.219903
71.428571      1.134097
89.285714      1.010575
85.714286      0.735714
75.000000      0.691396
67.857143      0.675602
64.285714      0.615020
60.714286      0.565516
100.000000     0.006836
Name: Login_Consistency, dtype: float64

In [34]:
TC_issue_query='''

select dealer_name,customer_id,unit_id,device_id,trouble_condition_name,trouble_condition_group_id,trouble_condition_group_desc, start_date_utc,end_date_utc
from `cio-datahub-enterprise-pr-183a.src_adc.bq_troublecondition_data`
where trouble_condition_start_ind='Y'and trouble_condition_closed_ind='Y' 
and date(start_date_utc) >= '2022-09-01' and date(start_date_utc) <'2022-12-01' 


'''

In [35]:
TC_data= extract_bq_data(bq_client, sql=TC_issue_query)

In [36]:
TC_data.head()

Unnamed: 0,dealer_name,customer_id,unit_id,device_id,trouble_condition_name,trouble_condition_group_id,trouble_condition_group_desc,start_date_utc,end_date_utc
0,ADT by TELUS,14730915,104692547,16,Alarm,4,Central Station Condition,2022-10-26 16:49:01+00:00,2022-10-26 16:52:27+00:00
1,ADT by TELUS,14797857,104759489,8,Alarm,4,Central Station Condition,2022-10-26 13:02:43+00:00,2022-10-26 13:05:33+00:00
2,ADT by TELUS,14792384,104754016,12,Alarm,4,Central Station Condition,2022-10-26 15:12:07+00:00,2022-10-26 15:18:53+00:00
3,ADT by TELUS,14730946,104692578,5,Alarm,4,Central Station Condition,2022-10-26 10:27:30+00:00,2022-10-26 10:28:36+00:00
4,ADT by TELUS,14730358,104691990,127,Alarm,4,Central Station Condition,2022-10-26 12:46:56+00:00,2022-10-26 12:48:06+00:00


In [37]:
TC_data['count']=1

TC_pivot_table=pd.pivot_table(TC_data, values=['count'], index=['customer_id'], columns='trouble_condition_name',
                          aggfunc={'count': np.sum})

In [38]:
TC_pivot_table=TC_pivot_table.reset_index()

In [39]:
TC_pivot_table.head()

Unnamed: 0_level_0,customer_id,count,count,count,count,count,count,count,count,count,count,count,count,count,count,count,count,count,count,count,count
trouble_condition_name,Unnamed: 1_level_1,ACFailure,Alarm,AuxSupply,BatteryAbsent,BellCircuit,BroadbandCommFailure,CameraNotCommunicating,CameraNotReachable,CarbonMonoxideAlert,...,SystemLocked,TamperEventsDisabled,ThermostatTooCold,ThermostatTooWarm,UploadsOverQuota,VideoDeviceFirmwareOutdated,VideoObjectDetectionRuleNotConfigured,WaterAlert,ZWaveRadioTrouble,ZoneDeviceMaskTrouble
0,210137,1.0,23.0,,1.0,,,,,,...,,,,,,,,,,
1,213860,,,,,,42.0,,,,...,,,,,,,,,,
2,217919,1.0,,,,,,,,,...,,,,,,,,,,
3,218873,,2.0,,,,,,2.0,,...,,,,,,,,,,
4,229153,,7.0,,,,,,,,...,,,,,,,,,,


In [40]:
TC_pivot_table.fillna(0,inplace=True)

In [41]:
TC_pivot_table.columns=['TC_Last3M_'+'_'.join(col).strip() for col in TC_pivot_table.columns.values]

In [42]:
# TC_pivot_table.columns=['customer_id','TC_BroadbandCommFailure_count','TC_CameraNotCommunicating_count','TC_CameraNotReachable_count']

In [43]:
# TC_pivot_table.fillna(0,inplace=True)

In [44]:
TC_pivot_table.head()

Unnamed: 0,TC_Last3M_customer_id_,TC_Last3M_count_ACFailure,TC_Last3M_count_Alarm,TC_Last3M_count_AuxSupply,TC_Last3M_count_BatteryAbsent,TC_Last3M_count_BellCircuit,TC_Last3M_count_BroadbandCommFailure,TC_Last3M_count_CameraNotCommunicating,TC_Last3M_count_CameraNotReachable,TC_Last3M_count_CarbonMonoxideAlert,...,TC_Last3M_count_SystemLocked,TC_Last3M_count_TamperEventsDisabled,TC_Last3M_count_ThermostatTooCold,TC_Last3M_count_ThermostatTooWarm,TC_Last3M_count_UploadsOverQuota,TC_Last3M_count_VideoDeviceFirmwareOutdated,TC_Last3M_count_VideoObjectDetectionRuleNotConfigured,TC_Last3M_count_WaterAlert,TC_Last3M_count_ZWaveRadioTrouble,TC_Last3M_count_ZoneDeviceMaskTrouble
0,210137,1.0,23.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,213860,0.0,0.0,0.0,0.0,0.0,42.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,217919,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,218873,0.0,2.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,229153,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [45]:
TCG_Pivot=pd.pivot_table(TC_data, values=['count'], index=['customer_id'], columns='trouble_condition_group_desc',
                          aggfunc={'count': np.sum})

In [46]:
TCG_Pivot=TCG_Pivot.reset_index()
TCG_Pivot.fillna(0,inplace=True)

In [47]:
TCG_Pivot.head()

Unnamed: 0_level_0,customer_id,count,count,count,count
trouble_condition_group_desc,Unnamed: 1_level_1,Central Station Condition,Critical System Issue,Engagement Issue,System Condition
0,210137,26.0,4.0,0.0,0.0
1,213860,0.0,1.0,0.0,42.0
2,217919,0.0,1.0,0.0,0.0
3,218873,2.0,3.0,1.0,0.0
4,229153,7.0,2.0,0.0,0.0


In [48]:
TCG_Pivot.columns=['TCG_Last3M_'+'_'.join(col).strip() for col in TCG_Pivot.columns.values]

In [49]:
TCG_Pivot.head()

Unnamed: 0,TCG_Last3M_customer_id_,TCG_Last3M_count_Central Station Condition,TCG_Last3M_count_Critical System Issue,TCG_Last3M_count_Engagement Issue,TCG_Last3M_count_System Condition
0,210137,26.0,4.0,0.0,0.0
1,213860,0.0,1.0,0.0,42.0
2,217919,0.0,1.0,0.0,0.0
3,218873,2.0,3.0,1.0,0.0
4,229153,7.0,2.0,0.0,0.0


In [50]:
# TC_pivot_table['TC_BroadbandCommFailure_Flag']=TC_pivot_table['TC_BroadbandCommFailure_count'].apply(lambda x: 1 if x>0 else 0)
# TC_pivot_table['TC_CameraNotCommunicating_Flag']=TC_pivot_table['TC_CameraNotCommunicating_count'].apply(lambda x: 1 if x>0 else 0)
# TC_pivot_table['TC_CameraNotReachable_Flag']=TC_pivot_table['TC_CameraNotReachable_count'].apply(lambda x: 1 if x>0 else 0)

In [51]:
Merge_DF=Customer_info.merge(Best_Practices_DF_wide,on='customer_id',how='left')

In [52]:
Merge_DF_1=Merge_DF.merge(Arming_DF,on='customer_id',how='left')

In [53]:
Merge_DF_2=Merge_DF_1.merge(Login_DF,on='customer_id',how='left')

In [54]:
Merge_DF_3=Merge_DF_2.merge(TC_pivot_table,left_on='customer_id',right_on='TC_Last3M_customer_id_',how='left')

In [55]:
Merge_DF_4=Merge_DF_3.merge(TCG_Pivot,left_on='customer_id',right_on='TCG_Last3M_customer_id_',how='left')

In [56]:
del Merge_DF_1,Merge_DF_2,Merge_DF_3

In [57]:
Merge_DF_4=Merge_DF_4.replace(np.nan, 0)

In [58]:
Merge_DF_4 = Merge_DF_4.drop(['customer_id_1','TC_Last3M_customer_id_','TCG_Last3M_customer_id_'], axis=1)

In [59]:
Merge_DF_4['dealer_name'].value_counts(normalize=True)*100

TELUS Communications Inc.    82.092731
ADT by TELUS                 17.907269
Name: dealer_name, dtype: float64

In [60]:
Merge_DF_4['Dealer_type_flag'].value_counts(normalize=True)*100

Telus_Customer    63.364184
ADT_Customer      36.635816
Name: Dealer_type_flag, dtype: float64

In [61]:
pd.DataFrame(Merge_DF_4.groupby(['dealer_name','Dealer_type_flag']).agg(
    
    Customer_count= ('Dealer_type_flag','count'),
    # Churn_total=('Churn_flag',lambda x: x.sum()),
    # Churn_rate=('Churn_flag',lambda x: x.mean()*100)
).reset_index())

Unnamed: 0,dealer_name,Dealer_type_flag,Customer_count
0,ADT by TELUS,ADT_Customer,99357
1,ADT by TELUS,Telus_Customer,288
2,TELUS Communications Inc.,ADT_Customer,104503
3,TELUS Communications Inc.,Telus_Customer,352302


In [62]:
rep_chars = ' |\|-|:|/'

Merge_DF_4.columns = Merge_DF_4.columns.str.replace(rep_chars, '_')

  Merge_DF_4.columns = Merge_DF_4.columns.str.replace(rep_chars, '_')


In [63]:
Merge_DF_4.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 556450 entries, 0 to 556449
Data columns (total 122 columns):
 #    Column                                                        Dtype              
---   ------                                                        -----              
 0    Month_Snapshot                                                dbdate             
 1    customer_id                                                   Int64              
 2    dealer_customer_id                                            object             
 3    primary_login_id                                              Int64              
 4    dealer_name                                                   object             
 5    join_date                                                     datetime64[ns, UTC]
 6    account_type_name                                             object             
 7    customer_type_name                                            object             
 8    pr

In [64]:
def Segment_making(row
                   ,min_login_consistency_threshold=50
                   ,min_arming_consistency_threshold=50
                   ,max_login_consistency_threshold=50
                   ,max_arming_consistency_threshold=50):
    
    if row['Best_Practice_All']>=5 and row['Login_Consistency']>=max_login_consistency_threshold  and row['Arming_Consistency']>=max_arming_consistency_threshold:
        return 'Heavy_User'
    elif row['Login_Consistency']>=max_login_consistency_threshold  and row['Arming_Consistency']< min_arming_consistency_threshold:
        return 'Home_automation_Savvy'
    elif row['Login_Consistency']< min_login_consistency_threshold  and row['Arming_Consistency']>=max_login_consistency_threshold:
        return 'Old_Fashion'
    elif row['Login_Consistency']==0  and row['Arming_Consistency']==0:
        return 'Disengaged'
    else:
        return "Moderate_Users"

In [65]:
Merge_DF_4['Segment']=Merge_DF_4.apply(Segment_making,axis=1)

In [66]:
Merge_DF_4['Segment'].value_counts()

Moderate_Users           222105
Disengaged               122449
Home_automation_Savvy     94163
Old_Fashion               80042
Heavy_User                37691
Name: Segment, dtype: int64

In [67]:
Merge_DF_4['Segment'].value_counts(normalize=True)*100

Moderate_Users           39.914637
Disengaged               22.005391
Home_automation_Savvy    16.922095
Old_Fashion              14.384401
Heavy_User                6.773475
Name: Segment, dtype: float64

In [68]:
pd.DataFrame(Merge_DF_4.groupby(['Dealer_type_flag','Segment']).agg(
    
    Customer_count= ('customer_id','count'),
    Customer_Share= ('customer_id',lambda x:x.count()*100/Merge_DF_4.shape[0])
    # Churn_total=('Churn_flag',lambda x: x.sum()),
    # Churn_rate=('Churn_flag',lambda x: x.mean()*100)
).reset_index())

Unnamed: 0,Dealer_type_flag,Segment,Customer_count,Customer_Share
0,ADT_Customer,Disengaged,35778,6.429688
1,ADT_Customer,Heavy_User,15699,2.821278
2,ADT_Customer,Home_automation_Savvy,30845,5.543175
3,ADT_Customer,Moderate_Users,73653,13.23623
4,ADT_Customer,Old_Fashion,47885,8.605445
5,Telus_Customer,Disengaged,86671,15.575703
6,Telus_Customer,Heavy_User,21992,3.952197
7,Telus_Customer,Home_automation_Savvy,63318,11.37892
8,Telus_Customer,Moderate_Users,148452,26.678408
9,Telus_Customer,Old_Fashion,32157,5.778956


In [69]:
Merge_DF_4['dealer_name'].value_counts()

TELUS Communications Inc.    456805
ADT by TELUS                  99645
Name: dealer_name, dtype: int64

In [70]:
# Merge_DF_4['dealer_name'=='ADT by TELUS'].to_csv('ADC_Master_Data_Sep2022_ADT.csv',index=False)

In [71]:
# config= bigquery.job.LoadJobConfig()

# # config._properties['timePartitioning'] = {'field': 'Month_Year'}
# config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

# Table_BQ = 'ADC_Feature_Datastore.ADC_Master_Data'

# bq_table_instance= bq_client.load_table_from_dataframe(Merge_DF_4, Table_BQ,job_config=config)

In [72]:
Merge_DF_4.drop(['TC_Last3M_count_CredentialsInConflict','TC_Last3M_count_LowControllerBattery'],inplace=True,axis=1)

In [73]:
Merge_DF_4.drop(['TC_Last3M_count_AuxSupply'],inplace=True,axis=1)

In [74]:
config= bigquery.job.LoadJobConfig()

# config._properties['timePartitioning'] = {'field': 'Month_Year'}
config.write_disposition = bigquery.WriteDisposition.WRITE_APPEND

Table_BQ = 'ADC_Feature_Datastore.ADC_Master_Data'

bq_table_instance= bq_client.load_table_from_dataframe(Merge_DF_4, Table_BQ,job_config=config)

In [75]:
# Merge_DF_4.to_csv('ADC_Master_Data_Nov2022.csv',index=False)

In [76]:
# ADC_Master_Query= '''


# select * from `divgpras-pr-579355.ADC_Feature_Datastore.ADC_Master_Data`
# where Month_Snapshot!='2022-09-30'


# '''

In [77]:
# ADC_Master_DF=extract_bq_data(bq_client, sql=ADC_Master_Query)

In [78]:
# ADC_Master_DF.info()

In [79]:
# ADC_Master_DF['Month_Snapshot'].value_counts()

In [80]:
# config= bigquery.job.LoadJobConfig()

# # config._properties['timePartitioning'] = {'field': 'Month_Year'}
# config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

# Table_BQ = 'ADC_Feature_Datastore.ADC_Master_Data'

# bq_table_instance= bq_client.load_table_from_dataframe(ADC_Master_DF, Table_BQ,job_config=config)