In [1]:
#### import global modules
import os
import sys
import pandas as pd
import numpy as np
from pathlib import Path
from yaml import safe_load
import google.oauth2.credentials
from google.cloud import bigquery
import gc

# Set global vars
pth_project = Path(os.getcwd().split('notebooks')[0])
pth_data = pth_project / 'data'
pth_queries = pth_project / 'core' / 'queries'
pth_creds = pth_project / 'conf' / 'local' / 'project_config.yaml'
sys.path.insert(0, str(pth_project))
d_project_config = safe_load(pth_creds.open())
# d_params = safe_load((pth_project / 'core' / 'parameters' / 'common.yaml').open())['data_extract']

# import local modules
from core.utils.gcp import connect_bq_services
# from core.etl.extract import extract_bq_data, extract_pr_codes, format_conv_df, filter_convs

# Connect to google services
bq_client = connect_bq_services(d_project_config['gcp-project-name'])
pd.options.display.max_rows = 100

In [2]:
def extract_bq_data(bq_client, sql=None, pth_query=None):
    if sql is not None:
        df = bq_client.query(sql).to_dataframe()
    elif pth_query is not None:
        sql = pth_query.read_text()
        df = bq_client.query(sql).to_dataframe()
    else:
        raise ValueError('`sql` or `pth_query` should be set')  
    return df

In [3]:
Customer_details='''

DECLARE _end_dt_snpsht DATE DEFAULT '2022-12-31';



with ADC_Customer_Base as (

SELECT date('2023-01-01') as Month_Snapshot,customer_id,dealer_customer_id,primary_login_id,dealer_name,join_date,account_type_name,customer_type_name,primary_email,primary_phone,last_updt_ts
FROM `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_account_details` 
where date(last_updt_ts) =_end_dt_snpsht
and  dealer_name in ('TELUS Communications Inc.','ADT by TELUS')
QUALIFY ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY last_updt_ts DESC) = 1
order by dealer_name,join_date,customer_id


)



,Telus_customers as
(
select bacct_bus_bacct_num,bacct_billg_acct_id,cust_bus_cust_id
--,pi_prod_instnc_typ_cd,pi_prod_instnc_stat_ts,prod_instnc_ts,pi_cntrct_start_ts as contract_start_date
--,pi_cntrct_end_ts as contract_end_date
from `cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht` 
WHERE DATE(prod_instnc_ts) = _end_dt_snpsht #Snapshot of the last day of the month
and pi_prod_instnc_typ_cd ='SMHM' #Serice type
and bus_prod_instnc_src_id = 1001 #BANs that are for home services
and pi_prod_instnc_stat_cd in ('A')
and  consldt_cust_typ_cd = 'R'
--QUALIFY ROW_NUMBER() OVER (PARTITION BY bacct_bus_bacct_num ORDER BY pi_prod_instnc_stat_ts DESC) = 1
order by cust_bus_cust_id

)



 ,Telus_Customer_Base as (

SELECT a.customer_id,'Telus_Customer' as Dealer_type_flag,b.bacct_bus_bacct_num as BAN, NULL as ADT_site_no
FROM ADC_Customer_Base a
inner join Telus_customers b
on a.dealer_customer_id=b.cust_bus_cust_id
)

, ADT_Customer_Base as (

SELECT customer_id,'ADT_Customer' as Dealer_type_flag,NULL  as BAN,site_no as ADT_site_no 
FROM `divgpras-pr-579355.ADC_updated.ADC_ADT_mapping` 

)



, Union_data as (

SELECT * from Telus_Customer_Base
UNION ALL
SELECT * from ADT_Customer_Base

)

,Merge_data as (

SELECT * from ADC_Customer_Base a
INNER JOIN Union_data b

on a.customer_id=b.customer_id
order by Dealer_type_flag
)

select * from Merge_data



'''

In [4]:
Customer_info=extract_bq_data(bq_client, sql=Customer_details)

In [5]:
Customer_info.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 579091 entries, 0 to 579090
Data columns (total 15 columns):
 #   Column              Non-Null Count   Dtype              
---  ------              --------------   -----              
 0   Month_Snapshot      579091 non-null  dbdate             
 1   customer_id         579091 non-null  Int64              
 2   dealer_customer_id  579091 non-null  object             
 3   primary_login_id    579091 non-null  Int64              
 4   dealer_name         579091 non-null  object             
 5   join_date           579091 non-null  datetime64[ns, UTC]
 6   account_type_name   579091 non-null  object             
 7   customer_type_name  579091 non-null  object             
 8   primary_email       579091 non-null  object             
 9   primary_phone       579091 non-null  object             
 10  last_updt_ts        579091 non-null  datetime64[ns, UTC]
 11  customer_id_1       579091 non-null  Int64              
 12  Dealer_type_flag

In [6]:
Customer_info['account_type_name'].value_counts()

Security System             433109
Awareness and Automation     97939
Standalone                   48043
Name: account_type_name, dtype: int64

In [7]:
Customer_info['dealer_name'].value_counts()

TELUS Communications Inc.    519707
ADT by TELUS                  59384
Name: dealer_name, dtype: int64

In [8]:
Customer_info.head()

Unnamed: 0,Month_Snapshot,customer_id,dealer_customer_id,primary_login_id,dealer_name,join_date,account_type_name,customer_type_name,primary_email,primary_phone,last_updt_ts,customer_id_1,Dealer_type_flag,BAN,ADT_site_no
0,2023-01-01,671094,E0052187,702719,TELUS Communications Inc.,2010-12-19 12:48:45+00:00,Security System,Customer,AebXOBdg0Gy6Nv5AeiQulq2UI1p+Wa9LOW7LIQgTFRqThC...,17809701114,2022-12-31 11:19:10.854776+00:00,671094,ADT_Customer,,700185494.0
1,2023-01-01,755535,105169822,792875,TELUS Communications Inc.,2011-04-21 08:04:59+00:00,Security System,Customer,AYhZsJdBq6VeiYz9WcLoWXHb23woZnJmzfJC9vfQnwj+y3...,16132910101,2022-12-31 11:19:10.854776+00:00,755535,ADT_Customer,,700172928.0
2,2023-01-01,955629,01053756,1005384,TELUS Communications Inc.,2011-07-17 21:34:33+00:00,Security System,Customer,AVpHhLHcTythX21+SRUuPiqcD2jNpdq+8SKJZZVnfFjIbn...,14503131313,2022-12-31 11:19:10.854776+00:00,955629,ADT_Customer,,700305903.0
3,2023-01-01,1165099,06010626,1234011,TELUS Communications Inc.,2012-01-06 11:20:10+00:00,Security System,Customer,AbRZQHiJ0FVQHxArDZh0bpUmyEofU/2dknfs8lSsroHNAc...,14165129161,2022-12-31 11:19:10.854776+00:00,1165099,ADT_Customer,,700258452.0
4,2023-01-01,1463461,09013859,1559134,TELUS Communications Inc.,2012-06-26 09:42:40+00:00,Security System,Customer,Ab8hFN31IqEwCBARPLfYQ3ZvhyPTsyzW1xlQ8lDYbAIdO1...,14502184901,2022-12-31 11:19:10.854776+00:00,1463461,ADT_Customer,,700124001.0


In [9]:
Best_practices_details_Query='''

select customer_id,best_practices_id,best_practices_ind,date(dt_last_calculate_utc) as last_date_calculate
from `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_best_practice`
where DATE(dt_last_calculate_utc)<='2022-12-31'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customer_id,best_practices_id ORDER BY last_updt_ts DESC) = 1
order by customer_id,best_practices_id


'''

In [10]:
Best_Practices_DF= extract_bq_data(bq_client,sql=Best_practices_details_Query)

In [11]:
Best_Practices_DF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3729564 entries, 0 to 3729563
Data columns (total 4 columns):
 #   Column               Dtype  
---  ------               -----  
 0   customer_id          Int64  
 1   best_practices_id    Int64  
 2   best_practices_ind   boolean
 3   last_date_calculate  dbdate 
dtypes: Int64(2), boolean(1), dbdate(1)
memory usage: 99.6 MB


In [12]:
Best_Practices_DF.head()

Unnamed: 0,customer_id,best_practices_id,best_practices_ind,last_date_calculate
0,287850,1,True,2022-08-14
1,287850,2,False,2022-08-14
2,287850,3,False,2022-08-14
3,287850,4,False,2022-08-14
4,287850,5,True,2022-08-14


In [13]:
Best_Practices_DF['Best_practices_flag']=Best_Practices_DF['best_practices_ind'].apply(lambda x: 1 if x==True else 0)

In [14]:
Best_Practices_DF_wide=Best_Practices_DF.pivot(index='customer_id', columns='best_practices_id', values='Best_practices_flag').reset_index()

In [15]:
Best_Practices_DF_wide.head()

best_practices_id,customer_id,1,2,3,4,5,6
0,287850,1,0,0,0,1,0
1,292877,1,1,0,0,0,0
2,299100,1,1,0,0,1,1
3,355703,1,1,0,1,1,1
4,359052,1,1,0,0,1,1


In [16]:
Best_Practices_DF_wide.columns=['customer_id','Best_partices_1','Best_partices_2','Best_partices_3','Best_partices_4','Best_partices_5','Best_partices_6']

In [17]:
cols_to_sum=['Best_partices_1','Best_partices_2','Best_partices_3','Best_partices_4','Best_partices_5','Best_partices_6']
Best_Practices_DF_wide['Best_Practice_All']=Best_Practices_DF_wide[cols_to_sum].sum(axis=1)

In [18]:
Best_Practices_DF_wide['Best_Practice_All_flag']=Best_Practices_DF_wide['Best_Practice_All'].apply(lambda x: 1 if x==6 else 0)

In [19]:
Best_Practices_DF_wide.head()

Unnamed: 0,customer_id,Best_partices_1,Best_partices_2,Best_partices_3,Best_partices_4,Best_partices_5,Best_partices_6,Best_Practice_All,Best_Practice_All_flag
0,287850,1,0,0,0,1,0,2,0
1,292877,1,1,0,0,0,0,2,0
2,299100,1,1,0,0,1,1,4,0
3,355703,1,1,0,1,1,1,5,0
4,359052,1,1,0,0,1,1,4,0


In [20]:
Best_Practices_DF_wide['Best_Practice_All'].value_counts(normalize=True)*100

4    27.931898
5    21.611695
3    15.236151
2    11.533895
0    10.363356
1     6.668179
6     6.654826
Name: Best_Practice_All, dtype: float64

In [21]:
Arming_Query=  '''

with date_sql as (
select  count(distinct date(date)) as count_of_dates from `cio-datahub-enterprise-pr-183a.src_adc.bq_aggregate_daily_arming_commands`
where date(date)< '2023-01-01' and date(date)>= '2022-12-01'

)

select id_cust as customer_id,sum(count_arm_commands) as sum_arm_commands,sum(count_disarm_commands) as sum_disarm_commands,count (distinct date(date)) as number_days_arming_disarming, (select count_of_dates from date_sql ) as count_of_dates_arming
from `cio-datahub-enterprise-pr-183a.src_adc.bq_aggregate_daily_arming_commands`
where date(date)< '2023-01-01' and date(date)>= '2022-12-01'
group by customer_id
order by number_days_arming_disarming desc

'''


In [22]:
Arming_DF=extract_bq_data(bq_client, sql=Arming_Query)

In [23]:
max_number_arming_dates=Arming_DF['count_of_dates_arming'].max()
Arming_DF['Arming_Consistency']=Arming_DF['number_days_arming_disarming'].apply(lambda x: x*100/max_number_arming_dates)

In [24]:
max_number_arming_dates

31

In [25]:
Arming_DF['Arming_Flag']=1

In [26]:
pd.DataFrame(Arming_DF['Arming_Consistency'].value_counts(normalize=True)*100).reset_index().sort_values(by='index',ascending=False)

Unnamed: 0,index,Arming_Consistency
0,100.0,19.270314
2,96.774194,8.216058
4,93.548387,5.023196
5,90.322581,3.852331
7,87.096774,3.334368
8,83.870968,3.041503
9,80.645161,2.797896
11,77.419355,2.553393
12,74.193548,2.355761
14,70.967742,2.210971


In [27]:
Login_SQL='''

with date_sql as (
select  count(distinct date(login_dt_utc)) as count_of_dates from `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_daily_logins`
where date(login_dt_utc)< '2023-01-01' and date(login_dt_utc)>= '2022-12-01' 

)



select customer_id,sum(login_count) as sum_login_count,count (distinct date(login_dt_utc)) as number_of_login_days, (select count_of_dates from date_sql ) as count_of_dates_logins
from `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_daily_logins`
where date(login_dt_utc)< '2023-01-01' and date(login_dt_utc)>= '2022-12-01' 
group by customer_id
order by number_of_login_days desc


'''

In [28]:
Login_DF=extract_bq_data(bq_client, sql=Login_SQL)

In [29]:
max_number_login_dates=Login_DF['count_of_dates_logins'].max()
Login_DF['Login_Consistency']=Login_DF['number_of_login_days'].apply(lambda x: x*100/max_number_login_dates)

In [30]:
max_number_login_dates

30

In [31]:
Login_DF.head()

Unnamed: 0,customer_id,sum_login_count,number_of_login_days,count_of_dates_logins,Login_Consistency
0,12013831,416,30,30,100.0
1,12444292,1810,30,30,100.0
2,12378229,2986,30,30,100.0
3,11531894,438,30,30,100.0
4,9375771,440,30,30,100.0


In [32]:
Login_DF['Login_Consistency'].value_counts(normalize=True)*100

100.000000    26.901805
3.333333       7.433176
6.666667       5.118232
96.666667      4.031328
10.000000      3.916034
13.333333      3.358115
93.333333      2.951051
16.666667      2.924767
20.000000      2.564527
90.000000      2.451662
23.333333      2.377671
86.666667      2.282917
26.666667      2.208705
30.000000      2.111743
83.333333      2.088110
33.333333      2.037530
80.000000      1.989381
36.666667      1.953820
76.666667      1.950066
40.000000      1.880491
73.333333      1.860834
43.333333      1.823949
70.000000      1.786842
46.666667      1.739355
66.666667      1.738030
50.000000      1.729195
63.333333      1.708875
56.666667      1.700040
53.333333      1.699378
60.000000      1.682371
Name: Login_Consistency, dtype: float64

In [35]:
TC_issue_query='''

select dealer_name,customer_id,unit_id,device_id,trouble_condition_name,trouble_condition_group_id,trouble_condition_group_desc, start_date_utc,end_date_utc
from `cio-datahub-enterprise-pr-183a.src_adc.bq_troublecondition_data`
where trouble_condition_start_ind='Y'and trouble_condition_closed_ind='Y' 
and date(start_date_utc) >= '2022-10-01' and date(start_date_utc) <'2023-01-01' 


'''

In [36]:
TC_data= extract_bq_data(bq_client, sql=TC_issue_query)

In [37]:
TC_data.head()

Unnamed: 0,dealer_name,customer_id,unit_id,device_id,trouble_condition_name,trouble_condition_group_id,trouble_condition_group_desc,start_date_utc,end_date_utc
0,ADT by TELUS,14797857,104759489,8,Alarm,4,Central Station Condition,2022-10-26 13:02:43+00:00,2022-10-26 13:05:33+00:00
1,ADT by TELUS,14792384,104754016,12,Alarm,4,Central Station Condition,2022-10-26 15:12:07+00:00,2022-10-26 15:18:53+00:00
2,ADT by TELUS,14730946,104692578,5,Alarm,4,Central Station Condition,2022-10-26 10:27:30+00:00,2022-10-26 10:28:36+00:00
3,ADT by TELUS,14730358,104691990,127,Alarm,4,Central Station Condition,2022-10-26 12:46:56+00:00,2022-10-26 12:48:06+00:00
4,ADT by TELUS,14675942,104637574,5,Alarm,4,Central Station Condition,2022-10-26 09:14:40+00:00,2022-10-26 09:18:56+00:00


In [38]:
TC_data['count']=1

TC_pivot_table=pd.pivot_table(TC_data, values=['count'], index=['customer_id'], columns='trouble_condition_name',
                          aggfunc={'count': np.sum})

In [39]:
TC_pivot_table=TC_pivot_table.reset_index()

In [40]:
TC_pivot_table.head()

Unnamed: 0_level_0,customer_id,count,count,count,count,count,count,count,count,count,count,count,count,count,count,count,count,count,count,count,count
trouble_condition_name,Unnamed: 1_level_1,ACFailure,Alarm,AuxSupply,BatteryAbsent,BellCircuit,BroadbandCommFailure,CameraNotCommunicating,CameraNotReachable,CarbonMonoxideAlert,...,SvrRecordingScheduleNotSetUp,SystemLocked,TamperEventsDisabled,ThermostatTooCold,ThermostatTooWarm,UploadsOverQuota,VideoObjectDetectionRuleNotConfigured,WaterAlert,ZWaveRadioTrouble,ZoneDeviceMaskTrouble
0,210137,1.0,,,,,,,,,...,,,,,,,,,,
1,213860,,,,,,3.0,,,,...,,,,,,,,,,
2,218873,,,,,,,,2.0,,...,,,,,,,,,,
3,229153,,,,,,,,,,...,,,,,,,,,,
4,230623,,1.0,,,,,,,,...,,,,,,,,,,


In [41]:
TC_pivot_table.fillna(0,inplace=True)

In [42]:
TC_pivot_table.columns=['TC_Last3M_'+'_'.join(col).strip() for col in TC_pivot_table.columns.values]

In [43]:
# TC_pivot_table.columns=['customer_id','TC_BroadbandCommFailure_count','TC_CameraNotCommunicating_count','TC_CameraNotReachable_count']

In [44]:
# TC_pivot_table.fillna(0,inplace=True)

In [45]:
TC_pivot_table.head()

Unnamed: 0,TC_Last3M_customer_id_,TC_Last3M_count_ACFailure,TC_Last3M_count_Alarm,TC_Last3M_count_AuxSupply,TC_Last3M_count_BatteryAbsent,TC_Last3M_count_BellCircuit,TC_Last3M_count_BroadbandCommFailure,TC_Last3M_count_CameraNotCommunicating,TC_Last3M_count_CameraNotReachable,TC_Last3M_count_CarbonMonoxideAlert,...,TC_Last3M_count_SvrRecordingScheduleNotSetUp,TC_Last3M_count_SystemLocked,TC_Last3M_count_TamperEventsDisabled,TC_Last3M_count_ThermostatTooCold,TC_Last3M_count_ThermostatTooWarm,TC_Last3M_count_UploadsOverQuota,TC_Last3M_count_VideoObjectDetectionRuleNotConfigured,TC_Last3M_count_WaterAlert,TC_Last3M_count_ZWaveRadioTrouble,TC_Last3M_count_ZoneDeviceMaskTrouble
0,210137,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,213860,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,218873,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,229153,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,230623,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [46]:
TCG_Pivot=pd.pivot_table(TC_data, values=['count'], index=['customer_id'], columns='trouble_condition_group_desc',
                          aggfunc={'count': np.sum})

In [47]:
TCG_Pivot=TCG_Pivot.reset_index()
TCG_Pivot.fillna(0,inplace=True)

In [48]:
TCG_Pivot.head()

Unnamed: 0_level_0,customer_id,count,count,count,count
trouble_condition_group_desc,Unnamed: 1_level_1,Central Station Condition,Critical System Issue,Engagement Issue,System Condition
0,210137,0.0,1.0,0.0,0.0
1,213860,0.0,0.0,0.0,3.0
2,218873,0.0,3.0,1.0,0.0
3,229153,0.0,1.0,0.0,0.0
4,230623,1.0,0.0,0.0,0.0


In [49]:
TCG_Pivot.columns=['TCG_Last3M_'+'_'.join(col).strip() for col in TCG_Pivot.columns.values]

In [50]:
TCG_Pivot.head()

Unnamed: 0,TCG_Last3M_customer_id_,TCG_Last3M_count_Central Station Condition,TCG_Last3M_count_Critical System Issue,TCG_Last3M_count_Engagement Issue,TCG_Last3M_count_System Condition
0,210137,0.0,1.0,0.0,0.0
1,213860,0.0,0.0,0.0,3.0
2,218873,0.0,3.0,1.0,0.0
3,229153,0.0,1.0,0.0,0.0
4,230623,1.0,0.0,0.0,0.0


In [51]:
# TC_pivot_table['TC_BroadbandCommFailure_Flag']=TC_pivot_table['TC_BroadbandCommFailure_count'].apply(lambda x: 1 if x>0 else 0)
# TC_pivot_table['TC_CameraNotCommunicating_Flag']=TC_pivot_table['TC_CameraNotCommunicating_count'].apply(lambda x: 1 if x>0 else 0)
# TC_pivot_table['TC_CameraNotReachable_Flag']=TC_pivot_table['TC_CameraNotReachable_count'].apply(lambda x: 1 if x>0 else 0)

In [52]:
Merge_DF=Customer_info.merge(Best_Practices_DF_wide,on='customer_id',how='left')

In [53]:
Merge_DF_1=Merge_DF.merge(Arming_DF,on='customer_id',how='left')

In [54]:
Merge_DF_2=Merge_DF_1.merge(Login_DF,on='customer_id',how='left')

In [55]:
Merge_DF_3=Merge_DF_2.merge(TC_pivot_table,left_on='customer_id',right_on='TC_Last3M_customer_id_',how='left')

In [56]:
Merge_DF_4=Merge_DF_3.merge(TCG_Pivot,left_on='customer_id',right_on='TCG_Last3M_customer_id_',how='left')

In [57]:
del Merge_DF_1,Merge_DF_2,Merge_DF_3

In [58]:
Merge_DF_4=Merge_DF_4.replace(np.nan, 0)

In [59]:
Merge_DF_4 = Merge_DF_4.drop(['customer_id_1','TC_Last3M_customer_id_','TCG_Last3M_customer_id_'], axis=1)

In [60]:
Merge_DF_4['dealer_name'].value_counts(normalize=True)*100

TELUS Communications Inc.    89.745308
ADT by TELUS                 10.254692
Name: dealer_name, dtype: float64

In [61]:
Merge_DF_4['Dealer_type_flag'].value_counts(normalize=True)*100

Telus_Customer    64.716599
ADT_Customer      35.283401
Name: Dealer_type_flag, dtype: float64

In [62]:
pd.DataFrame(Merge_DF_4.groupby(['dealer_name','Dealer_type_flag']).agg(
    
    Customer_count= ('Dealer_type_flag','count'),
    # Churn_total=('Churn_flag',lambda x: x.sum()),
    # Churn_rate=('Churn_flag',lambda x: x.mean()*100)
).reset_index())

Unnamed: 0,dealer_name,Dealer_type_flag,Customer_count
0,ADT by TELUS,ADT_Customer,59169
1,ADT by TELUS,Telus_Customer,215
2,TELUS Communications Inc.,ADT_Customer,145154
3,TELUS Communications Inc.,Telus_Customer,374553


In [63]:
rep_chars = ' |\|-|:|/'

Merge_DF_4.columns = Merge_DF_4.columns.str.replace(rep_chars, '_')

  Merge_DF_4.columns = Merge_DF_4.columns.str.replace(rep_chars, '_')


In [64]:
Merge_DF_4.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 579091 entries, 0 to 579090
Data columns (total 118 columns):
 #    Column                                                        Dtype              
---   ------                                                        -----              
 0    Month_Snapshot                                                dbdate             
 1    customer_id                                                   Int64              
 2    dealer_customer_id                                            object             
 3    primary_login_id                                              Int64              
 4    dealer_name                                                   object             
 5    join_date                                                     datetime64[ns, UTC]
 6    account_type_name                                             object             
 7    customer_type_name                                            object             
 8    pr

In [65]:
def Segment_making(row
                   ,min_login_consistency_threshold=50
                   ,min_arming_consistency_threshold=50
                   ,max_login_consistency_threshold=50
                   ,max_arming_consistency_threshold=50):
    
    if row['Best_Practice_All']>=5 and row['Login_Consistency']>=max_login_consistency_threshold  and row['Arming_Consistency']>=max_arming_consistency_threshold:
        return 'Heavy_User'
    elif row['Login_Consistency']>=max_login_consistency_threshold  and row['Arming_Consistency']< min_arming_consistency_threshold:
        return 'Home_automation_Savvy'
    elif row['Login_Consistency']< min_login_consistency_threshold  and row['Arming_Consistency']>=max_login_consistency_threshold:
        return 'Old_Fashion'
    elif row['Login_Consistency']==0  and row['Arming_Consistency']==0:
        return 'Disengaged'
    else:
        return "Moderate_Users"

In [66]:
Merge_DF_4['Segment']=Merge_DF_4.apply(Segment_making,axis=1)

In [67]:
Merge_DF_4['Segment'].value_counts()

Moderate_Users           210495
Home_automation_Savvy    148440
Disengaged               113873
Old_Fashion               63068
Heavy_User                43215
Name: Segment, dtype: int64

In [68]:
Merge_DF_4['Segment'].value_counts(normalize=True)*100

Moderate_Users           36.349209
Home_automation_Savvy    25.633277
Disengaged               19.664094
Old_Fashion              10.890862
Heavy_User                7.462558
Name: Segment, dtype: float64

In [69]:
Merge_DF_4['dealer_name'].value_counts()

TELUS Communications Inc.    519707
ADT by TELUS                  59384
Name: dealer_name, dtype: int64

In [70]:
# Merge_DF_4['dealer_name'=='ADT by TELUS'].to_csv('ADC_Master_Data_Sep2022_ADT.csv',index=False)

In [71]:
# config= bigquery.job.LoadJobConfig()

# # config._properties['timePartitioning'] = {'field': 'Month_Year'}
# config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

# Table_BQ = 'ADC_Feature_Datastore.ADC_Master_Data'

# bq_table_instance= bq_client.load_table_from_dataframe(Merge_DF_4, Table_BQ,job_config=config)

In [72]:
Merge_DF_4.drop(['TC_Last3M_count_CredentialsInConflict','TC_Last3M_count_LowControllerBattery'],inplace=True,axis=1)

In [75]:
Merge_DF_4.drop(['TC_Last3M_count_AuxSupply'],inplace=True,axis=1)

In [76]:
config= bigquery.job.LoadJobConfig()

# config._properties['timePartitioning'] = {'field': 'Month_Year'}
config.write_disposition = bigquery.WriteDisposition.WRITE_APPEND

Table_BQ = 'ADC_Feature_Datastore.ADC_Master_Data'

bq_table_instance= bq_client.load_table_from_dataframe(Merge_DF_4, Table_BQ,job_config=config)

In [73]:
# Merge_DF_4.to_csv('ADC_Master_Data_Oct2022.csv',index=False)

In [74]:
# ADC_Master_Query= '''


# select * from `divgpras-pr-579355.ADC_Feature_Datastore.ADC_Master_Data`
# where Month_Snapshot!='2022-09-30'


# '''

In [75]:
# ADC_Master_DF=extract_bq_data(bq_client, sql=ADC_Master_Query)

In [76]:
# ADC_Master_DF.info()

In [77]:
# ADC_Master_DF['Month_Snapshot'].value_counts()

In [78]:
# config= bigquery.job.LoadJobConfig()

# # config._properties['timePartitioning'] = {'field': 'Month_Year'}
# config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

# Table_BQ = 'ADC_Feature_Datastore.ADC_Master_Data'

# bq_table_instance= bq_client.load_table_from_dataframe(ADC_Master_DF, Table_BQ,job_config=config)