In [1]:
#### import global modules
import os
import sys
import pandas as pd
import numpy as np
from pathlib import Path
from yaml import safe_load
import google.oauth2.credentials
from google.cloud import bigquery
import gc

# Set global vars
pth_project = Path(os.getcwd().split('notebooks')[0])
pth_data = pth_project / 'data'
pth_queries = pth_project / 'core' / 'queries'
pth_creds = pth_project / 'conf' / 'local' / 'project_config.yaml'
sys.path.insert(0, str(pth_project))
d_project_config = safe_load(pth_creds.open())
# d_params = safe_load((pth_project / 'core' / 'parameters' / 'common.yaml').open())['data_extract']

# import local modules
from core.utils.gcp import connect_bq_services
# from core.etl.extract import extract_bq_data, extract_pr_codes, format_conv_df, filter_convs

# Connect to google services
bq_client = connect_bq_services(d_project_config['gcp-project-name'])
pd.options.display.max_rows = 100

In [2]:
def extract_bq_data(bq_client, sql=None, pth_query=None):
    if sql is not None:
        df = bq_client.query(sql).to_dataframe()
    elif pth_query is not None:
        sql = pth_query.read_text()
        df = bq_client.query(sql).to_dataframe()
    else:
        raise ValueError('`sql` or `pth_query` should be set')  
    return df

In [3]:
Customer_details='''

DECLARE _end_dt_snpsht DATE DEFAULT '2022-09-30';



with ADC_Customer_Base as (

SELECT date('2022-09-01') as Month_Snapshot,customer_id,dealer_customer_id,primary_login_id,dealer_name,join_date,account_type_name,customer_type_name,primary_email,primary_phone,last_updt_ts
FROM `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_account_details` 
where date(last_updt_ts) =_end_dt_snpsht
and  dealer_name in ('TELUS Communications Inc.','ADT by TELUS')
QUALIFY ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY last_updt_ts DESC) = 1
order by dealer_name,join_date,customer_id


)



,Telus_customers as
(
select bacct_bus_bacct_num,bacct_billg_acct_id,cust_bus_cust_id
--,pi_prod_instnc_typ_cd,pi_prod_instnc_stat_ts,prod_instnc_ts,pi_cntrct_start_ts as contract_start_date
--,pi_cntrct_end_ts as contract_end_date
from `cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht` 
WHERE DATE(prod_instnc_ts) = _end_dt_snpsht #Snapshot of the last day of the month
and pi_prod_instnc_typ_cd ='SMHM' #Serice type
and bus_prod_instnc_src_id = 1001 #BANs that are for home services
and pi_prod_instnc_stat_cd in ('A')
and  consldt_cust_typ_cd = 'R'
--QUALIFY ROW_NUMBER() OVER (PARTITION BY bacct_bus_bacct_num ORDER BY pi_prod_instnc_stat_ts DESC) = 1
order by cust_bus_cust_id

)



 ,Telus_Customer_Base as (

SELECT a.customer_id,'Telus_Customer' as Dealer_type_flag,b.bacct_bus_bacct_num as BAN, NULL as ADT_site_no
FROM ADC_Customer_Base a
inner join Telus_customers b
on a.dealer_customer_id=b.cust_bus_cust_id
)


, ADT_Customer_Base as (

SELECT customer_id,'ADT_Customer' as Dealer_type_flag,NULL  as BAN,site_no as ADT_site_no 
FROM `divgpras-pr-579355.ADC_updated.ADC_ADT_mapping` 

)



, Union_data as (

SELECT * from Telus_Customer_Base
UNION ALL
SELECT * from ADT_Customer_Base

)

,Merge_data as (

SELECT * from ADC_Customer_Base a
INNER JOIN Union_data b

on a.customer_id=b.customer_id
order by Dealer_type_flag
)

select * from Merge_data



'''

In [4]:
Customer_info=extract_bq_data(bq_client, sql=Customer_details)

In [5]:
Customer_info.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 533503 entries, 0 to 533502
Data columns (total 15 columns):
 #   Column              Non-Null Count   Dtype              
---  ------              --------------   -----              
 0   Month_Snapshot      533503 non-null  dbdate             
 1   customer_id         533503 non-null  Int64              
 2   dealer_customer_id  533503 non-null  object             
 3   primary_login_id    533503 non-null  Int64              
 4   dealer_name         533503 non-null  object             
 5   join_date           533503 non-null  datetime64[ns, UTC]
 6   account_type_name   533503 non-null  object             
 7   customer_type_name  533503 non-null  object             
 8   primary_email       533503 non-null  object             
 9   primary_phone       533503 non-null  object             
 10  last_updt_ts        533503 non-null  datetime64[ns, UTC]
 11  customer_id_1       533503 non-null  Int64              
 12  Dealer_type_flag

In [6]:
Customer_info['customer_id'].value_counts()

13901033    27
13901046    27
13796294    27
13901041    27
13407055    27
            ..
12448127     1
12453530     1
12454969     1
12455010     1
14629185     1
Name: customer_id, Length: 523316, dtype: Int64

In [7]:
Customer_info.isna().sum()*100/Customer_info.shape[0]

Month_Snapshot         0.000000
customer_id            0.000000
dealer_customer_id     0.000000
primary_login_id       0.000000
dealer_name            0.000000
join_date              0.000000
account_type_name      0.000000
customer_type_name     0.000000
primary_email          0.000000
primary_phone          0.000000
last_updt_ts           0.000000
customer_id_1          0.000000
Dealer_type_flag       0.000000
BAN                   37.998287
ADT_site_no           62.001713
dtype: float64

In [8]:
# Customer_info['BAN']=Customer_info['BAN'].fillna(0)

In [9]:
# Customer_info['ADT_site_no']=Customer_info['ADT_site_no'].fillna(0)

In [10]:
Customer_info['account_type_name'].value_counts()

Security System             407419
Awareness and Automation     79008
Standalone                   47076
Name: account_type_name, dtype: int64

In [11]:
Customer_info['dealer_name'].value_counts()

TELUS Communications Inc.    330354
ADT by TELUS                 203149
Name: dealer_name, dtype: int64

In [12]:
Customer_info.head()

Unnamed: 0,Month_Snapshot,customer_id,dealer_customer_id,primary_login_id,dealer_name,join_date,account_type_name,customer_type_name,primary_email,primary_phone,last_updt_ts,customer_id_1,Dealer_type_flag,BAN,ADT_site_no
0,2022-09-01,975124,9031103,1026047,ADT by TELUS,2011-07-26 15:20:45+00:00,Security System,Customer,AcP5K03DwjVQXjEhFdMuYDdPecyXXrxYn0luenJHVzNTF4...,15177040970,2022-09-30 11:27:24.729296+00:00,975124,ADT_Customer,,700152631.0
1,2022-09-01,988732,1040864,1040428,ADT by TELUS,2011-08-01 13:06:17+00:00,Security System,Customer,AQIAOe6TST2/E3v6PDAoUqzhRQatoTuSuNEX8vHvFjKYA5...,15146780540,2022-09-30 11:27:24.729296+00:00,988732,ADT_Customer,,700150387.0
2,2022-09-01,1061341,321293,1117777,ADT by TELUS,2011-09-12 23:45:27+00:00,Security System,Customer,AY68U4ny1c3DktdOk/PWIeoa1SiNMC4rB6vVVVD8o/6vJw==,12505633193,2022-09-30 11:27:24.729296+00:00,1061341,ADT_Customer,,700287739.0
3,2022-09-01,1095359,1016685,1155226,ADT by TELUS,2011-10-18 11:24:39+00:00,Security System,Customer,AZEDdaC/Ghn9vM7Q/Q5uWDzaLGQ9R1mRw2LHqAaBMqztiO2s,15149239614,2022-09-30 11:27:24.729296+00:00,1095359,ADT_Customer,,700112460.0
4,2022-09-01,1222668,9602539,1298722,ADT by TELUS,2012-02-28 11:57:21+00:00,Security System,Customer,AeKBG4RmtIhQ9YtO78XBrtHXqQKAVuWtNPog01mT12XBbW...,14504322858,2022-09-30 11:27:24.729296+00:00,1222668,ADT_Customer,,700306496.0


In [13]:
Best_practices_details_Query='''

select customer_id,best_practices_id,best_practices_ind,date(dt_last_calculate_utc) as last_date_calculate
from `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_best_practice`
where DATE(dt_last_calculate_utc)<='2022-09-30'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customer_id,best_practices_id ORDER BY last_updt_ts DESC) = 1
order by customer_id,best_practices_id


'''

In [14]:
Best_Practices_DF= extract_bq_data(bq_client,sql=Best_practices_details_Query)

In [15]:
Best_Practices_DF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3652638 entries, 0 to 3652637
Data columns (total 4 columns):
 #   Column               Dtype  
---  ------               -----  
 0   customer_id          Int64  
 1   best_practices_id    Int64  
 2   best_practices_ind   boolean
 3   last_date_calculate  dbdate 
dtypes: Int64(2), boolean(1), dbdate(1)
memory usage: 97.5 MB


In [16]:
Best_Practices_DF.head()

Unnamed: 0,customer_id,best_practices_id,best_practices_ind,last_date_calculate
0,287850,1,True,2022-08-14
1,287850,2,False,2022-08-14
2,287850,3,False,2022-08-14
3,287850,4,False,2022-08-14
4,287850,5,True,2022-08-14


In [17]:
Best_Practices_DF['Best_practices_flag']=Best_Practices_DF['best_practices_ind'].apply(lambda x: 1 if x==True else 0)

In [18]:
Best_Practices_DF_wide=Best_Practices_DF.pivot(index='customer_id', columns='best_practices_id', values='Best_practices_flag').reset_index()

In [19]:
Best_Practices_DF_wide.head()

best_practices_id,customer_id,1,2,3,4,5,6
0,287850,1,0,0,0,1,0
1,292877,1,1,0,0,0,0
2,299100,1,1,0,0,1,1
3,355703,1,1,0,1,1,1
4,359052,1,1,0,0,1,1


In [20]:
Best_Practices_DF_wide.columns=['customer_id','Best_partices_1','Best_partices_2','Best_partices_3','Best_partices_4','Best_partices_5','Best_partices_6']

In [21]:
cols_to_sum=['Best_partices_1','Best_partices_2','Best_partices_3','Best_partices_4','Best_partices_5','Best_partices_6']
Best_Practices_DF_wide['Best_Practice_All']=Best_Practices_DF_wide[cols_to_sum].sum(axis=1)

In [22]:
Best_Practices_DF_wide['Best_Practice_All_flag']=Best_Practices_DF_wide['Best_Practice_All'].apply(lambda x: 1 if x==6 else 0)

In [23]:
Best_Practices_DF_wide.head()

Unnamed: 0,customer_id,Best_partices_1,Best_partices_2,Best_partices_3,Best_partices_4,Best_partices_5,Best_partices_6,Best_Practice_All,Best_Practice_All_flag
0,287850,1,0,0,0,1,0,2,0
1,292877,1,1,0,0,0,0,2,0
2,299100,1,1,0,0,1,1,4,0
3,355703,1,1,0,1,1,1,5,0
4,359052,1,1,0,0,1,1,4,0


In [24]:
Best_Practices_DF_wide['Best_Practice_All'].value_counts(normalize=True)*100

4    28.336506
5    21.922950
3    15.389480
2    11.664446
0     9.321208
6     6.769518
1     6.595890
Name: Best_Practice_All, dtype: float64

In [25]:
Arming_Query=  '''

with date_sql as (
select  count(distinct date(date)) as count_of_dates from `cio-datahub-enterprise-pr-183a.src_adc.bq_aggregate_daily_arming_commands`
where date(date)< '2022-10-01' and date(date)>= '2022-09-01'

)

select id_cust as customer_id,sum(count_arm_commands) as sum_arm_commands,sum(count_disarm_commands) as sum_disarm_commands,count (distinct date(date)) as number_days_arming_disarming, (select count_of_dates from date_sql ) as count_of_dates_arming
from `cio-datahub-enterprise-pr-183a.src_adc.bq_aggregate_daily_arming_commands`
where date(date)< '2022-10-01' and date(date)>= '2022-09-01'
group by customer_id
order by number_days_arming_disarming desc

'''


In [26]:
Arming_DF=extract_bq_data(bq_client, sql=Arming_Query)

In [27]:
max_number_arming_dates=Arming_DF['count_of_dates_arming'].max()
Arming_DF['Arming_Consistency']=Arming_DF['number_days_arming_disarming'].apply(lambda x: x*100/max_number_arming_dates)

In [28]:
max_number_arming_dates

23

In [29]:
Arming_DF['Arming_Flag']=1

In [30]:
Arming_DF['Arming_Consistency'].value_counts(normalize=True)*100

100.000000    28.509748
95.652174      8.484381
4.347826       8.265297
91.304348      5.934806
8.695652       5.343536
86.956522      5.283640
82.608696      3.768950
78.260870      3.315087
13.043478      3.259675
73.913043      3.218998
17.391304      2.691466
21.739130      2.239846
69.565217      2.210058
26.086957      1.972717
65.217391      1.933961
30.434783      1.872143
34.782609      1.782460
60.869565      1.763562
56.521739      1.684128
39.130435      1.649536
52.173913      1.629678
47.826087      1.593804
43.478261      1.592523
Name: Arming_Consistency, dtype: float64

In [31]:
Login_SQL='''

with date_sql as (
select  count(distinct date(login_dt_utc)) as count_of_dates from `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_daily_logins`
where date(login_dt_utc)< '2022-10-01' and date(login_dt_utc)>= '2022-09-01' 

)



select customer_id,sum(login_count) as sum_login_count,count (distinct date(login_dt_utc)) as number_of_login_days, (select count_of_dates from date_sql ) as count_of_dates_logins
from `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_daily_logins`
where date(login_dt_utc)< '2022-10-01' and date(login_dt_utc)>= '2022-09-01' 
group by customer_id
order by number_of_login_days desc


'''

In [32]:
Login_DF=extract_bq_data(bq_client, sql=Login_SQL)

In [33]:
max_number_login_dates=Login_DF['count_of_dates_logins'].max()
Login_DF['Login_Consistency']=Login_DF['number_of_login_days'].apply(lambda x: x*100/max_number_login_dates)

In [34]:
max_number_login_dates

30

In [35]:
Login_DF.head()

Unnamed: 0,customer_id,sum_login_count,number_of_login_days,count_of_dates_logins,Login_Consistency
0,11901102,132,30,30,100.0
1,12035238,64,30,30,100.0
2,12021990,70,30,30,100.0
3,11810730,151,30,30,100.0
4,9229363,144,30,30,100.0


In [36]:
Login_DF['Login_Consistency'].value_counts(normalize=True)*100

100.000000    27.352993
3.333333       7.340600
6.666667       5.091007
10.000000      3.978940
96.666667      3.937148
13.333333      3.336440
93.333333      2.992011
16.666667      2.889932
20.000000      2.553189
90.000000      2.458796
23.333333      2.414121
86.666667      2.227496
26.666667      2.212124
30.000000      2.166969
83.333333      2.061526
33.333333      2.046875
80.000000      1.973618
36.666667      1.913331
73.333333      1.891714
76.666667      1.880425
40.000000      1.857848
43.333333      1.812933
70.000000      1.767777
46.666667      1.748803
50.000000      1.721181
53.333333      1.708932
66.666667      1.686834
60.000000      1.662575
63.333333      1.660654
56.666667      1.653208
Name: Login_Consistency, dtype: float64

In [37]:
TC_issue_query='''

select dealer_name,customer_id,unit_id,device_id,trouble_condition_name,trouble_condition_group_id,trouble_condition_group_desc, start_date_utc,end_date_utc
from `cio-datahub-enterprise-pr-183a.src_adc.bq_troublecondition_data`
where trouble_condition_start_ind='Y'and trouble_condition_closed_ind='Y' 
and date(start_date_utc) >= '2022-07-01' and date(start_date_utc) <'2022-10-01' 


'''

In [38]:
TC_data= extract_bq_data(bq_client, sql=TC_issue_query)

In [39]:
TC_data.head()

Unnamed: 0,dealer_name,customer_id,unit_id,device_id,trouble_condition_name,trouble_condition_group_id,trouble_condition_group_desc,start_date_utc,end_date_utc
0,ADT by TELUS,7494296,97456472,127,KeypadTamper,1,Critical System Issue,2022-07-03 17:21:24+00:00,2022-07-03 17:22:02+00:00
1,ADT by TELUS,7560026,97522199,127,ZWaveRadioTrouble,2,System Condition,2022-07-03 03:50:57+00:00,2022-07-03 03:51:00+00:00
2,ADT by TELUS,7692469,97654632,127,BatteryAbsent,1,Critical System Issue,2022-07-03 05:36:43+00:00,2022-07-03 05:38:44+00:00
3,ADT by TELUS,13963158,103924794,127,PanelAuxPanic,4,Central Station Condition,2022-07-03 10:50:07+00:00,2022-07-03 10:52:04+00:00
4,ADT by TELUS,8117879,98080035,127,BatteryAbsent,1,Critical System Issue,2022-07-03 06:50:47+00:00,2022-07-03 06:55:48+00:00


In [40]:
TC_data['count']=1

TC_pivot_table=pd.pivot_table(TC_data, values=['count'], index=['customer_id'], columns='trouble_condition_name',
                          aggfunc={'count': np.sum})

In [41]:
TC_pivot_table=TC_pivot_table.reset_index()

In [42]:
TC_pivot_table.head()

Unnamed: 0_level_0,customer_id,count,count,count,count,count,count,count,count,count,count,count,count,count,count,count,count,count,count,count,count
trouble_condition_name,Unnamed: 1_level_1,ACFailure,Alarm,BatteryAbsent,BellCircuit,BroadbandCommFailure,CameraNotCommunicating,CameraNotReachable,CarbonMonoxideAlert,CarbonMonoxideSensorTrouble,...,SystemLocked,TamperEventsDisabled,ThermostatTooCold,ThermostatTooWarm,UploadsOverQuota,VideoDeviceFirmwareOutdated,VideoObjectDetectionRuleNotConfigured,WaterAlert,ZWaveRadioTrouble,ZoneDeviceMaskTrouble
0,174996,,1.0,,,,,,,,...,,,,,,,,,,
1,210137,,23.0,1.0,,,,,,,...,,,,,,,,,,
2,213860,,,,,41.0,,,,,...,,,,,,,,,,
3,217919,1.0,,,,,,,,,...,,,,,,,,,,
4,218873,,2.0,,,,,,,,...,,,,,,,,,,


In [43]:
TC_pivot_table.fillna(0,inplace=True)

In [44]:
TC_pivot_table.columns=['TC_Last3M_'+'_'.join(col).strip() for col in TC_pivot_table.columns.values]

In [45]:
# TC_pivot_table.columns=['customer_id','TC_BroadbandCommFailure_count','TC_CameraNotCommunicating_count','TC_CameraNotReachable_count']

In [46]:
# TC_pivot_table.fillna(0,inplace=True)

In [47]:
TC_pivot_table.head()

Unnamed: 0,TC_Last3M_customer_id_,TC_Last3M_count_ACFailure,TC_Last3M_count_Alarm,TC_Last3M_count_BatteryAbsent,TC_Last3M_count_BellCircuit,TC_Last3M_count_BroadbandCommFailure,TC_Last3M_count_CameraNotCommunicating,TC_Last3M_count_CameraNotReachable,TC_Last3M_count_CarbonMonoxideAlert,TC_Last3M_count_CarbonMonoxideSensorTrouble,...,TC_Last3M_count_SystemLocked,TC_Last3M_count_TamperEventsDisabled,TC_Last3M_count_ThermostatTooCold,TC_Last3M_count_ThermostatTooWarm,TC_Last3M_count_UploadsOverQuota,TC_Last3M_count_VideoDeviceFirmwareOutdated,TC_Last3M_count_VideoObjectDetectionRuleNotConfigured,TC_Last3M_count_WaterAlert,TC_Last3M_count_ZWaveRadioTrouble,TC_Last3M_count_ZoneDeviceMaskTrouble
0,174996,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,210137,0.0,23.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,213860,0.0,0.0,0.0,0.0,41.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,217919,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,218873,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [48]:
TCG_Pivot=pd.pivot_table(TC_data, values=['count'], index=['customer_id'], columns='trouble_condition_group_desc',
                          aggfunc={'count': np.sum})

In [49]:
TCG_Pivot=TCG_Pivot.reset_index()
TCG_Pivot.fillna(0,inplace=True)

In [50]:
TCG_Pivot.head()

Unnamed: 0_level_0,customer_id,count,count,count,count
trouble_condition_group_desc,Unnamed: 1_level_1,Central Station Condition,Critical System Issue,Engagement Issue,System Condition
0,174996,1.0,0.0,0.0,0.0
1,210137,26.0,3.0,0.0,0.0
2,213860,0.0,1.0,0.0,41.0
3,217919,0.0,1.0,0.0,0.0
4,218873,2.0,0.0,0.0,0.0


In [51]:
TCG_Pivot.columns=['TCG_Last3M_'+'_'.join(col).strip() for col in TCG_Pivot.columns.values]

In [52]:
TCG_Pivot.head()

Unnamed: 0,TCG_Last3M_customer_id_,TCG_Last3M_count_Central Station Condition,TCG_Last3M_count_Critical System Issue,TCG_Last3M_count_Engagement Issue,TCG_Last3M_count_System Condition
0,174996,1.0,0.0,0.0,0.0
1,210137,26.0,3.0,0.0,0.0
2,213860,0.0,1.0,0.0,41.0
3,217919,0.0,1.0,0.0,0.0
4,218873,2.0,0.0,0.0,0.0


In [53]:
# TC_pivot_table['TC_BroadbandCommFailure_Flag']=TC_pivot_table['TC_BroadbandCommFailure_count'].apply(lambda x: 1 if x>0 else 0)
# TC_pivot_table['TC_CameraNotCommunicating_Flag']=TC_pivot_table['TC_CameraNotCommunicating_count'].apply(lambda x: 1 if x>0 else 0)
# TC_pivot_table['TC_CameraNotReachable_Flag']=TC_pivot_table['TC_CameraNotReachable_count'].apply(lambda x: 1 if x>0 else 0)

In [54]:
# Customer_info.fillna(0,inplace=True)

In [55]:
Merge_DF=Customer_info.merge(Best_Practices_DF_wide,on='customer_id',how='left')

In [56]:
# Merge_DF.isna().sum()*100/Merge_DF.shape[0]

In [57]:
Merge_DF_1=Merge_DF.merge(Arming_DF,on='customer_id',how='left')

In [58]:
# Merge_DF_1.isna().sum()*100/Merge_DF_1.shape[0]

In [59]:
Merge_DF_2=Merge_DF_1.merge(Login_DF,on='customer_id',how='left')

In [60]:
Merge_DF_3=Merge_DF_2.merge(TC_pivot_table,left_on='customer_id',right_on='TC_Last3M_customer_id_',how='left')

In [61]:
Merge_DF_4=Merge_DF_3.merge(TCG_Pivot,left_on='customer_id',right_on='TCG_Last3M_customer_id_',how='left')

In [62]:
del Merge_DF_1,Merge_DF_2,Merge_DF_3

In [63]:
# Merge_DF_4.isna().sum()*100/Merge_DF_4.shape[0]

In [64]:
Merge_DF_4=Merge_DF_4.replace(np.nan, 0)

In [65]:
# Merge_DF_4.isna().sum()*100/Merge_DF_4.shape[0]

In [66]:
Merge_DF_4 = Merge_DF_4.drop(['customer_id_1','TC_Last3M_customer_id_','TCG_Last3M_customer_id_'], axis=1)

In [67]:
Merge_DF_4['dealer_name'].value_counts(normalize=True)*100

TELUS Communications Inc.    61.921676
ADT by TELUS                 38.078324
Name: dealer_name, dtype: float64

In [68]:
Merge_DF_4['Dealer_type_flag'].value_counts(normalize=True)*100

Telus_Customer    62.001713
ADT_Customer      37.998287
Name: Dealer_type_flag, dtype: float64

In [69]:
pd.DataFrame(Merge_DF_4.groupby(['dealer_name','Dealer_type_flag']).agg(
    
    Customer_count= ('Dealer_type_flag','count'),
    # Churn_total=('Churn_flag',lambda x: x.sum()),
    # Churn_rate=('Churn_flag',lambda x: x.mean()*100)
).reset_index())

Unnamed: 0,dealer_name,Dealer_type_flag,Customer_count
0,ADT by TELUS,ADT_Customer,202678
1,ADT by TELUS,Telus_Customer,471
2,TELUS Communications Inc.,ADT_Customer,44
3,TELUS Communications Inc.,Telus_Customer,330310


In [70]:
rep_chars = ' |\|-|:|/'

Merge_DF_4.columns = Merge_DF_4.columns.str.replace(rep_chars, '_')

  Merge_DF_4.columns = Merge_DF_4.columns.str.replace(rep_chars, '_')


In [71]:
# Merge_DF_4.info(verbose=True)

In [72]:
def Segment_making(row
                   ,min_login_consistency_threshold=50
                   ,min_arming_consistency_threshold=50
                   ,max_login_consistency_threshold=50
                   ,max_arming_consistency_threshold=50):
    
    if row['Best_Practice_All']>=5 and row['Login_Consistency']>=max_login_consistency_threshold  and row['Arming_Consistency']>=max_arming_consistency_threshold:
        return 'Heavy_User'
    elif row['Login_Consistency']>=max_login_consistency_threshold  and row['Arming_Consistency']< min_arming_consistency_threshold:
        return 'Home_automation_Savvy'
    elif row['Login_Consistency']< min_login_consistency_threshold  and row['Arming_Consistency']>=max_login_consistency_threshold:
        return 'Old_Fashion'
    elif row['Login_Consistency']==0  and row['Arming_Consistency']==0:
        return 'Disengaged'
    else:
        return "Moderate_Users"

In [73]:
Merge_DF_4['Segment']=Merge_DF_4.apply(Segment_making,axis=1)

In [74]:
Merge_DF_4['Segment'].value_counts()

Moderate_Users           187511
Home_automation_Savvy    128906
Disengaged               109639
Old_Fashion               61041
Heavy_User                46406
Name: Segment, dtype: int64

In [75]:
Merge_DF_4['Segment'].value_counts(normalize=True)*100

Moderate_Users           35.147131
Home_automation_Savvy    24.162188
Disengaged               20.550775
Old_Fashion              11.441548
Heavy_User                8.698358
Name: Segment, dtype: float64

In [76]:
# Merge_DF_4.to_csv('ADC_Master_Data_Sep2022.csv',index=False)

In [77]:
Merge_DF_4['dealer_name'].value_counts()

TELUS Communications Inc.    330354
ADT by TELUS                 203149
Name: dealer_name, dtype: int64

In [78]:
# Merge_DF_4['dealer_name'=='ADT by TELUS'].to_csv('ADC_Master_Data_Sep2022_ADT.csv',index=False)

In [79]:
# config= bigquery.job.LoadJobConfig()

# # config._properties['timePartitioning'] = {'field': 'Month_Year'}
# config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

# Table_BQ = 'ADC_Feature_Datastore.ADC_Master_Data'

# bq_table_instance= bq_client.load_table_from_dataframe(Merge_DF_4, Table_BQ,job_config=config)

In [80]:
# Merge_DF_4.drop(['TC_Last3M_count_CredentialsInConflict'],inplace=True,axis=1)

In [81]:
config= bigquery.job.LoadJobConfig()

# config._properties['timePartitioning'] = {'field': 'Month_Year'}

# config.write_disposition = bigquery.WriteDisposition.WRITE_APPEND

config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE


Table_BQ = 'ADC_Feature_Datastore.ADC_Master_Data'

bq_table_instance= bq_client.load_table_from_dataframe(Merge_DF_4, Table_BQ,job_config=config)

In [82]:
# ADC_Master_Query= '''


# select * from `divgpras-pr-579355.ADC_Feature_Datastore.ADC_Master_Data`
# where Month_Snapshot!='2022-09-30'


# '''

In [83]:
# ADC_Master_DF=extract_bq_data(bq_client, sql=ADC_Master_Query)

In [84]:
# ADC_Master_DF.info()

In [85]:
# ADC_Master_DF['Month_Snapshot'].value_counts()

In [86]:
# config= bigquery.job.LoadJobConfig()

# # config._properties['timePartitioning'] = {'field': 'Month_Year'}
# config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

# Table_BQ = 'ADC_Feature_Datastore.ADC_Master_Data'

# bq_table_instance= bq_client.load_table_from_dataframe(ADC_Master_DF, Table_BQ,job_config=config)