In [62]:
#### import global modules
import os
import sys
import pandas as pd
import numpy as np
from pathlib import Path
from yaml import safe_load
import google.oauth2.credentials
from google.cloud import bigquery
import gc

# Set global vars
pth_project = Path(os.getcwd().split('notebooks')[0])
pth_data = pth_project / 'data'
pth_queries = pth_project / 'core' / 'queries'
pth_creds = pth_project / 'conf' / 'local' / 'project_config.yaml'
sys.path.insert(0, str(pth_project))
d_project_config = safe_load(pth_creds.open())
# d_params = safe_load((pth_project / 'core' / 'parameters' / 'common.yaml').open())['data_extract']

# import local modules
from core.utils.gcp import connect_bq_services
# from core.etl.extract import extract_bq_data, extract_pr_codes, format_conv_df, filter_convs

# Connect to google services
bq_client = connect_bq_services(d_project_config['gcp-project-name'])
pd.options.display.max_rows = 100

In [63]:
def extract_bq_data(bq_client, sql=None, pth_query=None):
    if sql is not None:
        df = bq_client.query(sql).to_dataframe()
    elif pth_query is not None:
        sql = pth_query.read_text()
        df = bq_client.query(sql).to_dataframe()
    else:
        raise ValueError('`sql` or `pth_query` should be set')  
    return df

In [64]:
Customer_details='''

--DECLARE _end_dt_snpsht_tmp DATE DEFAULT CURRENT_DATE();
DECLARE _end_dt_snpsht_tmp DATE DEFAULT '2023-02-04';

WITH date_sql as (

SELECT 

last_day(date_sub(_end_dt_snpsht_tmp, interval 1 month), month) as last_date_prev_month
,date_trunc(date_sub(_end_dt_snpsht_tmp, interval 1 month), month) as first_date_prev_month

)



, ADC_Customer_Base as (

SELECT date((select first_date_prev_month from date_sql)) as Month_Snapshot,customer_id,dealer_customer_id,primary_login_id,dealer_name,join_date,account_type_name,customer_type_name,primary_email,primary_phone,last_updt_ts
FROM `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_account_details` 
where date(last_updt_ts) =(select last_date_prev_month from date_sql)
and  dealer_name in ('TELUS Communications Inc.','ADT by TELUS')
QUALIFY ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY last_updt_ts DESC) = 1
order by dealer_name,join_date,customer_id



)


, ADT_Customer_Base as (

SELECT customer_id,'ADT_Customer' as Dealer_type_flag,NULL  as BAN,site_no as ADT_site_no 
FROM `divgpras-pr-579355.ADC_updated.ADC_ADT_mapping` 

)


/*
,Telus_customers as
(
select bacct_bus_bacct_num as BAN,cust_bus_cust_id,pi_prod_instnc_typ_cd,pi_prod_instnc_stat_ts,prod_instnc_ts,pi_cntrct_start_ts as contract_start_date,pi_cntrct_end_ts as contract_end_date
from `cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht` 
WHERE DATE(prod_instnc_ts) = (select last_date_prev_month from date_sql) #Snapshot of the last day of the month
and pi_prod_instnc_typ_cd ='SMHM' #Serice type
and bus_prod_instnc_src_id = 1001 #BANs that are for home services
and pi_prod_instnc_stat_cd in ('A')
and  consldt_cust_typ_cd = 'R'
order by cust_bus_cust_id

)

*/

,Telus_customers as
(
select bacct_bus_bacct_num,bacct_billg_acct_id,cust_bus_cust_id
--,pi_prod_instnc_typ_cd,pi_prod_instnc_stat_ts,prod_instnc_ts,pi_cntrct_start_ts as contract_start_date
--,pi_cntrct_end_ts as contract_end_date
from `cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht` 
WHERE DATE(prod_instnc_ts) = (select last_date_prev_month from date_sql) #Snapshot of the last day of the month
and pi_prod_instnc_typ_cd ='SMHM' #Serice type
and bus_prod_instnc_src_id = 1001 #BANs that are for home services
and pi_prod_instnc_stat_cd in ('A')
and  consldt_cust_typ_cd = 'R'
--QUALIFY ROW_NUMBER() OVER (PARTITION BY bacct_bus_bacct_num ORDER BY pi_prod_instnc_stat_ts DESC) = 1
order by cust_bus_cust_id

)



 ,Telus_Customer_Base as (

SELECT a.customer_id,'Telus_Customer' as Dealer_type_flag,b.bacct_bus_bacct_num as BAN, NULL as ADT_site_no
FROM ADC_Customer_Base a
inner join Telus_customers b
on a.dealer_customer_id=b.cust_bus_cust_id
)



, Union_data as (

SELECT * from Telus_Customer_Base
UNION ALL
SELECT * from ADT_Customer_Base

)

,Merge_data as (

SELECT * from ADC_Customer_Base a
INNER JOIN Union_data b

on a.customer_id=b.customer_id
order by Dealer_type_flag
)


/*
 ,Merge_data as (

SELECT *
FROM ADC_Customer_Base a
left join Telus_customers b
on a.dealer_customer_id=b.cust_bus_cust_id

)
*/



select * from Merge_data



'''

In [65]:
Customer_info=extract_bq_data(bq_client, sql=Customer_details)

In [66]:
Customer_info.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 584280 entries, 0 to 584279
Data columns (total 15 columns):
 #   Column              Non-Null Count   Dtype              
---  ------              --------------   -----              
 0   Month_Snapshot      584280 non-null  dbdate             
 1   customer_id         584280 non-null  Int64              
 2   dealer_customer_id  584280 non-null  object             
 3   primary_login_id    584280 non-null  Int64              
 4   dealer_name         584280 non-null  object             
 5   join_date           584280 non-null  datetime64[ns, UTC]
 6   account_type_name   584280 non-null  object             
 7   customer_type_name  584280 non-null  object             
 8   primary_email       584280 non-null  object             
 9   primary_phone       584280 non-null  object             
 10  last_updt_ts        584280 non-null  datetime64[ns, UTC]
 11  customer_id_1       584280 non-null  Int64              
 12  Dealer_type_flag

In [67]:
Customer_info.head()

Unnamed: 0,Month_Snapshot,customer_id,dealer_customer_id,primary_login_id,dealer_name,join_date,account_type_name,customer_type_name,primary_email,primary_phone,last_updt_ts,customer_id_1,Dealer_type_flag,BAN,ADT_site_no
0,2023-01-01,644850,E0342199,674917,ADT by TELUS,2010-10-29 16:21:51+00:00,Security System,Customer,AbY0zgUL1NTpjZjjoLqROoaeJbZuZMqqzyUaJmqHo99H8Wk=,17806372289,2023-01-31 11:27:54.202827+00:00,644850,ADT_Customer,,700225256.0
1,2023-01-01,1229785,,1306661,TELUS Communications Inc.,2012-03-05 12:11:21+00:00,Security System,Customer,AcZPuhEwJ6JfZ5L1i77L4A2OzD3PpLKMZr/+sFjKeeiycyk=,17788659737,2023-01-31 11:27:54.202827+00:00,1229785,ADT_Customer,,700302749.0
2,2023-01-01,1261094,09054513,1341534,TELUS Communications Inc.,2012-03-30 14:39:28+00:00,Security System,Customer,ARroS5JlgBAcAMHZtjmHpwJfZE+oVS9Mu1FrfJmycOmW/l0=,14504619797,2023-01-31 11:27:54.202827+00:00,1261094,ADT_Customer,,700329147.0
3,2023-01-01,1372440,,1462046,TELUS Communications Inc.,2012-05-25 15:40:15+00:00,Security System,Customer,AZ7xOvNxKU2SoJNAVecT/S4rFubpXppJzMvqup0Jnw52a2...,17807101825,2023-01-31 11:27:54.202827+00:00,1372440,ADT_Customer,,700343753.0
4,2023-01-01,1429415,DV102746,1522880,TELUS Communications Inc.,2012-06-14 08:29:02+00:00,Security System,Customer,AaT/nwJRDQmurQl2qAg/WN0odZEF6JcqpMSpnBTWLVsZcI...,19057850788,2023-01-31 11:27:54.202827+00:00,1429415,ADT_Customer,,700235752.0


In [68]:
Best_practices_details_Query='''

--DECLARE _end_dt_snpsht_tmp DATE DEFAULT CURRENT_DATE();
DECLARE _end_dt_snpsht_tmp DATE DEFAULT '2023-02-04';

WITH date_sql as (

SELECT 

last_day(date_sub(_end_dt_snpsht_tmp, interval 1 month), month) as last_date_prev_month
,date_trunc(date_sub(_end_dt_snpsht_tmp, interval 1 month), month) as first_date_prev_month

)

select customer_id,best_practices_id,best_practices_ind,date(dt_last_calculate_utc) as last_date_calculate
from `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_best_practice`
where DATE(dt_last_calculate_utc) < (select last_date_prev_month from date_sql)
QUALIFY ROW_NUMBER() OVER (PARTITION BY customer_id,best_practices_id ORDER BY last_updt_ts DESC) = 1
order by last_date_calculate


'''

In [69]:
Best_Practices_DF= extract_bq_data(bq_client,sql=Best_practices_details_Query)

In [70]:
Best_Practices_DF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4432506 entries, 0 to 4432505
Data columns (total 4 columns):
 #   Column               Dtype  
---  ------               -----  
 0   customer_id          Int64  
 1   best_practices_id    Int64  
 2   best_practices_ind   boolean
 3   last_date_calculate  dbdate 
dtypes: Int64(2), boolean(1), dbdate(1)
memory usage: 118.4 MB


In [71]:
Best_Practices_DF.head()

Unnamed: 0,customer_id,best_practices_id,best_practices_ind,last_date_calculate
0,7809038,2,False,2022-08-03
1,8215017,1,True,2022-08-03
2,9166652,1,True,2022-08-03
3,9535850,1,True,2022-08-03
4,10025855,3,True,2022-08-03


In [72]:
Best_Practices_DF['last_date_calculate'].max()

datetime.date(2023, 1, 30)

In [73]:
Best_Practices_DF['Best_practices_flag']=Best_Practices_DF['best_practices_ind'].apply(lambda x: 1 if x==True else 0)
Best_Practices_DF_wide=Best_Practices_DF.pivot(index='customer_id', columns='best_practices_id', values='Best_practices_flag').reset_index()
Best_Practices_DF_wide.columns=['customer_id','Best_partices_1','Best_partices_2','Best_partices_3','Best_partices_4','Best_partices_5','Best_partices_6']
cols_to_sum=['Best_partices_1','Best_partices_2','Best_partices_3','Best_partices_4','Best_partices_5','Best_partices_6']
Best_Practices_DF_wide['Best_Practice_All']=Best_Practices_DF_wide[cols_to_sum].sum(axis=1)
Best_Practices_DF_wide['Best_Practice_All_flag']=Best_Practices_DF_wide['Best_Practice_All'].apply(lambda x: 1 if x==6 else 0)

In [74]:
Best_Practices_DF_wide.head()

Unnamed: 0,customer_id,Best_partices_1,Best_partices_2,Best_partices_3,Best_partices_4,Best_partices_5,Best_partices_6,Best_Practice_All,Best_Practice_All_flag
0,174996,0,0,0,0,0,0,0,0
1,210137,1,1,0,0,0,0,2,0
2,213860,1,1,1,1,1,0,5,0
3,217919,1,1,0,0,1,1,4,0
4,218873,1,1,0,1,1,1,5,0


In [75]:
Arming_Query=  '''

--DECLARE _end_dt_snpsht_tmp DATE DEFAULT CURRENT_DATE();
DECLARE _end_dt_snpsht_tmp DATE DEFAULT '2023-02-04';

WITH date_sql as (

SELECT 

last_day(date_sub(_end_dt_snpsht_tmp, interval 1 month), month) as last_date_prev_month
,date_trunc(date_sub(_end_dt_snpsht_tmp, interval 1 month), month) as first_date_prev_month

)


, date_sql_arming as (
select  count(distinct date(date)) as count_of_dates from `cio-datahub-enterprise-pr-183a.src_adc.bq_aggregate_daily_arming_commands`
where date(date)< (select last_date_prev_month from date_sql)
and date(date)>= (select first_date_prev_month from date_sql)

)

select id_cust as customer_id,sum(count_arm_commands) as sum_arm_commands,sum(count_disarm_commands) as sum_disarm_commands,count (distinct date(date)) as number_days_arming_disarming, (select count_of_dates from date_sql_arming ) as count_of_dates_arming
from `cio-datahub-enterprise-pr-183a.src_adc.bq_aggregate_daily_arming_commands`
where date(date)< (select last_date_prev_month from date_sql)
and date(date)>= (select first_date_prev_month from date_sql)
group by customer_id
order by number_days_arming_disarming desc

'''

In [76]:
Arming_DF=extract_bq_data(bq_client, sql=Arming_Query)

In [77]:
Arming_DF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 331323 entries, 0 to 331322
Data columns (total 5 columns):
 #   Column                        Non-Null Count   Dtype
---  ------                        --------------   -----
 0   customer_id                   331323 non-null  Int64
 1   sum_arm_commands              331323 non-null  Int64
 2   sum_disarm_commands           331323 non-null  Int64
 3   number_days_arming_disarming  331323 non-null  Int64
 4   count_of_dates_arming         331323 non-null  Int64
dtypes: Int64(5)
memory usage: 14.2 MB


In [78]:
Arming_DF.head()

Unnamed: 0,customer_id,sum_arm_commands,sum_disarm_commands,number_days_arming_disarming,count_of_dates_arming
0,8551361,28,27,28,28
1,5279041,29,29,28,28
2,8044359,27,27,28,28
3,12563275,37,37,28,28
4,8892623,33,34,28,28


In [79]:
max_number_arming_dates=Arming_DF['count_of_dates_arming'].max()
Arming_DF['Arming_Consistency']=Arming_DF['number_days_arming_disarming'].apply(lambda x: x*100/max_number_arming_dates)

In [80]:
Login_SQL='''


--DECLARE _end_dt_snpsht_tmp DATE DEFAULT CURRENT_DATE();
DECLARE _end_dt_snpsht_tmp DATE DEFAULT '2023-02-04';

WITH date_sql as (

SELECT 

last_day(date_sub(_end_dt_snpsht_tmp, interval 1 month), month) as last_date_prev_month
,date_trunc(date_sub(_end_dt_snpsht_tmp, interval 1 month), month) as first_date_prev_month

)


, date_sql_login as (
select  count(distinct date(login_dt_utc)) as count_of_dates from `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_daily_logins`
where date(login_dt_utc)< (select last_date_prev_month from date_sql)
and date(login_dt_utc)>= (select first_date_prev_month from date_sql)

)



select customer_id,sum(login_count) as sum_login_count,count (distinct date(login_dt_utc)) as number_of_login_days, (select count_of_dates from date_sql_login ) as count_of_dates_logins
from `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_daily_logins`
where date(login_dt_utc)< (select last_date_prev_month from date_sql)
and date(login_dt_utc)>= (select first_date_prev_month from date_sql) 
group by customer_id
order by number_of_login_days desc


'''

In [81]:
Login_DF=extract_bq_data(bq_client, sql=Login_SQL)

In [82]:
Login_DF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 457369 entries, 0 to 457368
Data columns (total 4 columns):
 #   Column                 Non-Null Count   Dtype
---  ------                 --------------   -----
 0   customer_id            457369 non-null  Int64
 1   sum_login_count        457369 non-null  Int64
 2   number_of_login_days   457369 non-null  Int64
 3   count_of_dates_logins  457369 non-null  Int64
dtypes: Int64(4)
memory usage: 15.7 MB


In [83]:
Login_DF.head()

Unnamed: 0,customer_id,sum_login_count,number_of_login_days,count_of_dates_logins
0,9967604,558,30,30
1,11913281,1552,30,30
2,9813913,866,30,30
3,11958463,956,30,30
4,13123488,593,30,30


In [84]:
max_number_login_dates=Login_DF['count_of_dates_logins'].max()
Login_DF['Login_Consistency']=Login_DF['number_of_login_days'].apply(lambda x: x*100/max_number_login_dates)

In [85]:
Login_DF.head()

Unnamed: 0,customer_id,sum_login_count,number_of_login_days,count_of_dates_logins,Login_Consistency
0,9967604,558,30,30,100.0
1,11913281,1552,30,30,100.0
2,9813913,866,30,30,100.0
3,11958463,956,30,30,100.0
4,13123488,593,30,30,100.0


In [86]:
TC_issue_query='''

--DECLARE _end_dt_snpsht_tmp DATE DEFAULT CURRENT_DATE();
DECLARE _end_dt_snpsht_tmp DATE DEFAULT '2023-02-04';

WITH date_sql as (

SELECT 

last_day(date_sub(_end_dt_snpsht_tmp, interval 1 month), month) as last_date_prev_month
,date_trunc(date_sub(_end_dt_snpsht_tmp, interval 1 month), month) as first_date_prev_month
,date_trunc(date_sub(_end_dt_snpsht_tmp, interval 4 month), month) as first_date_prev_3_month

)


select dealer_name,customer_id,unit_id,device_id,trouble_condition_name,trouble_condition_group_id,trouble_condition_group_desc, start_date_utc,end_date_utc
from `cio-datahub-enterprise-pr-183a.src_adc.bq_troublecondition_data`
where trouble_condition_start_ind='Y'and trouble_condition_closed_ind='Y' 
and date(start_date_utc) >= (select first_date_prev_3_month from date_sql) 
and date(start_date_utc) <(select first_date_prev_month from date_sql)


'''

In [87]:
TC_data= extract_bq_data(bq_client, sql=TC_issue_query)

In [88]:
TC_data.head()

Unnamed: 0,dealer_name,customer_id,unit_id,device_id,trouble_condition_name,trouble_condition_group_id,trouble_condition_group_desc,start_date_utc,end_date_utc
0,ADT by TELUS,14722908,104684540,3,Alarm,4,Central Station Condition,2022-10-29 15:44:05+00:00,2022-10-29 15:44:11+00:00
1,ADT by TELUS,14818580,104780212,5,Alarm,4,Central Station Condition,2022-10-29 11:40:10+00:00,2022-10-29 11:48:30+00:00
2,ADT by TELUS,14674283,104635915,2,Alarm,4,Central Station Condition,2022-10-29 14:40:08+00:00,2022-10-29 14:45:40+00:00
3,ADT by TELUS,14823260,104784892,3,Alarm,4,Central Station Condition,2022-10-29 21:32:09+00:00,2022-10-29 21:36:11+00:00
4,ADT by TELUS,14741837,104703469,5,Alarm,4,Central Station Condition,2022-10-29 09:45:50+00:00,2022-10-29 09:47:09+00:00


In [89]:
TC_data['trouble_condition_name'].nunique()

82

In [90]:
TC_data['start_date_utc'].min()

Timestamp('2022-10-01 00:00:04+0000', tz='UTC')

In [91]:
TC_data['start_date_utc'].max()

Timestamp('2022-12-31 23:58:46+0000', tz='UTC')

In [92]:
TC_data['count']=1

TC_pivot_table=pd.pivot_table(TC_data, values=['count'], index=['customer_id'], columns='trouble_condition_name',
                          aggfunc={'count': 'sum'})

TC_pivot_table=TC_pivot_table.reset_index()

TC_pivot_table.fillna(0,inplace=True)

In [93]:
TC_pivot_table.columns=['TC_Last3M_'+'_'.join(col).strip() for col in TC_pivot_table.columns.values]

In [94]:
# TC_pivot_cols_required=TC_pivot_table.columns.to_list()

In [95]:
# TC_pivot_cols_required

In [96]:
# import yaml
# with open(r'/home/jupyter/ADC/notebooks/Feature_data_store/TC_pivot_cols_required.yaml', 'w') as file:
#     documents = yaml.dump(TC_pivot_cols_required, file)

# import json
    
# with open(r'/home/jupyter/ADC/notebooks/Feature_data_store/TC_pivot_cols_required.json', 'w') as Ffp:
#     json.dump(TC_pivot_cols_required, Ffp)


In [97]:
TC_pivot_table.head()

Unnamed: 0,TC_Last3M_customer_id_,TC_Last3M_count_ACFailure,TC_Last3M_count_Alarm,TC_Last3M_count_AuxSupply,TC_Last3M_count_BatteryAbsent,TC_Last3M_count_BellCircuit,TC_Last3M_count_BroadbandCommFailure,TC_Last3M_count_CameraNotCommunicating,TC_Last3M_count_CameraNotReachable,TC_Last3M_count_CarbonMonoxideAlert,...,TC_Last3M_count_SvrRecordingScheduleNotSetUp,TC_Last3M_count_SystemLocked,TC_Last3M_count_TamperEventsDisabled,TC_Last3M_count_ThermostatTooCold,TC_Last3M_count_ThermostatTooWarm,TC_Last3M_count_UploadsOverQuota,TC_Last3M_count_VideoObjectDetectionRuleNotConfigured,TC_Last3M_count_WaterAlert,TC_Last3M_count_ZWaveRadioTrouble,TC_Last3M_count_ZoneDeviceMaskTrouble
0,210137,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,213860,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,218873,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,229153,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,230623,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [98]:
def add_missing_dummy_columns(d, cols_req_model):
    ''' 
    This function adds missing columns to the data so that the model doesn't fail.
    d is the Dataset. 
    cols_req_model is the list of columns required by the model.
    
    '''
    missing_cols = list(set(cols_req_model) - set(d.columns))
    print('Adding columns', missing_cols, 'to the data and populating them with 0.')
    for c in missing_cols:
        d[c] = 0

In [99]:
import json
with open(r'/home/jupyter/ADC/notebooks/Feature_data_store/TC_pivot_cols_required.json') as json_file:
    final_feature_cols_TC = json.load(json_file)

In [100]:
# final_feature_cols_TC

In [101]:
add_missing_dummy_columns(TC_pivot_table,final_feature_cols_TC)

Adding columns ['TC_Last3M_count_RfJamTrouble', 'TC_Last3M_count_ControllerPowerFailure', 'TC_Last3M_count_VideoDeviceFirmwareOutdated', 'TC_Last3M_count_NoActivityPanel', 'TC_Last3M_count_MediumLeak'] to the data and populating them with 0.


In [102]:
def remove_extra_columns(d,cols_req_model):
    
    ''' 
    This function drops extra columns from the data so that the model doesn't fail.
    d is the Dataset. 
    cols_req_model is the list of columns required by the model.
    
    '''
    extra_cols= list(set( d.columns ) - set( cols_req_model ))
    print('Dropping', extra_cols, 'columns from the data.')
    d = d.drop(extra_cols, axis=1)
    return d

In [103]:
TC_pivot_table=remove_extra_columns(TC_pivot_table,final_feature_cols_TC)

Dropping [] columns from the data.


In [104]:
TCG_Pivot=pd.pivot_table(TC_data, values=['count'], index=['customer_id'], columns='trouble_condition_group_desc',
                          aggfunc={'count': np.sum})
TCG_Pivot=TCG_Pivot.reset_index()
TCG_Pivot.fillna(0,inplace=True)
TCG_Pivot.columns=['TCG_Last3M_'+'_'.join(col).strip() for col in TCG_Pivot.columns.values]

In [105]:
TCG_Pivot.head()

Unnamed: 0,TCG_Last3M_customer_id_,TCG_Last3M_count_Central Station Condition,TCG_Last3M_count_Critical System Issue,TCG_Last3M_count_Engagement Issue,TCG_Last3M_count_System Condition
0,210137,0.0,1.0,0.0,0.0
1,213860,0.0,0.0,0.0,3.0
2,218873,0.0,3.0,1.0,0.0
3,229153,0.0,1.0,0.0,0.0
4,230623,1.0,0.0,0.0,0.0


In [106]:
Merge_DF=Customer_info.merge(Best_Practices_DF_wide,on='customer_id',how='left')

In [107]:
Merge_DF_1=Merge_DF.merge(Arming_DF,on='customer_id',how='left')
Merge_DF_2=Merge_DF_1.merge(Login_DF,on='customer_id',how='left')
Merge_DF_3=Merge_DF_2.merge(TC_pivot_table,left_on='customer_id',right_on='TC_Last3M_customer_id_',how='left')
Merge_DF_4=Merge_DF_3.merge(TCG_Pivot,left_on='customer_id',right_on='TCG_Last3M_customer_id_',how='left')

In [108]:
del Merge_DF_1,Merge_DF_2,Merge_DF_3

In [109]:
rep_chars = ' |\|-|:|/'

Merge_DF_4.columns = Merge_DF_4.columns.str.replace(rep_chars, '_')

  Merge_DF_4.columns = Merge_DF_4.columns.str.replace(rep_chars, '_')


In [110]:
def Segment_making(row
                   ,min_login_consistency_threshold=50
                   ,min_arming_consistency_threshold=50
                   ,max_login_consistency_threshold=50
                   ,max_arming_consistency_threshold=50):
    
    if row['Best_Practice_All']>=5 and row['Login_Consistency']>=max_login_consistency_threshold  and row['Arming_Consistency']>=max_arming_consistency_threshold:
        return 'Heavy_User'
    elif row['Login_Consistency']>=max_login_consistency_threshold  and row['Arming_Consistency']< min_arming_consistency_threshold:
        return 'Home_automation_Savvy'
    elif row['Login_Consistency']< min_login_consistency_threshold  and row['Arming_Consistency']>=max_login_consistency_threshold:
        return 'Old_Fashion'
    elif row['Login_Consistency']==0  and row['Arming_Consistency']==0:
        return 'Disengaged'
    else:
        return "Moderate_Users"

In [111]:
Merge_DF_4['Segment']=Merge_DF_4.apply(Segment_making,axis=1)

In [112]:
Merge_DF_4['Segment'].value_counts(normalize=True)*100

Moderate_Users           78.420963
Heavy_User                9.213391
Home_automation_Savvy     7.151366
Old_Fashion               5.214281
Name: Segment, dtype: float64

In [113]:
Merge_DF_4.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 584280 entries, 0 to 584279
Data columns (total 126 columns):
 #    Column                                                        Dtype              
---   ------                                                        -----              
 0    Month_Snapshot                                                dbdate             
 1    customer_id                                                   Int64              
 2    dealer_customer_id                                            object             
 3    primary_login_id                                              Int64              
 4    dealer_name                                                   object             
 5    join_date                                                     datetime64[ns, UTC]
 6    account_type_name                                             object             
 7    customer_type_name                                            object             
 8    pr

In [114]:
Merge_DF_4.head()

Unnamed: 0,Month_Snapshot,customer_id,dealer_customer_id,primary_login_id,dealer_name,join_date,account_type_name,customer_type_name,primary_email,primary_phone,...,TC_Last3M_count_ControllerPowerFailure,TC_Last3M_count_VideoDeviceFirmwareOutdated,TC_Last3M_count_NoActivityPanel,TC_Last3M_count_MediumLeak,TCG_Last3M_customer_id_,TCG_Last3M_count_Central_Station_Condition,TCG_Last3M_count_Critical_System_Issue,TCG_Last3M_count_Engagement_Issue,TCG_Last3M_count_System_Condition,Segment
0,2023-01-01,644850,E0342199,674917,ADT by TELUS,2010-10-29 16:21:51+00:00,Security System,Customer,AbY0zgUL1NTpjZjjoLqROoaeJbZuZMqqzyUaJmqHo99H8Wk=,17806372289,...,0.0,0.0,0.0,0.0,644850.0,0.0,0.0,0.0,2.0,Moderate_Users
1,2023-01-01,1229785,,1306661,TELUS Communications Inc.,2012-03-05 12:11:21+00:00,Security System,Customer,AcZPuhEwJ6JfZ5L1i77L4A2OzD3PpLKMZr/+sFjKeeiycyk=,17788659737,...,,,,,,,,,,Moderate_Users
2,2023-01-01,1261094,09054513,1341534,TELUS Communications Inc.,2012-03-30 14:39:28+00:00,Security System,Customer,ARroS5JlgBAcAMHZtjmHpwJfZE+oVS9Mu1FrfJmycOmW/l0=,14504619797,...,0.0,0.0,0.0,0.0,1261094.0,0.0,0.0,0.0,9.0,Moderate_Users
3,2023-01-01,1372440,,1462046,TELUS Communications Inc.,2012-05-25 15:40:15+00:00,Security System,Customer,AZ7xOvNxKU2SoJNAVecT/S4rFubpXppJzMvqup0Jnw52a2...,17807101825,...,,,,,,,,,,Moderate_Users
4,2023-01-01,1429415,DV102746,1522880,TELUS Communications Inc.,2012-06-14 08:29:02+00:00,Security System,Customer,AaT/nwJRDQmurQl2qAg/WN0odZEF6JcqpMSpnBTWLVsZcI...,19057850788,...,0.0,0.0,0.0,0.0,1429415.0,1.0,2.0,0.0,0.0,Old_Fashion


In [115]:
config= bigquery.job.LoadJobConfig()

# config._properties['timePartitioning'] = {'field': 'Month_Year'}
config.write_disposition = bigquery.WriteDisposition.WRITE_APPEND

Table_BQ = 'ADC_Feature_Datastore.ADC_Master_Data'

bq_table_instance= bq_client.load_table_from_dataframe(Merge_DF_4, Table_BQ,job_config=config)

BadRequest: 400 POST https://bigquery.googleapis.com/upload/bigquery/v2/projects/divgpras-pr-579355/jobs?uploadType=resumable: Provided Schema does not match Table divgpras-pr-579355:ADC_Feature_Datastore.ADC_Master_Data. Cannot add fields (field: customer_id_1)

In [178]:
Query_test='''

declare end_date date;

set end_date = DATE('{score_date}');



WITH date_sql as (

SELECT 

last_day(date_sub(end_date, interval 1 month), month) as last_date_prev_month
,date_trunc(date_sub(end_date, interval 1 month), month) as first_date_prev_month

)


, date_sql_arming as (
select  count(distinct date(date)) as count_of_dates from `cio-datahub-enterprise-pr-183a.src_adc.bq_aggregate_daily_arming_commands`
where date(date)< (select last_date_prev_month from date_sql)
and date(date)>= (select first_date_prev_month from date_sql)

)

select id_cust as customer_id,sum(count_arm_commands) as sum_arm_commands,sum(count_disarm_commands) as sum_disarm_commands,count (distinct date(date)) as number_days_arming_disarming, (select count_of_dates from date_sql_arming ) as count_of_dates_arming
from `cio-datahub-enterprise-pr-183a.src_adc.bq_aggregate_daily_arming_commands`
where date(date)< (select last_date_prev_month from date_sql)
and date(date)>= (select first_date_prev_month from date_sql)
group by customer_id
order by number_days_arming_disarming desc


'''

In [179]:
def extract_bq_data(bq_client, sql=None, pth_query=None):
    if sql is not None:
        df = bq_client.query(sql).to_dataframe()
    elif pth_query is not None:
        sql = pth_query.read_text()
        df = bq_client.query(sql).to_dataframe()
    else:
        raise ValueError('`sql` or `pth_query` should be set')  
    return df

In [182]:
from datetime import date
SCORE_DATE = date.today().strftime('%Y-%m-%d')

In [183]:
SCORE_DATE

'2023-01-26'

In [1]:
# DF=extract_bq_data(bq_client, sql=Query_test).format(score_date='2023-01-26')