In [None]:
#### import global modules
import os
import sys
import pandas as pd
import numpy as np
from pathlib import Path
from yaml import safe_load
import google.oauth2.credentials
from google.cloud import bigquery
import gc

# Set global vars
pth_project = Path(os.getcwd().split('notebooks')[0])
pth_data = pth_project / 'data'
pth_queries = pth_project / 'core' / 'queries'
pth_creds = pth_project / 'conf' / 'local' / 'project_config.yaml'
sys.path.insert(0, str(pth_project))
d_project_config = safe_load(pth_creds.open())
# d_params = safe_load((pth_project / 'core' / 'parameters' / 'common.yaml').open())['data_extract']

# import local modules
from core.utils.gcp import connect_bq_services
# from core.etl.extract import extract_bq_data, extract_pr_codes, format_conv_df, filter_convs

# Connect to google services
bq_client = connect_bq_services(d_project_config['gcp-project-name'])
pd.options.display.max_rows = 100

In [None]:
def extract_bq_data(bq_client, sql=None, pth_query=None):
    if sql is not None:
        df = bq_client.query(sql).to_dataframe()
    elif pth_query is not None:
        sql = pth_query.read_text()
        df = bq_client.query(sql).to_dataframe()
    else:
        raise ValueError('`sql` or `pth_query` should be set')  
    return df

In [None]:
Customer_details='''

DECLARE _end_dt_snpsht DATE DEFAULT '2022-12-31';



with ADC_Customer_Base as (

SELECT date('2022-12-01') as Month_Snapshot,customer_id,dealer_customer_id,primary_login_id,dealer_name,join_date,account_type_name,customer_type_name,primary_email,primary_phone,last_updt_ts
FROM `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_account_details` 
where date(last_updt_ts) =_end_dt_snpsht
and  dealer_name in ('TELUS Communications Inc.','ADT by TELUS')
QUALIFY ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY last_updt_ts DESC) = 1
order by dealer_name,join_date,customer_id


)



,Telus_customers as
(
select bacct_bus_bacct_num,bacct_billg_acct_id,cust_bus_cust_id
--,pi_prod_instnc_typ_cd,pi_prod_instnc_stat_ts,prod_instnc_ts,pi_cntrct_start_ts as contract_start_date
--,pi_cntrct_end_ts as contract_end_date
from `cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht` 
WHERE DATE(prod_instnc_ts) = _end_dt_snpsht #Snapshot of the last day of the month
and pi_prod_instnc_typ_cd ='SMHM' #Serice type
and bus_prod_instnc_src_id = 1001 #BANs that are for home services
and pi_prod_instnc_stat_cd in ('A')
and  consldt_cust_typ_cd = 'R'
--QUALIFY ROW_NUMBER() OVER (PARTITION BY bacct_bus_bacct_num ORDER BY pi_prod_instnc_stat_ts DESC) = 1
order by cust_bus_cust_id

)



 ,Telus_Customer_Base as (

SELECT a.customer_id,'Telus_Customer' as Dealer_type_flag,b.bacct_bus_bacct_num as BAN, NULL as ADT_site_no
FROM ADC_Customer_Base a
inner join Telus_customers b
on a.dealer_customer_id=b.cust_bus_cust_id
)



, ADT_Customer_Base as (

SELECT customer_id,'ADT_Customer' as Dealer_type_flag,NULL  as BAN,site_no as ADT_site_no 
FROM `divgpras-pr-579355.ADC_updated.ADC_ADT_mapping` 

)



, Union_data as (

SELECT * from Telus_Customer_Base
UNION ALL
SELECT * from ADT_Customer_Base

)

,Merge_data as (

SELECT * from ADC_Customer_Base a
INNER JOIN Union_data b
on a.customer_id=b.customer_id
order by Dealer_type_flag

)

select * from Merge_data



'''

In [None]:
Customer_info=extract_bq_data(bq_client, sql=Customer_details)

In [None]:
Customer_info.info()

In [None]:
Customer_info['account_type_name'].value_counts()

In [None]:
Customer_info['dealer_name'].value_counts()

In [None]:
Customer_info['customer_id'].value_counts()

In [None]:
Customer_info.head()

In [None]:
Best_practices_details_Query='''

select customer_id,best_practices_id,best_practices_ind,date(dt_last_calculate_utc) as last_date_calculate
from `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_best_practice`
where DATE(dt_last_calculate_utc)<'2023-01-01'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customer_id,best_practices_id ORDER BY last_updt_ts DESC) = 1
order by customer_id,best_practices_id


'''

In [None]:
Best_Practices_DF= extract_bq_data(bq_client,sql=Best_practices_details_Query)

In [None]:
Best_Practices_DF.info()

In [None]:
Best_Practices_DF.head()

In [None]:
Best_Practices_DF['Best_practices_flag']=Best_Practices_DF['best_practices_ind'].apply(lambda x: 1 if x==True else 0)

In [None]:
Best_Practices_DF_wide=Best_Practices_DF.pivot(index='customer_id', columns='best_practices_id', values='Best_practices_flag').reset_index()

In [None]:
Best_Practices_DF_wide.head()

In [None]:
Best_Practices_DF_wide.columns=['customer_id','Best_partices_1','Best_partices_2','Best_partices_3','Best_partices_4','Best_partices_5','Best_partices_6']

In [None]:
cols_to_sum=['Best_partices_1','Best_partices_2','Best_partices_3','Best_partices_4','Best_partices_5','Best_partices_6']
Best_Practices_DF_wide['Best_Practice_All']=Best_Practices_DF_wide[cols_to_sum].sum(axis=1)

In [None]:
Best_Practices_DF_wide['Best_Practice_All_flag']=Best_Practices_DF_wide['Best_Practice_All'].apply(lambda x: 1 if x==6 else 0)

In [None]:
Best_Practices_DF_wide.head()

In [None]:
Best_Practices_DF_wide['Best_Practice_All'].value_counts(normalize=True)*100

In [None]:
Arming_Query=  '''

with date_sql as (
select  count(distinct date(date)) as count_of_dates from `cio-datahub-enterprise-pr-183a.src_adc.bq_aggregate_daily_arming_commands`
where date(date)< '2023-01-01' and date(date)>= '2022-12-01'

)

select id_cust as customer_id,sum(count_arm_commands) as sum_arm_commands,sum(count_disarm_commands) as sum_disarm_commands,count (distinct date(date)) as number_days_arming_disarming, (select count_of_dates from date_sql ) as count_of_dates_arming
from `cio-datahub-enterprise-pr-183a.src_adc.bq_aggregate_daily_arming_commands`
where date(date)< '2023-01-01' and date(date)>= '2022-12-01'
group by customer_id
order by number_days_arming_disarming desc

'''


In [None]:
Arming_DF=extract_bq_data(bq_client, sql=Arming_Query)

In [None]:
max_number_arming_dates=Arming_DF['count_of_dates_arming'].max()
Arming_DF['Arming_Consistency']=Arming_DF['number_days_arming_disarming'].apply(lambda x: x*100/max_number_arming_dates)

In [None]:
max_number_arming_dates

In [None]:
Arming_DF['Arming_Flag']=1

In [None]:
pd.DataFrame(Arming_DF['Arming_Consistency'].value_counts(normalize=True)*100).reset_index().sort_values(by='index',ascending=False)

In [None]:
Login_SQL='''

with date_sql as (
select  count(distinct date(login_dt_utc)) as count_of_dates from `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_daily_logins`
where date(login_dt_utc)< '2023-01-01' and date(login_dt_utc)>= '2022-12-01' 

)



select customer_id,sum(login_count) as sum_login_count,count (distinct date(login_dt_utc)) as number_of_login_days, (select count_of_dates from date_sql ) as count_of_dates_logins
from `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_daily_logins`
where date(login_dt_utc)< '2023-01-01' and date(login_dt_utc)>= '2022-12-01' 
group by customer_id
order by number_of_login_days desc


'''

In [None]:
Login_DF=extract_bq_data(bq_client, sql=Login_SQL)

In [None]:
max_number_login_dates=Login_DF['count_of_dates_logins'].max()
Login_DF['Login_Consistency']=Login_DF['number_of_login_days'].apply(lambda x: x*100/max_number_login_dates)

In [None]:
max_number_login_dates

In [None]:
Login_DF.head()

In [None]:
Login_DF['Login_Consistency'].value_counts(normalize=True)*100

In [None]:
TC_issue_query='''

select dealer_name,customer_id,unit_id,device_id,trouble_condition_name,trouble_condition_group_id,trouble_condition_group_desc, start_date_utc,end_date_utc
from `cio-datahub-enterprise-pr-183a.src_adc.bq_troublecondition_data`
where trouble_condition_start_ind='Y'and trouble_condition_closed_ind='Y' 
and date(start_date_utc) >= '2022-10-01' and date(start_date_utc) <'2023-01-01' 


'''

In [None]:
TC_data= extract_bq_data(bq_client, sql=TC_issue_query)

In [None]:
TC_data.head()

In [None]:
TC_data['count']=1

TC_pivot_table=pd.pivot_table(TC_data, values=['count'], index=['customer_id'], columns='trouble_condition_name',
                          aggfunc={'count': np.sum})

In [None]:
TC_pivot_table=TC_pivot_table.reset_index()

In [None]:
TC_pivot_table.head()

In [None]:
TC_pivot_table.fillna(0,inplace=True)

In [None]:
TC_pivot_table.columns=['TC_Last3M_'+'_'.join(col).strip() for col in TC_pivot_table.columns.values]

In [None]:
# TC_pivot_table.columns=['customer_id','TC_BroadbandCommFailure_count','TC_CameraNotCommunicating_count','TC_CameraNotReachable_count']

In [None]:
# TC_pivot_table.fillna(0,inplace=True)

In [None]:
TC_pivot_table.head()

In [None]:
TCG_Pivot=pd.pivot_table(TC_data, values=['count'], index=['customer_id'], columns='trouble_condition_group_desc',
                          aggfunc={'count': np.sum})

In [None]:
TCG_Pivot=TCG_Pivot.reset_index()
TCG_Pivot.fillna(0,inplace=True)

In [None]:
TCG_Pivot.head()

In [None]:
TCG_Pivot.columns=['TCG_Last3M_'+'_'.join(col).strip() for col in TCG_Pivot.columns.values]

In [None]:
TCG_Pivot.head()

In [None]:
# TC_pivot_table['TC_BroadbandCommFailure_Flag']=TC_pivot_table['TC_BroadbandCommFailure_count'].apply(lambda x: 1 if x>0 else 0)
# TC_pivot_table['TC_CameraNotCommunicating_Flag']=TC_pivot_table['TC_CameraNotCommunicating_count'].apply(lambda x: 1 if x>0 else 0)
# TC_pivot_table['TC_CameraNotReachable_Flag']=TC_pivot_table['TC_CameraNotReachable_count'].apply(lambda x: 1 if x>0 else 0)

In [None]:
Merge_DF=Customer_info.merge(Best_Practices_DF_wide,on='customer_id',how='left')

In [None]:
Merge_DF_1=Merge_DF.merge(Arming_DF,on='customer_id',how='left')

In [None]:
Merge_DF_2=Merge_DF_1.merge(Login_DF,on='customer_id',how='left')

In [None]:
Merge_DF_3=Merge_DF_2.merge(TC_pivot_table,left_on='customer_id',right_on='TC_Last3M_customer_id_',how='left')

In [None]:
Merge_DF_4=Merge_DF_3.merge(TCG_Pivot,left_on='customer_id',right_on='TCG_Last3M_customer_id_',how='left')

In [None]:
del Merge_DF_1,Merge_DF_2,Merge_DF_3

In [None]:
Merge_DF_4=Merge_DF_4.replace(np.nan, 0)

In [None]:
Merge_DF_4 = Merge_DF_4.drop(['customer_id_1','TC_Last3M_customer_id_','TCG_Last3M_customer_id_'], axis=1)

In [None]:
Merge_DF_4['dealer_name'].value_counts(normalize=True)*100

In [None]:
Merge_DF_4['Dealer_type_flag'].value_counts(normalize=True)*100

In [None]:
pd.DataFrame(Merge_DF_4.groupby(['dealer_name','Dealer_type_flag']).agg(
    
    Customer_count= ('Dealer_type_flag','count'),
    # Churn_total=('Churn_flag',lambda x: x.sum()),
    # Churn_rate=('Churn_flag',lambda x: x.mean()*100)
).reset_index())

In [None]:
rep_chars = ' |\|-|:|/'

Merge_DF_4.columns = Merge_DF_4.columns.str.replace(rep_chars, '_')

In [None]:
Merge_DF_4.info(verbose=True)

In [None]:
def Segment_making(row
                   ,min_login_consistency_threshold=50
                   ,min_arming_consistency_threshold=50
                   ,max_login_consistency_threshold=50
                   ,max_arming_consistency_threshold=50):
    
    if row['Best_Practice_All']>=5 and row['Login_Consistency']>=max_login_consistency_threshold  and row['Arming_Consistency']>=max_arming_consistency_threshold:
        return 'Heavy_User'
    elif row['Login_Consistency']>=max_login_consistency_threshold  and row['Arming_Consistency']< min_arming_consistency_threshold:
        return 'Home_automation_Savvy'
    elif row['Login_Consistency']< min_login_consistency_threshold  and row['Arming_Consistency']>=max_login_consistency_threshold:
        return 'Old_Fashion'
    elif row['Login_Consistency']==0  and row['Arming_Consistency']==0:
        return 'Disengaged'
    else:
        return "Moderate_Users"

In [None]:
Merge_DF_4['Segment']=Merge_DF_4.apply(Segment_making,axis=1)

In [None]:
Merge_DF_4['Segment'].value_counts()

In [None]:
Merge_DF_4['Segment'].value_counts(normalize=True)*100

In [None]:
pd.DataFrame(Merge_DF_4.groupby(['Dealer_type_flag','Segment']).agg(
    
    Customer_count= ('customer_id','count'),
    Customer_Share= ('customer_id',lambda x:x.count()*100/Merge_DF_4.shape[0])
    # Churn_total=('Churn_flag',lambda x: x.sum()),
    # Churn_rate=('Churn_flag',lambda x: x.mean()*100)
).reset_index())

In [None]:
Merge_DF_4['dealer_name'].value_counts()

In [None]:
# Merge_DF_4['dealer_name'=='ADT by TELUS'].to_csv('ADC_Master_Data_Sep2022_ADT.csv',index=False)

In [None]:
# config= bigquery.job.LoadJobConfig()

# # config._properties['timePartitioning'] = {'field': 'Month_Year'}
# config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

# Table_BQ = 'ADC_Feature_Datastore.ADC_Master_Data'

# bq_table_instance= bq_client.load_table_from_dataframe(Merge_DF_4, Table_BQ,job_config=config)

In [None]:
Merge_DF_4.drop(['TC_Last3M_count_CredentialsInConflict','TC_Last3M_count_LowControllerBattery'],inplace=True,axis=1)

In [None]:
Merge_DF_4.drop(['TC_Last3M_count_AuxSupply'],inplace=True,axis=1)

In [None]:
from datetime import date
date.today()

In [None]:
date.today().strftime('%Y-%m-%d')

In [None]:
config= bigquery.job.LoadJobConfig()

# config._properties['timePartitioning'] = {'field': 'Month_Year'}
config.write_disposition = bigquery.WriteDisposition.WRITE_APPEND

Table_BQ = 'ADC_Feature_Datastore.ADC_Master_Data'

bq_table_instance= bq_client.load_table_from_dataframe(Merge_DF_4, Table_BQ,job_config=config)

In [None]:
# Merge_DF_4.to_csv('ADC_Master_Data_Nov2022.csv',index=False)

In [None]:
# ADC_Master_Query= '''


# select * from `divgpras-pr-579355.ADC_Feature_Datastore.ADC_Master_Data`
# where Month_Snapshot!='2022-09-30'


# '''

In [None]:
# ADC_Master_DF=extract_bq_data(bq_client, sql=ADC_Master_Query)

In [None]:
# ADC_Master_DF.info()

In [None]:
# ADC_Master_DF['Month_Snapshot'].value_counts()

In [None]:
# config= bigquery.job.LoadJobConfig()

# # config._properties['timePartitioning'] = {'field': 'Month_Year'}
# config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

# Table_BQ = 'ADC_Feature_Datastore.ADC_Master_Data'

# bq_table_instance= bq_client.load_table_from_dataframe(ADC_Master_DF, Table_BQ,job_config=config)