In [None]:
#### import global modules
import os
import sys
import pandas as pd
import numpy as np
from pathlib import Path
from yaml import safe_load
import google.oauth2.credentials
from google.cloud import bigquery
import gc

# Set global vars
pth_project = Path(os.getcwd().split('notebooks')[0])
pth_data = pth_project / 'data'
pth_queries = pth_project / 'core' / 'queries'
pth_creds = pth_project / 'conf' / 'local' / 'project_config.yaml'
sys.path.insert(0, str(pth_project))
d_project_config = safe_load(pth_creds.open())
# d_params = safe_load((pth_project / 'core' / 'parameters' / 'common.yaml').open())['data_extract']

# import local modules
from core.utils.gcp import connect_bq_services
# from core.etl.extract import extract_bq_data, extract_pr_codes, format_conv_df, filter_convs

# Connect to google services
bq_client = connect_bq_services(d_project_config['gcp-project-name'])
pd.options.display.max_rows = 100

In [None]:
def extract_bq_data(bq_client, sql=None, pth_query=None):
    if sql is not None:
        df = bq_client.query(sql).to_dataframe()
    elif pth_query is not None:
        sql = pth_query.read_text()
        df = bq_client.query(sql).to_dataframe()
    else:
        raise ValueError('`sql` or `pth_query` should be set')  
    return df

In [None]:
Segment_query='''


select * from `divgpras-pr-579355.ADC.Customer_Segment`
where Segment='Home_automation_Savvy'

'''

In [None]:
Segment_DF= extract_bq_data(bq_client, sql=Segment_query)

In [None]:
Segment_DF.info()

In [None]:
Segment_DF['Avg_Arming_per_day'].value_counts()

In [None]:
Segment_DF['Avg_Arming_per_day'].describe()

In [None]:
Segment_DF['Avg_DisArming_per_day'].describe()

In [None]:
Segment_DF['Avg_logins_per_day'].describe()

In [None]:
Login_SQL='''

with date_sql as (
select  count(distinct date(login_dt_utc)) as count_of_dates from `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_daily_logins`
)



select customer_id,login_type_desc,sum(login_count) as sum_login_count,count (distinct date(login_dt_utc)) as number_of_login_days, (select count_of_dates from date_sql ) as count_of_dates_logins
from `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_daily_logins`
group by customer_id,login_type_desc
order by customer_id


'''

In [None]:
Login_DF= extract_bq_data(bq_client, sql=Login_SQL)

In [None]:
Login_DF.head()

In [None]:
Login_DF_wide=Login_DF.pivot(index='customer_id', columns='login_type_desc', values=['sum_login_count','number_of_login_days']).reset_index()


In [None]:
Login_DF_wide.info()

In [None]:
Login_DF_wide.columns=['customer_id','Mobile_login_Counts','VCD_Login_Counts','Web_login_Counts','Mobile_login_days','VCD_Login_days','Web_login_days']

In [None]:
Login_DF_wide.head()

In [None]:
Login_DF_wide.fillna(0,inplace=True)

In [None]:

Login_DF_wide['All_login_Counts']=Login_DF_wide.apply(lambda x:x['Mobile_login_Counts']+x['VCD_Login_Counts']+x['Web_login_Counts'],axis=1)


In [None]:

Login_DF_wide['Mobile_login_share']=Login_DF_wide.apply(lambda x: x['Mobile_login_Counts']*100/x['All_login_Counts'],axis=1)
Login_DF_wide['VCD_login_share']=Login_DF_wide.apply(lambda x: x['VCD_Login_Counts']*100/x['All_login_Counts'],axis=1)
Login_DF_wide['Web_login_share']=Login_DF_wide.apply(lambda x: x['Web_login_Counts']*100/x['All_login_Counts'],axis=1)



In [None]:
Login_DF_wide['Mobile_login_share'].describe()

In [None]:
Login_DF_wide['VCD_login_share'].describe()

In [None]:
Login_DF_wide['Web_login_share'].describe()

In [None]:
Segment_DF_1=Segment_DF.merge(Login_DF_wide,on='customer_id',how='left')

In [None]:
Segment_DF_1.head()

In [None]:
Segment_DF_1['Mobile_login_share'].describe()

In [None]:
Segment_DF_1['VCD_login_share'].describe()

In [None]:
Segment_DF_1['Web_login_share'].describe()

In [None]:
Device_types_SQL='''

select * from `divgpras-pr-579355.ADC.Device_Type_mapping`

'''

In [None]:
Device_type_df= extract_bq_data(bq_client, sql=Device_types_SQL)

In [None]:
Device_type_df.info()

In [None]:
feature_cols=Device_type_df.drop('customer_id',axis=1).columns.to_list()


Device_type_df.columns=['customer_id']+['device_type_'+ str(col) for col in feature_cols]

In [None]:
Device_type_df.info()

In [None]:
Device_type_df.head()

In [None]:
Device_type_df['Device_Flag']=1

In [None]:
Segment_DF_2=Segment_DF_1.merge(Device_type_df,on='customer_id',how='left')

In [None]:
del Segment_DF

In [None]:
Segment_DF_2['Device_Flag'].sum()*100/Segment_DF_2.shape[0]

In [None]:
Segment_DF_2['device_type_Total_types_of_devices'].describe()

In [None]:
Segment_DF_2['device_type_Total_types_of_devices'].value_counts()

In [None]:
TC_Query='''

select * from `divgpras-pr-579355.ADC.Trouble_Condition_Group_last3months_Aug2022`
'''

In [None]:
TC_Data=extract_bq_data(bq_client, sql=TC_Query)

In [None]:
TC_Data.info()

In [None]:
Segment_DF_3=Segment_DF_2.merge(TC_Data,on='customer_id',how='left')

In [None]:
(1-(Segment_DF_3['ref_end_date'].isna().sum()/Segment_DF_3.shape[0]))*100

In [None]:
(Segment_DF_3['Central_Station_Condition_Issue_count_last3M']>0).sum()*100/Segment_DF_3.shape[0]

In [None]:
(Segment_DF_3['Critical_System_Issue_count_last3M']>0).sum()*100/Segment_DF_3.shape[0]

In [None]:
(Segment_DF_3['Engagement_Issue_count_last3M']>0).sum()*100/Segment_DF_3.shape[0]

In [None]:
(Segment_DF_3['System_Condition_Issue_count_last3M']>0).sum()*100/Segment_DF_3.shape[0]

In [None]:
Segment_DF_3['Central_Station_Condition_Issue_count_last3M'].describe()

In [None]:
Segment_DF_3['Critical_System_Issue_count_last3M'].describe()

In [None]:
Segment_DF_3['Engagement_Issue_count_last3M'].describe()

In [None]:
Segment_DF_3['System_Condition_Issue_count_last3M'].describe()

In [None]:
Segment_DF_3['Central_Station_Condition_Issue_Avg_duration_hours_last3M']=Segment_DF_3.apply(lambda x: 0 if x['Central_Station_Condition_Issue_count_last3M']==0 else x['Central_Station_Condition_Issue_duration_hours_sum_last3M']/x['Central_Station_Condition_Issue_count_last3M'],axis=1)
Segment_DF_3['Critical_System_Avg_duration_hours_last3M']=Segment_DF_3.apply(lambda x: 0 if x['Critical_System_Issue_count_last3M']==0 else x['Critical_System_Issue_duration_hours_sum_last3M']/x['Critical_System_Issue_count_last3M'],axis=1)
Segment_DF_3['Engagement_Issue_Issue_Avg_duration_hours_last3M']=Segment_DF_3.apply(lambda x: 0 if x['Engagement_Issue_count_last3M']==0 else x['Engagement_Issue_duration_hours_sum_last3M']/x['Engagement_Issue_count_last3M'],axis=1)
Segment_DF_3['System_Condition_Issue_Avg_duration_hours_last3M']=Segment_DF_3.apply(lambda x: 0 if x['System_Condition_Issue_count_last3M']==0 else x['System_Condition_Issue_duration_hours_sum_last3M']/x['System_Condition_Issue_count_last3M'],axis=1)

In [None]:
Segment_DF_3['Central_Station_Condition_Issue_Avg_duration_hours_last3M'].describe()

In [None]:
Segment_DF_3['Critical_System_Avg_duration_hours_last3M'].describe()

In [None]:
Segment_DF_3['Engagement_Issue_Issue_Avg_duration_hours_last3M'].describe()

In [None]:
Segment_DF_3['System_Condition_Issue_Avg_duration_hours_last3M'].describe()