In [None]:
#### import global modules
import os
import sys
import pandas as pd
import numpy as np
from pathlib import Path
from yaml import safe_load
import google.oauth2.credentials
from google.cloud import bigquery
import gc

# Set global vars
pth_project = Path(os.getcwd().split('notebooks')[0])
pth_data = pth_project / 'data'
pth_queries = pth_project / 'core' / 'queries'
pth_creds = pth_project / 'conf' / 'local' / 'project_config.yaml'
sys.path.insert(0, str(pth_project))
d_project_config = safe_load(pth_creds.open())
# d_params = safe_load((pth_project / 'core' / 'parameters' / 'common.yaml').open())['data_extract']

# import local modules
from core.utils.gcp import connect_bq_services
# from core.etl.extract import extract_bq_data, extract_pr_codes, format_conv_df, filter_convs

# Connect to google services
bq_client = connect_bq_services(d_project_config['gcp-project-name'])
pd.options.display.max_rows = 100

In [None]:
def extract_bq_data(bq_client, sql=None, pth_query=None):
    if sql is not None:
        df = bq_client.query(sql).to_dataframe()
    elif pth_query is not None:
        sql = pth_query.read_text()
        df = bq_client.query(sql).to_dataframe()
    else:
        raise ValueError('`sql` or `pth_query` should be set')  
    return df

In [None]:
Query='''

DECLARE _end_dt_snpsht DATE DEFAULT '2022-11-30';
DECLARE _mnth_snpsht DATE DEFAULT '2022-11-01';

WITH
  ADC_data AS(
  SELECT
    customer_id,
    dealer_customer_id,
    BAN,
    Best_partices_1,
    Best_partices_2,
    Best_partices_3,
    Best_partices_4,
    Best_partices_5,
    Best_partices_6,
    Best_Practice_All,
    Best_Practice_All_flag,
    number_days_arming_disarming,
    Arming_Consistency,
    number_of_login_days,
    Login_Consistency,
    Segment,
    TC_Last3M_count_BroadbandCommFailure,
    TC_Last3M_count_CameraNotCommunicating,
    TC_Last3M_count_CameraNotReachable,
    
    
    CASE
      WHEN account_type_name='Standalone' THEN 'Smart_Camera'
      WHEN account_type_name='Awareness and Automation' THEN 'Smart_Automation_Plus'
    ELSE
    'Monitored'
  END
    AS Package
  FROM
    `divgpras-pr-579355.ADC_Feature_Datastore.ADC_Master_Data`
  WHERE
    Month_Snapshot=_mnth_snpsht
    AND dealer_name='TELUS Communications Inc.' ),
    


Telus_customers as(

select cust_bus_cust_id,pi_cntrct_end_ts as contract_end_date,pi_cntrct_start_ts as contract_start_date
from `cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht` 
WHERE DATE(prod_instnc_ts) = _end_dt_snpsht #Snapshot of the last day of the month
and pi_prod_instnc_typ_cd ='SMHM' #Serice type
and bus_prod_instnc_src_id = 1001 #BANs that are for home services
and pi_prod_instnc_stat_cd in ('A')
and  consldt_cust_typ_cd = 'R'
QUALIFY ROW_NUMBER() OVER (PARTITION BY cust_bus_cust_id ORDER BY prod_instnc_ts DESC) = 1
order by cust_bus_cust_id

)



,Telus_internet_customers as

(

select cust_bus_cust_id as cust_bus_cust_id,1 as Telus_Internet_customers
from `cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht` 
WHERE DATE(prod_instnc_ts) = _end_dt_snpsht  #Snapshot of the last day of the month
and pi_prod_instnc_stat_cd in ('A')
and pi_prod_instnc_typ_cd ='HSIC'
and consldt_cust_typ_cd = 'R'
QUALIFY ROW_NUMBER() OVER (PARTITION BY cust_bus_cust_id ORDER BY prod_instnc_ts DESC) = 1

)


, Telus_SMHM_Deacts as


(


select distinct BILLING_ACCOUNT_NUM as BAN ,1 as Telus_Churn_Flag  

--from `divgpras-pr-579355.SHS.SHS_DEACTS_OCT2022`
--from `divgpras-pr-579355.SHS.SHS_DEACTS_NOV2022`
from `divgpras-pr-579355.SHS.SHS_DEACTS_DEC2022`

)


select * from ADC_data as ADC
inner join Telus_customers as Telus
on ADC.dealer_customer_id=Telus.cust_bus_cust_id
left join Telus_internet_customers as Telus_int
on ADC.dealer_customer_id=Telus_int.cust_bus_cust_id
left join Telus_SMHM_Deacts c
on ADC.BAN=c.BAN


'''

In [None]:
DF=extract_bq_data(bq_client, sql=Query)

In [None]:
DF.info()

In [None]:
DF.head()

In [None]:
DF.fillna(0,inplace=True)

In [None]:
DF['Arming_Consistency'].value_counts(normalize=True)*100

In [None]:
def Arming_category_making(row):
    
    if row['Arming_Consistency']==0:
        return 'Arming_0%'
    elif row['Arming_Consistency']>0 and row['Arming_Consistency']<=20 :
        return 'Arming_0_20%%'
    elif row['Arming_Consistency']>20 and row['Arming_Consistency']<=50 :
        return 'Arming_20_50%%'
    elif row['Arming_Consistency']>50:
        return 'Arming_50_more%'
    else:
        return "None_of_Above"

In [None]:

DF['Arming_category']=DF.apply(Arming_category_making,axis=1)

In [None]:
DF['Arming_category'].value_counts(normalize=True)*100

In [None]:
DF['Login_Consistency'].value_counts(normalize=True)*100

In [None]:
def login_category_making(row):
    
    if row['Login_Consistency']==0:
        return 'Login_0%'
    elif row['Login_Consistency']>0 and row['Login_Consistency']<=20 :
        return 'Login_0_20%%'
    elif row['Login_Consistency']>20 and row['Login_Consistency']<=50 :
        return 'Login_20_50%%'
    elif row['Login_Consistency']>50:
        return 'Login_50_more%'
    else:
        return "None_of_Above"

In [None]:
DF['Login_category']=DF.apply(login_category_making,axis=1)

In [None]:
DF['Login_category'].value_counts(normalize=True)*100

In [None]:
DF['Telus_Churn_Flag'].value_counts()

In [None]:
DF['Telus_Churn_Flag'].value_counts(normalize=True)*100

In [None]:
# DF['Telus_Churn_Flag'].fillna(0,inplace=True)

In [None]:
DF['Telus_Churn_Flag'].value_counts(normalize=True)*100

In [None]:
DF.head()

In [None]:
# DF.to_csv('SHS_Churn_Analysis_OCT2022.csv',index=False)

In [None]:
DF['BAN'].value_counts()

In [None]:
DF['customer_id'].value_counts()

In [None]:
DF['customer_id'].count()

In [None]:
DF['Telus_Internet_customers'].value_counts()

In [None]:
DF['Telus_Internet_customers'].value_counts(normalize=True)

In [None]:
DF['TC_Last3M_count_BroadbandCommFailure_flag']=DF['TC_Last3M_count_BroadbandCommFailure'].apply(lambda x: 1 if x>0 else 0)
DF['TC_Last3M_count_CameraNotCommunicating_flag']=DF['TC_Last3M_count_CameraNotCommunicating'].apply(lambda x: 1 if x>0 else 0)
DF['TC_Last3M_count_CameraNotReachable_flag']=DF['TC_Last3M_count_CameraNotReachable'].apply(lambda x: 1 if x>0 else 0)
DF['TC_Last3M_count_BroadbandCommFailure_morethan5_flag']=DF['TC_Last3M_count_BroadbandCommFailure'].apply(lambda x: 1 if x>5 else 0)


In [None]:
DF_monitored=DF[DF['Package']=='Monitored']

In [None]:
DF_monitored['Telus_Internet_customers'].value_counts(normalize=True)

In [None]:
DF_monitored.info()

In [None]:
pd.DataFrame(DF_monitored.groupby(['Arming_category','Login_category']).agg(
    
    Customer_count= ('customer_id','count'),
    Customer_count_1= ('customer_id','nunique'),
    # Customer_Share= ('customer_id',lambda x:x.count()*100/Merge_DF_4.shape[0])
    Churn_total=('Telus_Churn_Flag',lambda x: x.sum()),
    Churn_rate=('Telus_Churn_Flag',lambda x: x.mean()*100)
).reset_index())

In [None]:
pd.DataFrame(DF_monitored.groupby(['Segment','Best_Practice_All']).agg(
    
    Customer_count= ('customer_id','count'),
    Customer_count_1= ('customer_id','nunique'),
    Churn_total=('Telus_Churn_Flag',lambda x: x.sum()),
    Churn_rate=('Telus_Churn_Flag',lambda x: x.mean()*100)
    
    # Best_partices_1= ('Best_partices_1','sum'),
    # Best_partices_2= ('Best_partices_2','sum'),
    # Best_partices_3= ('Best_partices_3','sum'),
    # Best_partices_4= ('Best_partices_4','sum'),
    # Best_partices_5= ('Best_partices_5','sum'),
    # Best_partices_6= ('Best_partices_6','sum'),
    # # Best_partices_All= ('Best_Practice_All','sum'),
    # Best_partices_All_flag= ('Best_Practice_All_flag','sum')
    # Customer_Share= ('customer_id',lambda x:x.count()*100/Merge_DF_4.shape[0])
    # Churn_total=('Telus_Churn_Flag',lambda x: x.sum()),
    # Churn_rate=('Telus_Churn_Flag',lambda x: x.mean()*100)
).reset_index())

In [None]:
pd.DataFrame(DF_monitored.groupby(['Segment']).agg(
    
    Customer_count= ('customer_id','count'),
    Customer_count_1= ('customer_id','nunique'),
    Best_partices_1= ('Best_partices_1','sum'),
    Best_partices_2= ('Best_partices_2','sum'),
    Best_partices_3= ('Best_partices_3','sum'),
    Best_partices_4= ('Best_partices_4','sum'),
    Best_partices_5= ('Best_partices_5','sum'),
    Best_partices_6= ('Best_partices_6','sum'),
    # Best_partices_All= ('Best_Practice_All','sum'),
    Best_partices_All_flag= ('Best_Practice_All_flag','sum')
    # Customer_Share= ('customer_id',lambda x:x.count()*100/Merge_DF_4.shape[0])
    # Churn_total=('Telus_Churn_Flag',lambda x: x.sum()),
    # Churn_rate=('Telus_Churn_Flag',lambda x: x.mean()*100)
).reset_index())

In [None]:
pd.DataFrame(DF_monitored[DF_monitored.Best_partices_1==1].groupby(['Segment','Best_partices_1']).agg(
    
    Customer_count= ('customer_id','nunique'),
    Churn_total=('Telus_Churn_Flag',lambda x: x.sum()),
    Churn_rate=('Telus_Churn_Flag',lambda x: x.mean()*100)).reset_index())

In [None]:
pd.DataFrame(DF_monitored[DF_monitored.Best_partices_2==1].groupby(['Segment','Best_partices_2']).agg(
    
    Customer_count= ('customer_id','nunique'),
    Churn_total=('Telus_Churn_Flag',lambda x: x.sum()),
    Churn_rate=('Telus_Churn_Flag',lambda x: x.mean()*100)).reset_index())

In [None]:
pd.DataFrame(DF_monitored[DF_monitored.Best_partices_3==1].groupby(['Segment','Best_partices_3']).agg(
    
    Customer_count= ('customer_id','nunique'),
    Churn_total=('Telus_Churn_Flag',lambda x: x.sum()),
    Churn_rate=('Telus_Churn_Flag',lambda x: x.mean()*100)).reset_index())

In [None]:
pd.DataFrame(DF_monitored[DF_monitored.Best_partices_4==1].groupby(['Segment','Best_partices_4']).agg(
    
    Customer_count= ('customer_id','nunique'),
    Churn_total=('Telus_Churn_Flag',lambda x: x.sum()),
    Churn_rate=('Telus_Churn_Flag',lambda x: x.mean()*100)).reset_index())

In [None]:
pd.DataFrame(DF_monitored[DF_monitored.Best_partices_5==1].groupby(['Segment','Best_partices_5']).agg(
    
    Customer_count= ('customer_id','nunique'),
    Churn_total=('Telus_Churn_Flag',lambda x: x.sum()),
    Churn_rate=('Telus_Churn_Flag',lambda x: x.mean()*100)).reset_index())

In [None]:
pd.DataFrame(DF_monitored[DF_monitored.Best_partices_6==1].groupby(['Segment','Best_partices_6']).agg(
    
    Customer_count= ('customer_id','nunique'),
    Churn_total=('Telus_Churn_Flag',lambda x: x.sum()),
    Churn_rate=('Telus_Churn_Flag',lambda x: x.mean()*100)).reset_index())

In [None]:
pd.DataFrame(DF_monitored[DF_monitored.Best_Practice_All_flag==1].groupby(['Segment','Best_Practice_All_flag']).agg(
    
    Customer_count= ('customer_id','nunique'),
    Churn_total=('Telus_Churn_Flag',lambda x: x.sum()),
    Churn_rate=('Telus_Churn_Flag',lambda x: x.mean()*100)).reset_index())

In [None]:
pd.DataFrame(DF_monitored.groupby(['Telus_Internet_customers']).agg(
    
    Customer_count= ('customer_id','count'),
    Customer_count_1= ('customer_id','nunique'),
    TC_Last3M_count_CameraNotReachable_flag=('TC_Last3M_count_CameraNotReachable_flag','sum'),
    TC_Last3M_count_CameraNotCommunicating_flag=('TC_Last3M_count_CameraNotCommunicating_flag','sum'),
    TC_Last3M_count_BroadbandCommFailure_flag=('TC_Last3M_count_BroadbandCommFailure_flag','sum'),
    TC_Last3M_count_BroadbandCommFailure_morethan5_flag=('TC_Last3M_count_BroadbandCommFailure_morethan5_flag','sum'),

    
    
    # Best_partices_1= ('Best_partices_1','sum'),
    # Best_partices_2= ('Best_partices_2','sum'),
    # Best_partices_3= ('Best_partices_3','sum'),
    # Best_partices_4= ('Best_partices_4','sum'),
    # Best_partices_5= ('Best_partices_5','sum'),
    # Best_partices_6= ('Best_partices_6','sum'),
    # # Best_partices_All= ('Best_Practice_All','sum'),
    # Best_partices_All_flag= ('Best_Practice_All_flag','sum')
    # Customer_Share= ('customer_id',lambda x:x.count()*100/Merge_DF_4.shape[0])
    Churn_total=('Telus_Churn_Flag',lambda x: x.sum()),
    Churn_rate=('Telus_Churn_Flag',lambda x: x.mean()*100)
).reset_index())

In [None]:
pd.DataFrame(DF_monitored[DF_monitored.TC_Last3M_count_BroadbandCommFailure_flag>0].groupby(['Telus_Internet_customers','TC_Last3M_count_BroadbandCommFailure_flag']).agg(
    
    Customer_count= ('customer_id','count'),
    Customer_count_1= ('customer_id','nunique'),
    Churn_total=('Telus_Churn_Flag',lambda x: x.sum()),
    Churn_rate=('Telus_Churn_Flag',lambda x: x.mean()*100)
).reset_index())

In [None]:
pd.DataFrame(DF_monitored[DF_monitored.TC_Last3M_count_CameraNotCommunicating_flag>0].groupby(['Telus_Internet_customers','TC_Last3M_count_CameraNotCommunicating_flag']).agg(
    
    Customer_count= ('customer_id','count'),
    Customer_count_1= ('customer_id','nunique'),
    Churn_total=('Telus_Churn_Flag',lambda x: x.sum()),
    Churn_rate=('Telus_Churn_Flag',lambda x: x.mean()*100)
).reset_index())

In [None]:
pd.DataFrame(DF_monitored[DF_monitored.TC_Last3M_count_CameraNotReachable_flag>0].groupby(['Telus_Internet_customers','TC_Last3M_count_CameraNotReachable_flag']).agg(
    
    Customer_count= ('customer_id','count'),
    Customer_count_1= ('customer_id','nunique'),
    Churn_total=('Telus_Churn_Flag',lambda x: x.sum()),
    Churn_rate=('Telus_Churn_Flag',lambda x: x.mean()*100)
).reset_index())

In [None]:
pd.DataFrame(DF_monitored[DF_monitored.TC_Last3M_count_BroadbandCommFailure_morethan5_flag>0].groupby(['Telus_Internet_customers','TC_Last3M_count_BroadbandCommFailure_morethan5_flag']).agg(
    
    Customer_count= ('customer_id','count'),
    Customer_count_1= ('customer_id','nunique'),
    Churn_total=('Telus_Churn_Flag',lambda x: x.sum()),
    Churn_rate=('Telus_Churn_Flag',lambda x: x.mean()*100)
).reset_index())

In [None]:
# config= bigquery.job.LoadJobConfig()

# # config._properties['timePartitioning'] = {'field': 'Month_Year'}
# config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

# Table_BQ = 'SHS.SHS_Churn_Analysis_OCT2022'

# bq_table_instance= bq_client.load_table_from_dataframe(DF, Table_BQ,job_config=config)