In [None]:
#### import global modules
import os
import sys
import pandas as pd
import numpy as np
from pathlib import Path
from yaml import safe_load
import google.oauth2.credentials
from google.cloud import bigquery
import gc

# Set global vars
pth_project = Path(os.getcwd().split('notebooks')[0])
pth_data = pth_project / 'data'
pth_queries = pth_project / 'core' / 'queries'
pth_creds = pth_project / 'conf' / 'local' / 'project_config.yaml'
sys.path.insert(0, str(pth_project))
d_project_config = safe_load(pth_creds.open())
# d_params = safe_load((pth_project / 'core' / 'parameters' / 'common.yaml').open())['data_extract']

# import local modules
from core.utils.gcp import connect_bq_services
# from core.etl.extract import extract_bq_data, extract_pr_codes, format_conv_df, filter_convs

# Connect to google services
bq_client = connect_bq_services(d_project_config['gcp-project-name'])
pd.options.display.max_rows = 100

In [None]:
def extract_bq_data(bq_client, sql=None, pth_query=None):
    if sql is not None:
        df = bq_client.query(sql).to_dataframe()
    elif pth_query is not None:
        sql = pth_query.read_text()
        df = bq_client.query(sql).to_dataframe()
    else:
        raise ValueError('`sql` or `pth_query` should be set')  
    return df

In [None]:
Telus_Customer_details='''



with ADC_customer_base as(

SELECT customer_id,dealer_customer_id,dealer_name
FROM `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_account_details` 
where date(last_updt_ts) ='2022-08-31'
and account_type_name!='Standalone'
and customer_type_name='Customer'
and dealer_name='TELUS Communications Inc.'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY last_updt_ts DESC) = 1
order by dealer_name,join_date,customer_id
)
,

Telus_customers as
(

select bacct_bus_bacct_num,bacct_billg_acct_id,cust_bus_cust_id,pi_prod_instnc_typ_cd,pi_prod_instnc_stat_ts,prod_instnc_ts,pi_cntrct_end_ts as contract_end_date
from `cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht` 
WHERE DATE(prod_instnc_ts) = "2022-08-31" #Snapshot of the last day of the month
and pi_prod_instnc_typ_cd ='SMHM' #Serice type
and bus_prod_instnc_src_id = 1001 #BANs that are for home services
and pi_prod_instnc_stat_cd in ('A')
and  consldt_cust_typ_cd = 'R'
QUALIFY ROW_NUMBER() OVER (PARTITION BY cust_bus_cust_id ORDER BY pi_prod_instnc_stat_ts DESC) = 1
order by cust_bus_cust_id

)








select * from ADC_customer_base as adc
inner join Telus_customers as telus
on telus.cust_bus_cust_id=adc.dealer_customer_id

'''

In [None]:
Telus_Customer_info=extract_bq_data(bq_client, sql=Telus_Customer_details)

In [None]:
Telus_Customer_info.info()

In [None]:
Telus_Customer_info.head()

In [None]:
ADC_Resi_DF_query='''



with ADC_resi_base as


(

select * from `divgpras-pr-579355.ADC_updated.ADC_resi_customers_base_31Aug2022`


),

telus_churn as (


select cust_bus_cust_id,1 as Telus_Churn,pi_prod_instnc_stat_ts as churn_date_telus
from `cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht` 
WHERE DATE(prod_instnc_ts) = "2022-10-01" #Snapshot of the last day of the month
and pi_prod_instnc_typ_cd ='SMHM' #Serice type
and bus_prod_instnc_src_id = 1001 #BANs that are for home services
and pi_prod_instnc_stat_cd in ('C')
and DATE(pi_prod_instnc_stat_ts)>='2022-09-01' and DATE(pi_prod_instnc_stat_ts)<'2022-10-01' #Date on which the status was updated
QUALIFY ROW_NUMBER() OVER (PARTITION BY cust_bus_cust_id ORDER BY pi_prod_instnc_stat_ts DESC) = 1
order by pi_cntrct_end_ts


)



select * from ADC_resi_base as adc
left join telus_churn as telus
on telus.cust_bus_cust_id=adc.dealer_customer_id
order by Telus_Churn desc

'''

In [None]:
ADC_resi_DF=extract_bq_data(bq_client, sql=ADC_Resi_DF_query)

In [None]:
ADC_resi_DF.head()

In [None]:
ADC_resi_DF['Telus_Churn'].sum()

In [None]:
Churn_Customers='''


select customer_id,term_date,term_reason,1 as Churn_flag from `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_account_terminated_details`
where date(term_date) >= '2022-09-01' and date(term_date) < '2022-10-01'


'''

In [None]:
Churn_DF_Sep2022=extract_bq_data(bq_client, sql=Churn_Customers)

In [None]:
Churn_DF_Sep2022.info()

In [None]:
Churn_DF_Sep2022['term_date'].min()

In [None]:
Churn_DF_Sep2022['term_date'].max()

In [None]:
Churn_DF_Sep2022.info()

In [None]:
All_DF=ADC_resi_DF.merge(Churn_DF_Sep2022,on='customer_id',how='left')

In [None]:
All_DF.info()

In [None]:
pd.DataFrame(All_DF.groupby(['dealer_name']).agg(
    
    Customer_count= ('customer_id','nunique'),
    Churn_total_telus=('Telus_Churn',lambda x: x.sum()),
    Churn_rate_telus=('Telus_Churn',lambda x: x.mean()*100),
    Churn_total=('Churn_flag',lambda x: x.sum()),
    Churn_rate=('Churn_flag',lambda x: x.mean()*100)).reset_index())

In [None]:
3424-2890

In [None]:
All_DF['Churn_flag'].fillna(0,inplace=True)

In [None]:
Best_Practices_Query= '''

select * from `divgpras-pr-579355.ADC.Best_Practices_Customers`

'''

In [None]:
Best_Practices_DF=extract_bq_data(bq_client, sql=Best_Practices_Query)

In [None]:
Best_Practices_DF.head()

In [None]:
Arming_Query='''



with date_sql as (
select  count(distinct date(date)) as count_of_dates from `cio-datahub-enterprise-pr-183a.src_adc.bq_aggregate_daily_arming_commands`
where date(date)< '2022-09-01'
)



select id_cust as customer_id,sum(count_arm_commands) as sum_arm_commands,sum(count_disarm_commands) as sum_disarm_commands,count (distinct date(date)) as number_days_arming_disarming, (select count_of_dates from date_sql ) as count_of_dates_arming
from `cio-datahub-enterprise-pr-183a.src_adc.bq_aggregate_daily_arming_commands`
where date(date)< '2022-09-01'
group by customer_id
order by number_days_arming_disarming desc





'''

In [None]:
Arming_DF=extract_bq_data(bq_client, sql=Arming_Query)

In [None]:
Arming_DF.info()

In [None]:
Arming_DF.head()

In [None]:

max_number_arming_dates=Arming_DF['count_of_dates_arming'].max()
Arming_DF['Arming_Consistency']=Arming_DF['number_days_arming_disarming'].apply(lambda x: x*100/max_number_arming_dates)


In [None]:
Arming_DF['Arming_Flag']=1

In [None]:
Login_SQL='''

with date_sql as (
select  count(distinct date(login_dt_utc)) as count_of_dates from `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_daily_logins`
where date(login_dt_utc)< '2022-09-01'

)



select customer_id,sum(login_count) as sum_login_count,count (distinct date(login_dt_utc)) as number_of_login_days, (select count_of_dates from date_sql ) as count_of_dates_logins
from `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_daily_logins`
where date(login_dt_utc)< '2022-09-01'
group by customer_id
order by number_of_login_days desc


'''

In [None]:
Login_DF=extract_bq_data(bq_client, sql=Login_SQL)

In [None]:
Login_DF.info()

In [None]:
max_number_login_dates=Login_DF['count_of_dates_logins'].max()
Login_DF['Login_Consistency']=Login_DF['number_of_login_days'].apply(lambda x: x*100/max_number_login_dates)

In [None]:
Merge_DF=All_DF.merge(Best_Practices_DF,on='customer_id',how='left')

In [None]:
Merge_DF_1=Merge_DF.merge(Arming_DF,on='customer_id',how='left')

In [None]:
Merge_DF_2=Merge_DF_1.merge(Login_DF,on='customer_id',how='left')

In [None]:
Merge_DF_2.head()

In [None]:
Merge_DF_2.info()

In [None]:
Merge_DF_2.fillna(0,inplace=True)

In [None]:
def Segment_making(row
                   ,min_login_consistency_threshold=50
                   ,min_arming_consistency_threshold=50
                   ,max_login_consistency_threshold=50
                   ,max_arming_consistency_threshold=50):
    
    if row['Best_Practice_All']>=5 and row['Login_Consistency']>=max_login_consistency_threshold  and row['Arming_Consistency']>=max_arming_consistency_threshold:
        return 'Heavy_User'
    elif row['Login_Consistency']>=max_login_consistency_threshold  and row['Arming_Consistency']< min_arming_consistency_threshold:
        return 'Home_automation_Savvy'
    elif row['Login_Consistency']< min_login_consistency_threshold  and row['Arming_Consistency']>=max_login_consistency_threshold:
        return 'Old_Fashion'
    elif row['Login_Consistency']==0  and row['Arming_Consistency']==0:
        return 'Disengaged'
    else:
        return "Moderate_Users"

In [None]:
Merge_DF_2['Segment']=Merge_DF_2.apply(Segment_making,axis=1)

In [None]:
Merge_DF_2['Segment'].value_counts(normalize=True)*100

In [None]:
Merge_DF_2['Segment'].value_counts()

In [None]:
Merge_DF_2['Churn_flag'].value_counts(normalize=True)*100

In [None]:
pd.DataFrame(Merge_DF_2.groupby(['dealer_name']).agg(

    Customer_count= ('customer_id','nunique'),
    Churn_total=('Churn_flag',lambda x: x.sum()),
    Churn_rate=('Churn_flag',lambda x: x.mean()*100)


).reset_index())

In [None]:
pd.DataFrame(Merge_DF_2.groupby(['Segment']).agg(
       Customer_count= ('customer_id','nunique'),
    Churn_total=('Churn_flag',lambda x: x.sum()),
    Churn_rate=('Churn_flag',lambda x: x.mean()*100)).sort_values('Customer_count',ascending=False).reset_index())

In [None]:
pd.DataFrame(Merge_DF_2.groupby(['Best_Practice_All']).agg(
    
    Customer_count= ('customer_id','nunique'),
    Churn_total=('Churn_flag',lambda x: x.sum()),
    Churn_rate=('Churn_flag',lambda x: x.mean()*100)).reset_index())

In [None]:
pd.DataFrame(Merge_DF_2.groupby(['Best_partices_1']).agg(
    
    Customer_count= ('customer_id','nunique'),
    Churn_total=('Churn_flag',lambda x: x.sum()),
    Churn_rate=('Churn_flag',lambda x: x.mean()*100)).reset_index())

In [None]:
pd.DataFrame(Merge_DF_2.groupby(['Best_partices_2']).agg(
    
    Customer_count= ('customer_id','nunique'),
    Churn_total=('Churn_flag',lambda x: x.sum()),
    Churn_rate=('Churn_flag',lambda x: x.mean()*100)).reset_index())

In [None]:
pd.DataFrame(Merge_DF_2.groupby(['Best_partices_3']).agg(
    
    Customer_count= ('customer_id','nunique'),
    Churn_total=('Churn_flag',lambda x: x.sum()),
    Churn_rate=('Churn_flag',lambda x: x.mean()*100)).reset_index())

In [None]:
pd.DataFrame(Merge_DF_2.groupby(['Best_partices_4']).agg(
    
    Customer_count= ('customer_id','nunique'),
    Churn_total=('Churn_flag',lambda x: x.sum()),
    Churn_rate=('Churn_flag',lambda x: x.mean()*100)).reset_index())

In [None]:
pd.DataFrame(Merge_DF_2.groupby(['Best_partices_5']).agg(
    
    Customer_count= ('customer_id','nunique'),
    Churn_total=('Churn_flag',lambda x: x.sum()),
    Churn_rate=('Churn_flag',lambda x: x.mean()*100)).reset_index())

In [None]:
pd.DataFrame(Merge_DF_2.groupby(['Best_partices_6']).agg(
    
    Customer_count= ('customer_id','nunique'),
    Churn_total=('Churn_flag',lambda x: x.sum()),
    Churn_rate=('Churn_flag',lambda x: x.mean()*100)).reset_index())

In [None]:
Telus_internet_customers_query='''

select * from `divgpras-pr-579355.ADC_updated.ADC_Telus_internet_customers_31Aug2022`

'''

In [None]:
Telus_internet_DF=extract_bq_data(bq_client, sql=Telus_internet_customers_query)

In [None]:
Telus_internet_DF.info()

In [None]:
Merge_DF_3=Merge_DF_2.merge(Telus_internet_DF[['customer_id','Telus_internet_flag']],how='left',on='customer_id')

In [None]:
Merge_DF_3['Telus_internet_flag'].sum()

In [None]:
Telus_DF=Merge_DF_3[Merge_DF_3['dealer_name']=='TELUS Communications Inc.']

In [None]:
Telus_DF['Telus_internet_flag'].sum()

In [None]:
Telus_DF['Telus_internet_flag'].fillna(0,inplace=True)

In [None]:
Telus_DF['Churn_flag'].value_counts(normalize=True)*100

In [None]:
pd.DataFrame(Telus_DF.groupby(['Telus_internet_flag']).agg(
    
    Customer_count= ('customer_id','nunique'),
    Churn_total=('Churn_flag',lambda x: x.sum()),
    Churn_rate=('Churn_flag',lambda x: x.mean()*100)).reset_index())

In [None]:
Camera_TC_issue_query='''

select dealer_name,customer_id,unit_id,device_id,trouble_condition_name,trouble_condition_group_id,trouble_condition_group_desc, start_date_utc,end_date_utc
from `cio-datahub-enterprise-pr-183a.src_adc.bq_troublecondition_data`
where trouble_condition_start_ind='Y'and trouble_condition_closed_ind='Y' and date(start_date_utc) >= '2022-06-01' and date(start_date_utc) <'2022-09-01' and trouble_condition_name in ('CameraNotReachable','CameraNotCommunicating','BroadbandCommFailure')


'''

In [None]:
Camera_TC= extract_bq_data(bq_client, sql=Camera_TC_issue_query)

In [None]:
Camera_TC.info()

In [None]:
Camera_TC['count_instance']=1

TC_pivot_table=pd.pivot_table(Camera_TC, values=['count_instance'], index=['customer_id'], columns='trouble_condition_name',
                          aggfunc={'count_instance': np.sum})

In [None]:
TC_pivot_table=TC_pivot_table.reset_index()

In [None]:
TC_pivot_table.head()

In [None]:
TC_pivot_table.columns=['customer_id','BroadbandCommFailure_count','CameraNotCommunicating_count','CameraNotReachable_count']

In [None]:
TC_pivot_table['BroadbandCommFailure_Flag']=TC_pivot_table['BroadbandCommFailure_count'].apply(lambda x: 1 if x>0 else 0)
TC_pivot_table['CameraNotCommunicating_Flag']=TC_pivot_table['CameraNotCommunicating_count'].apply(lambda x: 1 if x>0 else 0)
TC_pivot_table['CameraNotReachable_Flag']=TC_pivot_table['CameraNotReachable_count'].apply(lambda x: 1 if x>0 else 0)

In [None]:
Merge_DF_4=Merge_DF_3.merge(TC_pivot_table,on='customer_id',how='left')

In [None]:
Merge_DF_5=Merge_DF_4.merge(Telus_Customer_info[['customer_id','contract_end_date']],on='customer_id',how='left')

In [None]:
Merge_DF_5.info()

In [None]:
Merge_DF_5['Telus_Churn'].isnull().sum()

In [None]:
Merge_DF_5['Telus_Churn'].value_counts()

In [None]:
Merge_DF_5['Churn_flag'].isna().sum()

In [None]:
Merge_DF_5['Churn_flag'].value_counts()

In [None]:
# def telus_delta(row):
#     if row['Telus_Churn']!=row['Churn_flag']:
#         1
#     else:
#         0
    
    

In [None]:
Merge_DF_5['Delta_Telus']=Merge_DF_5.apply(lambda x:1 if x['Telus_Churn']!=x['Churn_flag'] else 0,axis=1)

In [None]:
Telus_delta_DF=Merge_DF_5[(Merge_DF_5['dealer_name']=='TELUS Communications Inc.') & (Merge_DF_5['Delta_Telus']==1) ]

In [None]:
Telus_delta_DF.info()

In [None]:
Telus_delta_DF.head()

In [None]:
Telus_delta_DF['Telus_Churn'].sum()

In [None]:
Telus_delta_missed_DF=Telus_delta_DF[Telus_delta_DF['Telus_Churn']==1]

In [None]:
Telus_delta_missed_DF['Delta_Telus'].sum()

In [None]:
Telus_delta_missed_DF['Churn_flag'].sum()

In [None]:
Telus_delta_missed_DF.columns.to_list()

In [None]:
cols_req=['customer_id','dealer_name','dealer_customer_id','Telus_Churn','churn_date_telus','Churn_flag','term_date_y','term_reason_y','Telus_internet_flag','Segment','contract_end_date','Delta_Telus',]
Telus_delta_missed_DF=Telus_delta_missed_DF[cols_req]

In [None]:
config= bigquery.job.LoadJobConfig()

# config._properties['timePartitioning'] = {'field': 'Month_Year'}
config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

Table_BQ = 'ADC_updated.Telus_ADC_missed_churn'

bq_table_instance= bq_client.load_table_from_dataframe(Telus_delta_missed_DF, Table_BQ,job_config=config)

In [None]:
Disengaged_DF=Merge_DF_5[Merge_DF_5['Segment']=='Disengaged']

In [None]:
Disengaged_DF.info()

In [None]:
Disengaged_DF['dealer_name'].value_counts()

In [None]:
Disengaged_DF['contract_end_date'].isna().sum()

In [None]:
Disengaged_DF['Telus_Churn'].sum()

In [None]:
Disengaged_DF['Churn_flag'].sum()

In [None]:
pd.DataFrame(Disengaged_DF.groupby(['Best_Practice_All']).agg(
    
    Customer_count= ('customer_id','nunique'),
    Telus_Churn_total=('Telus_Churn',lambda x: x.sum()),
    Telus_Churn_rate=('Telus_Churn',lambda x: x.mean()*100),    
    Churn_total=('Churn_flag',lambda x: x.sum()),
    Churn_rate=('Churn_flag',lambda x: x.mean()*100)

).reset_index())

In [None]:
pd.DataFrame(Disengaged_DF.groupby(['Best_partices_1']).agg(
    
    Customer_count= ('customer_id','nunique'),
    Churn_total=('Churn_flag',lambda x: x.sum()),
    Churn_rate=('Churn_flag',lambda x: x.mean()*100)).reset_index())

In [None]:
pd.DataFrame(Disengaged_DF.groupby(['Best_Practice_All']).agg(
    BroadbandCommFailure_count=('BroadbandCommFailure_Flag',np.sum),
    CameraNotCommunicating_count=('CameraNotCommunicating_Flag',np.sum), 
    CameraNotReachable_count=('CameraNotReachable_Flag',np.sum)).reset_index())

In [None]:
pd.DataFrame(Disengaged_DF.groupby(['BroadbandCommFailure_Flag']).agg(
    
    Customer_count= ('customer_id','nunique'),
    Churn_total=('Churn_flag',lambda x: x.sum()),
    Churn_rate=('Churn_flag',lambda x: x.mean()*100)).reset_index())

In [None]:
pd.DataFrame(Disengaged_DF.groupby(['CameraNotCommunicating_Flag']).agg(
    
    Customer_count= ('customer_id','nunique'),
    Churn_total=('Churn_flag',lambda x: x.sum()),
    Churn_rate=('Churn_flag',lambda x: x.mean()*100)).reset_index())

In [None]:
pd.DataFrame(Disengaged_DF.groupby(['CameraNotReachable_Flag']).agg(
    
    Customer_count= ('customer_id','nunique'),
    Churn_total=('Churn_flag',lambda x: x.sum()),
    Churn_rate=('Churn_flag',lambda x: x.mean()*100)).reset_index())

In [None]:
pd.DataFrame(Disengaged_DF.groupby(['CameraNotCommunicating_count']).agg(
    
    Customer_count= ('customer_id','nunique'),
    Churn_total=('Churn_flag',lambda x: x.sum()),
    Churn_rate=('Churn_flag',lambda x: x.mean()*100)).sort_values('Churn_total',ascending=False).reset_index())

In [None]:
pd.DataFrame(Disengaged_DF.groupby(['CameraNotCommunicating_count']).agg(
    
    Customer_count= ('customer_id','nunique'),
    Churn_total=('Churn_flag',lambda x: x.sum()),
    Churn_rate=('Churn_flag',lambda x: x.mean()*100)).sort_values('Churn_total',ascending=False).reset_index())

In [None]:
pd.DataFrame(Disengaged_DF.groupby(['BroadbandCommFailure_count']).agg(
    
    Customer_count= ('customer_id','nunique'),
    Churn_total=('Churn_flag',lambda x: x.sum()),
    Churn_rate=('Churn_flag',lambda x: x.mean()*100)).sort_values('Churn_total',ascending=False).reset_index())

In [None]:
pd.DataFrame(Disengaged_DF.groupby(['CameraNotReachable_count']).agg(
    
    Customer_count= ('customer_id','nunique'),
    Churn_total=('Churn_flag',lambda x: x.sum()),
    Churn_rate=('Churn_flag',lambda x: x.mean()*100)).sort_values('Churn_total',ascending=False).reset_index())

In [None]:
Disengaged_DF['Telus_internet_flag'].fillna(0,inplace=True)

In [None]:
Disengaged_DF.info()

In [None]:
Disengaged_DF['Churn_flag'].isna().sum()
 

In [None]:
Disengaged_DF['BroadbandCommFailure_Flag'].fillna(0,inplace=True)
Disengaged_DF['CameraNotCommunicating_Flag'].fillna(0,inplace=True)
Disengaged_DF['CameraNotReachable_Flag'].fillna(0,inplace=True)
Disengaged_DF['BroadbandCommFailure_count'].fillna(0,inplace=True)
Disengaged_DF['CameraNotCommunicating_count'].fillna(0,inplace=True)
Disengaged_DF['CameraNotReachable_count'].fillna(0,inplace=True)

# Disengaged_DF['Churn_Flag'].fillna(0,inplace=True)


In [None]:
pd.DataFrame(Disengaged_DF[Disengaged_DF['dealer_name']=='TELUS Communications Inc.'].groupby(['Telus_internet_flag']).agg(
    
    Customer_count= ('customer_id','nunique'),
    Churn_total=('Churn_flag',lambda x: x.sum()),
    Churn_rate=('Churn_flag',lambda x: x.mean()*100)).reset_index())

In [None]:
pd.DataFrame(Disengaged_DF[(Disengaged_DF['dealer_name']=='TELUS Communications Inc.') & (Disengaged_DF['Best_Practice_All']<3) ].groupby(['Telus_internet_flag']).agg(
    
    Customer_count= ('customer_id','nunique'),
    Telus_Churn_total=('Telus_Churn',lambda x: x.sum()),
    Telus_Churn_rate=('Telus_Churn',lambda x: x.mean()*100),    
    Churn_total=('Churn_flag',lambda x: x.sum()),
    Churn_rate=('Churn_flag',lambda x: x.mean()*100)).reset_index())

In [None]:
pd.DataFrame(Disengaged_DF[(Disengaged_DF['dealer_name']=='TELUS Communications Inc.') & (Disengaged_DF['Best_Practice_All']==0) ].groupby(['Telus_internet_flag']).agg(
    
    Customer_count= ('customer_id','nunique'),
    Telus_Churn_total=('Telus_Churn',lambda x: x.sum()),
    Telus_Churn_rate=('Telus_Churn',lambda x: x.mean()*100),    
    Churn_total=('Churn_flag',lambda x: x.sum()),
    Churn_rate=('Churn_flag',lambda x: x.mean()*100)).reset_index())

In [None]:
pd.DataFrame(Disengaged_DF[(Disengaged_DF['dealer_name']=='TELUS Communications Inc.') & (Disengaged_DF['Best_partices_1']==0) ].groupby(['Telus_internet_flag']).agg(
    
    Customer_count= ('customer_id','nunique'),
    Telus_Churn_total=('Telus_Churn',lambda x: x.sum()),
    Telus_Churn_rate=('Telus_Churn',lambda x: x.mean()*100),    
    Churn_total=('Churn_flag',lambda x: x.sum()),
    Churn_rate=('Churn_flag',lambda x: x.mean()*100)).reset_index())

In [None]:
pd.DataFrame(Disengaged_DF[(Disengaged_DF['dealer_name']=='TELUS Communications Inc.') & (Disengaged_DF['Best_partices_1']==1) ].groupby(['Telus_internet_flag']).agg(
    
    Customer_count= ('customer_id','nunique'),
    Telus_Churn_total=('Telus_Churn',lambda x: x.sum()),
    Telus_Churn_rate=('Telus_Churn',lambda x: x.mean()*100),   
    Churn_total=('Churn_flag',lambda x: x.sum()),
    Churn_rate=('Churn_flag',lambda x: x.mean()*100)).reset_index())

In [None]:
pd.DataFrame(Disengaged_DF[(Disengaged_DF['dealer_name']=='TELUS Communications Inc.') & (Disengaged_DF['Best_partices_2']==0) ].groupby(['Telus_internet_flag']).agg(
    
    Customer_count= ('customer_id','nunique'),
    Telus_Churn_total=('Telus_Churn',lambda x: x.sum()),
    Telus_Churn_rate=('Telus_Churn',lambda x: x.mean()*100),   
    Churn_total=('Churn_flag',lambda x: x.sum()),
    Churn_rate=('Churn_flag',lambda x: x.mean()*100)).reset_index())

In [None]:
pd.DataFrame(Disengaged_DF[(Disengaged_DF['dealer_name']=='TELUS Communications Inc.') & (Disengaged_DF['Best_partices_3']==0) ].groupby(['Telus_internet_flag']).agg(
    
    Customer_count= ('customer_id','nunique'),
    Telus_Churn_total=('Telus_Churn',lambda x: x.sum()),
    Telus_Churn_rate=('Telus_Churn',lambda x: x.mean()*100),   
    Churn_total=('Churn_flag',lambda x: x.sum()),
    Churn_rate=('Churn_flag',lambda x: x.mean()*100)).reset_index())

In [None]:
pd.DataFrame(Disengaged_DF[(Disengaged_DF['dealer_name']=='TELUS Communications Inc.') & (Disengaged_DF['Best_partices_4']==0) ].groupby(['Telus_internet_flag']).agg(
    
    Customer_count= ('customer_id','nunique'),
    Telus_Churn_total=('Telus_Churn',lambda x: x.sum()),
    Telus_Churn_rate=('Telus_Churn',lambda x: x.mean()*100),   
    Churn_total=('Churn_flag',lambda x: x.sum()),
    Churn_rate=('Churn_flag',lambda x: x.mean()*100)).reset_index())

In [None]:
pd.DataFrame(Disengaged_DF[(Disengaged_DF['dealer_name']=='TELUS Communications Inc.') & (Disengaged_DF['Best_partices_5']==0) ].groupby(['Telus_internet_flag']).agg(
    
    Customer_count= ('customer_id','nunique'),
    Telus_Churn_total=('Telus_Churn',lambda x: x.sum()),
    Telus_Churn_rate=('Telus_Churn',lambda x: x.mean()*100),   
    Churn_total=('Churn_flag',lambda x: x.sum()),
    Churn_rate=('Churn_flag',lambda x: x.mean()*100)).reset_index())

In [None]:
pd.DataFrame(Disengaged_DF[(Disengaged_DF['dealer_name']=='TELUS Communications Inc.') & (Disengaged_DF['Best_partices_6']==0) ].groupby(['Telus_internet_flag']).agg(
    
    Customer_count= ('customer_id','nunique'),
    Telus_Churn_total=('Telus_Churn',lambda x: x.sum()),
    Telus_Churn_rate=('Telus_Churn',lambda x: x.mean()*100),   
    Churn_total=('Churn_flag',lambda x: x.sum()),
    Churn_rate=('Churn_flag',lambda x: x.mean()*100)).reset_index())

In [None]:
pd.DataFrame(Disengaged_DF[(Disengaged_DF['dealer_name']=='TELUS Communications Inc.') & (Disengaged_DF['Telus_internet_flag']==0) ].groupby(['CameraNotReachable_Flag']).agg(
    
    Customer_count= ('customer_id','nunique'),
    Telus_Churn_total=('Telus_Churn',lambda x: x.sum()),
    Telus_Churn_rate=('Telus_Churn',lambda x: x.mean()*100),   
    Churn_total=('Churn_flag',lambda x: x.sum()),
    Churn_rate=('Churn_flag',lambda x: x.mean()*100)).reset_index())

In [None]:
pd.DataFrame(Disengaged_DF[(Disengaged_DF['dealer_name']=='TELUS Communications Inc.') & (Disengaged_DF['Telus_internet_flag']==0) ].groupby(['BroadbandCommFailure_Flag']).agg(
    
    Customer_count= ('customer_id','nunique'),
    Churn_total=('Churn_flag',lambda x: x.sum()),
    Churn_rate=('Churn_flag',lambda x: x.mean()*100)).reset_index())

In [None]:
pd.DataFrame(Disengaged_DF[(Disengaged_DF['dealer_name']=='TELUS Communications Inc.') & (Disengaged_DF['Telus_internet_flag']==0) ].groupby(['CameraNotCommunicating_Flag']).agg(
    
    Customer_count= ('customer_id','nunique'),
    Churn_total=('Churn_flag',lambda x: x.sum()),
    Churn_rate=('Churn_flag',lambda x: x.mean()*100)).reset_index())

In [None]:
Disengaged_DF['contract_end_date'].value_counts()

In [None]:
sum(Disengaged_DF['contract_end_date']=='2024-06-24')

In [None]:
GTM_data_1=Disengaged_DF[(Disengaged_DF['dealer_name']=='TELUS Communications Inc.') & (Disengaged_DF['Best_partices_1']==0)] 

In [None]:
GTM_data_2=Disengaged_DF[(Disengaged_DF['dealer_name']=='TELUS Communications Inc.') & (Disengaged_DF['Best_Practice_All']<3)] 

In [None]:
GTM_data_2.shape

In [None]:
GTM_data_1.shape

In [None]:
GTM_data_1.to_csv('/home/jovyan/work/ADC/notebooks/GTM_data_1.csv',index=False)

In [None]:
GTM_data_2.to_csv('/home/jovyan/work/ADC/notebooks/GTM_data_2.csv',index=False)

In [None]:
Disengaged_DF.info()

### Decision Tree 

In [None]:
feature_cols=['dealer_name','Best_partices_1','Best_partices_2','Best_partices_3','Best_partices_4','Best_partices_5','Best_partices_6','Best_Practice_All','Telus_internet_flag','BroadbandCommFailure_count','CameraNotCommunicating_count','CameraNotReachable_count']
Target_col=['Churn_flag']

In [None]:
percent_missing = Disengaged_DF[feature_cols].isnull().sum() * 100 / Disengaged_DF.shape[0]
missing_value_df = pd.DataFrame({'column_name': Disengaged_DF[feature_cols].columns,
                                 'percent_missing': percent_missing})


In [None]:
missing_value_df

In [None]:
Disengaged_DF[feature_cols].info()

In [None]:
X=pd.get_dummies(Disengaged_DF[feature_cols])
Y=Disengaged_DF[Target_col]

In [None]:
X.columns

In [None]:
X.info()

In [None]:
from sklearn import tree
from matplotlib import pyplot as plt
from matplotlib.pyplot import figure
dt = tree.DecisionTreeClassifier(criterion='entropy',max_depth = 3)

In [None]:
decision_tree=dt.fit(X, Y)

In [None]:
from sklearn.tree import export_text
r = export_text(decision_tree, feature_names=X.columns.tolist())


In [None]:

figure(figsize=(100, 100))

tree.plot_tree(decision_tree,feature_names=X.columns.tolist())


In [None]:
411*100/13104

In [None]:
1305*100/49442