In [48]:
#### import global modules
import os
import sys
import pandas as pd
import numpy as np
from pathlib import Path
from yaml import safe_load
import google.oauth2.credentials
from google.cloud import bigquery
import gc

# Set global vars
pth_project = Path(os.getcwd().split('notebooks')[0])
pth_data = pth_project / 'data'
pth_queries = pth_project / 'core' / 'queries'
pth_creds = pth_project / 'conf' / 'local' / 'project_config.yaml'
sys.path.insert(0, str(pth_project))
d_project_config = safe_load(pth_creds.open())
# d_params = safe_load((pth_project / 'core' / 'parameters' / 'common.yaml').open())['data_extract']

# import local modules
from core.utils.gcp import connect_bq_services
# from core.etl.extract import extract_bq_data, extract_pr_codes, format_conv_df, filter_convs

# Connect to google services
bq_client = connect_bq_services(d_project_config['gcp-project-name'])
pd.options.display.max_rows = 100

In [49]:
def extract_bq_data(bq_client, sql=None, pth_query=None):
    if sql is not None:
        df = bq_client.query(sql).to_dataframe()
    elif pth_query is not None:
        sql = pth_query.read_text()
        df = bq_client.query(sql).to_dataframe()
    else:
        raise ValueError('`sql` or `pth_query` should be set')  
    return df

In [50]:
Telus_Query ='''


with ADC_customer_base as(

SELECT distinct customer_id,dealer_customer_id,dealer_name,account_type_name,customer_type_name
FROM `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_account_details` 
where date(last_updt_ts) = '2022-08-31'
--and account_type_name!='Standalone'
--and customer_type_name='Customer'
and dealer_name='TELUS Communications Inc.'
QUALIFY ROW_NUMBER() OVER (PARTITION BY dealer_customer_id ORDER BY last_updt_ts DESC) = 1
--order by dealer_name,join_date,customer_id
)
,

Telus_customers as
(
select bacct_bus_bacct_num,bacct_billg_acct_id,cust_bus_cust_id,pi_prod_instnc_typ_cd,pi_prod_instnc_stat_ts,prod_instnc_ts,pi_cntrct_end_ts as contract_end_date
from `cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht` 
WHERE DATE(prod_instnc_ts) = "2022-08-31" #Snapshot of the last day of the month
and pi_prod_instnc_typ_cd ='SMHM' #Serice type
and bus_prod_instnc_src_id = 1001 #BANs that are for home services
and pi_prod_instnc_stat_cd in ('A')
and  consldt_cust_typ_cd = 'R'
QUALIFY ROW_NUMBER() OVER (PARTITION BY cust_bus_cust_id ORDER BY pi_prod_instnc_stat_ts DESC) = 1
order by cust_bus_cust_id

)


,Telus_internet_customers as 
(

select distinct (customer_id) as cust_id_telus,Telus_internet_flag from `divgpras-pr-579355.ADC_updated.Telus_internet_31Aug2022`

)


, Telus_SMHM_Deact_Sep2022 as 


(


select distinct(CAST (CUST_ID as STRING)) as CUST_ID ,Deact_Flag as Telus_Churn_Flag  

from `divgpras-pr-579355.ADC_updated.Telus_SMHM_Sep2022_Deacts`


)

select * from ADC_customer_base as ADC
inner join Telus_customers as Telus
on ADC.dealer_customer_id=Telus.cust_bus_cust_id
left join Telus_internet_customers
on ADC.customer_id=Telus_internet_customers.cust_id_telus
left join Telus_SMHM_Deact_Sep2022
on ADC.dealer_customer_id=Telus_SMHM_Deact_Sep2022.CUST_ID

'''


In [51]:
Data_Telus=extract_bq_data(bq_client, sql=Telus_Query)

In [52]:
Data_Telus.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 305606 entries, 0 to 305605
Data columns (total 16 columns):
 #   Column                  Non-Null Count   Dtype 
---  ------                  --------------   ----- 
 0   customer_id             305606 non-null  Int64 
 1   dealer_customer_id      305606 non-null  object
 2   dealer_name             305606 non-null  object
 3   account_type_name       305606 non-null  object
 4   customer_type_name      305606 non-null  object
 5   bacct_bus_bacct_num     305606 non-null  Int64 
 6   bacct_billg_acct_id     305606 non-null  Int64 
 7   cust_bus_cust_id        305606 non-null  object
 8   pi_prod_instnc_typ_cd   305606 non-null  object
 9   pi_prod_instnc_stat_ts  305606 non-null  object
 10  prod_instnc_ts          305606 non-null  object
 11  contract_end_date       305606 non-null  object
 12  cust_id_telus           216722 non-null  Int64 
 13  Telus_internet_flag     216722 non-null  Int64 
 14  CUST_ID                 3207 non-nul

In [91]:
Data_Telus['Cust_id_1']=Data_Telus['dealer_customer_id'].astype('int64')

In [92]:
ARPU=pd.read_csv('Telus_SMHM_ARPU_Sep_Apr2022.csv')

In [93]:
ARPU.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 453959 entries, 0 to 453958
Data columns (total 7 columns):
 #   Column     Non-Null Count   Dtype  
---  ------     --------------   -----  
 0   cust_id    453959 non-null  int64  
 1   ffh_amt_0  453959 non-null  float64
 2   ffh_amt_1  453959 non-null  float64
 3   ffh_amt_2  453959 non-null  float64
 4   ffh_amt_3  453959 non-null  float64
 5   ffh_amt_4  453959 non-null  float64
 6   ffh_amt_5  453959 non-null  float64
dtypes: float64(6), int64(1)
memory usage: 24.2 MB


In [94]:
# ARPU['cust_id']=ARPU['cust_id'].astype('O')

In [95]:
ARPU.head()

Unnamed: 0,cust_id,ffh_amt_0,ffh_amt_1,ffh_amt_2,ffh_amt_3,ffh_amt_4,ffh_amt_5
0,100187113,66.94,76.94,76.94,76.94,66.94,66.94
1,100343137,0.0,0.0,0.0,0.0,0.0,0.0
2,100402370,71.73,71.73,71.73,71.73,71.73,71.73
3,100449914,60.0,60.0,60.0,60.0,60.0,60.0
4,100468394,69.78,69.78,69.78,69.78,69.78,69.78


In [96]:
ARPU['ARPU_flag']=1

In [97]:
Data_Telus[Data_Telus['cust_bus_cust_id']=='100402370']

Unnamed: 0,customer_id,dealer_customer_id,dealer_name,account_type_name,customer_type_name,bacct_bus_bacct_num,bacct_billg_acct_id,cust_bus_cust_id,pi_prod_instnc_typ_cd,pi_prod_instnc_stat_ts,prod_instnc_ts,contract_end_date,cust_id_telus,Telus_internet_flag,CUST_ID,Telus_Churn_Flag,Cust_id_1
22708,12639786,100402370,TELUS Communications Inc.,Security System,Customer,604179426,100951772,100402370,SMHM,2021-12-01 00:00:00,2022-08-31 00:00:00+00:00,2024-12-01 00:00:00,12639786,1,,,100402370


In [99]:
Data_Telus_merge=Data_Telus.merge(ARPU,left_on='Cust_id_1',right_on='cust_id',how='left')

In [102]:
Data_Telus_merge['ARPU_flag'].isna().sum()*100/Data_Telus_merge.shape[0]

5.901716589333979

In [101]:
Data_Telus_merge.head()

Unnamed: 0,customer_id,dealer_customer_id,dealer_name,account_type_name,customer_type_name,bacct_bus_bacct_num,bacct_billg_acct_id,cust_bus_cust_id,pi_prod_instnc_typ_cd,pi_prod_instnc_stat_ts,...,Telus_Churn_Flag,Cust_id_1,cust_id,ffh_amt_0,ffh_amt_1,ffh_amt_2,ffh_amt_3,ffh_amt_4,ffh_amt_5,ARPU_flag
0,10606955,100194243,TELUS Communications Inc.,Standalone,Customer,604128596,100498352,100194243,SMHM,2021-02-02 00:00:00,...,,100194243,100194243.0,24.0,24.0,21.0,27.0,34.0,34.0,1.0
1,8560253,100379251,TELUS Communications Inc.,Security System,Customer,604171428,100914554,100379251,SMHM,2020-02-04 00:00:00,...,,100379251,100379251.0,40.0,50.0,50.0,50.0,40.0,40.0,1.0
2,13269842,100412210,TELUS Communications Inc.,Awareness and Automation,Customer,604181997,100969397,100412210,SMHM,2022-03-18 00:00:00,...,,100412210,100412210.0,46.39,46.39,46.39,46.39,49.69,0.0,1.0
3,8680384,100413544,TELUS Communications Inc.,Security System,Customer,604182411,100975756,100413544,SMHM,2020-02-23 00:00:00,...,,100413544,100413544.0,50.0,50.0,50.0,50.0,50.0,50.0,1.0
4,8762117,100445217,TELUS Communications Inc.,Security System,Customer,604193529,101025697,100445217,SMHM,2020-03-08 00:00:00,...,,100445217,100445217.0,38.0,38.0,38.0,38.0,38.0,38.0,1.0


In [117]:
pd.DataFrame(Data_Telus_merge.groupby(['account_type_name']).agg(
    
    Customer_count= ('customer_id','nunique'),
    ARPU_Avg=('ffh_amt_0','mean')))

Unnamed: 0_level_0,Customer_count,ARPU_Avg
account_type_name,Unnamed: 1_level_1,Unnamed: 2_level_1
Awareness and Automation,70994,34.526738
Security System,190273,58.083216
Standalone,44339,17.453874
