In [80]:
#### import global modules
import os
import sys
import pandas as pd
import numpy as np
from pathlib import Path
from yaml import safe_load
import google.oauth2.credentials
from google.cloud import bigquery
import gc

# Set global vars
pth_project = Path(os.getcwd().split('notebooks')[0])
pth_data = pth_project / 'data'
pth_queries = pth_project / 'core' / 'queries'
pth_creds = pth_project / 'conf' / 'local' / 'project_config.yaml'
sys.path.insert(0, str(pth_project))
d_project_config = safe_load(pth_creds.open())
# d_params = safe_load((pth_project / 'core' / 'parameters' / 'common.yaml').open())['data_extract']

# import local modules
from core.utils.gcp import connect_bq_services
# from core.etl.extract import extract_bq_data, extract_pr_codes, format_conv_df, filter_convs

# Connect to google services
bq_client = connect_bq_services(d_project_config['gcp-project-name'])
pd.options.display.max_rows = 100

In [81]:
def extract_bq_data(bq_client, sql=None, pth_query=None):
    if sql is not None:
        df = bq_client.query(sql).to_dataframe()
    elif pth_query is not None:
        sql = pth_query.read_text()
        df = bq_client.query(sql).to_dataframe()
    else:
        raise ValueError('`sql` or `pth_query` should be set')  
    return df

In [29]:
Telus_resi_customers='''



with ADC_customer_base as(

SELECT customer_id,dealer_customer_id,dealer_name
FROM `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_account_details` 
where date(last_updt_ts) ='2022-08-31'
and account_type_name!='Standalone'
and customer_type_name='Customer'
and dealer_name='TELUS Communications Inc.'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY last_updt_ts DESC) = 1
order by dealer_name,join_date,customer_id
)
,

Telus_customers as
(select bacct_bus_bacct_num,bacct_billg_acct_id,cust_bus_cust_id,pi_prod_instnc_typ_cd,pi_prod_instnc_stat_ts,prod_instnc_ts
from `cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht` 
WHERE DATE(prod_instnc_ts) = "2022-08-31" #Snapshot of the last day of the month
and pi_prod_instnc_typ_cd ='SMHM' #Serice type
and bus_prod_instnc_src_id = 1001 #BANs that are for home services
and pi_prod_instnc_stat_cd in ('A')
and  consldt_cust_typ_cd = 'R'
QUALIFY ROW_NUMBER() OVER (PARTITION BY cust_bus_cust_id ORDER BY pi_prod_instnc_stat_ts DESC) = 1
order by cust_bus_cust_id)

select * from ADC_customer_base as adc
inner join Telus_customers as telus
on telus.cust_bus_cust_id=adc.dealer_customer_id


'''

In [30]:
Telus_Customer_DF=extract_bq_data(bq_client, sql=Telus_resi_customers)

In [31]:
Telus_Customer_DF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 260495 entries, 0 to 260494
Data columns (total 9 columns):
 #   Column                  Non-Null Count   Dtype              
---  ------                  --------------   -----              
 0   customer_id             260495 non-null  Int64              
 1   dealer_customer_id      260495 non-null  object             
 2   dealer_name             260495 non-null  object             
 3   bacct_bus_bacct_num     260495 non-null  Int64              
 4   bacct_billg_acct_id     260495 non-null  Int64              
 5   cust_bus_cust_id        260495 non-null  object             
 6   pi_prod_instnc_typ_cd   260495 non-null  object             
 7   pi_prod_instnc_stat_ts  260495 non-null  datetime64[ns]     
 8   prod_instnc_ts          260495 non-null  datetime64[ns, UTC]
dtypes: Int64(3), datetime64[ns, UTC](1), datetime64[ns](1), object(4)
memory usage: 18.6+ MB


In [32]:
config= bigquery.job.LoadJobConfig()

# config._properties['timePartitioning'] = {'field': 'Month_Year'}
config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

Table_BQ = 'ADC_updated.ADC_Telus_Customer_31Aug2022'

bq_table_instance= bq_client.load_table_from_dataframe(Telus_Customer_DF, Table_BQ,job_config=config)

In [33]:
ADT_Customer_Base='''


select * from `divgpras-pr-579355.ADC.ADC_ADT_mapping`


'''

In [34]:
ADT_Customer_Base=extract_bq_data(bq_client, sql=ADT_Customer_Base)

In [35]:
ADT_Customer_Base.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 212501 entries, 0 to 212500
Data columns (total 74 columns):
 #   Column                          Non-Null Count   Dtype              
---  ------                          --------------   -----              
 0   customer_id                     212501 non-null  Int64              
 1   dealer_customer_id              212501 non-null  object             
 2   primary_login_id                212501 non-null  Int64              
 3   dealer_name                     212501 non-null  object             
 4   join_date                       212501 non-null  datetime64[ns, UTC]
 5   account_type_name               212501 non-null  object             
 6   customer_type_name              212501 non-null  object             
 7   primary_email                   212501 non-null  object             
 8   primary_phone                   212501 non-null  object             
 9   last_updt_ts                    212501 non-null  datetime64[ns, UTC]
 

In [36]:
config= bigquery.job.LoadJobConfig()

# config._properties['timePartitioning'] = {'field': 'Month_Year'}
config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

Table_BQ = 'ADC_updated.ADT_Customer_base'

bq_table_instance= bq_client.load_table_from_dataframe(ADT_Customer_Base, Table_BQ,job_config=config)

In [51]:
ADT_resi_customers='''



with ADC_customer_base as(

SELECT customer_id,dealer_customer_id,dealer_name
FROM `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_account_details` 
where date(last_updt_ts) ='2022-08-31'
and account_type_name!='Standalone'
and customer_type_name='Customer'
and dealer_name='ADT by TELUS'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY last_updt_ts DESC) = 1
order by dealer_name,join_date,customer_id
)
,

ADT_customers as
(select * from `divgpras-pr-579355.ADC_updated.ADT_Customer_base` )

select * from ADC_customer_base as adc
inner join ADT_customers as ADT
on ADT.customer_id=adc.customer_id


''' 

In [52]:
ADT_Customer_DF=extract_bq_data(bq_client, sql=ADT_resi_customers)

In [53]:
ADT_Customer_DF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 203903 entries, 0 to 203902
Data columns (total 77 columns):
 #   Column                          Non-Null Count   Dtype              
---  ------                          --------------   -----              
 0   customer_id                     203903 non-null  Int64              
 1   dealer_customer_id              203903 non-null  object             
 2   dealer_name                     203903 non-null  object             
 3   customer_id_1                   203903 non-null  Int64              
 4   dealer_customer_id_1            203903 non-null  object             
 5   primary_login_id                203903 non-null  Int64              
 6   dealer_name_1                   203903 non-null  object             
 7   join_date                       203903 non-null  datetime64[ns, UTC]
 8   account_type_name               203903 non-null  object             
 9   customer_type_name              203903 non-null  object             
 

In [54]:
config= bigquery.job.LoadJobConfig()

# config._properties['timePartitioning'] = {'field': 'Month_Year'}
config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

Table_BQ = 'ADC_updated.ADC_ADT_Customer_31Aug2022'

bq_table_instance= bq_client.load_table_from_dataframe(ADT_Customer_DF, Table_BQ,job_config=config)

In [66]:
All_resi_customers= '''


with ADC_Customer_Base as (

SELECT *
FROM `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_account_details` 
where date(last_updt_ts) ='2022-08-31'
and date(join_date)<'2022-08-01'
and account_type_name!='Standalone'
and customer_type_name='Customer'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY last_updt_ts DESC) = 1
order by dealer_name,join_date,customer_id )



, Telus_Customer_Base as (

SELECT customer_id,'Telus_Customer' as Dealer_type_flag,COUNT(*) OVER (PARTITION BY 1) as RowCnt 
FROM `divgpras-pr-579355.ADC_updated.ADC_Telus_Customer_31Aug2022` 
)


, ADT_Customer_Base as (

SELECT customer_id,'ADT_Customer' as Dealer_type_flag,COUNT(*) OVER (PARTITION BY 1) as RowCnt 
FROM `divgpras-pr-579355.ADC_updated.ADC_ADT_Customer_31Aug2022` 
)


, Union_data as (

SELECT * from Telus_Customer_Base
UNION ALL
SELECT * from ADT_Customer_Base

)

,Merge_data as (

SELECT * from ADC_Customer_Base a
INNER JOIN Union_data b

on a.customer_id=b.customer_id
order by Dealer_type_flag
)

select * from Merge_data


'''

In [67]:
All_resi_DF=extract_bq_data(bq_client, sql=All_resi_customers)

In [68]:
All_resi_DF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 447141 entries, 0 to 447140
Data columns (total 53 columns):
 #   Column                            Non-Null Count   Dtype              
---  ------                            --------------   -----              
 0   dealer_id                         447141 non-null  Int64              
 1   dealer_name                       447141 non-null  object             
 2   dealer_branch_id                  182721 non-null  object             
 3   dealer_branch_desc                447141 non-null  object             
 4   customer_id                       447141 non-null  Int64              
 5   title                             447141 non-null  object             
 6   lastname                          447141 non-null  object             
 7   firstname                         447141 non-null  object             
 8   address1                          447141 non-null  object             
 9   address2                          447141 non-nul

In [69]:
All_resi_DF.head()

Unnamed: 0,dealer_id,dealer_name,dealer_branch_id,dealer_branch_desc,customer_id,title,lastname,firstname,address1,address2,...,primary_email,primary_phone,file_name,create_ts,create_user_id,last_updt_ts,last_updt_user_id,customer_id_1,Dealer_type_flag,RowCnt
0,343,ADT by TELUS,,,745444,,CHU,YUKLING,58 PRINCE EDWARD BOULEVARD,,...,AY1RJ2rfJBAY7ekjMvmfPjH7e6NSi+Z6JqzefLPLlyKEpcnl,19057642929,CustomerAccountDetails-20220830,2022-08-31 11:59:52.034161+00:00,etl_cust,2022-08-31 11:59:52.034161+00:00,etl_cust,745444,ADT_Customer,203903
1,343,ADT by TELUS,5954.0,hPro Quebec,752150,,Choronzey,Sylvie,5661 Chateaubriand,,...,AUv9UBmID3PvqoljOsKwyHdZWNJez+QrWrvWoPlmU+YBMl...,15146456478,CustomerAccountDetails-20220830,2022-08-31 11:59:52.034161+00:00,etl_cust,2022-08-31 11:59:52.034161+00:00,etl_cust,752150,ADT_Customer,203903
2,343,ADT by TELUS,5954.0,hPro Quebec,771230,,Navet,Fabien,1901 Champlain #4,,...,AftWuZ9Zvl1iEHT9u+hovMC/nM52BYEbLD6q2kAgoPBmzeWp,15148276545,CustomerAccountDetails-20220830,2022-08-31 11:59:52.034161+00:00,etl_cust,2022-08-31 11:59:52.034161+00:00,etl_cust,771230,ADT_Customer,203903
3,343,ADT by TELUS,,,1024449,,ALI,BRIAN,100 TORRENCE WOOD,,...,Adi1njkSP5LBODUmeFQK048KJK4LqoK8cTw3irqFVSoeEf...,19054974441,CustomerAccountDetails-20220830,2022-08-31 11:59:52.034161+00:00,etl_cust,2022-08-31 11:59:52.034161+00:00,etl_cust,1024449,ADT_Customer,203903
4,343,ADT by TELUS,5954.0,hPro Quebec,1284620,,Beaulieu,Michel,1625 Delage,,...,AX2Aif0u8Ou3BNMNDjv0Fh8cqmTH4Dw7ocbT6Zv93is5p9...,14504346504,CustomerAccountDetails-20220830,2022-08-31 11:59:52.034161+00:00,etl_cust,2022-08-31 11:59:52.034161+00:00,etl_cust,1284620,ADT_Customer,203903


In [70]:
All_resi_DF['dealer_name'].value_counts()

TELUS Communications Inc.    245862
ADT by TELUS                 201279
Name: dealer_name, dtype: int64

In [71]:
config= bigquery.job.LoadJobConfig()

# config._properties['timePartitioning'] = {'field': 'Month_Year'}
config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

Table_BQ = 'ADC_updated.ADC_resi_customers_base_31Aug2022'

bq_table_instance= bq_client.load_table_from_dataframe(All_resi_DF, Table_BQ,job_config=config)

In [74]:
ADC_Telus_internet_customers='''

with  Telus_internet_customers as

(select cust_bus_cust_id, pi_prod_instnc_typ_cd as pi_prod_instnc_typ_cd_internet
from `cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht` 
WHERE DATE(prod_instnc_ts) = "2022-08-31" #Snapshot of the last day of the month
and pi_prod_instnc_stat_cd in ('A')
and pi_prod_instnc_typ_cd ='HSIC'
and consldt_cust_typ_cd = 'R'
)

,
Telus_customers as
(select bacct_bus_bacct_num,bacct_billg_acct_id,cust_bus_cust_id,pi_prod_instnc_typ_cd,pi_prod_instnc_stat_ts,prod_instnc_ts
from `cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht` 
WHERE DATE(prod_instnc_ts) = "2022-08-31" #Snapshot of the last day of the month
and pi_prod_instnc_typ_cd ='SMHM' #Serice type
and bus_prod_instnc_src_id = 1001 #BANs that are for home services
and pi_prod_instnc_stat_cd in ('A')
and  consldt_cust_typ_cd = 'R'
QUALIFY ROW_NUMBER() OVER (PARTITION BY cust_bus_cust_id ORDER BY pi_prod_instnc_stat_ts DESC) = 1
order by cust_bus_cust_id)
,

ADC_customer_base as(

SELECT customer_id,dealer_customer_id,dealer_name
FROM `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_account_details` 
where date(last_updt_ts) ='2022-08-31'
and account_type_name!='Standalone'
and customer_type_name='Customer'
and dealer_name='TELUS Communications Inc.'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY last_updt_ts DESC) = 1
order by dealer_name,join_date,customer_id

)
,
merge_data as(

select distinct adc.customer_id,adc.dealer_customer_id, adc.dealer_name,telus.bacct_bus_bacct_num, telus.bacct_billg_acct_id, telus.cust_bus_cust_id,
       telus.pi_prod_instnc_typ_cd, telus.pi_prod_instnc_stat_ts, telus.prod_instnc_ts,b.cust_bus_cust_id as cust_bus_cust_id_internet ,
       b.pi_prod_instnc_typ_cd_internet 
from ADC_customer_base adc
inner join Telus_customers telus
on telus.cust_bus_cust_id=adc.dealer_customer_id
inner join Telus_internet_customers b
on adc.dealer_customer_id=b.cust_bus_cust_id
order by telus.bacct_bus_bacct_num
)




select distinct customer_id, dealer_customer_id, dealer_name,
       bacct_bus_bacct_num, bacct_billg_acct_id, cust_bus_cust_id,
       pi_prod_instnc_typ_cd, pi_prod_instnc_stat_ts, prod_instnc_ts,cust_bus_cust_id_internet,
       pi_prod_instnc_typ_cd_internet, 1 as Telus_internet_flag 
       from merge_data


'''

In [75]:
ADC_Telus_internet=extract_bq_data(bq_client, sql=ADC_Telus_internet_customers)

In [76]:
ADC_Telus_internet.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 184624 entries, 0 to 184623
Data columns (total 12 columns):
 #   Column                          Non-Null Count   Dtype              
---  ------                          --------------   -----              
 0   customer_id                     184624 non-null  Int64              
 1   dealer_customer_id              184624 non-null  object             
 2   dealer_name                     184624 non-null  object             
 3   bacct_bus_bacct_num             184624 non-null  Int64              
 4   bacct_billg_acct_id             184624 non-null  Int64              
 5   cust_bus_cust_id                184624 non-null  object             
 6   pi_prod_instnc_typ_cd           184624 non-null  object             
 7   pi_prod_instnc_stat_ts          184624 non-null  datetime64[ns]     
 8   prod_instnc_ts                  184624 non-null  datetime64[ns, UTC]
 9   cust_bus_cust_id_internet       184624 non-null  object             
 

In [77]:
ADC_Telus_internet.head()

Unnamed: 0,customer_id,dealer_customer_id,dealer_name,bacct_bus_bacct_num,bacct_billg_acct_id,cust_bus_cust_id,pi_prod_instnc_typ_cd,pi_prod_instnc_stat_ts,prod_instnc_ts,cust_bus_cust_id_internet,pi_prod_instnc_typ_cd_internet,Telus_internet_flag
0,6562192,2254123,TELUS Communications Inc.,200336887,1283751,2254123,SMHM,2018-09-11,2022-08-31 00:00:00+00:00,2254123,HSIC,1
1,6865838,2269164,TELUS Communications Inc.,200843930,1297437,2269164,SMHM,2019-01-04,2022-08-31 00:00:00+00:00,2269164,HSIC,1
2,6959209,19044912,TELUS Communications Inc.,602031087,84870392,19044912,SMHM,2019-02-03,2022-08-31 00:00:00+00:00,19044912,HSIC,1
3,7043028,38019196,TELUS Communications Inc.,217802400,35774849,38019196,SMHM,2019-02-25,2022-08-31 00:00:00+00:00,38019196,HSIC,1
4,7164953,18846246,TELUS Communications Inc.,602358631,88774004,18846246,SMHM,2019-03-29,2022-08-31 00:00:00+00:00,18846246,HSIC,1


In [78]:
config= bigquery.job.LoadJobConfig()

# config._properties['timePartitioning'] = {'field': 'Month_Year'}
config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

Table_BQ = 'ADC_updated.ADC_Telus_internet_customers_31Aug2022'

bq_table_instance= bq_client.load_table_from_dataframe(ADC_Telus_internet, Table_BQ,job_config=config)

FileNotFoundError: [Errno 2] No such file or directory: 'jovyan/work/ADC/notebooks/ADT_Churn_Since_Jan2021.csv'