In [1]:
#### import global modules
import os
import sys
import pandas as pd
import numpy as np
from pathlib import Path
from yaml import safe_load
import google.oauth2.credentials
from google.cloud import bigquery
import gc

# Set global vars
pth_project = Path(os.getcwd().split('notebooks')[0])
pth_data = pth_project / 'data'
pth_queries = pth_project / 'core' / 'queries'
pth_creds = pth_project / 'conf' / 'local' / 'project_config.yaml'
sys.path.insert(0, str(pth_project))
d_project_config = safe_load(pth_creds.open())
# d_params = safe_load((pth_project / 'core' / 'parameters' / 'common.yaml').open())['data_extract']

# import local modules
from core.utils.gcp import connect_bq_services
# from core.etl.extract import extract_bq_data, extract_pr_codes, format_conv_df, filter_convs

# Connect to google services
bq_client = connect_bq_services(d_project_config['gcp-project-name'])
pd.options.display.max_rows = 100

In [2]:
def extract_bq_data(bq_client, sql=None, pth_query=None):
    if sql is not None:
        df = bq_client.query(sql).to_dataframe()
    elif pth_query is not None:
        sql = pth_query.read_text()
        df = bq_client.query(sql).to_dataframe()
    else:
        raise ValueError('`sql` or `pth_query` should be set')  
    return df

In [8]:
Query='''



with base_data as (
select *, cast(CUSTOMER_ID as STRING) as Telus_customer_ID  from `divgpras-pr-579355.SHS.Never_pay_customers_2022`
)


, ADC_data_Sep_2022 as 

(

SELECT distinct
    dealer_customer_id as Telus_ID,
    avg(Login_consistency) as Login_consistency_sep ,
    avg(number_of_login_days) as Login_days_sep,
    avg(Arming_Consistency) as Arming_consistency_sep,
    1 as Sept_data_flag

   FROM
    `divgpras-pr-579355.ADC_Feature_Datastore.ADC_Master_Data`
  WHERE
    Month_Snapshot='2022-09-01'
  group by dealer_customer_id
    
)

, ADC_data_oct_2022 as 

(

SELECT distinct
    dealer_customer_id as Telus_ID,
    avg(Login_consistency) as Login_consistency_Oct ,
    avg(number_of_login_days) as Login_days_Oct,
    avg(Arming_Consistency) as Arming_consistency_Oct,
    1 as Oct_data_flag

   FROM
    `divgpras-pr-579355.ADC_Feature_Datastore.ADC_Master_Data`
  WHERE
    Month_Snapshot='2022-10-01'
  group by dealer_customer_id
    
)


, ADC_data_Nov_2022 as 

(

SELECT distinct
    dealer_customer_id as Telus_ID,
    avg(Login_consistency) as Login_consistency_Nov ,
    avg(number_of_login_days) as Login_days_Nov,
    avg(Arming_Consistency) as Arming_consistency_Nov,
    1 as Nov_data_flag

   FROM
    `divgpras-pr-579355.ADC_Feature_Datastore.ADC_Master_Data`
  WHERE
    Month_Snapshot='2022-11-01'
  group by dealer_customer_id
    
)



, ADC_data_Dec_2022 as 

(

SELECT distinct
    dealer_customer_id as Telus_ID,
    avg(Login_consistency) as Login_consistency_Dec ,
    avg(number_of_login_days) as Login_days_Dec,
    avg(Arming_Consistency) as Arming_consistency_Dec,
    1 as Dec_data_flag

   FROM
    `divgpras-pr-579355.ADC_Feature_Datastore.ADC_Master_Data`
  WHERE
    Month_Snapshot='2022-12-01'
  group by dealer_customer_id
    
)


, ADC_data_Jan_2023 as 

(

SELECT distinct
    dealer_customer_id as Telus_ID,
    avg(Login_consistency) as Login_consistency_Jan ,
    avg(number_of_login_days) as Login_days_Jan,
    avg(Arming_Consistency) as Arming_consistency_Jan,
    1 as Jan_data_flag

   FROM
    `divgpras-pr-579355.ADC_Feature_Datastore.ADC_Master_Data`
  WHERE
    Month_Snapshot='2023-01-01'
  group by dealer_customer_id
    
)


, ADC_data_Feb_2023 as 

(

SELECT distinct
    dealer_customer_id as Telus_ID,
    avg(login_consistency) as Login_consistency_Feb ,
    avg(number_of_login_days) as Login_days_Feb,
    avg(arming_consistency) as Arming_consistency_Feb,
    1 as Feb_data_flag

   FROM
    `bi-srv-divg-speech-pr-79f6e9.adc_feature_store.ADC_Feature_data_store_daily`
  WHERE
    daily_snapshot_start_date='2023-02-01'
  group by dealer_customer_id
    
)


select a.*
,b.Login_consistency_sep,b.Login_days_sep,b.Arming_consistency_sep,b.Sept_data_flag
,c.Login_consistency_Oct,c.Login_days_Oct,c.Arming_consistency_Oct,c.Oct_data_flag
,d.Login_consistency_Nov,d.Login_days_Nov,d.Arming_consistency_Nov,d.Nov_data_flag
,e.Login_consistency_Dec,e.Login_days_Dec,e.Arming_consistency_Dec,e.Dec_data_flag
,f.Login_consistency_Jan,f.Login_days_Jan,f.Arming_consistency_Jan,f.Jan_data_flag
,g.Login_consistency_Feb,g.Login_days_Feb,g.Arming_consistency_Feb,g.Feb_data_flag
from base_data a
left join ADC_data_Sep_2022 b
on a.Telus_customer_ID=b.Telus_ID
left join ADC_data_oct_2022 c
on a.Telus_customer_ID=c.Telus_ID
left join ADC_data_Nov_2022 d
on a.Telus_customer_ID=d.Telus_ID
left join ADC_data_Dec_2022 e
on a.Telus_customer_ID=e.Telus_ID
left join ADC_data_Jan_2023 f
on a.Telus_customer_ID=f.Telus_ID
left join ADC_data_Feb_2023 g
on a.Telus_customer_ID=g.Telus_ID
order by BAN


'''

In [9]:
DF_2022=extract_bq_data(bq_client, sql=Query)

In [11]:
DF_2022.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 117352 entries, 0 to 117351
Data columns (total 56 columns):
 #   Column                  Non-Null Count   Dtype  
---  ------                  --------------   -----  
 0   BAN                     117352 non-null  Int64  
 1   CUSTOMER_ID             117352 non-null  Int64  
 2   FULL_NM                 117084 non-null  object 
 3   START_SERVICE_DT        117352 non-null  object 
 4   BAN_STATUS              117352 non-null  object 
 5   TENURE_MTHS             117352 non-null  Int64  
 6   LANGUAGE_IND            114081 non-null  object 
 7   COMMUNICATION_CHANNEL   76956 non-null   object 
 8   E_BILL_IND              117352 non-null  object 
 9   START_COLLECTION_DT     64011 non-null   object 
 10  COLLECTION_STATUS_CD    64493 non-null   object 
 11  INCOLL_STATUS_IND       117352 non-null  object 
 12  SERVICES_ALL            117352 non-null  object 
 13  SMHM_ONLY_IND           117352 non-null  object 
 14  BILL_CYC_DT         

In [12]:
DF_2022.head(10)

Unnamed: 0,BAN,CUSTOMER_ID,FULL_NM,START_SERVICE_DT,BAN_STATUS,TENURE_MTHS,LANGUAGE_IND,COMMUNICATION_CHANNEL,E_BILL_IND,START_COLLECTION_DT,...,Arming_consistency_Dec,Dec_data_flag,Login_consistency_Jan,Login_days_Jan,Arming_consistency_Jan,Jan_data_flag,Login_consistency_Feb,Login_days_Feb,Arming_consistency_Feb,Feb_data_flag
0,601368567,78929393,MASTER STEPVAN,06Jul2012,O,120,EN,EMAIL,Y,,...,5.376344,1,16.666667,5.0,5.376344,1,0.0,,5.287356,1
1,601368567,78929393,MASTER STEPVAN,06Jul2012,O,124,EN,EMAIL,Y,24Nov2022,...,5.376344,1,16.666667,5.0,5.376344,1,0.0,,5.287356,1
2,601368567,78929393,MASTER STEPVAN,06Jul2012,O,123,EN,EMAIL,Y,,...,5.376344,1,16.666667,5.0,5.376344,1,0.0,,5.287356,1
3,602083523,31412669,GURINDER ATWAL,25Jul2014,O,100,EN,EMAIL,Y,,...,54.83871,1,63.333333,19.0,54.83871,1,0.0,,65.517241,1
4,602083523,31412669,GURINDER ATWAL,25Jul2014,O,99,EN,EMAIL,Y,,...,54.83871,1,63.333333,19.0,54.83871,1,0.0,,65.517241,1
5,602102375,79790144,KATHLEEN MERX,13Aug2014,O,94,EN,EMAIL,Y,03Jul2022,...,0.0,1,0.0,0.0,0.0,1,0.0,,0.0,1
6,602102375,79790144,KATHLEEN MERX,13Aug2014,O,93,EN,EMAIL,Y,,...,0.0,1,0.0,0.0,0.0,1,0.0,,0.0,1
7,602187320,1366908,STEPHEN BOYD,29Oct2014,O,93,EN,EMAIL,Y,,...,0.0,1,3.333333,1.0,0.0,1,0.0,,0.0,1
8,602187320,1366908,STEPHEN BOYD,29Oct2014,O,94,EN,EMAIL,Y,,...,0.0,1,3.333333,1.0,0.0,1,0.0,,0.0,1
9,602292620,20513226,RANBEER DHILLON,23Feb2015,O,90,EN,EMAIL,Y,,...,0.0,1,0.0,0.0,0.0,1,0.0,,0.0,1


In [13]:
DF_2022.to_csv('Never_Paid_Usage_2022.csv',index=False)

In [21]:
Query_2023='''



with base_data as (
select *, cast(CUSTOMER_ID as STRING) as Telus_customer_ID  from `divgpras-pr-579355.SHS.Never_pay_customers_2023`
)


, ADC_data_Sep_2022 as 

(

SELECT distinct
    dealer_customer_id as Telus_ID,
    avg(Login_consistency) as Login_consistency_sep ,
    avg(number_of_login_days) as Login_days_sep,
    avg(Arming_Consistency) as Arming_consistency_sep,
    1 as Sept_data_flag

   FROM
    `divgpras-pr-579355.ADC_Feature_Datastore.ADC_Master_Data`
  WHERE
    Month_Snapshot='2022-09-01'
  group by dealer_customer_id
    
)

, ADC_data_oct_2022 as 

(

SELECT distinct
    dealer_customer_id as Telus_ID,
    avg(Login_consistency) as Login_consistency_Oct ,
    avg(number_of_login_days) as Login_days_Oct,
    avg(Arming_Consistency) as Arming_consistency_Oct,
    1 as Oct_data_flag

   FROM
    `divgpras-pr-579355.ADC_Feature_Datastore.ADC_Master_Data`
  WHERE
    Month_Snapshot='2022-10-01'
  group by dealer_customer_id
    
)


, ADC_data_Nov_2022 as 

(

SELECT distinct
    dealer_customer_id as Telus_ID,
    avg(Login_consistency) as Login_consistency_Nov ,
    avg(number_of_login_days) as Login_days_Nov,
    avg(Arming_Consistency) as Arming_consistency_Nov,
    1 as Nov_data_flag

   FROM
    `divgpras-pr-579355.ADC_Feature_Datastore.ADC_Master_Data`
  WHERE
    Month_Snapshot='2022-11-01'
  group by dealer_customer_id
    
)



, ADC_data_Dec_2022 as 

(

SELECT distinct
    dealer_customer_id as Telus_ID,
    avg(Login_consistency) as Login_consistency_Dec ,
    avg(number_of_login_days) as Login_days_Dec,
    avg(Arming_Consistency) as Arming_consistency_Dec,
    1 as Dec_data_flag

   FROM
    `divgpras-pr-579355.ADC_Feature_Datastore.ADC_Master_Data`
  WHERE
    Month_Snapshot='2022-12-01'
  group by dealer_customer_id
    
)


, ADC_data_Jan_2023 as 

(

SELECT distinct
    dealer_customer_id as Telus_ID,
    avg(Login_consistency) as Login_consistency_Jan ,
    avg(number_of_login_days) as Login_days_Jan,
    avg(Arming_Consistency) as Arming_consistency_Jan,
    1 as Jan_data_flag

   FROM
    `divgpras-pr-579355.ADC_Feature_Datastore.ADC_Master_Data`
  WHERE
    Month_Snapshot='2023-01-01'
  group by dealer_customer_id
    
)


, ADC_data_Feb_2023 as 

(

SELECT distinct
    dealer_customer_id as Telus_ID,
    avg(Login_consistency) as Login_consistency_Feb ,
    avg(number_of_login_days) as Login_days_Feb,
    avg(Arming_Consistency) as Arming_consistency_Feb,
    1 as Feb_data_flag

   FROM
    `bi-srv-divg-speech-pr-79f6e9.adc_feature_store.ADC_Feature_data_store_daily`
  WHERE
    daily_snapshot_start_date='2023-02-01'
  group by dealer_customer_id
    
)


select a.*
,b.Login_consistency_sep,b.Login_days_sep,b.Arming_consistency_sep,b.Sept_data_flag
,c.Login_consistency_Oct,c.Login_days_Oct,c.Arming_consistency_Oct,c.Oct_data_flag
,d.Login_consistency_Nov,d.Login_days_Nov,d.Arming_consistency_Nov,d.Nov_data_flag
,e.Login_consistency_Dec,e.Login_days_Dec,e.Arming_consistency_Dec,e.Dec_data_flag
,f.Login_consistency_Jan,f.Login_days_Jan,f.Arming_consistency_Jan,f.Jan_data_flag
,g.Login_consistency_Feb,g.Login_days_Feb,g.Arming_consistency_Feb,g.Feb_data_flag
from base_data a
left join ADC_data_Sep_2022 b
on a.Telus_customer_ID=b.Telus_ID
left join ADC_data_oct_2022 c
on a.Telus_customer_ID=c.Telus_ID
left join ADC_data_Nov_2022 d
on a.Telus_customer_ID=d.Telus_ID
left join ADC_data_Dec_2022 e
on a.Telus_customer_ID=e.Telus_ID
left join ADC_data_Jan_2023 f
on a.Telus_customer_ID=f.Telus_ID
left join ADC_data_Feb_2023 g
on a.Telus_customer_ID=g.Telus_ID
order by BAN


'''

In [22]:
DF_2023=extract_bq_data(bq_client, sql=Query_2023)

In [23]:
DF_2023.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 35328 entries, 0 to 35327
Data columns (total 56 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   BAN                     35328 non-null  Int64  
 1   CUSTOMER_ID             35328 non-null  Int64  
 2   FULL_NM                 35279 non-null  object 
 3   START_SERVICE_DT        35328 non-null  object 
 4   BAN_STATUS              35328 non-null  object 
 5   TENURE_MTHS             35328 non-null  Int64  
 6   LANGUAGE_IND            34737 non-null  object 
 7   COMMUNICATION_CHANNEL   26733 non-null  object 
 8   E_BILL_IND              35328 non-null  object 
 9   START_COLLECTION_DT     18927 non-null  object 
 10  COLLECTION_STATUS_CD    19017 non-null  object 
 11  INCOLL_STATUS_IND       35328 non-null  object 
 12  SERVICES_ALL            35328 non-null  object 
 13  SMHM_ONLY_IND           35328 non-null  object 
 14  BILL_CYC_DT             35328 non-null

In [24]:
DF_2023.head()

Unnamed: 0,BAN,CUSTOMER_ID,FULL_NM,START_SERVICE_DT,BAN_STATUS,TENURE_MTHS,LANGUAGE_IND,COMMUNICATION_CHANNEL,E_BILL_IND,START_COLLECTION_DT,...,Arming_consistency_Dec,Dec_data_flag,Login_consistency_Jan,Login_days_Jan,Arming_consistency_Jan,Jan_data_flag,Login_consistency_Feb,Login_days_Feb,Arming_consistency_Feb,Feb_data_flag
0,601368567,78929393,MASTER STEPVAN,06Jul2012,O,126,EN,EMAIL,Y,,...,5.376344,1,16.666667,5.0,5.376344,1,0.0,,5.287356,1
1,602215572,88695748,JOANNA REID,27Nov2014,O,98,EN,,Y,13Feb2023,...,0.0,1,0.0,0.0,0.0,1,0.0,,0.0,1
2,602215572,88695748,JOANNA REID,27Nov2014,O,97,EN,,Y,,...,0.0,1,0.0,0.0,0.0,1,0.0,,0.0,1
3,602215572,88695748,JOANNA REID,27Nov2014,O,99,EN,,Y,13Feb2023,...,0.0,1,0.0,0.0,0.0,1,0.0,,0.0,1
4,602234155,78244019,TERRI M BEEBE,17Dec2014,O,98,EN,,Y,27Feb2023,...,0.0,1,53.333333,16.0,0.0,1,0.0,,0.0,1


In [25]:
DF_2023.to_csv('Never_Paid_Usage_2023.csv',index=False)