In [3]:
#### import global modules
import os
import sys
import pandas as pd
import numpy as np
from pathlib import Path
from yaml import safe_load
import google.oauth2.credentials
from google.cloud import bigquery
import gc

# Set global vars
pth_project = Path(os.getcwd().split('notebooks')[0])
pth_data = pth_project / 'data'
pth_queries = pth_project / 'core' / 'queries'
pth_creds = pth_project / 'conf' / 'local' / 'project_config.yaml'
sys.path.insert(0, str(pth_project))
d_project_config = safe_load(pth_creds.open())
# d_params = safe_load((pth_project / 'core' / 'parameters' / 'common.yaml').open())['data_extract']

# import local modules
from core.utils.gcp import connect_bq_services
# from core.etl.extract import extract_bq_data, extract_pr_codes, format_conv_df, filter_convs

# Connect to google services
bq_client = connect_bq_services(d_project_config['gcp-project-name'])
pd.options.display.max_rows = 100

In [4]:
def extract_bq_data(bq_client, sql=None, pth_query=None):
    if sql is not None:
        df = bq_client.query(sql).to_dataframe()
    elif pth_query is not None:
        sql = pth_query.read_text()
        df = bq_client.query(sql).to_dataframe()
    else:
        raise ValueError('`sql` or `pth_query` should be set')  
    return df

In [5]:
Customer_details='''


DECLARE
  start_dt_snpsht DATE DEFAULT '2023-03-01';
DECLARE
  end_dt_snpsht DATE DEFAULT '2023-04-25';
WITH
  date_sql AS (
  SELECT
    DATE_TRUNC(DATE_SUB(start_dt_snpsht, INTERVAL 3 month), month) AS eligibility_date ),
  
  ADC_Customer_Base AS (
  SELECT
    dealer_customer_id,
    customer_id,
    primary_login_id,
    dealer_name,
    DATE(join_date) AS join_date,
    account_type_name,
    customer_type_name,
    CASE
      WHEN account_type_name='Standalone' THEN 'Smart_Camera'
      WHEN account_type_name='Awareness and Automation' THEN 'Smart_Automation_Plus'
    ELSE
    'Monitored'
  END
    AS Package,
    DATE_DIFF(end_dt_snpsht,DATE(join_date), MONTH) AS Tenure_months,
    1 as Interactivity_flag #Interactivity=Having ADC account

  FROM
    `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_account_details`
  WHERE
    DATE(last_updt_ts) <= end_dt_snpsht
    AND DATE(last_updt_ts) >= start_dt_snpsht
    AND dealer_name in ('TELUS Communications Inc.')
    QUALIFY ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY last_updt_ts DESC) = 1
  ORDER BY
    join_date,
    customer_id ),
  
Telus_customers AS (
  SELECT
    cust_bus_cust_id as Telus_ID,
    cast (bacct_bus_bacct_num as STRING) AS BAN,
    DATE(pi_cntrct_start_ts) AS contract_start_date,
    DATE(pi_cntrct_end_ts) AS contract_end_date,
    DATE(pi_actvn_ts) as Intial_activation_date,
    CASE
      WHEN DATE(pi_cntrct_end_ts) = "9999-12-31" THEN 'contract_expired'
      WHEN DATE_DIFF(DATE(pi_cntrct_end_ts),end_dt_snpsht, DAY) < 0 THEN 'contract_expired'
      WHEN DATE_DIFF(DATE(pi_cntrct_end_ts),end_dt_snpsht, DAY) <= 90 THEN 'contract_expiring_within_90_days'
      WHEN DATE_DIFF(DATE(pi_cntrct_end_ts),end_dt_snpsht, DAY) > 90 THEN 'contract_expiring_after_90_days'
    ELSE
    'Undefined_contract_end_date'
  END
    AS contract_end_status,
    DATE_DIFF(end_dt_snpsht,DATE(pi_cntrct_start_ts), MONTH) AS Contract_start_months
  FROM
    `cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht`
  WHERE
    DATE(prod_instnc_ts) = end_dt_snpsht
    AND pi_prod_instnc_typ_cd ='SMHM' #Serice type
    AND bus_prod_instnc_src_id = 1001 #BANs that are FOR home services
    AND pi_prod_instnc_stat_cd IN ('A')
    AND consldt_cust_typ_cd = 'R' QUALIFY ROW_NUMBER() OVER (PARTITION BY cust_bus_cust_id ORDER BY prod_instnc_ts DESC) = 1
  ORDER BY
    cust_bus_cust_id ),
    
Telus_Billing as (

select CAST (bus_bacct_num as STRING) as BAN,billg_addr_prov_cd as Province_code
from `cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_billg_acct_snpsht`
WHERE DATE(billg_acct_ts)=end_dt_snpsht
QUALIFY ROW_NUMBER() OVER (PARTITION BY bus_bacct_num ORDER BY billg_acct_ts DESC) = 1
)

    
,ADT_migrated_customers as (

SELECT distinct udf4 as BAN
      ,cust_no
      ,co_no
      ,branch_no
      ,custype_id
      ,custstat_id
      , 1 as ADT_migrated_customer
      

  FROM `cio-datahub-enterprise-pr-183a.src_mastermind.bq_customer`
  where branch_no in (999100,999500)
  and udf4 is not NULL


),

Merge_data AS (
  SELECT
    a.*,b.*,c.ADT_migrated_customer,d.Province_code
  FROM
    Telus_customers a
  LEFT JOIN
    ADC_Customer_Base b
  ON
    a.Telus_ID=b.dealer_customer_id
LEFT JOIN
ADT_migrated_customers c
on a.BAN=c.BAN
LEFT JOIN
Telus_Billing d
on a.BAN=d.BAN


)

SELECT
  *
FROM
  Merge_data
  
  
  
  
'''

In [6]:
Customer_info=extract_bq_data(bq_client, sql=Customer_details)

In [7]:
Customer_info.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 697271 entries, 0 to 697270
Data columns (total 19 columns):
 #   Column                  Non-Null Count   Dtype 
---  ------                  --------------   ----- 
 0   Telus_ID                697271 non-null  object
 1   BAN                     697271 non-null  object
 2   contract_start_date     477519 non-null  object
 3   contract_end_date       697271 non-null  object
 4   Intial_activation_date  697271 non-null  object
 5   contract_end_status     697271 non-null  object
 6   Contract_start_months   477519 non-null  Int64 
 7   dealer_customer_id      521556 non-null  object
 8   customer_id             521556 non-null  Int64 
 9   primary_login_id        521556 non-null  Int64 
 10  dealer_name             521556 non-null  object
 11  join_date               521556 non-null  object
 12  account_type_name       521556 non-null  object
 13  customer_type_name      521556 non-null  object
 14  Package                 521556 non-n

In [13]:
# Customer_info['join_date'].min()

In [14]:
Customer_info['contract_start_date'].value_counts()

2021-04-01    2518
2016-06-06    2296
2017-11-01    1256
2021-12-01    1090
2022-12-01     860
              ... 
2013-12-04       1
2012-02-08       1
2009-05-10       1
2016-05-07       1
2013-03-20       1
Name: contract_start_date, Length: 3745, dtype: int64

In [15]:
Customer_info['Province_code'].value_counts()

AB    223554
BC    203571
ON    133316
QC    101420
MB     14368
SK     10930
NS      4926
NB      2826
NL      2024
PE       230
NT        35
NU        26
YT        25
CA         5
TX         3
MA         1
MT         1
MI         1
WA         1
SC         1
LA         1
NY         1
CT         1
CO         1
Name: Province_code, dtype: int64

In [16]:
Customer_info['contract_end_status'].value_counts()

contract_expiring_after_90_days     421867
contract_expired                    260343
contract_expiring_within_90_days     15061
Name: contract_end_status, dtype: int64

In [17]:
Customer_info['dealer_name'].value_counts()

TELUS Communications Inc.    521556
Name: dealer_name, dtype: int64

In [18]:
Customer_info['Package'].value_counts()

Monitored                355790
Smart_Automation_Plus    116628
Smart_Camera              49138
Name: Package, dtype: int64

In [19]:
Customer_info['Package'].fillna('Unknown_Package',inplace=True)
# Customer_info['Tenure_months'].fillna('Unknown_Tenure',inplace=True)
Customer_info['dealer_name'].fillna('Unknown_Dealer',inplace=True)
Customer_info['Interactivity_flag'].fillna(0,inplace=True)
Customer_info['ADT_migrated_customer'].fillna(0,inplace=True)

In [20]:
Customer_info['ADT_migrated_customer'].value_counts(normalize=True)*100

0    63.413651
1    36.586349
Name: ADT_migrated_customer, dtype: Float64

In [21]:
Customer_info['Interactivity_flag'].value_counts(normalize=True)*100

1    74.799612
0    25.200388
Name: Interactivity_flag, dtype: Float64

In [22]:
BAN_list=Customer_info['BAN'].value_counts().rename_axis('BAN').reset_index(name='unique_counts')

In [23]:
BAN_list_1=BAN_list[BAN_list.unique_counts==1]

In [24]:
BAN_list_1.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 671111 entries, 12012 to 683122
Data columns (total 2 columns):
 #   Column         Non-Null Count   Dtype 
---  ------         --------------   ----- 
 0   BAN            671111 non-null  object
 1   unique_counts  671111 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 15.4+ MB


In [25]:
Customer_info_1=Customer_info.merge(BAN_list_1[['BAN']],on='BAN',how='inner')

In [26]:
Customer_info_1['BAN'].value_counts().rename_axis('BAN').reset_index(name='unique_counts')

Unnamed: 0,BAN,unique_counts
0,605811173,1
1,605733401,1
2,605046050,1
3,605048936,1
4,605818197,1
...,...,...
671106,605526632,1
671107,605567588,1
671108,605578277,1
671109,605655711,1


In [27]:
Customer_info_1.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 671111 entries, 0 to 671110
Data columns (total 19 columns):
 #   Column                  Non-Null Count   Dtype 
---  ------                  --------------   ----- 
 0   Telus_ID                671111 non-null  object
 1   BAN                     671111 non-null  object
 2   contract_start_date     456425 non-null  object
 3   contract_end_date       671111 non-null  object
 4   Intial_activation_date  671111 non-null  object
 5   contract_end_status     671111 non-null  object
 6   Contract_start_months   456425 non-null  Int64 
 7   dealer_customer_id      496186 non-null  object
 8   customer_id             496186 non-null  Int64 
 9   primary_login_id        496186 non-null  Int64 
 10  dealer_name             671111 non-null  object
 11  join_date               496186 non-null  object
 12  account_type_name       496186 non-null  object
 13  customer_type_name      496186 non-null  object
 14  Package                 671111 non-n

In [28]:
Customer_info_1.head()

Unnamed: 0,Telus_ID,BAN,contract_start_date,contract_end_date,Intial_activation_date,contract_end_status,Contract_start_months,dealer_customer_id,customer_id,primary_login_id,dealer_name,join_date,account_type_name,customer_type_name,Package,Tenure_months,Interactivity_flag,ADT_migrated_customer,Province_code
0,106017614,605811173,,9999-12-31,2023-03-15,contract_expired,,106017614,6199998,7140235,TELUS Communications Inc.,2018-05-17,Security System,Customer,Monitored,59,1,1,BC
1,19071476,603845847,2023-02-14,2028-02-14,2019-03-27,contract_expiring_after_90_days,2.0,19071476,7154541,8323448,TELUS Communications Inc.,2019-03-27,Security System,Customer,Monitored,49,1,0,BC
2,2327117,601978796,,9999-12-31,2019-01-13,contract_expired,,2327117,6886864,7998250,TELUS Communications Inc.,2019-01-13,Security System,Customer,Monitored,51,1,0,AB
3,106153693,605871197,,9999-12-31,2023-03-28,contract_expired,,106153693,6760626,7836449,TELUS Communications Inc.,2018-11-21,Security System,Customer,Monitored,53,1,1,QC
4,106255588,605894663,,9999-12-31,2023-04-06,contract_expired,,106255588,3770360,4230678,TELUS Communications Inc.,2015-08-21,Security System,Customer,Monitored,92,1,1,ON


In [29]:
Customer_info_1['Contract_start_months'].value_counts()

1        15932
5        15686
3        15144
8        14933
4        14874
         ...  
12040        1
9616         1
322          1
9845         1
256          1
Name: Contract_start_months, Length: 339, dtype: Int64

In [30]:
Customer_info_1['Tenure_months'].value_counts()

5      16854
1      16830
3      15983
4      15415
6      15161
       ...  
157        2
165        2
155        2
150        1
154        1
Name: Tenure_months, Length: 158, dtype: Int64

In [31]:
Customer_info_1[Customer_info_1.Tenure_months==0]

Unnamed: 0,Telus_ID,BAN,contract_start_date,contract_end_date,Intial_activation_date,contract_end_status,Contract_start_months,dealer_customer_id,customer_id,primary_login_id,dealer_name,join_date,account_type_name,customer_type_name,Package,Tenure_months,Interactivity_flag,ADT_migrated_customer,Province_code
234435,560783,208612537,2023-04-13,2026-04-13,2023-04-13,contract_expiring_after_90_days,0,560783,15783720,18714307,TELUS Communications Inc.,2023-04-13,Awareness and Automation,Customer,Smart_Automation_Plus,0,1,0,AB
234436,98599830,603682436,2023-04-18,2026-04-18,2023-04-18,contract_expiring_after_90_days,0,98599830,15811512,18749033,TELUS Communications Inc.,2023-04-19,Awareness and Automation,Customer,Smart_Automation_Plus,0,1,0,AB
234437,100249642,604141161,2023-04-08,2028-04-08,2023-04-07,contract_expiring_after_90_days,0,100249642,15752321,18675115,TELUS Communications Inc.,2023-04-07,Security System,Customer,Monitored,0,1,0,AB
234438,103730928,605021532,2023-04-22,2026-04-22,2023-04-22,contract_expiring_after_90_days,0,103730928,15837255,18780696,TELUS Communications Inc.,2023-04-22,Awareness and Automation,Customer,Smart_Automation_Plus,0,1,0,BC
234439,106327089,605931741,2023-04-16,2028-04-16,2023-04-16,contract_expiring_after_90_days,0,106327089,15797731,18731801,TELUS Communications Inc.,2023-04-16,Security System,Customer,Monitored,0,1,0,ON
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
245328,106195345,605882829,2023-04-05,2026-04-05,2023-04-04,contract_expiring_after_90_days,0,106195345,15731471,18649204,TELUS Communications Inc.,2023-04-04,Security System,Customer,Monitored,0,1,0,AB
245329,2337070,211154247,2023-04-19,2026-04-19,2023-04-14,contract_expiring_after_90_days,0,2337070,15792938,18726042,TELUS Communications Inc.,2023-04-21,Security System,Customer,Monitored,0,1,0,AB
245330,99196550,603850502,2023-04-05,2028-04-05,2023-04-05,contract_expiring_after_90_days,0,99196550,15737119,18656350,TELUS Communications Inc.,2023-04-05,Security System,Customer,Monitored,0,1,0,AB
245331,106300630,605918634,2023-04-22,2026-04-22,2023-04-22,contract_expiring_after_90_days,0,106300630,15836318,18779616,TELUS Communications Inc.,2023-04-22,Security System,Customer,Monitored,0,1,0,SK


In [33]:
config= bigquery.job.LoadJobConfig()

# config._properties['timePartitioning'] = {'field': 'Month_Year'}
config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

Table_BQ = 'SHS.SHS_churn_model_base_score_data_mar23_Apr23'

bq_table_instance= bq_client.load_table_from_dataframe(Customer_info_1, Table_BQ,job_config=config)