In [1]:
#### import global modules
import os
import sys
import pandas as pd
import numpy as np
from pathlib import Path
from yaml import safe_load
import google.oauth2.credentials
from google.cloud import bigquery
import gc

# Set global vars
pth_project = Path(os.getcwd().split('notebooks')[0])
pth_data = pth_project / 'data'
pth_queries = pth_project / 'core' / 'queries'
pth_creds = pth_project / 'conf' / 'local' / 'project_config.yaml'
sys.path.insert(0, str(pth_project))
d_project_config = safe_load(pth_creds.open())
# d_params = safe_load((pth_project / 'core' / 'parameters' / 'common.yaml').open())['data_extract']

# import local modules
from core.utils.gcp import connect_bq_services
# from core.etl.extract import extract_bq_data, extract_pr_codes, format_conv_df, filter_convs

# Connect to google services
bq_client = connect_bq_services(d_project_config['gcp-project-name'])
pd.options.display.max_rows = 100

In [2]:
def extract_bq_data(bq_client, sql=None, pth_query=None):
    if sql is not None:
        df = bq_client.query(sql).to_dataframe()
    elif pth_query is not None:
        sql = pth_query.read_text()
        df = bq_client.query(sql).to_dataframe()
    else:
        raise ValueError('`sql` or `pth_query` should be set')  
    return df

In [3]:
Customer_details='''


  --Take LAST day SNAPSHOT FOR active customers -- remove the eligibilty criteria --AND use it AS contract start feature
  
DECLARE
  start_dt_snpsht DATE DEFAULT '2022-10-15';
DECLARE
  end_dt_snpsht DATE DEFAULT '2023-01-15';
WITH
  date_sql AS (
  SELECT
    DATE_TRUNC(DATE_SUB(start_dt_snpsht, INTERVAL 3 month), month) AS eligibility_date ),
  ADC_Customer_Base AS (
  SELECT
    customer_id,
    dealer_customer_id,
    primary_login_id,
    dealer_name,
    DATE(join_date) AS join_date,
    account_type_name,
    customer_type_name,
    CASE
      WHEN account_type_name='Standalone' THEN 'Smart_Camera'
      WHEN account_type_name='Awareness and Automation' THEN 'Smart_Automation_Plus'
    ELSE
    'Monitored'
  END
    AS Package,
    DATE_DIFF(end_dt_snpsht,DATE(join_date), MONTH) AS Tenure_months
  FROM
    `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_account_details`
  WHERE
    DATE(last_updt_ts) = end_dt_snpsht QUALIFY ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY last_updt_ts DESC) = 1
  ORDER BY
    join_date,
    customer_id ),
  Telus_customers AS (
  SELECT
    bacct_bus_bacct_num AS BAN,
    cust_bus_cust_id,
    DATE(pi_cntrct_start_ts) AS contract_start_date,
    DATE(pi_cntrct_end_ts) AS contract_end_date,
    CASE
      WHEN DATE(pi_cntrct_end_ts) = "9999-12-31" THEN 'contract_expired'
      WHEN DATE_DIFF(DATE(pi_cntrct_end_ts),end_dt_snpsht, DAY) < 0 THEN 'contract_expired'
      WHEN DATE_DIFF(DATE(pi_cntrct_end_ts),end_dt_snpsht, DAY) <= 90 THEN 'contract_expiring_within_90_days'
      WHEN DATE_DIFF(DATE(pi_cntrct_end_ts),end_dt_snpsht, DAY) > 90 THEN 'contract_expiring_after_90_days'
    ELSE
    'Undefined_contract_end_date'
  END
    AS contract_end_status,
    DATE_DIFF(end_dt_snpsht,DATE(pi_cntrct_start_ts), MONTH) AS Contract_start_months
  FROM
    `cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht`
  WHERE
    DATE(prod_instnc_ts) = end_dt_snpsht
    AND pi_prod_instnc_typ_cd ='SMHM' #Serice type
    AND bus_prod_instnc_src_id = 1001 #BANs that are FOR home services
    AND pi_prod_instnc_stat_cd IN ('A')
    AND consldt_cust_typ_cd = 'R' QUALIFY ROW_NUMBER() OVER (PARTITION BY cust_bus_cust_id ORDER BY prod_instnc_ts DESC) = 1
  ORDER BY
    cust_bus_cust_id ),


Merge_data AS (
  SELECT
    *
  FROM
    ADC_Customer_Base a
  INNER JOIN
    Telus_customers b
  ON
    a.dealer_customer_id=b.cust_bus_cust_id )
SELECT
  *
FROM
  Merge_data
  
  
'''

In [4]:
Customer_info=extract_bq_data(bq_client, sql=Customer_details)

In [5]:
Customer_info.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 368584 entries, 0 to 368583
Data columns (total 15 columns):
 #   Column                 Non-Null Count   Dtype 
---  ------                 --------------   ----- 
 0   customer_id            368584 non-null  Int64 
 1   dealer_customer_id     368584 non-null  object
 2   primary_login_id       368584 non-null  Int64 
 3   dealer_name            368584 non-null  object
 4   join_date              368584 non-null  object
 5   account_type_name      368584 non-null  object
 6   customer_type_name     368584 non-null  object
 7   Package                368584 non-null  object
 8   Tenure_months          368584 non-null  Int64 
 9   BAN                    368584 non-null  Int64 
 10  cust_bus_cust_id       368584 non-null  object
 11  contract_start_date    326908 non-null  object
 12  contract_end_date      368584 non-null  object
 13  contract_end_status    368584 non-null  object
 14  Contract_start_months  326908 non-null  Int64 
dtype

In [6]:
Customer_info['join_date'].min()

datetime.date(2010, 3, 18)

In [7]:
Customer_info['contract_start_date'].value_counts()

2023-01-06    804
2022-11-24    707
2023-01-11    698
2022-11-23    693
2022-12-01    688
             ... 
2006-02-22      1
2001-10-17      1
2013-10-24      1
2014-05-09      1
2015-11-08      1
Name: contract_start_date, Length: 3490, dtype: int64

In [8]:
Customer_info['contract_end_status'].value_counts()

contract_expiring_after_90_days     288888
contract_expired                     71895
contract_expiring_within_90_days      7801
Name: contract_end_status, dtype: int64

In [9]:
Customer_info['dealer_name'].value_counts()

TELUS Communications Inc.        368308
ADT by TELUS                        221
TELUS Pre-Production                 47
TELUS Custom Security Systems         8
Name: dealer_name, dtype: int64

In [10]:
Customer_info['Package'].value_counts()

Monitored                223397
Smart_Automation_Plus     98750
Smart_Camera              46437
Name: Package, dtype: int64

In [11]:
BAN_list=Customer_info['BAN'].value_counts().rename_axis('BAN').reset_index(name='unique_counts')

In [12]:
BAN_list_1=BAN_list[BAN_list.unique_counts==1]

In [13]:
BAN_list_1.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 360097 entries, 4046 to 364142
Data columns (total 2 columns):
 #   Column         Non-Null Count   Dtype
---  ------         --------------   -----
 0   BAN            360097 non-null  Int64
 1   unique_counts  360097 non-null  Int64
dtypes: Int64(2)
memory usage: 8.9 MB


In [14]:
Customer_info_1=Customer_info.merge(BAN_list_1[['BAN']],on='BAN',how='inner')

In [15]:
Customer_info_1['BAN'].value_counts().rename_axis('BAN').reset_index(name='unique_counts')

Unnamed: 0,BAN,unique_counts
0,605588323,1
1,603577884,1
2,605579465,1
3,605587046,1
4,605587495,1
...,...,...
360092,605163696,1
360093,604056155,1
360094,229759849,1
360095,605609859,1


In [16]:
Customer_info_1.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 360097 entries, 0 to 360096
Data columns (total 15 columns):
 #   Column                 Non-Null Count   Dtype 
---  ------                 --------------   ----- 
 0   customer_id            360097 non-null  Int64 
 1   dealer_customer_id     360097 non-null  object
 2   primary_login_id       360097 non-null  Int64 
 3   dealer_name            360097 non-null  object
 4   join_date              360097 non-null  object
 5   account_type_name      360097 non-null  object
 6   customer_type_name     360097 non-null  object
 7   Package                360097 non-null  object
 8   Tenure_months          360097 non-null  Int64 
 9   BAN                    360097 non-null  Int64 
 10  cust_bus_cust_id       360097 non-null  object
 11  contract_start_date    319404 non-null  object
 12  contract_end_date      360097 non-null  object
 13  contract_end_status    360097 non-null  object
 14  Contract_start_months  319404 non-null  Int64 
dtype

In [17]:
Customer_info_1.head()

Unnamed: 0,customer_id,dealer_customer_id,primary_login_id,dealer_name,join_date,account_type_name,customer_type_name,Package,Tenure_months,BAN,cust_bus_cust_id,contract_start_date,contract_end_date,contract_end_status,Contract_start_months
0,1175274,105220977,1245513,TELUS Communications Inc.,2012-01-17,Security System,Customer,Monitored,132,605588323,105220977,2022-01-23,2025-01-23,contract_expiring_after_90_days,12.0
1,2191014,105167762,2378159,TELUS Communications Inc.,2013-06-27,Security System,Customer,Monitored,115,605567827,105167762,,9999-12-31,contract_expired,
2,2209667,105233747,2398935,TELUS Communications Inc.,2013-07-03,Security System,Customer,Monitored,114,605597517,105233747,2022-05-01,2024-05-01,contract_expiring_after_90_days,8.0
3,2377229,105214885,2586096,TELUS Communications Inc.,2013-08-26,Security System,Customer,Monitored,113,605589480,105214885,,9999-12-31,contract_expired,
4,2898226,105168087,3197570,TELUS Communications Inc.,2014-06-11,Security System,Customer,Monitored,103,605567112,105168087,,9999-12-31,contract_expired,


In [18]:
Customer_info_1['Contract_start_months'].value_counts()

2       15494
1       15125
5       14468
3       14183
4       13618
        ...  
9854        1
289         1
9844        1
9842        1
282         1
Name: Contract_start_months, Length: 293, dtype: Int64

In [19]:
Customer_info_1['Tenure_months'].value_counts()

2      17963
1      16956
5      16128
3      16005
4      15075
       ...  
118        1
146        1
147        1
144        1
142        1
Name: Tenure_months, Length: 148, dtype: Int64

In [20]:
Customer_info_1[Customer_info_1.Tenure_months==0]

Unnamed: 0,customer_id,dealer_customer_id,primary_login_id,dealer_name,join_date,account_type_name,customer_type_name,Package,Tenure_months,BAN,cust_bus_cust_id,contract_start_date,contract_end_date,contract_end_status,Contract_start_months
3496,15091016,19005873,17859108,TELUS Communications Inc.,2023-01-05,Security System,Customer,Monitored,0,605546842,19005873,2022-12-14,2025-12-14,contract_expiring_after_90_days,1
3540,15121746,105276639,17897153,TELUS Communications Inc.,2023-01-07,Security System,Customer,Monitored,0,605607047,105276639,2022-12-20,2025-12-20,contract_expiring_after_90_days,1
3593,15181672,105324499,17970274,TELUS Communications Inc.,2023-01-02,Security System,Customer,Monitored,0,605617311,105324499,2023-01-02,2028-01-02,contract_expiring_after_90_days,0
3594,15182237,105332950,17971119,TELUS Communications Inc.,2023-01-02,Security System,Customer,Monitored,0,605619042,105332950,2023-01-03,2028-01-03,contract_expiring_after_90_days,0
3595,15183221,105220784,17972371,TELUS Communications Inc.,2023-01-03,Security System,Customer,Monitored,0,605590887,105220784,2023-01-03,2026-01-03,contract_expiring_after_90_days,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
360092,15248419,104112232,18054022,TELUS Communications Inc.,2023-01-13,Awareness and Automation,Customer,Smart_Automation_Plus,0,605163696,104112232,,9999-12-31,contract_expired,
360093,15249685,99879743,18055551,TELUS Communications Inc.,2023-01-13,Awareness and Automation,Customer,Smart_Automation_Plus,0,604056155,99879743,2023-01-14,2026-01-14,contract_expiring_after_90_days,0
360094,15249943,19819948,18055848,TELUS Communications Inc.,2023-01-13,Awareness and Automation,Customer,Smart_Automation_Plus,0,229759849,19819948,2023-01-13,2026-01-13,contract_expiring_after_90_days,0
360095,15250270,105289145,18056251,TELUS Communications Inc.,2023-01-13,Awareness and Automation,Commitment,Smart_Automation_Plus,0,605609859,105289145,2023-01-13,2026-01-13,contract_expiring_after_90_days,0


In [21]:
config= bigquery.job.LoadJobConfig()

# config._properties['timePartitioning'] = {'field': 'Month_Year'}
config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

Table_BQ = 'SHS.SHS_churn_model_base_score_data_oct2022_jan2023'

bq_table_instance= bq_client.load_table_from_dataframe(Customer_info_1, Table_BQ,job_config=config)