In [1]:
#### import global modules
import os
import sys
import pandas as pd
import numpy as np
from pathlib import Path
from yaml import safe_load
import google.oauth2.credentials
from google.cloud import bigquery
import gc

# Set global vars
pth_project = Path(os.getcwd().split('notebooks')[0])
pth_data = pth_project / 'data'
pth_queries = pth_project / 'core' / 'queries'
pth_creds = pth_project / 'conf' / 'local' / 'project_config.yaml'
sys.path.insert(0, str(pth_project))
d_project_config = safe_load(pth_creds.open())
# d_params = safe_load((pth_project / 'core' / 'parameters' / 'common.yaml').open())['data_extract']

# import local modules
from core.utils.gcp import connect_bq_services
# from core.etl.extract import extract_bq_data, extract_pr_codes, format_conv_df, filter_convs

# Connect to google services
bq_client = connect_bq_services(d_project_config['gcp-project-name'])
pd.options.display.max_rows = 100

In [2]:
def extract_bq_data(bq_client, sql=None, pth_query=None):
    if sql is not None:
        df = bq_client.query(sql).to_dataframe()
    elif pth_query is not None:
        sql = pth_query.read_text()
        df = bq_client.query(sql).to_dataframe()
    else:
        raise ValueError('`sql` or `pth_query` should be set')  
    return df

In [3]:
Customer_details='''


  --Take LAST day SNAPSHOT FOR active customers -- remove the eligibilty criteria --AND use it AS contract start feature
  
DECLARE
  start_dt_snpsht DATE DEFAULT '2022-12-01';
DECLARE
  end_dt_snpsht DATE DEFAULT '2023-02-28';
WITH
  date_sql AS (
  SELECT
    DATE_TRUNC(DATE_SUB(start_dt_snpsht, INTERVAL 3 month), month) AS eligibility_date ),
  
  ADC_Customer_Base AS (
  SELECT
    dealer_customer_id,
    customer_id,
    primary_login_id,
    dealer_name,
    DATE(join_date) AS join_date,
    account_type_name,
    customer_type_name,
    CASE
      WHEN account_type_name='Standalone' THEN 'Smart_Camera'
      WHEN account_type_name='Awareness and Automation' THEN 'Smart_Automation_Plus'
    ELSE
    'Monitored'
  END
    AS Package,
    DATE_DIFF(end_dt_snpsht,DATE(join_date), MONTH) AS Tenure_months,
    1 as Interactivity_flag #Interactivity=Having ADC account

  FROM
    `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_account_details`
  WHERE
    DATE(last_updt_ts) <= end_dt_snpsht
    AND DATE(last_updt_ts) >= start_dt_snpsht
    AND dealer_name in ('TELUS Communications Inc.')
    QUALIFY ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY last_updt_ts DESC) = 1
  ORDER BY
    join_date,
    customer_id ),
  
Telus_customers AS (
  SELECT
    cust_bus_cust_id as Telus_ID,
    cast (bacct_bus_bacct_num as STRING) AS BAN,
    DATE(pi_cntrct_start_ts) AS contract_start_date,
    DATE(pi_cntrct_end_ts) AS contract_end_date,
    DATE(pi_actvn_ts) as Intial_activation_date,
    CASE
      WHEN DATE(pi_cntrct_end_ts) = "9999-12-31" THEN 'contract_expired'
      WHEN DATE_DIFF(DATE(pi_cntrct_end_ts),end_dt_snpsht, DAY) < 0 THEN 'contract_expired'
      WHEN DATE_DIFF(DATE(pi_cntrct_end_ts),end_dt_snpsht, DAY) <= 90 THEN 'contract_expiring_within_90_days'
      WHEN DATE_DIFF(DATE(pi_cntrct_end_ts),end_dt_snpsht, DAY) > 90 THEN 'contract_expiring_after_90_days'
    ELSE
    'Undefined_contract_end_date'
  END
    AS contract_end_status,
    DATE_DIFF(end_dt_snpsht,DATE(pi_cntrct_start_ts), MONTH) AS Contract_start_months
  FROM
    `cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht`
  WHERE
    DATE(prod_instnc_ts) = end_dt_snpsht
    AND pi_prod_instnc_typ_cd ='SMHM' #Serice type
    AND bus_prod_instnc_src_id = 1001 #BANs that are FOR home services
    AND pi_prod_instnc_stat_cd IN ('A')
    AND consldt_cust_typ_cd = 'R' QUALIFY ROW_NUMBER() OVER (PARTITION BY cust_bus_cust_id ORDER BY prod_instnc_ts DESC) = 1
  ORDER BY
    cust_bus_cust_id ),
    
ADT_migrated_customers as (

SELECT distinct udf4 as BAN
      ,cust_no
      ,co_no
      ,branch_no
      ,custype_id
      ,custstat_id
      , 1 as ADT_migrated_customer
      

  FROM `cio-datahub-enterprise-pr-183a.src_mastermind.bq_customer`
  where branch_no in (999100,999500)
  and udf4 is not NULL


),

Merge_data AS (
  SELECT
    a.*,b.*,c.ADT_migrated_customer
  FROM
    Telus_customers a
  LEFT JOIN
    ADC_Customer_Base b
  ON
    a.Telus_ID=b.dealer_customer_id
LEFT JOIN
ADT_migrated_customers c
on a.BAN=c.BAN)

SELECT
  *
FROM
  Merge_data
  
  
'''

In [4]:
Customer_info=extract_bq_data(bq_client, sql=Customer_details)

In [5]:
Customer_info.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 545128 entries, 0 to 545127
Data columns (total 18 columns):
 #   Column                  Non-Null Count   Dtype 
---  ------                  --------------   ----- 
 0   Telus_ID                545128 non-null  object
 1   BAN                     545128 non-null  object
 2   contract_start_date     395553 non-null  object
 3   contract_end_date       545128 non-null  object
 4   Intial_activation_date  545128 non-null  object
 5   contract_end_status     545128 non-null  object
 6   Contract_start_months   395553 non-null  Int64 
 7   dealer_customer_id      431480 non-null  object
 8   customer_id             431480 non-null  Int64 
 9   primary_login_id        431480 non-null  Int64 
 10  dealer_name             431480 non-null  object
 11  join_date               431480 non-null  object
 12  account_type_name       431480 non-null  object
 13  customer_type_name      431480 non-null  object
 14  Package                 431480 non-n

In [7]:
Customer_info.isna().sum()*100/Customer_info.shape[0]

Telus_ID                   0.000000
BAN                        0.000000
contract_start_date       27.438510
contract_end_date          0.000000
Intial_activation_date     0.000000
contract_end_status        0.000000
Contract_start_months     27.438510
dealer_customer_id        20.847948
customer_id               20.847948
primary_login_id          20.847948
dealer_name               20.847948
join_date                 20.847948
account_type_name         20.847948
customer_type_name        20.847948
Package                   20.847948
Tenure_months             20.847948
Interactivity_flag        20.847948
ADT_migrated_customer     77.683590
dtype: float64

In [8]:
Customer_info['Package'].fillna('Unknown_Package',inplace=True)
# Customer_info['Tenure_months'].fillna('Unknown_Tenure',inplace=True)
Customer_info['dealer_name'].fillna('Unknown_Dealer',inplace=True)
Customer_info['Interactivity_flag'].fillna(0,inplace=True)
Customer_info['ADT_migrated_customer'].fillna(0,inplace=True)

In [9]:
Customer_info['contract_start_date'].value_counts()

2016-06-06    2360
2021-04-01    1671
2017-11-01    1313
2023-01-06     898
2021-12-01     850
              ... 
2005-07-28       1
2010-01-08       1
2011-10-17       1
2013-05-03       1
1999-03-01       1
Name: contract_start_date, Length: 3736, dtype: int64

In [10]:
Customer_info['contract_end_status'].value_counts()

contract_expiring_after_90_days     345023
contract_expired                    190735
contract_expiring_within_90_days      9370
Name: contract_end_status, dtype: int64

In [11]:
Customer_info['dealer_name'].value_counts()

TELUS Communications Inc.    431480
Unknown_Dealer               113648
Name: dealer_name, dtype: int64

In [12]:
Customer_info['ADT_migrated_customer'].value_counts()

0    423475
1    121653
Name: ADT_migrated_customer, dtype: Int64

In [13]:
Customer_info['ADT_migrated_customer'].value_counts(normalize=True)*100

0    77.68359
1    22.31641
Name: ADT_migrated_customer, dtype: Float64

In [14]:
Customer_info['Interactivity_flag'].value_counts(normalize=True)*100

1    79.152052
0    20.847948
Name: Interactivity_flag, dtype: Float64

In [15]:
pd.DataFrame(Customer_info.groupby(['ADT_migrated_customer','Interactivity_flag']).agg(
    
    Customer_count= ('Telus_ID','nunique'),
    Activation_date=('Intial_activation_date','min')
    # Email_Opened_count=('OPENED','sum'),
    # CT_Opened_count=('CLICKTHROUGH','sum'),
    # softbounce_count=('SOFTBOUNCE','sum'),
    # Hardbounce_count=('HARDBOUNCE','sum'),
    # # Unsub_count=('UNSUBSCRIBE','sum'),
    # campaign_date_min=('Campaign_date','min'),
    # campaign_date_max=('Campaign_date','max')

    
    # # Customer_Share= ('customer_id',lambda x:x.count()*100/Merge_DF_4.shape[0])
    # Churn_total=('Telus_Churn_Flag',lambda x: x.sum()),
    # Churn_rate=('Telus_Churn_Flag',lambda x: x.mean()*100)
).reset_index())

Unnamed: 0,ADT_migrated_customer,Interactivity_flag,Customer_count,Activation_date
0,0,0,45472,2018-07-06
1,0,1,367745,2018-06-13
2,1,0,65918,2022-08-17
3,1,1,49051,2022-03-10


In [369]:

Customer_info[Customer_info['BAN']=='605508523']

Unnamed: 0,Telus_ID,BAN,contract_start_date,contract_end_date,Intial_activation_date,contract_end_status,Contract_start_months,dealer_customer_id,customer_id,primary_login_id,dealer_name,join_date,account_type_name,customer_type_name,Package,Tenure_months,Interactivity_flag,ADT_migrated_customer
56682,105026666,605508523,,9999-12-31,2022-11-11,contract_expired,,,,,Unknown_Dealer,,,,Unknown_Package,,0,1


In [370]:
Customer_info['Interactivity_flag'].value_counts(normalize=True)*100

1    76.692117
0    23.307883
Name: Interactivity_flag, dtype: Float64

In [371]:
# ADT_migrated_customers=Customer_info[(Customer_info['ADT_migrated_customer']==1) & (Customer_info['contract_end_status']=='contract_expired') ]
# [['Telus_ID','BAN','dealer_name']]

In [372]:
# ADT_migrated_customers.info()

In [373]:
# ADT_migrated_customers.head()[['Telus_ID','BAN']]

In [374]:
# ADT_migrated_customers['dealer_name'].value_counts()

In [375]:
Customer_info['Package'].value_counts()

Monitored                231671
Unknown_Package          115760
Smart_Automation_Plus    102590
Smart_Camera              46635
Name: Package, dtype: int64

In [376]:
BAN_list=Customer_info['BAN'].value_counts().rename_axis('BAN').reset_index(name='unique_counts')

In [377]:
BAN_list

Unnamed: 0,BAN,unique_counts
0,605255312,27
1,604663242,17
2,601368567,15
3,604318996,12
4,605038967,11
...,...,...
482745,605535063,1
482746,605534917,1
482747,605537911,1
482748,605534339,1


In [378]:
BAN_list_1=BAN_list[BAN_list.unique_counts==1]

In [379]:
BAN_list_1.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 470413 entries, 12337 to 482749
Data columns (total 2 columns):
 #   Column         Non-Null Count   Dtype 
---  ------         --------------   ----- 
 0   BAN            470413 non-null  object
 1   unique_counts  470413 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 10.8+ MB


In [380]:
Customer_info_1=Customer_info.merge(BAN_list_1[['BAN']],on='BAN',how='inner')

In [381]:
Customer_info_1['BAN'].value_counts().rename_axis('BAN').reset_index(name='unique_counts')

Unnamed: 0,BAN,unique_counts
0,124963305,1
1,604954988,1
2,604949306,1
3,604951430,1
4,604918507,1
...,...,...
470408,605616689,1
470409,605615734,1
470410,605612678,1
470411,605610932,1


In [382]:
Customer_info_1.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 470413 entries, 0 to 470412
Data columns (total 18 columns):
 #   Column                  Non-Null Count   Dtype 
---  ------                  --------------   ----- 
 0   Telus_ID                470413 non-null  object
 1   BAN                     470413 non-null  object
 2   contract_start_date     338253 non-null  object
 3   contract_end_date       470413 non-null  object
 4   Intial_activation_date  470413 non-null  object
 5   contract_end_status     470413 non-null  object
 6   Contract_start_months   338253 non-null  Int64 
 7   dealer_customer_id      360226 non-null  object
 8   customer_id             360226 non-null  Int64 
 9   primary_login_id        360226 non-null  Int64 
 10  dealer_name             470413 non-null  object
 11  join_date               360226 non-null  object
 12  account_type_name       360226 non-null  object
 13  customer_type_name      360226 non-null  object
 14  Package                 470413 non-n

In [383]:
Customer_info_1.head()

Unnamed: 0,Telus_ID,BAN,contract_start_date,contract_end_date,Intial_activation_date,contract_end_status,Contract_start_months,dealer_customer_id,customer_id,primary_login_id,dealer_name,join_date,account_type_name,customer_type_name,Package,Tenure_months,Interactivity_flag,ADT_migrated_customer
0,10029227,124963305,,9999-12-31,2019-06-13,contract_expired,,10029227,7486745,8729565,TELUS Communications Inc.,2019-06-13,Security System,Customer,Monitored,43,1,0
1,10034968,125458749,2018-10-31,2021-10-31,2018-10-30,contract_expired,51.0,10034968,6701443,7761156,TELUS Communications Inc.,2018-10-30,Security System,Customer,Monitored,51,1,0
2,100615457,604238304,2020-04-14,2023-04-14,2020-04-14,contract_expiring_within_90_days,33.0,100615457,8931781,10496253,TELUS Communications Inc.,2020-04-19,Security System,Customer,Monitored,33,1,0
3,100704347,604257986,2017-01-01,2020-01-01,2020-05-20,contract_expired,72.0,100704347,2808186,3093101,TELUS Communications Inc.,2014-04-29,Security System,Customer,Monitored,105,1,0
4,100720822,604262437,2020-07-31,2023-07-31,2020-05-22,contract_expiring_after_90_days,30.0,100720822,1989596,2153711,TELUS Communications Inc.,2013-04-26,Security System,Customer,Monitored,117,1,0


In [384]:
Customer_info_1['Contract_start_months'].value_counts()

2       15371
5       14235
3       14205
0       13774
1       13495
        ...  
9868        1
9654        1
253         1
9858        1
311         1
Name: Contract_start_months, Length: 332, dtype: Int64

In [385]:
Customer_info_1['Tenure_months'].value_counts()

2      17613
3      15796
5      15627
4      14703
6      14513
       ...  
142        1
154        1
129        1
147        1
144        1
Name: Tenure_months, Length: 148, dtype: Int64

In [386]:
# Customer_info_1[Customer_info_1.Tenure_months==0]

In [387]:
pd.DataFrame(Customer_info_1.groupby(['ADT_migrated_customer','Interactivity_flag']).agg(
    
    Customer_count= ('Telus_ID','nunique'),
    # Email_Opened_count=('OPENED','sum'),
    # CT_Opened_count=('CLICKTHROUGH','sum'),
    # softbounce_count=('SOFTBOUNCE','sum'),
    # Hardbounce_count=('HARDBOUNCE','sum'),
    # # Unsub_count=('UNSUBSCRIBE','sum'),
    # campaign_date_min=('Campaign_date','min'),
    # campaign_date_max=('Campaign_date','max')

    
    # # Customer_Share= ('customer_id',lambda x:x.count()*100/Merge_DF_4.shape[0])
    # Churn_total=('Telus_Churn_Flag',lambda x: x.sum()),
    # Churn_rate=('Telus_Churn_Flag',lambda x: x.mean()*100)
).reset_index())

Unnamed: 0,ADT_migrated_customer,Interactivity_flag,Customer_count
0,0,0,45625
1,0,1,350787
2,1,0,64562
3,1,1,9439


In [357]:
config= bigquery.job.LoadJobConfig()

# config._properties['timePartitioning'] = {'field': 'Month_Year'}
config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

Table_BQ = 'SHS.SHS_churn_model_base_trn_data_dec22_feb23'

bq_table_instance= bq_client.load_table_from_dataframe(Customer_info_1, Table_BQ,job_config=config)