In [40]:
#### import global modules
import os
import sys
import pandas as pd
import numpy as np
from pathlib import Path
from yaml import safe_load
import google.oauth2.credentials
from google.cloud import bigquery
import gc

# Set global vars
pth_project = Path(os.getcwd().split('notebooks')[0])
pth_data = pth_project / 'data'
pth_queries = pth_project / 'core' / 'queries'
pth_creds = pth_project / 'conf' / 'local' / 'project_config.yaml'
sys.path.insert(0, str(pth_project))
d_project_config = safe_load(pth_creds.open())
# d_params = safe_load((pth_project / 'core' / 'parameters' / 'common.yaml').open())['data_extract']

# import local modules
from core.utils.gcp import connect_bq_services
# from core.etl.extract import extract_bq_data, extract_pr_codes, format_conv_df, filter_convs

# Connect to google services
bq_client = connect_bq_services(d_project_config['gcp-project-name'])
pd.options.display.max_rows = 100

In [41]:
def extract_bq_data(bq_client, sql=None, pth_query=None):
    if sql is not None:
        df = bq_client.query(sql).to_dataframe()
    elif pth_query is not None:
        sql = pth_query.read_text()
        df = bq_client.query(sql).to_dataframe()
    else:
        raise ValueError('`sql` or `pth_query` should be set')  
    return df

In [42]:
Query='''



with base_data as (
select Bill_Account_Number as BAN,date(Activation_Dt) as Activation_Date,date(Deactivation_Dt) as Deactivation_Date  from `pras-pr-223186.pras_pr_dataset.Early Tenure Invol Churn`
)


, ADC_data_Jan_2023 as 

(

SELECT distinct
    ban as BAN,

    avg(login_consistency) as Login_consistency_Jan2023 ,
    avg(Arming_Consistency) as Arming_consistency_Jan2023,
    Segment as Segment_Jan2023,
    1 as Data_flag_Jan2023
   FROM
    `bi-srv-divg-speech-pr-79f6e9.adc_feature_store.bq_adc_feature_store_daily`
  WHERE
daily_snapshot_end_date='2023-01-31'
  group by ban,Segment
    
)

, ADC_data_Feb_2023 as 

(

SELECT distinct
    ban as BAN,

    avg(login_consistency) as Login_consistency_Feb2023 ,
    avg(Arming_Consistency) as Arming_consistency_Feb2023,
    Segment as Segment_Feb2023,
    1 as Data_flag_Feb2023
   FROM
    `bi-srv-divg-speech-pr-79f6e9.adc_feature_store.bq_adc_feature_store_daily`
  WHERE
daily_snapshot_end_date='2023-02-28'
  group by ban,Segment
    
)


, ADC_data_Mar_2023 as 

(

SELECT distinct
    ban as BAN,

    avg(login_consistency) as Login_consistency_Mar2023 ,
    avg(Arming_Consistency) as Arming_consistency_Mar2023,
    Segment as Segment_Mar2023,
    1 as Data_flag_Mar2023
   FROM
    `bi-srv-divg-speech-pr-79f6e9.adc_feature_store.bq_adc_feature_store_daily`
  WHERE
daily_snapshot_end_date='2023-03-31'
  group by ban,Segment
    
)

, ADC_data_Apr_2023 as 

(

SELECT distinct
    ban as BAN,
    avg(login_consistency) as Login_consistency_Apr2023 ,
    avg(Arming_Consistency) as Arming_consistency_Apr2023,
    Segment as Segment_Apr2023,
    1 as Data_flag_Apr2023
   FROM
    `bi-srv-divg-speech-pr-79f6e9.adc_feature_store.bq_adc_feature_store_daily`
  WHERE
daily_snapshot_end_date='2023-04-30'
  group by ban,Segment
    
)


, ADC_data_May_2023 as 

(

SELECT distinct
    ban as BAN,
    avg(login_consistency) as Login_consistency_May2023 ,
    avg(Arming_Consistency) as Arming_consistency_May2023,
    Segment as Segment_May2023,
    1 as Data_flag_May2023
   FROM
    `bi-srv-divg-speech-pr-79f6e9.adc_feature_store.bq_adc_feature_store_daily`
  WHERE
daily_snapshot_end_date='2023-05-31'
  group by ban,Segment
    
)



, ADC_data_Jun_2023 as 

(

SELECT distinct
    ban as BAN,
    avg(login_consistency) as Login_consistency_Jun2023 ,
    avg(Arming_Consistency) as Arming_consistency_Jun2023,
    Segment as Segment_Jun2023,
    1 as Data_flag_Jun2023
   FROM
    `bi-srv-divg-speech-pr-79f6e9.adc_feature_store.bq_adc_feature_store_daily`
  WHERE
daily_snapshot_end_date='2023-06-30'
  group by ban,Segment
    
)


, ADC_Best_Practices as 

(

SELECT distinct
    ban as BAN,
    max(best_partices_1) as BP_1,
    max(best_partices_2)as BP_2,
    max(best_partices_3)as BP_3,
    max(best_partices_4)as BP_4,
    max(best_partices_5)as BP_5,
    max(best_partices_6)as BP_6

   FROM
    `bi-srv-divg-speech-pr-79f6e9.adc_feature_store.bq_adc_feature_store_daily`
group by ban

    
)



select a.* 
,b.Login_consistency_Jan2023
,b.Arming_consistency_Jan2023
,b.Data_flag_Jan2023
,c.Login_consistency_Feb2023
,c.Arming_consistency_Feb2023
,c.Data_flag_Feb2023
,d.Login_consistency_Mar2023
,d.Arming_consistency_Mar2023
,d.Data_flag_Mar2023
,e.Login_consistency_Apr2023
,e.Arming_consistency_Apr2023
,e.Data_flag_Apr2023
,f.Login_consistency_May2023
,f.Arming_consistency_May2023
,f.Data_flag_May2023
,g.Login_consistency_Jun2023
,g.Arming_consistency_Jun2023
,g.Data_flag_Jun2023
,b.Segment_Jan2023
,c.Segment_Feb2023
,d.Segment_Mar2023
,e.Segment_Apr2023
,f.Segment_May2023
,g.Segment_Jun2023
,h.BP_1
,h.BP_2
,h.BP_3
,h.BP_4
,h.BP_5
,h.BP_6

from base_data a
left join ADC_data_Jan_2023 b
on a.BAN=b.BAN
left join ADC_data_Feb_2023 c
on a.BAN=c.BAN
left join ADC_data_Mar_2023 d
on a.BAN=d.BAN
left join ADC_data_Apr_2023 e
on a.BAN=e.BAN
left join ADC_data_May_2023 f
on a.BAN=f.BAN
left join ADC_data_Jun_2023 g
on a.BAN=g.BAN
left join ADC_Best_Practices h
on a.BAN=h.BAN




'''

In [43]:
DF=extract_bq_data(bq_client, sql=Query)

In [44]:
DF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 287 entries, 0 to 286
Data columns (total 33 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   BAN                         287 non-null    Int64  
 1   Activation_Date             287 non-null    dbdate 
 2   Deactivation_Date           287 non-null    dbdate 
 3   Login_consistency_Jan2023   0 non-null      float64
 4   Arming_consistency_Jan2023  0 non-null      float64
 5   Data_flag_Jan2023           0 non-null      Int64  
 6   Login_consistency_Feb2023   0 non-null      float64
 7   Arming_consistency_Feb2023  0 non-null      float64
 8   Data_flag_Feb2023           0 non-null      Int64  
 9   Login_consistency_Mar2023   34 non-null     float64
 10  Arming_consistency_Mar2023  34 non-null     float64
 11  Data_flag_Mar2023           34 non-null     Int64  
 12  Login_consistency_Apr2023   44 non-null     float64
 13  Arming_consistency_Apr2023  44 non-

In [36]:
DF.head(10)

Unnamed: 0,BAN,Activation_Date,Deactivation_Date,Login_consistency_Jan2023,Arming_consistency_Jan2023,Data_flag_Jan2023,Login_consistency_Feb2023,Arming_consistency_Feb2023,Data_flag_Feb2023,Login_consistency_Mar2023,...,Segment_Mar2023,Segment_Apr2023,Segment_May2023,Segment_Jun2023,BP_1,BP_2,BP_3,BP_4,BP_5,BP_6
0,605975768,2023-04-28,2023-08-28,,,,,,,,...,,,,,,,,,,
1,605620376,2023-01-05,2023-05-20,,,,,,,33.333333,...,Moderate_Users,Moderate_Users,,,1.0,1.0,0.0,0.0,1.0,1.0
2,605620138,2023-01-13,2023-05-29,,,,,,,,...,,,,,,,,,,
3,605645619,2023-01-25,2023-06-13,,,,,,,,...,,,,,,,,,,
4,605672262,2023-02-02,2023-06-20,,,,,,,,...,,,,,,,,,,
5,605675422,2023-02-13,2023-07-08,,,,,,,,...,,,,,,,,,,
6,605743079,2023-02-15,2023-07-05,,,,,,,,...,,,,,,,,,,
7,605742881,2023-02-27,2023-06-28,,,,,,,,...,,,,,,,,,,
8,605816800,2023-03-21,2023-08-03,,,,,,,,...,,,,,,,,,,
9,605828162,2023-03-21,2023-07-25,,,,,,,,...,,,,,,,,,,


In [37]:
DF.fillna(0,inplace=True)

In [38]:
DF.head(10)

Unnamed: 0,BAN,Activation_Date,Deactivation_Date,Login_consistency_Jan2023,Arming_consistency_Jan2023,Data_flag_Jan2023,Login_consistency_Feb2023,Arming_consistency_Feb2023,Data_flag_Feb2023,Login_consistency_Mar2023,...,Segment_Mar2023,Segment_Apr2023,Segment_May2023,Segment_Jun2023,BP_1,BP_2,BP_3,BP_4,BP_5,BP_6
0,605975768,2023-04-28,2023-08-28,0.0,0.0,0,0.0,0.0,0,0.0,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
1,605620376,2023-01-05,2023-05-20,0.0,0.0,0,0.0,0.0,0,33.333333,...,Moderate_Users,Moderate_Users,0,0,1.0,1.0,0.0,0.0,1.0,1.0
2,605620138,2023-01-13,2023-05-29,0.0,0.0,0,0.0,0.0,0,0.0,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
3,605645619,2023-01-25,2023-06-13,0.0,0.0,0,0.0,0.0,0,0.0,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
4,605672262,2023-02-02,2023-06-20,0.0,0.0,0,0.0,0.0,0,0.0,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
5,605675422,2023-02-13,2023-07-08,0.0,0.0,0,0.0,0.0,0,0.0,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
6,605743079,2023-02-15,2023-07-05,0.0,0.0,0,0.0,0.0,0,0.0,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
7,605742881,2023-02-27,2023-06-28,0.0,0.0,0,0.0,0.0,0,0.0,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
8,605816800,2023-03-21,2023-08-03,0.0,0.0,0,0.0,0.0,0,0.0,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
9,605828162,2023-03-21,2023-07-25,0.0,0.0,0,0.0,0.0,0,0.0,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0


In [39]:
DF.to_csv('Early_tenure_Invol_Churn_Usage.csv',index=False)