In [19]:
#### import global modules
import os
import sys
import pandas as pd
import numpy as np
from pathlib import Path
from yaml import safe_load
import google.oauth2.credentials
from google.cloud import bigquery
import gc

# Set global vars
pth_project = Path(os.getcwd().split('notebooks')[0])
pth_data = pth_project / 'data'
pth_queries = pth_project / 'core' / 'queries'
pth_creds = pth_project / 'conf' / 'local' / 'project_config.yaml'
sys.path.insert(0, str(pth_project))
d_project_config = safe_load(pth_creds.open())
# d_params = safe_load((pth_project / 'core' / 'parameters' / 'common.yaml').open())['data_extract']

# import local modules
from core.utils.gcp import connect_bq_services
# from core.etl.extract import extract_bq_data, extract_pr_codes, format_conv_df, filter_convs

# Connect to google services
bq_client = connect_bq_services(d_project_config['gcp-project-name'])
pd.options.display.max_rows = 100

In [20]:
def extract_bq_data(bq_client, sql=None, pth_query=None):
    if sql is not None:
        df = bq_client.query(sql).to_dataframe()
    elif pth_query is not None:
        sql = pth_query.read_text()
        df = bq_client.query(sql).to_dataframe()
    else:
        raise ValueError('`sql` or `pth_query` should be set')  
    return df

In [11]:
Query='''



with base_data as (
select BAN from `pras-pr-223186.pras_pr_dataset.ADC_usage_data_Achint`
)


, ADC_data_Sep_2023 as 

(

SELECT distinct
    ban as BAN,

    avg(login_consistency) as Login_consistency_sep2023 ,
    avg(Arming_Consistency) as Arming_consistency_sep2023,
    1 as Data_flag_sep2023
   FROM
    `bi-srv-divg-speech-pr-79f6e9.adc_feature_store.bq_adc_feature_store_daily`
  WHERE
daily_snapshot_end_date='2023-09-30'
  group by ban
    
)

, ADC_data_Aug_2023 as 

(

SELECT distinct
    ban as BAN,

    avg(login_consistency) as Login_consistency_Aug2023 ,
    avg(Arming_Consistency) as Arming_consistency_Aug2023,
    1 as Data_flag_Aug2023
   FROM
    `bi-srv-divg-speech-pr-79f6e9.adc_feature_store.bq_adc_feature_store_daily`
  WHERE
daily_snapshot_end_date='2023-08-31'
  group by ban
    
)


, ADC_data_Jul_2023 as 

(

SELECT distinct
    ban as BAN,

    avg(login_consistency) as Login_consistency_Jul2023 ,
    avg(Arming_Consistency) as Arming_consistency_Jul2023,
    1 as Data_flag_Jul2023
   FROM
    `bi-srv-divg-speech-pr-79f6e9.adc_feature_store.bq_adc_feature_store_daily`
  WHERE
daily_snapshot_end_date='2023-07-31'
  group by ban
    
)

, ADC_data_Jun_2023 as 

(

SELECT distinct
    ban as BAN,

    avg(login_consistency) as Login_consistency_Jun2023 ,
    avg(Arming_Consistency) as Arming_consistency_Jun2023,
    1 as Data_flag_Jun2023
   FROM
    `bi-srv-divg-speech-pr-79f6e9.adc_feature_store.bq_adc_feature_store_daily`
  WHERE
daily_snapshot_end_date='2023-06-30'
  group by ban
    
)


, ADC_data_May_2023 as 

(

SELECT distinct
    ban as BAN,
    avg(login_consistency) as Login_consistency_May2023 ,
    avg(Arming_Consistency) as Arming_consistency_May2023,
    1 as Data_flag_May2023
   FROM
    `bi-srv-divg-speech-pr-79f6e9.adc_feature_store.bq_adc_feature_store_daily`
  WHERE
daily_snapshot_end_date='2023-05-31'
  group by ban
    
)


, ADC_data_Apr_2023 as 

(

SELECT distinct
    ban as BAN,
    avg(login_consistency) as Login_consistency_Apr2023 ,
    avg(Arming_Consistency) as Arming_consistency_Apr2023,
    1 as Data_flag_Apr2023
   FROM
    `bi-srv-divg-speech-pr-79f6e9.adc_feature_store.bq_adc_feature_store_daily`
  WHERE
daily_snapshot_end_date='2023-04-30'
  group by ban
    
)



select a.* 
,b.Login_consistency_sep2023
,b.Arming_consistency_sep2023
,b.Data_flag_sep2023
,c.Login_consistency_Aug2023
,c.Arming_consistency_Aug2023
,c.Data_flag_Aug2023
,d.Login_consistency_Jul2023
,d.Arming_consistency_Jul2023
,d.Data_flag_Jul2023
,e.Login_consistency_Jun2023
,e.Arming_consistency_Jun2023
,e.Data_flag_Jun2023
,f.Login_consistency_May2023
,f.Arming_consistency_May2023
,f.Data_flag_May2023
,g.Login_consistency_Apr2023
,g.Arming_consistency_Apr2023
,g.Data_flag_Apr2023


from base_data a
left join ADC_data_Sep_2023 b
on a.BAN=b.BAN
left join ADC_data_Aug_2023 c
on a.BAN=c.BAN
left join ADC_data_Jul_2023 d
on a.BAN=d.BAN
left join ADC_data_Jun_2023 e
on a.BAN=e.BAN
left join ADC_data_May_2023 f
on a.BAN=f.BAN
left join ADC_data_Apr_2023 g
on a.BAN=g.BAN

order by Data_flag_sep2023 desc




'''

In [12]:
DF=extract_bq_data(bq_client, sql=Query)

In [13]:
DF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 36426 entries, 0 to 36425
Data columns (total 19 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   BAN                         36426 non-null  Int64  
 1   Login_consistency_sep2023   4714 non-null   float64
 2   Arming_consistency_sep2023  4714 non-null   float64
 3   Data_flag_sep2023           4714 non-null   Int64  
 4   Login_consistency_Aug2023   6498 non-null   float64
 5   Arming_consistency_Aug2023  6498 non-null   float64
 6   Data_flag_Aug2023           6498 non-null   Int64  
 7   Login_consistency_Jul2023   8828 non-null   float64
 8   Arming_consistency_Jul2023  8828 non-null   float64
 9   Data_flag_Jul2023           8828 non-null   Int64  
 10  Login_consistency_Jun2023   10976 non-null  float64
 11  Arming_consistency_Jun2023  10976 non-null  float64
 12  Data_flag_Jun2023           10976 non-null  Int64  
 13  Login_consistency_May2023   123

In [17]:
DF.head(10)

Unnamed: 0,BAN,Login_consistency_sep2023,Arming_consistency_sep2023,Data_flag_sep2023,Login_consistency_Aug2023,Arming_consistency_Aug2023,Data_flag_Aug2023,Login_consistency_Jul2023,Arming_consistency_Jul2023,Data_flag_Jul2023,Login_consistency_Jun2023,Arming_consistency_Jun2023,Data_flag_Jun2023,Login_consistency_May2023,Arming_consistency_May2023,Data_flag_May2023,Login_consistency_Apr2023,Arming_consistency_Apr2023,Data_flag_Apr2023
0,123737261,83.333333,0.0,1,96.666667,0.0,1,93.333333,0.0,1,64.0,0.0,1,70.0,0.0,1,76.666667,0.0,1
1,123737261,83.333333,0.0,1,96.666667,0.0,1,93.333333,0.0,1,64.0,0.0,1,70.0,0.0,1,76.666667,0.0,1
2,124201615,100.0,0.0,1,93.333333,0.0,1,100.0,0.0,1,100.0,0.0,1,100.0,0.0,1,100.0,0.0,1
3,124686728,0.0,0.0,1,0.0,0.0,1,0.0,0.0,1,0.0,0.0,1,0.0,0.0,1,0.0,0.0,1
4,124686728,0.0,0.0,1,0.0,0.0,1,0.0,0.0,1,0.0,0.0,1,0.0,0.0,1,0.0,0.0,1
5,125732991,0.0,0.0,1,16.666667,0.0,1,20.0,0.0,1,32.0,0.0,1,13.333333,0.0,1,13.333333,0.0,1
6,125732991,0.0,0.0,1,16.666667,0.0,1,20.0,0.0,1,32.0,0.0,1,13.333333,0.0,1,13.333333,0.0,1
7,200303391,46.666667,54.166667,1,43.333333,56.666667,1,30.0,58.62069,1,52.0,71.428571,1,30.0,83.333333,1,13.333333,33.333333,1
8,200421863,13.333333,16.666667,1,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,100.0,16.666667,1
9,200623247,3.333333,0.0,1,3.333333,0.0,1,3.333333,0.0,1,8.0,0.0,1,6.666667,0.0,1,13.333333,0.0,1


In [15]:
DF.fillna(0,inplace=True)

In [16]:
DF.to_csv('Customer_neverPay_Oct2023_updated.csv',index=False)