In [None]:
#### import global modules
import os
import sys
import pandas as pd
from pathlib import Path
from yaml import safe_load
import google.oauth2.credentials
from google.cloud import bigquery

# Set global vars
pth_project = Path(os.getcwd().split('notebooks')[0])
pth_data = pth_project / 'data'
pth_queries = pth_project / 'core' / 'queries'
pth_creds = pth_project / 'conf' / 'local' / 'project_config.yaml'
sys.path.insert(0, str(pth_project))
d_project_config = safe_load(pth_creds.open())
# d_params = safe_load((pth_project / 'core' / 'parameters' / 'common.yaml').open())['data_extract']

# import local modules
from core.utils.gcp import connect_bq_services
# from core.etl.extract import extract_bq_data, extract_pr_codes, format_conv_df, filter_convs

# Connect to google services
bq_client = connect_bq_services(d_project_config['gcp-project-name'])
pd.options.display.max_rows = 100

In [None]:
def extract_bq_data(bq_client, sql=None, pth_query=None):
    if sql is not None:
        df = bq_client.query(sql).to_dataframe()
    elif pth_query is not None:
        sql = pth_query.read_text()
        df = bq_client.query(sql).to_dataframe()
    else:
        raise ValueError('`sql` or `pth_query` should be set')  
    return df

In [None]:
Query_Termination_details='''

select * from `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_account_terminated_details`

'''

In [None]:
Query_Termination_reason_lookup='''

select * from `cio-datahub-enterprise-pr-183a.src_adc.bq_termination_reason`

'''

In [None]:
Termination_DF= extract_bq_data(bq_client,sql=Query_Termination_details)

In [None]:
Termination_DF.info()

In [None]:
Termination_DF['term_date'].min(),Termination_DF['term_date'].max()

In [None]:
Termination_DF['term_reason'].value_counts()

In [None]:
Termination_DF['account_recycle_reason_id'].value_counts()

In [None]:
Termination_DF['account_recycle_reason_type_desc'].value_counts()

In [None]:
Termination_DF_Aug2022= Termination_DF[(Termination_DF['term_date']<'2022-09-01') & (Termination_DF['term_date']>='2022-08-01')]

In [None]:
Termination_DF_Aug2022.info()

In [None]:
Termination_lookup_DF= extract_bq_data(bq_client,sql=Query_Termination_reason_lookup)

In [None]:
Termination_lookup_DF.info()

In [None]:
Termination_lookup_DF.sort_values('term_id')

In [None]:
Customer_Detail_Query= '''

SELECT a.*  FROM `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_account_details` as a
left join `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_account_terminated_details` as b
on a.customer_id=b.customer_id WHERE b.customer_id IS NULL
and a.last_updt_ts=(SELECT MAX(st1.last_updt_ts) FROM `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_account_details` AS st1) 



'''

In [None]:
Customer_DF= extract_bq_data(bq_client,sql=Customer_Detail_Query)

In [None]:
Customer_DF=Customer_DF[Customer_DF['customer_type_name']=='Customer']

In [None]:
Customer_DF.info()

In [None]:
Customer_DF['dealer_name'].value_counts()

In [None]:
Customer_DF['customer_type_name'].value_counts()

In [None]:
Customer_DF['account_recycle_reason_type_desc'].value_counts()

In [None]:
Customer_DF['account_type_name'].value_counts()

In [None]:
Customer_DF_1=Customer_DF[Customer_DF['account_type_name']!='Standalone']

In [None]:
Customer_DF_1['dealer_name'].value_counts()

In [None]:
Customer_DF_1.info()