In [None]:
#### import global modules
import os
import sys
import pandas as pd
from pathlib import Path
from yaml import safe_load
import google.oauth2.credentials
from google.cloud import bigquery

# Set global vars
pth_project = Path(os.getcwd().split('notebooks')[0])
pth_data = pth_project / 'data'
pth_queries = pth_project / 'core' / 'queries'
pth_creds = pth_project / 'conf' / 'local' / 'project_config.yaml'
sys.path.insert(0, str(pth_project))
d_project_config = safe_load(pth_creds.open())
# d_params = safe_load((pth_project / 'core' / 'parameters' / 'common.yaml').open())['data_extract']

# import local modules
from core.utils.gcp import connect_bq_services
# from core.etl.extract import extract_bq_data, extract_pr_codes, format_conv_df, filter_convs

# Connect to google services
bq_client = connect_bq_services(d_project_config['gcp-project-name'])
pd.options.display.max_rows = 100

In [None]:
def extract_bq_data(bq_client, sql=None, pth_query=None):
    if sql is not None:
        df = bq_client.query(sql).to_dataframe()
    elif pth_query is not None:
        sql = pth_query.read_text()
        df = bq_client.query(sql).to_dataframe()
    else:
        raise ValueError('`sql` or `pth_query` should be set')  
    return df

In [None]:
Customer_Detail_Query= '''


SELECT *  FROM `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_account_details`
Where last_updt_ts < '2022-09-01' and last_updt_ts >= '2022-08-01' 
--and join_date < '2022-05-01'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY last_updt_ts DESC) = 1 --get the latest information of customer



'''

In [None]:
Customer_Details_DF= extract_bq_data(bq_client,sql=Customer_Detail_Query)

In [None]:
Customer_Details_DF.info()

In [None]:
Customer_Details_DF['account_type_name'].value_counts()

In [None]:
Customer_Details_DF['join_date'].min(),Customer_Details_DF['join_date'].max()

In [None]:
Customer_Details_DF['customer_type_name'].value_counts()

In [None]:
Customer_Details_DF['dealer_name'].value_counts()

In [None]:
Customer_Details_DF['term_reason'].value_counts()

In [None]:
Customer_Details_DF['upsell_account_type'].value_counts()

In [None]:
Customer_Details_DF['account_recycle_reason_type_desc'].value_counts()

In [None]:
Customer_Details_DF['dealer_customer_id'].value_counts()

In [None]:
Customer_Details_DF=Customer_Details_DF[Customer_Details_DF['account_type_name']!='Standalone']

In [None]:
Customer_Details_DF=Customer_Details_DF[Customer_Details_DF['customer_type_name']=='Customer']

In [None]:
Customer_Details_DF.info()

In [None]:
Active_Customer_Aug2022= Customer_Details_DF[['customer_id','join_date','last_updt_ts','customer_type_name','dealer_name']]

In [None]:
Active_Customer_Aug2022.info()

In [None]:
Active_Customer_Aug2022['dealer_name'].value_counts()

In [None]:
Query_Termination_details='''

select customer_id,term_date,term_reason from `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_account_terminated_details`
where term_date >= '2022-08-01' and term_date< '2022-09-01' 

'''

In [None]:
Terminated_Account_Aug2022=extract_bq_data(bq_client,sql=Query_Termination_details)

In [None]:
Terminated_Account_Aug2022['Account_terminated']=1

In [None]:
Terminated_Account_Aug2022.info()

In [None]:
Terminated_Account_Aug2022.head()

In [None]:
# Full_active_base_Aug2022=Active_Customer_Aug2022.merge(Terminated_Account_Aug2022,on='customer_id')

In [None]:
#

In [None]:
Trouble_condition_query='''


select * from `divgpras-pr-579355.ADC.Trouble_Condition_Group`

'''

In [None]:
Trouble_condition_DF= extract_bq_data(bq_client,sql=Trouble_condition_query)

In [None]:
Trouble_condition_DF.info()

In [None]:
Trouble_condition_DF.head()

In [None]:
Trouble_condition_DF=Trouble_condition_DF.sort_values(['customer_id','Month_Year'])

In [None]:
start_date = '2022-05-01'
end_date = '2022-08-01'
last_3months = (Trouble_condition_DF['Month_Year'] >= start_date) & (Trouble_condition_DF['Month_Year'] < end_date)

In [None]:
Trouble_condition_DF.head()

In [None]:
Trouble_condition_DF_1=Trouble_condition_DF[last_3months]

In [None]:
Trouble_condition_DF_1.info()

In [None]:
Trouble_condition_DF_wide=Trouble_condition_DF_1.groupby(['customer_id']).agg({'Central_Station_Condition_Issue_count':'sum','Critical_System_Issue_count':'sum','Engagement_Issue_count':'sum','System_Condition_Issue_count':'sum','Central_Station_Condition_Issue_duration_mean_hours':'sum','Critical_System_Issue_duration_mean_hours':'sum','Engagement_Issue_duration_mean_hours':'sum','System_Condition_Issue_duration_mean_hours':'sum'}).reset_index()

In [None]:
Trouble_condition_DF[Trouble_condition_DF['customer_id']==287850]

In [None]:
Trouble_condition_DF_wide.head()

In [None]:
Trouble_condition_DF_wide.columns
    

In [None]:
Trouble_condition_DF_wide.columns=['customer_id',
'Central_Station_Condition_Issue_count_last3M',
'Critical_System_Issue_count_last3M', 
'Engagement_Issue_count_last3M',
'System_Condition_Issue_count_last3M',
'Central_Station_Condition_Issue_duration_mean_hours_sum_last3M',
'Critical_System_Issue_duration_mean_hours_sum_last3M',
'Engagement_Issue_duration_mean_hours_sum_last3M',
'System_Condition_Issue_duration_mean_hours_sum_last3M']

In [None]:
Trouble_condition_DF_wide.head()

In [None]:
Trouble_condition_DF_wide['ref_date']=pd.to_datetime('2022-08-01')

In [None]:
## reordering dataframe
Trouble_condition_DF_wide=Trouble_condition_DF_wide[['ref_date','customer_id',
'Central_Station_Condition_Issue_count_last3M',
'Critical_System_Issue_count_last3M', 
'Engagement_Issue_count_last3M',
'System_Condition_Issue_count_last3M',
'Central_Station_Condition_Issue_duration_mean_hours_sum_last3M',
'Critical_System_Issue_duration_mean_hours_sum_last3M',
'Engagement_Issue_duration_mean_hours_sum_last3M',
'System_Condition_Issue_duration_mean_hours_sum_last3M']]

In [None]:
Trouble_condition_DF_wide.head()

In [None]:
config= bigquery.job.LoadJobConfig()

# config = bigquery.LoadJobConfig(
    
#         schema=[
#         bigquery.SchemaField("Month_Year", bigquery.SqlTypeNames.TIMESTAMP),
#         bigquery.SchemaField("customer_id", bigquery.SqlTypeNames.STRING),
#         bigquery.SchemaField("Central_Station_Condition_diff_mean", bigquery.SqlTypeNames.TIMESTAMP),
#         bigquery.SchemaField("Critical_System_Issue_diff_mean", bigquery.SqlTypeNames.TIMESTAMP),
#         bigquery.SchemaField("Engagement_Issue_diff_mean", bigquery.SqlTypeNames.TIMESTAMP),
#         bigquery.SchemaField("System_Condition_diff_mean", bigquery.SqlTypeNames.TIMESTAMP),
#         bigquery.SchemaField("Central_Station_Condition_diff_count", bigquery.SqlTypeNames.NUMERIC),
#         bigquery.SchemaField("Critical_System_Issue_diff_count", bigquery.SqlTypeNames.NUMERIC),
#         bigquery.SchemaField("Engagement_Issue_diff_count", bigquery.SqlTypeNames.NUMERIC),
#         bigquery.SchemaField("System_Condition_count", bigquery.SqlTypeNames.NUMERIC) 
#       ],
#       time_partitioning=bigquery.TimePartitioning(
#             field="Month_Year"  # Name of the column to use for partitioning.
#         ),
#     )

# config._properties['timePartitioning'] = {'field': 'Month_Year'}
config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE


TC_Table_BQ = 'ADC.Trouble_Condition_Group_last3months_Aug2022'

job_device_type= bq_client.load_table_from_dataframe(Trouble_condition_DF_wide, TC_Table_BQ,job_config=config)