In [None]:

#### import global modules
import os
import sys
import pandas as pd
import numpy as np
from pathlib import Path
from yaml import safe_load
import google.oauth2.credentials
from google.cloud import bigquery
import gc

# Set global vars
pth_project = Path(os.getcwd().split('notebooks')[0])
pth_data = pth_project / 'data'
pth_queries = pth_project / 'core' / 'queries'
pth_creds = pth_project / 'conf' / 'local' / 'project_config.yaml'
sys.path.insert(0, str(pth_project))
d_project_config = safe_load(pth_creds.open())
# d_params = safe_load((pth_project / 'core' / 'parameters' / 'common.yaml').open())['data_extract']

# import local modules
from core.utils.gcp import connect_bq_services
# from core.etl.extract import extract_bq_data, extract_pr_codes, format_conv_df, filter_convs

# Connect to google services
bq_client = connect_bq_services(d_project_config['gcp-project-name'])
pd.options.display.max_rows = 100

In [None]:
def extract_bq_data(bq_client, sql=None, pth_query=None):
    if sql is not None:
        df = bq_client.query(sql).to_dataframe()
    elif pth_query is not None:
        sql = pth_query.read_text()
        df = bq_client.query(sql).to_dataframe()
    else:
        raise ValueError('`sql` or `pth_query` should be set')  
    return df

In [None]:
ADC_ADT_mapping=pd.read_csv('ADC_ADT_mapping.csv',low_memory=False)

In [None]:
ADC_ADT_mapping.info()

In [None]:
ADC_ADT_mapping['sitetype_id']=ADC_ADT_mapping['sitetype_id'].str.strip()

In [None]:
ADC_ADT_mapping['sitetype_id'].value_counts()

In [None]:
# ADC_ADT_mapping['CustomerId_1']=ADC_ADT_mapping['CustomerId'].astype('object')

In [None]:
ADC_ADT_mapping.head()

In [None]:
Customer_Details_Query='''

SELECT customer_id,dealer_customer_id,primary_login_id,dealer_name,join_date,account_type_name,customer_type_name,primary_email,primary_phone,last_updt_ts
FROM `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_account_details` 
where dealer_name='ADT by TELUS'
--and account_type_name!='Standalone'
--and customer_type_name='Customer'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY last_updt_ts DESC) = 1
order by dealer_name,join_date,customer_id


'''

In [None]:
Customer_DF=extract_bq_data(bq_client, sql=Customer_Details_Query)

In [None]:
Customer_DF.info()

In [None]:
Customer_DF['dealer_name'].value_counts()

In [None]:
ADC_ADT_mapping['ADT_customer_flag']=1

In [None]:
Merge_data=Customer_DF.merge(ADC_ADT_mapping[ADC_ADT_mapping['sitetype_id']=='R'],how='inner',left_on='customer_id',right_on='CustomerId')

In [None]:
Merge_data['ADT_customer_flag'].sum()

In [None]:
Merge_data['customer_type_name'].value_counts()

In [None]:
Merge_data['dealer_name'].value_counts()

In [None]:
Merge_data.head()

In [None]:
Merge_data.info()

In [None]:
rep_chars = ' |\|-|:|/'

Merge_data.columns = Merge_data.columns.str.replace(rep_chars, '_')

In [None]:
Merge_data.info()

In [None]:
config= bigquery.job.LoadJobConfig()

# config._properties['timePartitioning'] = {'field': 'Month_Year'}
config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

Table_BQ = 'ADC_updated.ADC_ADT_mapping'

bq_table_instance= bq_client.load_table_from_dataframe(Merge_data, Table_BQ,job_config=config)