In [1]:
#### import global modules
import os
import sys
import pandas as pd
import numpy as np
from pathlib import Path
from yaml import safe_load
import google.oauth2.credentials
from google.cloud import bigquery
import gc

# Set global vars
pth_project = Path(os.getcwd().split('notebooks')[0])
pth_data = pth_project / 'data'
pth_queries = pth_project / 'core' / 'queries'
pth_creds = pth_project / 'conf' / 'local' / 'project_config.yaml'
sys.path.insert(0, str(pth_project))
d_project_config = safe_load(pth_creds.open())
# d_params = safe_load((pth_project / 'core' / 'parameters' / 'common.yaml').open())['data_extract']

# import local modules
from core.utils.gcp import connect_bq_services
# from core.etl.extract import extract_bq_data, extract_pr_codes, format_conv_df, filter_convs

# Connect to google services
bq_client = connect_bq_services(d_project_config['gcp-project-name'])
pd.options.display.max_rows = 100

In [2]:
def extract_bq_data(bq_client, sql=None, pth_query=None):
    if sql is not None:
        df = bq_client.query(sql).to_dataframe()
    elif pth_query is not None:
        sql = pth_query.read_text()
        df = bq_client.query(sql).to_dataframe()
    else:
        raise ValueError('`sql` or `pth_query` should be set')  
    return df

In [3]:
Query='''


select dealer_customer_id as Telus_customer_id,customer_id as ADC_customer_id,account_type_name,date(last_updt_ts) as last_update_date,
from `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_account_details` 
where dealer_name='TELUS Communications Inc.'
and dealer_customer_id !=''
QUALIFY ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY last_updt_ts DESC) = 1
order by last_updt_ts desc

'''

In [4]:
DF=extract_bq_data(bq_client, sql=Query)

In [5]:
DF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 622310 entries, 0 to 622309
Data columns (total 4 columns):
 #   Column             Non-Null Count   Dtype 
---  ------             --------------   ----- 
 0   Telus_customer_id  622310 non-null  object
 1   ADC_customer_id    622310 non-null  Int64 
 2   account_type_name  622310 non-null  object
 3   last_update_date   622310 non-null  dbdate
dtypes: Int64(1), dbdate(1), object(2)
memory usage: 19.6+ MB


In [6]:
DF['Telus_customer_id'].value_counts()

99178853     133
35173897      61
99178857      56
99178855      47
385339        36
            ... 
106023301      1
10147645       1
1000228        1
92807356       1
97011213       1
Name: Telus_customer_id, Length: 593414, dtype: int64

In [7]:
DF[DF['Telus_customer_id']=='385339']

Unnamed: 0,Telus_customer_id,ADC_customer_id,account_type_name,last_update_date
8672,385339,11491466,Security System,2023-04-04
8701,385339,11588265,Security System,2023-04-04
15798,385339,11684102,Security System,2023-04-04
19441,385339,11873887,Security System,2023-04-04
30000,385339,11474646,Security System,2023-04-04
30054,385339,11678149,Security System,2023-04-04
30702,385339,14407775,Security System,2023-04-04
37136,385339,11550087,Security System,2023-04-04
44310,385339,11743313,Security System,2023-04-04
54890,385339,11541770,Security System,2023-04-04


In [8]:
DF.to_csv('Telus_ADC_customer_mapping_04Apr2023.csv',index=False)