In [25]:

#### import global modules
import os
import sys
import pandas as pd
import numpy as np
from pathlib import Path
from yaml import safe_load
import google.oauth2.credentials
from google.cloud import bigquery
import gc

# Set global vars
pth_project = Path(os.getcwd().split('notebooks')[0])
pth_data = pth_project / 'data'
pth_queries = pth_project / 'core' / 'queries'
pth_creds = pth_project / 'conf' / 'local' / 'project_config.yaml'
sys.path.insert(0, str(pth_project))
d_project_config = safe_load(pth_creds.open())
# d_params = safe_load((pth_project / 'core' / 'parameters' / 'common.yaml').open())['data_extract']

# import local modules
from core.utils.gcp import connect_bq_services
# from core.etl.extract import extract_bq_data, extract_pr_codes, format_conv_df, filter_convs

# Connect to google services
bq_client = connect_bq_services(d_project_config['gcp-project-name'])
pd.options.display.max_rows = 100

In [26]:
def extract_bq_data(bq_client, sql=None, pth_query=None):
    if sql is not None:
        df = bq_client.query(sql).to_dataframe()
    elif pth_query is not None:
        sql = pth_query.read_text()
        df = bq_client.query(sql).to_dataframe()
    else:
        raise ValueError('`sql` or `pth_query` should be set')  
    return df

In [27]:
ADC_ADT_mapping=pd.read_csv('ADC_ADT_mapping.csv',low_memory=False)

In [28]:
ADC_ADT_mapping.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 306650 entries, 0 to 306649
Data columns (total 64 columns):
 #   Column                          Non-Null Count   Dtype  
---  ------                          --------------   -----  
 0   ImportationDate                 306650 non-null  object 
 1   CustomerId                      306650 non-null  int64  
 2   DealerCustomerId                125825 non-null  object 
 3   JoinDate                        306650 non-null  object 
 4   StreetAddress1                  306649 non-null  object 
 5   StreetAddress2                  7827 non-null    object 
 6   City                            306649 non-null  object 
 7   State                           306246 non-null  object 
 8   ZipCode                         306649 non-null  object 
 9   FirstName                       306627 non-null  object 
 10  LastName                        306636 non-null  object 
 11  CompanyName                     1071 non-null    object 
 12  ModemSerial     

In [29]:
ADC_ADT_mapping['sitetype_id']=ADC_ADT_mapping['sitetype_id'].str.strip()

In [30]:
ADC_ADT_mapping['sitetype_id'].value_counts()

R    306650
Name: sitetype_id, dtype: int64

In [31]:
# ADC_ADT_mapping['CustomerId_1']=ADC_ADT_mapping['CustomerId'].astype('object')

In [32]:
ADC_ADT_mapping.head()

Unnamed: 0,ImportationDate,CustomerId,DealerCustomerId,JoinDate,StreetAddress1,StreetAddress2,City,State,ZipCode,FirstName,...,Last7DaysLogin,LastMonthLogin,TotalImageSensors,TotalCaptured,TotalUploaded,LastUpload,TotalPeekedIn,LastPeekInTime,rn,ADT_customer_flag
0,2019-09-13 00:00:00.000,7606283,,2019-07-31 13:13:03,5510 MILLE ILES DES,,ST FRANCOIS LVL,QC,H7L1K5,ADAN MR,...,,,,,,,,,1,1
1,2022-08-13 00:00:00.000,14161472,,2022-07-29 08:52:33,122 GILLES LABARRE,,CHATEAUGUAY,QC,J6J5T4,"STEPHANE/PICHETTE,NATHALI",...,,,,,,,,,1,1
2,2022-09-23 00:00:00.000,14583072,,2022-09-21 16:11:17,429 WOODLEA AVE,,MONT-ROYAL,QC,H3P1R7,SARDI,...,,,,,,,,,1,1
3,2020-12-24 00:00:00.000,10231254,,2020-12-23 09:14:10,460 SOMMET BLEU DU,,ST ADELE,QC,J0R1L0,MAURICE,...,,,,,,,,,1,1
4,2021-01-14 00:00:00.000,10475995,,2021-01-13 09:02:28,1775 RENE LAENNEC BLVD,,LAVAL,QC,H7M5C8,JOSE,...,,,,,,,,,1,1


In [33]:
Customer_Details_Query='''

SELECT customer_id,dealer_customer_id,primary_login_id,dealer_name,join_date,account_type_name,customer_type_name,primary_email,primary_phone,last_updt_ts
FROM `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_account_details` 
where dealer_name='ADT by TELUS'
--and account_type_name!='Standalone'
--and customer_type_name='Customer'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY last_updt_ts DESC) = 1
order by dealer_name,join_date,customer_id


'''

In [34]:
Customer_DF=extract_bq_data(bq_client, sql=Customer_Details_Query)

In [35]:
Customer_DF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 279038 entries, 0 to 279037
Data columns (total 10 columns):
 #   Column              Non-Null Count   Dtype              
---  ------              --------------   -----              
 0   customer_id         279038 non-null  Int64              
 1   dealer_customer_id  279038 non-null  object             
 2   primary_login_id    279038 non-null  Int64              
 3   dealer_name         279038 non-null  object             
 4   join_date           279038 non-null  datetime64[ns, UTC]
 5   account_type_name   279038 non-null  object             
 6   customer_type_name  279038 non-null  object             
 7   primary_email       279038 non-null  object             
 8   primary_phone       279038 non-null  object             
 9   last_updt_ts        279038 non-null  datetime64[ns, UTC]
dtypes: Int64(2), datetime64[ns, UTC](2), object(6)
memory usage: 21.8+ MB


In [45]:
Customer_DF['dealer_name'].value_counts()

ADT by TELUS    279038
Name: dealer_name, dtype: int64

In [46]:
ADC_ADT_mapping['ADT_customer_flag']=1

In [47]:
Merge_data=Customer_DF.merge(ADC_ADT_mapping[ADC_ADT_mapping['sitetype_id']=='R'],how='inner',left_on='customer_id',right_on='CustomerId')

In [48]:
Merge_data['ADT_customer_flag'].sum()

218151

In [49]:
Merge_data['customer_type_name'].value_counts()

Customer         218039
Commitment           60
From Recycle         41
Demo - Normal        11
Name: customer_type_name, dtype: int64

In [50]:
Merge_data['dealer_name'].value_counts()

ADT by TELUS    218151
Name: dealer_name, dtype: int64

In [40]:
Merge_data.head()

Unnamed: 0,customer_id,dealer_customer_id,primary_login_id,dealer_name,join_date,account_type_name,customer_type_name,primary_email,primary_phone,last_updt_ts,...,Last7DaysLogin,LastMonthLogin,TotalImageSensors,TotalCaptured,TotalUploaded,LastUpload,TotalPeekedIn,LastPeekInTime,rn,ADT_customer_flag
0,287850,,306491,ADT by TELUS,2009-07-08 10:46:52+00:00,Security System,Customer,AeFd5ZDQfT+2dlYTQE+NVn9VE9Q+PP6Jz99bl40h0FllVg==,14034619748,2022-11-17 11:20:18.335370+00:00,...,,,,,,,,,1,1
1,292877,,311600,ADT by TELUS,2009-07-16 00:34:52+00:00,Security System,Customer,AYM0XZ60owqcI4xfVSe4qu/D79HXcWIz03BAhCe4DwvAvE...,14035565543,2022-11-17 11:20:18.335370+00:00,...,,,,,,,,,1,1
2,359052,,382402,ADT by TELUS,2010-03-12 18:12:23+00:00,Security System,Customer,AXEModsEPR/On9XxBpHhTOsK4/7JD1zj2pQ1KuslUwk7EH...,17806557428,2022-11-16 11:26:59.835886+00:00,...,,,,,,,,,1,1
3,361396,50001346.0,384875,ADT by TELUS,2010-03-18 11:18:44+00:00,Security System,Customer,AfweuuQerY2xv70GPiAMA4NyzuShjslGEwWmcqdjO/sDFQ==,17804463443,2022-10-28 11:19:58.768721+00:00,...,,,,,,,,,1,1
4,361475,,384957,ADT by TELUS,2010-03-18 13:21:35+00:00,Security System,Customer,AVv7q8XdOTYLZMlyJe3bzqe+CmymXcYqvfpmS2pm1TvBaL...,14166533767,2022-10-21 11:27:36.706033+00:00,...,,,,,,,,,1,1


In [41]:
Merge_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 218151 entries, 0 to 218150
Data columns (total 74 columns):
 #   Column                          Non-Null Count   Dtype              
---  ------                          --------------   -----              
 0   customer_id                     218151 non-null  Int64              
 1   dealer_customer_id              218151 non-null  object             
 2   primary_login_id                218151 non-null  Int64              
 3   dealer_name                     218151 non-null  object             
 4   join_date                       218151 non-null  datetime64[ns, UTC]
 5   account_type_name               218151 non-null  object             
 6   customer_type_name              218151 non-null  object             
 7   primary_email                   218151 non-null  object             
 8   primary_phone                   218151 non-null  object             
 9   last_updt_ts                    218151 non-null  datetime64[ns, UTC]
 

In [42]:
rep_chars = ' |\|-|:|/'

Merge_data.columns = Merge_data.columns.str.replace(rep_chars, '_')

  Merge_data.columns = Merge_data.columns.str.replace(rep_chars, '_')


In [43]:
Merge_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 218151 entries, 0 to 218150
Data columns (total 74 columns):
 #   Column                          Non-Null Count   Dtype              
---  ------                          --------------   -----              
 0   customer_id                     218151 non-null  Int64              
 1   dealer_customer_id              218151 non-null  object             
 2   primary_login_id                218151 non-null  Int64              
 3   dealer_name                     218151 non-null  object             
 4   join_date                       218151 non-null  datetime64[ns, UTC]
 5   account_type_name               218151 non-null  object             
 6   customer_type_name              218151 non-null  object             
 7   primary_email                   218151 non-null  object             
 8   primary_phone                   218151 non-null  object             
 9   last_updt_ts                    218151 non-null  datetime64[ns, UTC]
 

In [44]:
config= bigquery.job.LoadJobConfig()

# config._properties['timePartitioning'] = {'field': 'Month_Year'}
config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

Table_BQ = 'ADC_updated.ADC_ADT_mapping'

bq_table_instance= bq_client.load_table_from_dataframe(Merge_data, Table_BQ,job_config=config)