In [7]:
#### import global modules
import os
import sys
import pandas as pd
import numpy as np
from pathlib import Path
from yaml import safe_load
import google.oauth2.credentials
from google.cloud import bigquery
import gc

# Set global vars
pth_project = Path(os.getcwd().split('notebooks')[0])
pth_data = pth_project / 'data'
pth_queries = pth_project / 'core' / 'queries'
pth_creds = pth_project / 'conf' / 'local' / 'project_config.yaml'
sys.path.insert(0, str(pth_project))
d_project_config = safe_load(pth_creds.open())
# d_params = safe_load((pth_project / 'core' / 'parameters' / 'common.yaml').open())['data_extract']

# import local modules
from core.utils.gcp import connect_bq_services
# from core.etl.extract import extract_bq_data, extract_pr_codes, format_conv_df, filter_convs

# Connect to google services
bq_client = connect_bq_services(d_project_config['gcp-project-name'])
pd.options.display.max_rows = 100

In [8]:
def extract_bq_data(bq_client, sql=None, pth_query=None):
    if sql is not None:
        df = bq_client.query(sql).to_dataframe()
    elif pth_query is not None:
        sql = pth_query.read_text()
        df = bq_client.query(sql).to_dataframe()
    else:
        raise ValueError('`sql` or `pth_query` should be set')  
    return df

In [9]:
Order_DF=pd.read_csv('ORDER_DETAILS_JAN2023_MAR2023.csv')

In [10]:
Order_DF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 44764 entries, 0 to 44763
Data columns (total 15 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   SALES_AGENT_ID        44711 non-null  object 
 1   CUST_ID               44763 non-null  float64
 2   CHANNEL_GROUP         44764 non-null  object 
 3   CHANNEL               44756 non-null  object 
 4   PROD_INSTNC_ID        44764 non-null  int64  
 5   CREATED_DT            44764 non-null  object 
 6   ORDER_STATUS          44764 non-null  object 
 7   PRODUCT_NAME          44764 non-null  object 
 8   BILL_ACCOUNT_NUMBER   44654 non-null  float64
 9   IS_DIY                44764 non-null  int64  
 10  IS_EXISTING_CUSTOMER  44764 non-null  int64  
 11  SELF_INSTALL          44764 non-null  int64  
 12  CURRENT_ORDER_STATUS  44764 non-null  object 
 13  IS_TEST_ACCOUNT       44764 non-null  int64  
 14  ACTIVATION_DT         44764 non-null  object 
dtypes: float64(2), int6

In [11]:
Order_DF.head()

Unnamed: 0,SALES_AGENT_ID,CUST_ID,CHANNEL_GROUP,CHANNEL,PROD_INSTNC_ID,CREATED_DT,ORDER_STATUS,PRODUCT_NAME,BILL_ACCOUNT_NUMBER,IS_DIY,IS_EXISTING_CUSTOMER,SELF_INSTALL,CURRENT_ORDER_STATUS,IS_TEST_ACCOUNT,ACTIVATION_DT
0,1100043889-KHRH,19033735.0,CORP STORES,TELUS Store / Tillicum Centre,1135051759,24JAN2023:00:00:00,Installed,Smart Automation Plus,224030389.0,0,1,0,Installed,0,08FEB2023:00:00:00
1,1100052004-F3VD,105893421.0,D2C,D2C: D2D,1137125713,08MAR2023:00:00:00,Installed,Smart Automation Plus,605791112.0,0,0,0,Installed,0,25MAR2023:00:00:00
2,X261942,100985494.0,CSS,CSS,1137563685,17MAR2023:00:00:00,Installed,Smart Automation Plus,604327868.0,1,1,1,Installed,0,17MAR2023:00:00:00
3,1100030105-KG5D,106139796.0,CORP STORES,TELUS Store / The Village Shopping Centre,1137927707,25MAR2023:00:00:00,Installed,Smart Automation Plus,605863932.0,1,0,1,Installed,0,25MAR2023:00:00:00
4,1100000224-CBW2,106024978.0,TQ,ORIZON MOBILE,1137424744,15MAR2023:00:00:00,Installed,Control,605816205.0,0,0,0,Installed,0,21MAR2023:00:00:00


In [12]:
config= bigquery.job.LoadJobConfig()

# config._properties['timePartitioning'] = {'field': 'Month_Year'}
config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

Table_BQ = 'SHS.Order_details_Jan2023_Mar2023'

bq_table_instance= bq_client.load_table_from_dataframe(Order_DF, Table_BQ,job_config=config)

In [13]:
Invol_DF=pd.read_csv('SMHH_Involchurn_JAN2023_MAR2023.csv')
Vol_DF=pd.read_csv('SMHH_VC_JAN2023_MAR2023.csv')

In [14]:
Invol_DF.head()

Unnamed: 0,BILLING_ACCOUNT_NUM,DLY_PROD_INSTNC_ACTVY_TS,NET_DEACT
0,604868277,02JAN2023:00:00:00,1
1,605318623,02JAN2023:00:00:00,1
2,604967835,02JAN2023:00:00:00,2
3,604823649,02JAN2023:00:00:00,1
4,605365300,02JAN2023:00:00:00,1


In [15]:
Vol_DF.head()

Unnamed: 0,ACTIVITY_DATE,CUST_ID,BILLING_ACCOUNT_NUM,BUS_BILLING_ACCOUNT_NUM,BUS_PROD_INSTNC_ID,ACTIVITY_REASON_TXT,ACTIVITY_REASON_TYPE_CD,ACTIVITY_REASON_SUB_TYPE_CD,INITIAL_ACTIVATION_TS,ACTIVATION_TS,...,BI_CHANNEL_TAG_CD,ORIG_TEAM_MEMBER_KEY,ORIG_TEAM_MEMBER_ID,ORIG_TM_WORK_LOCN_NM,MAX_PRODUCT_PRICE_AMT,PROCESS_TYPE_TXT,ORDER_ACTION_DUE_TS,ORDER_FMS_ADDRESS_ID,TRANSACTION_TYPE_TXT,PRICE_PLAN_TXT
0,01JAN2023:00:00:00,1582660,605530428,605530428,1133170791,Other,DEACT,V,30NOV2022:00:00:00,30NOV2022:00:00:00,...,CARE-WLN,4076036.0,191920.0,TI Philippines (TI Philippines),0.0,N/AVAIL,01JAN2023:21:25:20,1637282.0,Cease,Smart Automation Plus (V2)
1,01JAN2023:00:00:00,2133792,601488436,601488436,1111212968,Other,DEACT,V,04AUG2020:00:00:00,04AUG2020:00:00:00,...,CARE-WLN,4099422.0,374606.0,TI El Salvador (TI El Salvador),0.0,N/AVAIL,01JAN2023:19:00:00,3113263.0,Cease,Control Plus Video (V1)
2,01JAN2023:00:00:00,2421437,220062258,220062258,1133633157,Other,DEACT,V,08DEC2022:00:00:00,08DEC2022:00:00:00,...,CARE-WLN,4096370.0,363361.0,TI Guatemala (TI Guatemala),0.0,N/AVAIL,31DEC2022:23:21:31,2955203.0,Cease,Smart Camera (V2)
3,01JAN2023:00:00:00,2863982,215974930,215974930,1108679944,Churn to another provider,DEACT,V,20DEC2019:00:00:00,20DEC2019:00:00:00,...,CARE-WLN,4102204.0,381720.0,UNKNOWN,0.0,N/AVAIL,31DEC2022:22:47:01,715872.0,Cease,Control Plus Video
4,01JAN2023:00:00:00,10105793,602833943,602833943,1118298004,Other,DEACT,V,12SEP2021:00:00:00,12SEP2021:00:00:00,...,CARE-WLN,4099421.0,374605.0,TI El Salvador (TI El Salvador),0.0,N/AVAIL,31DEC2022:22:17:49,1395301.0,Cease,SmartHome Security - Camera


In [18]:
config= bigquery.job.LoadJobConfig()

# config._properties['timePartitioning'] = {'field': 'Month_Year'}
config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

Table_BQ = 'SHS.VolChurn_Jan2023_Mar2023'

bq_table_instance= bq_client.load_table_from_dataframe(Vol_DF, Table_BQ,job_config=config)

In [19]:
config= bigquery.job.LoadJobConfig()

# config._properties['timePartitioning'] = {'field': 'Month_Year'}
config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

Table_BQ = 'SHS.InVolChurn_Jan2023_Mar2023'

bq_table_instance= bq_client.load_table_from_dataframe(Invol_DF, Table_BQ,job_config=config)