In [33]:
#### import global modules
import os
import sys
import pandas as pd
import numpy as np
from pathlib import Path
from yaml import safe_load
import google.oauth2.credentials
from google.cloud import bigquery
import gc

# Set global vars
pth_project = Path(os.getcwd().split('notebooks')[0])
pth_data = pth_project / 'data'
pth_queries = pth_project / 'core' / 'queries'
pth_creds = pth_project / 'conf' / 'local' / 'project_config.yaml'
sys.path.insert(0, str(pth_project))
d_project_config = safe_load(pth_creds.open())
# d_params = safe_load((pth_project / 'core' / 'parameters' / 'common.yaml').open())['data_extract']

# import local modules
from core.utils.gcp import connect_bq_services
# from core.etl.extract import extract_bq_data, extract_pr_codes, format_conv_df, filter_convs

# Connect to google services
bq_client = connect_bq_services(d_project_config['gcp-project-name'])
pd.options.display.max_rows = 100

In [34]:
def extract_bq_data(bq_client, sql=None, pth_query=None):
    if sql is not None:
        df = bq_client.query(sql).to_dataframe()
    elif pth_query is not None:
        sql = pth_query.read_text()
        df = bq_client.query(sql).to_dataframe()
    else:
        raise ValueError('`sql` or `pth_query` should be set')  
    return df

In [35]:
Telus_internet_Customer_Query='''



with  Telus_internet_customers as

(select distinct cust_bus_cust_id
--, pi_prod_instnc_typ_cd as pi_prod_instnc_typ_cd_internet
from `cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht` 
WHERE DATE(prod_instnc_ts) = "2022-10-31" #Snapshot of the last day of the month
and pi_prod_instnc_stat_cd in ('A')
and pi_prod_instnc_typ_cd ='HSIC'
and consldt_cust_typ_cd = 'R'
QUALIFY ROW_NUMBER() OVER (PARTITION BY cust_bus_cust_id ORDER BY pi_prod_instnc_stat_ts DESC) = 1

)

,
Telus_SMHM_customers as
(select distinct cust_bus_cust_id
--,pi_prod_instnc_typ_cd,pi_prod_instnc_stat_ts,prod_instnc_ts
from `cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht` 
WHERE DATE(prod_instnc_ts) = "2022-10-31" #Snapshot of the last day of the month
and pi_prod_instnc_typ_cd ='SMHM' #Serice type
and bus_prod_instnc_src_id = 1001 #BANs that are for home services
and pi_prod_instnc_stat_cd in ('A')
and  consldt_cust_typ_cd = 'R'
-- and DATE(pi_prod_instnc_stat_ts)>='2022-08-01' and DATE(pi_prod_instnc_stat_ts)<'2022-09-01' #Date on which the status was updated
QUALIFY ROW_NUMBER() OVER (PARTITION BY cust_bus_cust_id ORDER BY pi_prod_instnc_stat_ts DESC) = 1
order by cust_bus_cust_id)
,

ADC_customer_base as(

SELECT customer_id,dealer_customer_id,dealer_name,account_type_name,customer_type_name
FROM `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_account_details` 
--where last_updt_ts >='2022-08-01'  and last_updt_ts < '2022-10-01'
where date(last_updt_ts) ='2022-10-31'
-- and account_type_name!='Standalone'
-- and customer_type_name='Customer'
and dealer_name='TELUS Communications Inc.'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY last_updt_ts DESC) = 1
order by dealer_name,join_date,customer_id

)
,

Telus_SMHM_internet as
(

select distinct (a.cust_bus_cust_id) as cust_bus_cust_id 
from Telus_SMHM_customers a
inner join Telus_internet_customers b
on a.cust_bus_cust_id=b.cust_bus_cust_id

)


,merge_data as(

select * from ADC_customer_base adc
inner join Telus_SMHM_internet telus
on telus.cust_bus_cust_id=adc.dealer_customer_id

)

-- ,
-- final_data as(

-- select * from merge_data a
-- inner join Telus_internet_customers b
-- on a.cust_bus_cust_id=b.cust_bus_cust_id
-- order by a.bacct_bus_bacct_num
-- )

select *,1 as Telus_internet_flag from merge_data





'''

In [36]:
Telus_internet_DF=extract_bq_data(bq_client, sql=Telus_internet_Customer_Query)

In [37]:
Telus_internet_DF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 233706 entries, 0 to 233705
Data columns (total 7 columns):
 #   Column               Non-Null Count   Dtype 
---  ------               --------------   ----- 
 0   customer_id          233706 non-null  Int64 
 1   dealer_customer_id   233706 non-null  object
 2   dealer_name          233706 non-null  object
 3   account_type_name    233706 non-null  object
 4   customer_type_name   233706 non-null  object
 5   cust_bus_cust_id     233706 non-null  object
 6   Telus_internet_flag  233706 non-null  Int64 
dtypes: Int64(2), object(5)
memory usage: 12.9+ MB


In [38]:
Telus_internet_DF['cust_bus_cust_id'].value_counts()

59807994     19
78929393     16
100949016    12
103773257    10
95537824      7
             ..
102821651     1
102821690     1
102843703     1
102861272     1
99977130      1
Name: cust_bus_cust_id, Length: 230927, dtype: int64

In [39]:
Telus_internet_DF['dealer_customer_id'].value_counts()

59807994     19
78929393     16
100949016    12
103773257    10
95537824      7
             ..
102821651     1
102821690     1
102843703     1
102861272     1
99977130      1
Name: dealer_customer_id, Length: 230927, dtype: int64

In [40]:
config= bigquery.job.LoadJobConfig()

# config._properties['timePartitioning'] = {'field': 'Month_Year'}
config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

Table_BQ = 'ADC_updated.Telus_internet_31Oct2022'

bq_table_instance= bq_client.load_table_from_dataframe(Telus_internet_DF, Table_BQ,job_config=config)