In [2]:
#### import global modules
import os
import sys
import pandas as pd
import numpy as np
from pathlib import Path
from yaml import safe_load
import google.oauth2.credentials
from google.cloud import bigquery
import gc

# Set global vars
pth_project = Path(os.getcwd().split('notebooks')[0])
pth_data = pth_project / 'data'
pth_queries = pth_project / 'core' / 'queries'
pth_creds = pth_project / 'conf' / 'local' / 'project_config.yaml'
sys.path.insert(0, str(pth_project))
d_project_config = safe_load(pth_creds.open())
# d_params = safe_load((pth_project / 'core' / 'parameters' / 'common.yaml').open())['data_extract']

# import local modules
from core.utils.gcp import connect_bq_services
# from core.etl.extract import extract_bq_data, extract_pr_codes, format_conv_df, filter_convs

# Connect to google services
bq_client = connect_bq_services(d_project_config['gcp-project-name'])
pd.options.display.max_rows = 100

In [3]:
def extract_bq_data(bq_client, sql=None, pth_query=None):
    if sql is not None:
        df = bq_client.query(sql).to_dataframe()
    elif pth_query is not None:
        sql = pth_query.read_text()
        df = bq_client.query(sql).to_dataframe()
    else:
        raise ValueError('`sql` or `pth_query` should be set')  
    return df

In [16]:
Query='''



with period as (


  select DATE('2022-05-01') as start_date, DATE('2022-10-31') as end_date

)



, Telus_customers as
(
select bacct_bus_bacct_num,bacct_billg_acct_id,cust_bus_cust_id,pi_prod_instnc_typ_cd,pi_prod_instnc_stat_ts,prod_instnc_ts,pi_cntrct_end_ts as contract_end_date
from `cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht` 
WHERE DATE(prod_instnc_ts) = (select end_date from period) 
and pi_prod_instnc_typ_cd ='SMHM' #Serice type
and bus_prod_instnc_src_id = 1001 #BANs that are for home services
and pi_prod_instnc_stat_cd in ('A')
and  consldt_cust_typ_cd = 'R'
QUALIFY ROW_NUMBER() OVER (PARTITION BY cust_bus_cust_id ORDER BY pi_prod_instnc_stat_ts DESC) = 1
order by cust_bus_cust_id
)



, interim_data as (
SELECT
  distinct SAFE_CAST(cust_id AS STRING) AS cust_id,
  EXTRACT(YEAR
  FROM
    bill.bill_dt)||'-'||LPAD(CAST(EXTRACT(MONTH
      FROM
        bill.bill_dt) AS STRING), 2, '0') AS bill_year_month,
  bill.rid_unit_chrg_amt
  FROM
  `cio-datahub-enterprise-pr-183a.ent_cust_bill.bq_wln_inv_sum_view` bill
WHERE
  DATE(bill.bill_dt)>= (select start_date from period) and DATE(bill.bill_dt)<= (select end_date from period)
  AND EXISTS 
(
SELECT
  1
FROM
  `cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht` AS prod
WHERE
  prod.pi_prod_instnc_typ_cd IN ('SMHM')
  AND prod.cust_bus_cust_id = SAFE_CAST(bill.cust_id AS STRING)
  AND DATE(prod.prod_instnc_ts) = (select end_date from period))
)



,all_data_ARPU as (select cust_id, bill_year_month,sum(rid_unit_chrg_amt) as ARPU
from interim_data
group by cust_id, bill_year_month
order by cust_id,bill_year_month)


, ARPU_table as (


select cust_id, avg(ARPU) as AVG_ARPU 
from all_data_ARPU
group by cust_id
order by cust_id,AVG_ARPU desc


)

select * from Telus_customers a
left join ARPU_table b 
on a.cust_bus_cust_id=b.cust_id

'''


In [17]:
SMHM_data=extract_bq_data(bq_client, sql=Query)

In [18]:
SMHM_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 394045 entries, 0 to 394044
Data columns (total 9 columns):
 #   Column                  Non-Null Count   Dtype 
---  ------                  --------------   ----- 
 0   bacct_bus_bacct_num     394045 non-null  Int64 
 1   bacct_billg_acct_id     394045 non-null  Int64 
 2   cust_bus_cust_id        394045 non-null  object
 3   pi_prod_instnc_typ_cd   394045 non-null  object
 4   pi_prod_instnc_stat_ts  394045 non-null  object
 5   prod_instnc_ts          394045 non-null  object
 6   contract_end_date       394045 non-null  object
 7   cust_id                 372715 non-null  object
 8   AVG_ARPU                371934 non-null  object
dtypes: Int64(2), object(7)
memory usage: 27.8+ MB


In [19]:
SMHM_data['AVG_ARPU'].isna().sum()*100/SMHM_data.shape[0]

5.6112880508571354

In [20]:
SMHM_data.head()

Unnamed: 0,bacct_bus_bacct_num,bacct_billg_acct_id,cust_bus_cust_id,pi_prod_instnc_typ_cd,pi_prod_instnc_stat_ts,prod_instnc_ts,contract_end_date,cust_id,AVG_ARPU
0,604087014,100168468,100008388,SMHM,2022-06-18 00:00:00,2022-10-31 00:00:00+00:00,2027-06-18 00:00:00,100008388,17.82
1,604090423,100187398,100018095,SMHM,2020-10-23 00:00:00,2022-10-31 00:00:00+00:00,2023-10-24 00:00:00,100018095,51.333333333
2,604090701,100189260,100019879,SMHM,2022-03-26 00:00:00,2022-10-31 00:00:00+00:00,2025-03-26 00:00:00,100019879,43.0
3,604094054,100210452,100034805,SMHM,2020-02-29 00:00:00,2022-10-31 00:00:00+00:00,2023-02-28 00:00:00,100034805,53.333333333
4,604094450,100211506,100036481,SMHM,2020-11-09 00:00:00,2022-10-31 00:00:00+00:00,9999-12-31 00:00:00,100036481,54.888333333


In [21]:
SMHM_data.to_csv('SMHM_ARPU_Nov2022.csv',index=False)