In [None]:
#### import global modules
import os
import sys
import pandas as pd
import numpy as np
from pathlib import Path
from yaml import safe_load
import google.oauth2.credentials
from google.cloud import bigquery
import gc

# Set global vars
pth_project = Path(os.getcwd().split('notebooks')[0])
pth_data = pth_project / 'data'
pth_queries = pth_project / 'core' / 'queries'
pth_creds = pth_project / 'conf' / 'local' / 'project_config.yaml'
sys.path.insert(0, str(pth_project))
d_project_config = safe_load(pth_creds.open())
# d_params = safe_load((pth_project / 'core' / 'parameters' / 'common.yaml').open())['data_extract']

# import local modules
from core.utils.gcp import connect_bq_services
# from core.etl.extract import extract_bq_data, extract_pr_codes, format_conv_df, filter_convs

# Connect to google services
bq_client = connect_bq_services(d_project_config['gcp-project-name'])
pd.options.display.max_rows = 100

In [None]:
def extract_bq_data(bq_client, sql=None, pth_query=None):
    if sql is not None:
        df = bq_client.query(sql).to_dataframe()
    elif pth_query is not None:
        sql = pth_query.read_text()
        df = bq_client.query(sql).to_dataframe()
    else:
        raise ValueError('`sql` or `pth_query` should be set')  
    return df

In [None]:
Query='''



with period as (


  select DATE('2022-05-01') as start_date, DATE('2022-10-31') as end_date

)



, Telus_customers as
(
select bacct_bus_bacct_num,bacct_billg_acct_id,cust_bus_cust_id,pi_prod_instnc_typ_cd,pi_prod_instnc_stat_ts,prod_instnc_ts,pi_cntrct_end_ts as contract_end_date
from `cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht` 
WHERE DATE(prod_instnc_ts) = (select end_date from period) 
and pi_prod_instnc_typ_cd ='SMHM' #Serice type
and bus_prod_instnc_src_id = 1001 #BANs that are for home services
and pi_prod_instnc_stat_cd in ('A')
and  consldt_cust_typ_cd = 'R'
QUALIFY ROW_NUMBER() OVER (PARTITION BY cust_bus_cust_id ORDER BY pi_prod_instnc_stat_ts DESC) = 1
order by cust_bus_cust_id
)



, interim_data as (
SELECT
  distinct SAFE_CAST(cust_id AS STRING) AS cust_id,
  EXTRACT(YEAR
  FROM
    bill.bill_dt)||'-'||LPAD(CAST(EXTRACT(MONTH
      FROM
        bill.bill_dt) AS STRING), 2, '0') AS bill_year_month,
  bill.rid_unit_chrg_amt
  FROM
  `cio-datahub-enterprise-pr-183a.ent_cust_bill.bq_wln_inv_sum_view` bill
WHERE
  DATE(bill.bill_dt)>= (select start_date from period) and DATE(bill.bill_dt)<= (select end_date from period)
  AND EXISTS 
(
SELECT
  1
FROM
  `cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht` AS prod
WHERE
  prod.pi_prod_instnc_typ_cd IN ('SMHM')
  AND prod.cust_bus_cust_id = SAFE_CAST(bill.cust_id AS STRING)
  AND DATE(prod.prod_instnc_ts) = (select end_date from period))
)



,all_data_ARPU as (select cust_id, bill_year_month,sum(rid_unit_chrg_amt) as ARPU
from interim_data
group by cust_id, bill_year_month
order by cust_id,bill_year_month)


, ARPU_table as (


select cust_id, avg(ARPU) as AVG_ARPU 
from all_data_ARPU
group by cust_id
order by cust_id,AVG_ARPU desc


)

select * from Telus_customers a
left join ARPU_table b 
on a.cust_bus_cust_id=b.cust_id

'''


In [None]:
SMHM_data=extract_bq_data(bq_client, sql=Query)

In [None]:
SMHM_data.info()

In [None]:
SMHM_data['AVG_ARPU'].isna().sum()*100/SMHM_data.shape[0]

In [None]:
SMHM_data.head()

In [None]:
SMHM_data.to_csv('SMHM_ARPU_Nov2022.csv',index=False)