
declare str_dt string default '2023-11-30'; 
declare ref_dt date default parse_date('%Y-%m-%d', str_dt);

create or replace table `divg-groovyhoon-pr-d2eab4.promo_expiry_analysis_q4.latest_bill_discount_dec_2023` as 


with bill_chrg_tmp AS (
  SELECT
	bill_dt,
	bill_doc_id,
	SUM(CASE
		WHEN unit_chrg_amt IS NULL THEN 0
	  ELSE
	  unit_chrg_amt
	END
	  ) AS tot_unit_chrg_amt,
	SUM(CASE
		WHEN disc_amt IS NULL THEN 0
	  ELSE
	  disc_amt
	END
	  ) AS tot_disc_amt,
	SUM(CASE
		WHEN srvc_resrc_typ_cd IN ('HSIC', 'HSSN' ) THEN unit_chrg_amt
	  ELSE
	  0
	END
	  ) AS hs_unit_chrg_amt,
	SUM(CASE
		WHEN srvc_resrc_typ_cd IN ('TTV', 'STV' ) THEN unit_chrg_amt
	  ELSE
	  0
	END
	  ) AS tv_unit_chrg_amt,
	SUM(CASE
		WHEN srvc_resrc_typ_cd IN ('SING', 'TN', 'CFCO', 'HPBX', 'TRAN' ) THEN unit_chrg_amt
	  ELSE
	  0
	END
	  ) AS tn_unit_chrg_amt,
	SUM(CASE
		WHEN srvc_resrc_typ_cd IN ('CPE', 'DIIC', 'SMHM', 'NAAS' ) THEN unit_chrg_amt
	  ELSE
	  0
	END
	  ) AS rid_unit_chrg_amt,
	SUM(CASE
		WHEN srvc_resrc_typ_cd NOT IN ('HSIC', 'HSSN', 'TTV', 'STV', 'SING', 'TN', 'CFCO', 'HPBX', 'TRAN', 'CPE', 'DIIC', 'SMHM', 'NAAS') THEN unit_chrg_amt
		WHEN srvc_resrc_typ_cd IS NULL THEN coalesce(unit_chrg_amt,
		0)
	  ELSE
	  0
	END
	  ) AS oth_unit_chrg_amt,
	SUM(CASE
		WHEN net_chrg_amt IS NULL THEN 0
	  ELSE
	  net_chrg_amt
	END
	  ) AS tot_net_chrg_amt,
	SUM(CASE
		WHEN srvc_resrc_typ_cd IN ('HSIC', 'HSSN' ) THEN net_chrg_amt
	  ELSE
	  0
	END
	  ) AS hs_net_chrg_amt,
	SUM(CASE
		WHEN srvc_resrc_typ_cd IN ('TTV', 'STV' ) THEN net_chrg_amt
	  ELSE
	  0
	END
	  ) AS tv_net_chrg_amt,
	SUM(CASE
		WHEN srvc_resrc_typ_cd IN ('SING', 'TN', 'CFCO', 'HPBX', 'TRAN' ) THEN net_chrg_amt
	  ELSE
	  0
	END
	  ) AS tn_net_chrg_amt,
	SUM(CASE
		WHEN srvc_resrc_typ_cd IN ('CPE', 'DIIC', 'SMHM', 'NAAS' ) THEN net_chrg_amt
	  ELSE
	  0
	END
	  ) AS rid_net_chrg_amt,
	SUM(CASE
		WHEN srvc_resrc_typ_cd NOT IN ('HSIC', 'HSSN', 'TTV', 'STV', 'SING', 'TN', 'CFCO', 'HPBX', 'TRAN', 'CPE', 'DIIC', 'SMHM', 'NAAS') THEN net_chrg_amt
		WHEN srvc_resrc_typ_cd IS NULL THEN coalesce(net_chrg_amt,
		0)
	  ELSE
	  0
	END
	  ) AS oth_net_chrg_amt
  FROM
	`cio-datahub-enterprise-pr-183a.ent_cust_bill.bq_wln_chrg_sec_sum`
  WHERE
	bill_itm_sum_lvl_cd IN ('D',
	  'SD')
  GROUP BY
	bill_dt,
	bill_doc_id ),
	
  bq_join_view AS (
  SELECT
	bacct_bus_bacct_num AS bus_bacct_num,
	bacct_bacct_typ_cd AS bacct_typ_cd,
	bacct_bacct_typ_txt AS bacct_typ_txt,
	bacct_bacct_typ_fr_txt AS bacct_typ_fr_txt,
	bacct_bacct_subtyp_cd AS bacct_subtyp_cd,
	bacct_bacct_subtyp_txt AS bacct_subtyp_txt,
	bacct_bacct_subtyp_fr_txt AS bacct_subtyp_fr_txt,
	bacct_brand_id AS bacct_brand_id,
	bacct_brand_nm AS bacct_brand_nm,
	bacct_billg_cycl_cd AS bacct_billg_cycl_cd,
	bacct_billg_mthd_cd AS bacct_billg_mthd_cd,
	bacct_kb_billg_cycl_cd AS bacct_kb_billg_cycl_cd,
	bacct_bacct_stat_cd AS bacct_stat_cd,
	CAST(bacct_bacct_stat_ts AS TIMESTAMP) AS bacct_stat_ts,
	bacct_gl_seg_cd AS bacct_gl_seg_cd,
	bacct_gl_subseg_cd AS bacct_gl_subseg_cd,
	bacct_ebill_ind AS bacct_ebill_ind,
	consldt_cust_pref_lang_cd AS consldt_cust_pref_lang_cd,
	consldt_cust_typ_cd AS consldt_cust_typ_cd,
	consldt_cust_typ_txt AS consldt_cust_typ_txt,
	consldt_cust_typ_fr_txt AS consldt_cust_typ_fr_txt,
	consldt_cust_subtyp_cd AS consldt_cust_subtyp_cd,
	consldt_cust_subtyp_txt AS consldt_cust_subtyp_txt,
	consldt_cust_subtyp_fr_txt AS consldt_cust_subtyp_fr_txt,
	ROW_NUMBER() OVER (PARTITION BY bacct_bus_bacct_num ORDER BY bacct_bacct_stat_ts DESC) AS cust_rnk
  FROM
	`cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_view` prod ), 

  bq_wln_ban_inv_sum_view AS (
  SELECT
	bd.bill_doc_id,
	bd.cust_id,
	bd.billg_acct_num,
	bd.bill_dt,
	bd.last_bill_due_amt,
	bd.bal_fwd_amt,
	bd.tot_inv_amt,
	bd.tot_tax_inv_amt,
	bd.tot_due_amt,
	chrg.tot_disc_amt,
	coalesce( tot_adjmt_amt,
	  0) AS tot_adjmt_amt,
	chrg.tot_unit_chrg_amt,
	chrg.tot_net_chrg_amt,
	coalesce( tot_prorate_chrg_amt,
	  0) AS tot_prorate_chrg_amt,
	chrg.hs_unit_chrg_amt,
	chrg.tv_unit_chrg_amt,
	chrg.tn_unit_chrg_amt,
	chrg.rid_unit_chrg_amt,
	chrg.oth_unit_chrg_amt,
	chrg.hs_net_chrg_amt,
	chrg.tv_net_chrg_amt,
	chrg.tn_net_chrg_amt,
	chrg.rid_net_chrg_amt,
	chrg.oth_net_chrg_amt,
	cust.bacct_typ_cd,
	cust.bacct_typ_txt,
	cust.bacct_typ_fr_txt,
	cust.bacct_subtyp_cd,
	cust.bacct_subtyp_txt,
	cust.bacct_subtyp_fr_txt,
	cust.bacct_brand_id,
	cust.bacct_brand_nm,
	cust.bacct_billg_cycl_cd,
	cust.bacct_billg_mthd_cd,
	cust.bacct_kb_billg_cycl_cd,
	cust.bacct_stat_cd,
	cust.bacct_stat_ts,
	cust.bacct_gl_seg_cd,
	cust.bacct_gl_subseg_cd,
	cust.bacct_ebill_ind,
	cust.consldt_cust_pref_lang_cd,
	cust.consldt_cust_typ_cd,
	cust.consldt_cust_typ_txt,
	cust.consldt_cust_typ_fr_txt,
	cust.consldt_cust_subtyp_cd,
	cust.consldt_cust_subtyp_txt,
	cust.consldt_cust_subtyp_fr_txt
  FROM
	`cio-datahub-enterprise-pr-183a.ent_cust_bill.bq_wln_document_dtl` bd
  LEFT OUTER JOIN
	bq_join_view cust
  ON
	bd.billg_acct_num = cust.bus_bacct_num
	AND cust.cust_rnk = 1
  LEFT OUTER JOIN (
	SELECT
	  bill_doc_id AS bill_document_id,
	  bill_dt,
	  SUM(pretax_chrg_amt) AS tot_prorate_chrg_amt
	FROM
	  `cio-datahub-enterprise-pr-183a.ent_cust_bill.bq_wln_si_sec_sum`
	WHERE
	  bill_sect_cd = 'MP'
	GROUP BY
	  bill_doc_id,
	  bill_dt) bse
  ON
	bd.bill_doc_id = bse.bill_document_id
	AND bd.bill_dt = bse.bill_dt
  LEFT OUTER JOIN (
	SELECT
	  bill_doc_id AS bill_document_id,
	  bill_dt,
	  SUM(adj_amt) AS tot_adjmt_amt
	FROM
	  `cio-datahub-enterprise-pr-183a.ent_cust_bill.bq_wln_adjmt_dtl`
	WHERE
	  sum_ind = '1'
	GROUP BY
	  bill_doc_id,
	  bill_dt) bad
  ON
	bd.bill_doc_id = bad.bill_document_id
	AND bd.bill_dt = bad.bill_dt
  LEFT OUTER JOIN
	bill_chrg_tmp chrg
  ON
	bd.bill_doc_id = chrg.bill_doc_id
	AND bd.bill_dt = chrg.bill_dt
  ), 


latest_bill as
(
select
 t1.billg_acct_num as ban,
 max(t1.bill_dt) as max_bill_dt
from
 bq_wln_ban_inv_sum_view t1
where
 t1.bill_dt <= ref_dt
and
 t1.bill_dt >  date_sub(ref_dt, interval 1 year)
group by
 t1.billg_acct_num
),

r1 as
(
select
 bill.ban,
 bill.max_bill_dt as latest_bill_dt,
 doc.bill_doc_id,
 doc.tot_inv_amt as bill_tot_inv_amt,
 doc.tot_tax_inv_amt as bill_tot_tax_amt,
 dtl.bill_chrg_dtl_id,
 dtl.bill_chrg_dtl_typ_cd,
 dtl.chrg_rev_cd,
 dtl.srvc_resrc_typ_cd,
 dtl.bill_itm_dsply_nm,
 case
  when
   disc.disc_prd_cvrg_start_dt is not null
  then
   date(disc.disc_prd_cvrg_start_dt)
  when
   regexp_contains(dtl.bill_itm_dsply_nm, r'effective [a-zA-Z]{{3}} [0-9]{{2}}, [0-9]{{4}} to [a-zA-Z]{{3}} [0-9]{{2}}, [0-9]{{4}}')
  then
   parse_date("%b %d, %Y", regexp_extract(dtl.bill_itm_dsply_nm, r'[a-zA-Z]{{3}} [0-9]{{2}}, [0-9]{{4}}', 1, 1))
  else
   null
 end as
  discount_start_date,
 case
  when
   disc.disc_prd_cvrg_end_dt is not null
  then
   date(disc.disc_prd_cvrg_end_dt)
  when
   regexp_contains(dtl.bill_itm_dsply_nm, r'effective [a-zA-Z]{{3}} [0-9]{{2}}, [0-9]{{4}} to [a-zA-Z]{{3}} [0-9]{{2}}, [0-9]{{4}}')
  then
   parse_date("%b %d, %Y", regexp_extract(dtl.bill_itm_dsply_nm, r'[a-zA-Z]{{3}} [0-9]{{2}}, [0-9]{{4}}', 1, 2))
  when
   dtl.bill_itm_dsply_nm like '%discount end%'
  then
   parse_date("%b %d, %Y", regexp_extract(dtl.bill_itm_dsply_nm, r'[a-zA-Z]{{3}} [0-9]{{2}}, [0-9]{{4}}', 1, 1))
  else
   null
 end as
  discount_end_date,
 dtl.chrg_typ_cd,
 dtl.net_chrg_amt
from
 latest_bill as bill
inner join
 bq_wln_ban_inv_sum_view as doc
on
 doc.billg_acct_num = bill.ban
and
 doc.bill_dt = bill.max_bill_dt
inner join
 `cio-datahub-enterprise-pr-183a.ent_cust_bill.bq_wln_chrg_dtl` as dtl
on
 dtl.bill_doc_id = doc.bill_doc_id
left join
 `cio-datahub-enterprise-pr-183a.ent_cust_bill.bq_wln_discount_chrg_dtl` as disc
on
 disc.bill_chrg_dtl_id = dtl.bill_chrg_dtl_id
where
 dtl.bill_itm_sum_lvl_cd = 'D'
and
 chrg_typ_cd = 'DSC'
)

select
 *
from
 r1 as t1
-- where
--  coalesce(t1.discount_start_date, t1.discount_end_date) is not null
; 


--latest_bill_discount_dec 2023
create or replace table `divg-groovyhoon-pr-d2eab4.promo_expiry_analysis_q4.latest_bill_discount_dec_2023` as 

select a.ban
, a.latest_bill_dt
, a.bill_doc_id
, a.bill_tot_inv_amt
, a.bill_tot_tax_amt
, a.bill_chrg_dtl_id
, a.bill_chrg_dtl_typ_cd
, a.chrg_rev_cd
, a.srvc_resrc_typ_cd
, a.bill_itm_dsply_nm
, coalesce(a.discount_start_date,"9999-12-31") as discount_start_date
, coalesce(a.discount_end_date, "9999-12-31") as discount_end_date
, a.chrg_typ_cd
, a.net_chrg_amt

from `divg-groovyhoon-pr-d2eab4.promo_expiry_analysis_q4.latest_bill_discount_dec_2023` a 
inner join `divg-groovyhoon-pr-d2eab4.promo_expiry_analysis_q4.discount_dec_2023` b 
on cast(a.ban as string) = cast(b.ban as string)

group by 1,2,3,4,5,6,7,8,9,10,11,12,13,14
; 




In [None]:
import os
import re
import google
from datetime import datetime
from google.oauth2 import credentials
from google.oauth2 import service_account
from google.oauth2.service_account import Credentials
import pandas as pd 
import numpy as np
from google.cloud import bigquery
from google.cloud import storage
import gc
import time

#`divg-josh-pr-d1cc3a.campaign_performance_analysis.contract_expiry_and_discounts` 

project_id = 'divg-team-v03-pr-de558a' 
dataset_id = 'hs_nba_capture_rate'
tbl_nm = 'nba_ffh_model_scores_new_dl'
save_data_path='gs://divg-groovyhoon-pr-d2eab4-default/downloads/{}.csv.gz'.format(tbl_nm)

def get_gcp_bqclient(project_id, use_local_credential=True):
    token = os.popen('gcloud auth print-access-token').read()
    token = re.sub(f'\n$', '', token)
    credentials = google.oauth2.credentials.Credentials(token)

    bq_client = bigquery.Client(project=project_id)
    if use_local_credential:
        bq_client = bigquery.Client(project=project_id, credentials=credentials)
    return bq_client

client = get_gcp_bqclient(project_id)

data_set = f"{project_id}.{dataset_id}.{tbl_nm}" 

sql = '''SELECT * FROM `{data_set}`'''.format(data_set=data_set)

df_export = client.query(sql).to_dataframe()

df_export.head()

df_export = df_export.reset_index()
df_export.to_csv(save_data_path, index=False, compression='gzip') 

print(f"{data_set} saved in {save_data_path}")


In [None]:
bill_itm_dsply_nm = df_export['bill_itm_dsply_nm'].to_list() 
discount_end_date = df_export['discount_end_date'].to_list() 
discount_end_dt = []

def left(s, amount):
    return s[:amount]

for idx, item in enumerate(bill_itm_dsply_nm): 
    if 'discount end' in item and re.search(r'[a-zA-Z]{3} [0-9]{2}, [0-9]{4}', item): 
        discount_end_dt.append(datetime.strptime(re.search(r'[a-zA-Z]{3} [0-9]{2}, [0-9]{4}', item)[0], "%b %d, %Y").date())
    elif re.search(r'effective [a-zA-Z]{3} [0-9]{2}, [0-9]{4} to [a-zA-Z]{3} [0-9]{2}, [0-9]{4}', item): 
        discount_end_dt.append(datetime.strptime(re.findall(r'[a-zA-Z]{3} [0-9]{2}, [0-9]{4}', item)[1], "%b %d, %Y").date())
    else: 
        discount_end_dt.append(datetime.strptime(left(str(discount_end_date[idx]), 10), "%Y-%m-%d").date())
    
df_discount_end_dt = pd.DataFrame(discount_end_dt, columns=['df_discount_end_dt'])       

df_export['df_discount_end_dt'] = df_discount_end_dt

df_export = df_export.reset_index()
df_export.to_csv(save_data_path, index=False) 

print(f"{data_set} saved in {save_data_path}")


In [None]:
from datetime import datetime

dt = re.search(r'[a-zA-Z]{3} [0-9]{2}, [0-9]{4}', df_export['bill_itm_dsply_nm'].to_list()[-5000])[0]

print(datetime.strptime(dt, "%b %d, %Y").date()) 
