CREATE OR REPLACE TABLE pik_tv.hsia_discounts AS 

WITH latest_bill AS (
SELECT billg_acct_num AS ban,
        MAX(bill_dt) AS max_bill_dt
  FROM `cio-datahub-enterprise-pr-183a.ent_cust_bill.bq_wln_ban_inv_sum_view`
  WHERE bill_dt <= "2023-09-30"
  AND bill_dt >= "2023-09-01"
  GROUP BY billg_acct_num
)
SELECT bill.ban,
    bill.max_bill_dt AS latest_bill_dt,
    doc.bill_doc_id,
    doc.tot_inv_amt AS bill_tot_inv_amt,
    doc.tot_tax_inv_amt AS bill_tot_tax_amt,
    dtl.bill_chrg_dtl_id,
    dtl.bill_chrg_dtl_typ_cd,
    dtl.chrg_rev_cd,
    dtl.srvc_resrc_typ_cd,
    dtl.bill_itm_dsply_nm,
    CASE WHEN disc.disc_prd_cvrg_start_dt IS NOT NULL
        THEN DATE(disc.disc_prd_cvrg_start_dt)
        WHEN REGEXP_CONTAINS(dtl.bill_itm_dsply_nm, r'effective [a-zA-Z]{{3}} [0-9]{{2}}, [0-9]{{4}} to [a-zA-Z]{{3}} [0-9]{{2}}, [0-9]{{4}}')
        THEN PARSE_DATE("%b %d, %Y", REGEXP_EXTRACT(dtl.bill_itm_dsply_nm, r'[a-zA-Z]{{3}} [0-9]{{2}}, [0-9]{{4}}', 1, 1))
        ELSE NULL
        END AS discount_start_date,
    CASE WHEN disc.disc_prd_cvrg_end_dt IS NOT NULL
        THEN DATE(disc.disc_prd_cvrg_end_dt)
        WHEN REGEXP_CONTAINS(dtl.bill_itm_dsply_nm, r'effective [a-zA-Z]{{3}} [0-9]{{2}}, [0-9]{{4}} to [a-zA-Z]{{3}} [0-9]{{2}}, [0-9]{{4}}')
        THEN PARSE_DATE("%b %d, %Y", REGEXP_EXTRACT(dtl.bill_itm_dsply_nm, r'[a-zA-Z]{{3}} [0-9]{{2}}, [0-9]{{4}}', 1, 2))
        WHEN dtl.bill_itm_dsply_nm LIKE '%discount end%'
        THEN PARSE_DATE("%b %d, %Y", REGEXP_EXTRACT(dtl.bill_itm_dsply_nm, r'[a-zA-Z]{{3}} [0-9]{{2}}, [0-9]{{4}}', 1, 1))
        ELSE NULL
        END AS discount_end_date,
    dtl.chrg_typ_cd,
    dtl.net_chrg_amt
FROM latest_bill AS bill
INNER JOIN `cio-datahub-enterprise-pr-183a.ent_cust_bill.bq_wln_ban_inv_sum_view` AS doc
ON doc.billg_acct_num = bill.ban
AND doc.bill_dt = bill.max_bill_dt
INNER JOIN `cio-datahub-enterprise-pr-183a.ent_cust_bill.bq_wln_chrg_dtl` AS dtl
ON dtl.bill_doc_id = doc.bill_doc_id
LEFT JOIN `cio-datahub-enterprise-pr-183a.ent_cust_bill.bq_wln_discount_chrg_dtl` AS disc
ON disc.bill_chrg_dtl_id = dtl.bill_chrg_dtl_id
WHERE dtl.bill_itm_sum_lvl_cd = 'D'
AND net_chrg_amt = -20
AND CONTAINS_SUBSTR(LOWER(dtl.bill_itm_dsply_nm), 'unlimited')



In [None]:
import os
import re
import google
from google.oauth2 import credentials
from google.oauth2 import service_account
from google.oauth2.service_account import Credentials
import pandas as pd 
import numpy as np
from google.cloud import bigquery
from google.cloud import storage
import gc
import time
from datetime import datetime

#`divg-josh-pr-d1cc3a.campaign_performance_analysis.contract_expiry_and_discounts` 

project_id = 'divg-josh-pr-d1cc3a' 
dataset_id = 'pik_tv'
tbl_nm = 'hsia_discounts'
save_data_path='gs://divg-josh-pr-d1cc3a-default/downloads/{}_parsed.csv'.format(tbl_nm)

def get_gcp_bqclient(project_id, use_local_credential=True):
    token = os.popen('gcloud auth print-access-token').read()
    token = re.sub(f'\n$', '', token)
    credentials = google.oauth2.credentials.Credentials(token)

    bq_client = bigquery.Client(project=project_id)
    if use_local_credential:
        bq_client = bigquery.Client(project=project_id, credentials=credentials)
    return bq_client

client = get_gcp_bqclient(project_id)

data_set = f"{project_id}.{dataset_id}.{tbl_nm}" 

sql = '''SELECT * FROM `{data_set}`'''.format(data_set=data_set)

df_export = client.query(sql).to_dataframe()

df_export.head()


In [None]:
bill_itm_dsply_nm = df_export['bill_itm_dsply_nm'].to_list() 
discount_end_date = df_export['discount_end_date'].to_list() 
discount_end_dt = []

def left(s, amount):
    return s[:amount]

for idx, item in enumerate(bill_itm_dsply_nm): 
    if 'discount end' in item and re.search(r'[a-zA-Z]{3} [0-9]{2}, [0-9]{4}', item): 
        discount_end_dt.append(datetime.strptime(re.search(r'[a-zA-Z]{3} [0-9]{2}, [0-9]{4}', item)[0], "%b %d, %Y").date())
    elif re.search(r'effective [a-zA-Z]{3} [0-9]{2}, [0-9]{4} to [a-zA-Z]{3} [0-9]{2}, [0-9]{4}', item): 
        discount_end_dt.append(datetime.strptime(re.findall(r'[a-zA-Z]{3} [0-9]{2}, [0-9]{4}', item)[1], "%b %d, %Y").date())
    else: 
            # discount_end_dt.append(datetime.strptime(left(str(discount_end_date[idx]), 10), "%Y-%m-%d").date())
            discount_end_dt.append("") 
    
df_discount_end_dt = pd.DataFrame(discount_end_dt, columns=['df_discount_end_dt'])       

df_export['df_discount_end_dt'] = df_discount_end_dt

df_export = df_export.reset_index()
df_export.to_csv(save_data_path, index=False) 

print(f"{data_set} saved in {save_data_path}")


In [None]:
from datetime import datetime

dt = re.search(r'[a-zA-Z]{3} [0-9]{2}, [0-9]{4}', df_export['bill_itm_dsply_nm'].to_list()[-5000])[0]

print(datetime.strptime(dt, "%b %d, %Y").date()) 
