In [22]:
#### import global modules
import os
import sys
import pandas as pd
import numpy as np
from pathlib import Path
from yaml import safe_load
import google.oauth2.credentials
from google.cloud import bigquery
import gc

# Set global vars
pth_project = Path(os.getcwd().split('notebooks')[0])
pth_data = pth_project / 'data'
pth_queries = pth_project / 'core' / 'queries'
pth_creds = pth_project / 'conf' / 'local' / 'project_config.yaml'
sys.path.insert(0, str(pth_project))
d_project_config = safe_load(pth_creds.open())
# d_params = safe_load((pth_project / 'core' / 'parameters' / 'common.yaml').open())['data_extract']

# import local modules
from core.utils.gcp import connect_bq_services
# from core.etl.extract import extract_bq_data, extract_pr_codes, format_conv_df, filter_convs

# Connect to google services
pd.options.display.max_rows = 100

In [25]:
# .hsmsd_3c_rpt_dataset.bq_rpt_chnl_order_ffh_dtl

bq_client = connect_bq_services(d_project_config['gcp-project-name'])

project_id = 'bi-srv-hsmsd-3c-pr-ca2cd4'
dataset_name = 'hsmsd_3c_rpt_dataset'
table_name = 'bq_rpt_chnl_order_ffh_dtl'

# Get the table
table = bq_client.get_table(f"{project_id}.{dataset_name}.{table_name}")

# Construct the fully-qualified table ID
table_id = f"{project_id}.{dataset_name}.{table_name}"


# Prepare lists to hold schema information
column_names = []
column_types = []
column_descriptions = []

# Extract schema details
for schema_field in table.schema:
    column_names.append(schema_field.name)
    column_types.append(schema_field.field_type)
    column_descriptions.append(schema_field.description or 'No description')

# Create a pandas DataFrame
schema_df = pd.DataFrame({
    'Column Name': column_names,
    'Data Type': column_types,
    'Description': column_descriptions
})

# Display the DataFrame


In [26]:
schema_df.head()

Unnamed: 0,Column Name,Data Type,Description
0,acquired_from,STRING,Source: Service acquired from security company...
1,action_type,STRING,Derived: Order business item action (what is r...
2,activation_dt,DATE,Source: Date when product was activated (MT)
3,address,STRING,Derived: Service address
4,available_delivery_method,STRING,Source: available_delivery_method from bq_rdb_...


In [9]:
def extract_bq_data(bq_client, sql=None, pth_query=None):
    if sql is not None:
        df = bq_client.query(sql).to_dataframe()
    elif pth_query is not None:
        sql = pth_query.read_text()
        df = bq_client.query(sql).to_dataframe()
    else:
        raise ValueError('`sql` or `pth_query` should be set')  
    return df

In [10]:
BAN_list_DF= pd.read_csv('vendor_Channl_rep_required.csv')

In [11]:
BAN_list_DF.head()

Unnamed: 0,BAN
0,124460494
1,124479535
2,124759271
3,124837557
4,125008141


In [12]:
config= bigquery.job.LoadJobConfig()

# config._properties['timePartitioning'] = {'field': 'Month_Year'}
config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

Table_BQ = 'pras-pr-223186.pras_pr_dataset.Achint_BAN_Feb2024'

bq_table_instance= bq_client.load_table_from_dataframe(BAN_list_DF, Table_BQ,job_config=config)

In [15]:
Query='''


with original
as 
(SELECT 
min(dly_ord_itm_actvy_ts) as first_add,
bus_prod_instnc_id
 FROM `cio-datahub-enterprise-pr-183a.ent_cust_ord_actvy.bq_dly_wln_ord_item_actvy` 
 WHERE ord_act_typ_cd = 'Add' and prod_typ_cd in ('SMHM') 
 and ord_act_stat_cd = 'Processed' 
 and bus_prod_instnc_id is not null
 group by bus_prod_instnc_id) /* Fetch original order date for the product instance */

,details as
(
SELECT 
dly_ord_itm_actvy_ts,
bus_billg_acct_num,
bus_prod_instnc_id,
prod_nm,
munic_nm,
prov_state_cd,
bi_chnl_tag_cd,
chnl_org_id,
prod_typ_cd,
src_sls_rep_cd,
src_typ_cd,
src_usr_chnl_txt,
txn_sub_typ_txt,
SLS_ACTVY_TXT
FROM `cio-datahub-enterprise-pr-183a.ent_cust_ord_actvy.bq_dly_wln_ord_item_actvy` 
WHERE ord_act_typ_cd = 'Add' 
and prod_typ_cd in ('SMHM') 
and ord_act_stat_cd = 'Processed'

  )


  /* Gather channel details */

,channel_refs as (


with
latest_update as (
SELECT
chnl_org_id,
max(chnl_org_key) as latest_key FROM `cio-datahub-enterprise-pr-183a.ent_sls_chnl.bq_channel_org_dim`
group by chnl_org_id

)

select distinct
t1.chnl_org_id,
chnl_org_txt from `cio-datahub-enterprise-pr-183a.ent_sls_chnl.bq_channel_org_dim` t1 inner join latest_update on latest_key = chnl_org_key

)


,interim_data as 
(
select
date(details.dly_ord_itm_actvy_ts) as Order_date,
details.bus_billg_acct_num as BAN,
details.bus_prod_instnc_id as Prod_Instnc_ID,
details.prod_nm as Prod_nm,
details.munic_nm,
details.prov_state_cd,
details.bi_chnl_tag_cd as Sales_Channel_tag,
details.chnl_org_id,
channel_refs.chnl_org_txt,
details.prod_typ_cd as Prod_Type,
details.src_sls_rep_cd as Sales_Agent_ID,
details.src_typ_cd,
details.src_usr_chnl_txt,
details.txn_sub_typ_txt,
details.SLS_ACTVY_TXT
from details 
inner join original /* Inner join them to prevent duplicates */
on original.bus_prod_instnc_id = details.bus_prod_instnc_id 
and original.first_add = details.dly_ord_itm_actvy_ts  

left join channel_refs /* left joining to get channel dealer names */
on details.chnl_org_id = channel_refs.chnl_org_id 
where bi_chnl_tag_cd <> 'UNKNOWN' 
order by details.dly_ord_itm_actvy_ts
)


select * from `pras-pr-223186.pras_pr_dataset.Achint_BAN_Feb2024` a
left join interim_data b on a.BAN=b.BAN

'''

In [16]:
BAN_sales_DF=extract_bq_data(bq_client, sql=Query)

In [17]:
BAN_sales_DF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18271 entries, 0 to 18270
Data columns (total 16 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   BAN                18271 non-null  Int64 
 1   Order_date         15137 non-null  dbdate
 2   BAN_1              15137 non-null  Int64 
 3   Prod_Instnc_ID     15137 non-null  object
 4   Prod_nm            15137 non-null  object
 5   munic_nm           15137 non-null  object
 6   prov_state_cd      15137 non-null  object
 7   Sales_Channel_tag  15137 non-null  object
 8   chnl_org_id        15137 non-null  Int64 
 9   chnl_org_txt       15137 non-null  object
 10  Prod_Type          15137 non-null  object
 11  Sales_Agent_ID     15137 non-null  object
 12  src_typ_cd         15137 non-null  object
 13  src_usr_chnl_txt   15137 non-null  object
 14  txn_sub_typ_txt    14832 non-null  object
 15  SLS_ACTVY_TXT      14832 non-null  object
dtypes: Int64(3), dbdate(1), object(12)
memor

In [19]:
BAN_sales_DF.isna().sum()/BAN_sales_DF.shape[0]*100

BAN                   0.000000
Order_date           17.152865
BAN_1                17.152865
Prod_Instnc_ID       17.152865
Prod_nm              17.152865
munic_nm             17.152865
prov_state_cd        17.152865
Sales_Channel_tag    17.152865
chnl_org_id          17.152865
chnl_org_txt         17.152865
Prod_Type            17.152865
Sales_Agent_ID       17.152865
src_typ_cd           17.152865
src_usr_chnl_txt     17.152865
txn_sub_typ_txt      18.822177
SLS_ACTVY_TXT        18.822177
dtype: float64

In [20]:
BAN_sales_DF['Sales_Agent_ID'].value_counts()

Sales_Agent_ID
UKN        10443
digital       83
t904090       76
x223076       26
t889962       21
           ...  
x272845        1
x260768        1
x269813        1
x255920        1
x223612        1
Name: count, Length: 2040, dtype: int64

In [21]:
BAN_sales_DF.to_csv('BAN_Channel_rep.csv',index=False)