In [4]:
#### import global modules
import os
import sys
import pandas as pd
import numpy as np
from pathlib import Path
from yaml import safe_load
import google.oauth2.credentials
from google.cloud import bigquery
import gc

# Set global vars
pth_project = Path(os.getcwd().split('notebooks')[0])
pth_data = pth_project / 'data'
pth_queries = pth_project / 'core' / 'queries'
pth_creds = pth_project / 'conf' / 'local' / 'project_config.yaml'
sys.path.insert(0, str(pth_project))
d_project_config = safe_load(pth_creds.open())
# d_params = safe_load((pth_project / 'core' / 'parameters' / 'common.yaml').open())['data_extract']

# import local modules
from core.utils.gcp import connect_bq_services
# from core.etl.extract import extract_bq_data, extract_pr_codes, format_conv_df, filter_convs

# Connect to google services
bq_client = connect_bq_services(d_project_config['gcp-project-name'])
pd.options.display.max_rows = 100

In [5]:
def extract_bq_data(bq_client, sql=None, pth_query=None):
    if sql is not None:
        df = bq_client.query(sql).to_dataframe()
    elif pth_query is not None:
        sql = pth_query.read_text()
        df = bq_client.query(sql).to_dataframe()
    else:
        raise ValueError('`sql` or `pth_query` should be set')  
    return df

In [6]:
Sales_order_Query='''


with original
as 
(SELECT 
min(dly_ord_itm_actvy_ts) as first_add,
bus_prod_instnc_id
 FROM `cio-datahub-enterprise-pr-183a.ent_cust_ord_actvy.bq_dly_wln_ord_item_actvy` WHERE ord_act_typ_cd = 'Add' and prod_typ_cd in ('SMHM') and ord_act_stat_cd = 'Processed' and bus_prod_instnc_id is not null
 --and date(dly_ord_itm_actvy_ts) >= '2023-10-01' and date(dly_ord_itm_actvy_ts) < '2024-01-01'

 group by bus_prod_instnc_id) /* Fetch original order date for the product instance */



,BAN_list as (

SELECT CAST(BAN as STRING) as BAN_data
 --FROM `pras-pr-223186.pras_pr_dataset.Outbound_Robin` 
 FROM `pras-pr-223186.pras_pr_dataset.Outbound_Robin_08Mar2024`

)



,details as
(
SELECT 
dly_ord_itm_actvy_ts,
bus_billg_acct_num,
bus_prod_instnc_id,
prod_nm,
munic_nm,
prov_state_cd,
bi_chnl_tag_cd,
chnl_org_id,
prod_typ_cd,
src_sls_rep_cd,
src_typ_cd,
src_usr_chnl_txt,
txn_sub_typ_txt,
SLS_ACTVY_TXT
 FROM `cio-datahub-enterprise-pr-183a.ent_cust_ord_actvy.bq_dly_wln_ord_item_actvy` WHERE ord_act_typ_cd = 'Add' 
 --and prod_typ_cd in ('SMHM','HSIC','SING','TTV') 
 and prod_typ_cd in ('SMHM')
 and ord_act_stat_cd = 'Processed'
 --and date(dly_ord_itm_actvy_ts) >= '2023-10-01' and date(dly_ord_itm_actvy_ts) < '2024-01-01' 
 
 )
 
 
 
  /* Gather channel details */

,channel_refs as (


with
latest_update as (
SELECT
chnl_org_id,
max(chnl_org_key) as latest_key FROM `cio-datahub-enterprise-pr-183a.ent_sls_chnl.bq_channel_org_dim`
group by chnl_org_id

)

select distinct
t1.chnl_org_id,
chnl_org_txt from `cio-datahub-enterprise-pr-183a.ent_sls_chnl.bq_channel_org_dim` t1 inner join latest_update on latest_key = chnl_org_key

)

,Sales_Order_DF as (

select
CAST(details.bus_billg_acct_num as STRING) as BAN_sales_order,
date(details.dly_ord_itm_actvy_ts) as Order_date,
details.bus_billg_acct_num as BAN,
cast (details.bus_prod_instnc_id as Integer) as Prod_Instnc_ID,
details.prod_nm as SMHM_Plan_Name,
details.munic_nm as Municpality_Name,
details.prov_state_cd as State_Code,
details.bi_chnl_tag_cd as Sales_Channel_tag,
details.chnl_org_id,
channel_refs.chnl_org_txt as  Sales_Dealer_Name,
details.prod_typ_cd as Prod_Type,
details.src_sls_rep_cd as Sales_Agent_ID,
details.src_typ_cd,
details.src_usr_chnl_txt,
details.txn_sub_typ_txt,
details.SLS_ACTVY_TXT
from details inner join original on original.bus_prod_instnc_id = details.bus_prod_instnc_id and original.first_add = details.dly_ord_itm_actvy_ts  /* Inner join them to prevent duplicates */
 left join channel_refs on details.chnl_org_id = channel_refs.chnl_org_id /* left joining to get channel dealer names */
 where bi_chnl_tag_cd <> 'UNKNOWN' 
 order by details.dly_ord_itm_actvy_ts
)


,Sales_order_data_rpt as (



  SELECT
 bill_account_number  AS BAN_sales,
  reporting_dt AS order_created_date,
  activation_dt AS activation_date,
  platform,
  product_family,
  channel_group,
  sales_agent_id,
  current_sales_agent_id,
initial_sales_agent_id,
  sales_agent_nm,
  product_nm as Plan_name_1
FROM
  `bi-srv-hsmsd-3c-pr-ca2cd4.hsmsd_3c_rpt_dataset.bq_rpt_chnl_order_ffh_dtl_view`
WHERE
  is_gross_new_order = 1
  AND is_initial_order = 1
  AND product_family IN ('SMHM')

) 


select * 
from BAN_list a
left join Sales_Order_DF b
on a.BAN_data=b.BAN_sales_order 
left join Sales_order_data_rpt rpt
on a.BAN_data=rpt.BAN_sales

'''

In [7]:
DF_sales_Order=extract_bq_data(bq_client, sql=Sales_order_Query)

In [9]:
DF_sales_Order.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1546 entries, 0 to 1545
Data columns (total 28 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   BAN_data                1546 non-null   object
 1   BAN_sales_order         1463 non-null   object
 2   Order_date              1463 non-null   dbdate
 3   BAN                     1463 non-null   Int64 
 4   Prod_Instnc_ID          1463 non-null   Int64 
 5   SMHM_Plan_Name          1463 non-null   object
 6   Municpality_Name        1463 non-null   object
 7   State_Code              1463 non-null   object
 8   Sales_Channel_tag       1463 non-null   object
 9   chnl_org_id             1463 non-null   Int64 
 10  Sales_Dealer_Name       1463 non-null   object
 11  Prod_Type               1463 non-null   object
 12  Sales_Agent_ID          1463 non-null   object
 13  src_typ_cd              1463 non-null   object
 14  src_usr_chnl_txt        1463 non-null   object
 15  txn_

In [10]:
DF_sales_Order['Sales_Agent_ID'].value_counts()

Sales_Agent_ID
UKN        958
x220444     15
digital     13
x239242     12
x243263     10
          ... 
x275566      1
x179906      1
x270541      1
x270532      1
x260660      1
Name: count, Length: 270, dtype: int64

In [11]:
DF_sales_Order['Sales_Agent_ID'].replace('UKN', np.nan, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  DF_sales_Order['Sales_Agent_ID'].replace('UKN', np.nan, inplace=True)


In [12]:
# Columns in order of priority to fill the nulls
replacement_cols = ['Sales_Agent_ID', 'sales_agent_id_1', 'current_sales_agent_id','initial_sales_agent_id','sales_agent_nm']

# Create a new column 'E' that starts as all NaN
DF_sales_Order['Sales_Agent_Final'] = pd.NA

# Iteratively fill 'E' using 'fillna()' from columns A, B, and C
for col in replacement_cols:
    DF_sales_Order['Sales_Agent_Final'] = DF_sales_Order['Sales_Agent_Final'].fillna(DF_sales_Order[col])

DF_sales_Order['Sales_Agent_Final'].value_counts()

Sales_Agent_Final
x220444    15
J4HL       13
digital    13
KBPP       12
x239242    12
           ..
LRTW        1
x275314     1
LN45        1
X232814     1
LRTZ        1
Name: count, Length: 709, dtype: int64

In [13]:
DF_sales_Order['SMHM_Plan_Name'].value_counts()

SMHM_Plan_Name
Control Plus Video                                    504
Smart Automation Plus                                 401
Smart Camera                                          203
Secure Plus Video                                      87
Acquired Home Security with no ADC                     58
Acquired Home Security with ADC                        49
Secure                                                 42
Acquired Home Security: Non ADC, Unmonitored           41
ADT Home Security with ADC                             21
ADT Home Security with no ADC                          20
Acquired Home Security with ADC, Externally monito     18
Control                                                 8
LivingWell Companion Home - Cellular                    5
Medical Alert Pendant                                   3
Smart Automation                                        1
Smart Automation Plus (Automation Hub)                  1
Security Monitoring                                     1

In [14]:
DF_sales_Order['SMHM_Plan_Name_Final'] = DF_sales_Order['SMHM_Plan_Name'].fillna(DF_sales_Order['Plan_name_1'])
DF_sales_Order['SMHM_Plan_Name_Final'].value_counts()

SMHM_Plan_Name_Final
Control Plus Video                                    541
Smart Automation Plus                                 426
Smart Camera                                          212
Secure Plus Video                                      92
Acquired Home Security with no ADC                     58
Acquired Home Security with ADC                        49
Secure                                                 44
Acquired Home Security: Non ADC, Unmonitored           41
ADT Home Security with ADC                             21
ADT Home Security with no ADC                          20
Acquired Home Security with ADC, Externally monito     18
Control                                                 8
LivingWell Companion Home - Cellular                    5
Medical Alert Pendant                                   3
Smart Automation                                        1
Smart Automation Plus (Automation Hub)                  1
Security Monitoring                                

In [15]:
DF_sales_Order['Sales_Channel_tag'].value_counts()

Sales_Channel_tag
D2C                454
CORP_STORE         301
SYSTEM             205
CARE-WLN           201
CSS-MOB            151
CSS                 89
WEB                 25
DEALER              24
OTHER                6
NRS_REF              6
NATIONAL RETAIL      1
Name: count, dtype: int64

In [16]:
DF_sales_Order['SMHM_Channel_Final'] = DF_sales_Order['Sales_Channel_tag'].fillna(DF_sales_Order['channel_group'])
DF_sales_Order['SMHM_Channel_Final'].value_counts()

SMHM_Channel_Final
D2C                490
CORP_STORE         301
SYSTEM             205
CARE-WLN           201
CSS-MOB            151
CSS                 95
DEALER              26
WEB                 25
CORP STORES         24
CLIENT CARE         10
OTHER                6
NRS_REF              6
NATIONAL RETAIL      1
Name: count, dtype: int64

In [17]:
DF_sales_Order.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1546 entries, 0 to 1545
Data columns (total 31 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   BAN_data                1546 non-null   object
 1   BAN_sales_order         1463 non-null   object
 2   Order_date              1463 non-null   dbdate
 3   BAN                     1463 non-null   Int64 
 4   Prod_Instnc_ID          1463 non-null   Int64 
 5   SMHM_Plan_Name          1463 non-null   object
 6   Municpality_Name        1463 non-null   object
 7   State_Code              1463 non-null   object
 8   Sales_Channel_tag       1463 non-null   object
 9   chnl_org_id             1463 non-null   Int64 
 10  Sales_Dealer_Name       1463 non-null   object
 11  Prod_Type               1463 non-null   object
 12  Sales_Agent_ID          505 non-null    object
 13  src_typ_cd              1463 non-null   object
 14  src_usr_chnl_txt        1463 non-null   object
 15  txn_

In [18]:
DF_sales_Order.to_csv('Outbound_Robin_with_Sales_08Mar2024.csv',index=False)