# * Adhoc : Target

## Parameter

In [168]:
# import os
# import glob
import configparser
import datetime as dt
import pandas as pd
import numpy as np
# import xlrd
import oracledb
import re

config = configparser.ConfigParser()
config.read('../../my_config.ini')
config.sections()

TDMDBPR_user = config['TDMDBPR']['username']
TDMDBPR_pwd = config['TDMDBPR']['password']
TDMDBPR_db = config['TDMDBPR']['db']
TDMDBPR_host = config['TDMDBPR']['host']
TDMDBPR_port = config['TDMDBPR']['port']

AKPIPRD_user = config['AKPIPRD']['username']
AKPIPRD_pwd = config['AKPIPRD']['password']
AKPIPRD_db = config['AKPIPRD']['db']
AKPIPRD_host = config['AKPIPRD']['host']
AKPIPRD_port = config['AKPIPRD']['port']

curr_dt = dt.datetime.now().date()
str_curr_dt = curr_dt.strftime('%Y%m%d')
curr_dt

datetime.date(2024, 6, 20)

## ETL Process...

### Step 1 : Import Data Source

In [169]:
''' Rawdata '''
# src_file = 'C:\Ruz\MyProject\Code\Jupyter\data\interim\Target_Sales_DTAC_Y2024.xlsx'

''' Target_Sales_DTAC_Y2024 '''
src_file = '../../data/interim/Target_Sales_DTAC_Y2024.xlsx'
src_sheet = 'Rawdata'
src_df = pd.read_excel(src_file, sheet_name=src_sheet, index_col=None) 
src_df.rename(columns={'AREA_CD': 'H_AREA_KEY', 'DATA_MONTH': 'MONTH_SHORT', 'DATA_YEAR': 'TM_KEY_YR', 'METRIC_VALUES': 'MTH_VALUE'}, inplace=True)
# src_df = src_df.replace(np.nan, None)

print(f'\nsrc_df : {src_df.shape[0]} rows, {src_df.shape[1]} columns')
src_df.tail(3)


src_df : 17292 rows, 11 columns


Unnamed: 0,DATA_DATE,MONTH_SHORT,TM_KEY_YR,METRIC_CD,METRIC_NAME,MTH_VALUE,COMP_CD,VERSION,H_AREA_KEY,ARE_DESC,AREA_TYPE
17289,20240523,MAY,2024,DB0R000100,Total Revenue : DTAC,92373340.0,DTAC,T,84Z,SURAT THANI,H
17290,20240523,MAY,2024,DB0R000100,Total Revenue : DTAC,70892380.0,DTAC,T,93X,"TRANG, SATUN, PHATTHALUNG",H
17291,20240523,MAY,2024,DB0R000100,Total Revenue : DTAC,,DTAC,T,,True corp,H


In [170]:
''' Master Data '''


''' DIM_TIME '''
dt_file = '../CFW/data/dim_time.csv'
dt_cols = ['TM_KEY_YR', 'MONTH_SHORT', 'TM_KEY_MTH', 'TRUE_TM_KEY_WK', 'TM_KEY_DAY', 'DAYS_IN_MONTH']
dt_df = pd.read_csv(dt_file, usecols=dt_cols)
dt_df['MONTH_SHORT'] = dt_df['MONTH_SHORT'].apply(lambda x: x.upper())
# dt_df['MONTH_KEY'] = dt_df['MONTH_SHORT'].str.upper()
# dt_df.tail(3)


''' DIM_MOOC_AREA '''
mooc_file = '../CFW/data/dim_mooc_area.csv'
mooc_cols = ['ZONE_TYPE', 'ORGID_G', 'TDS_SGMD', 'ORGID_R', 'TDS_RGM_CODE', 'ORGID_H', 'HOP_HINT', 'TDS_PROVINCE', 'PROVINCE_ENG', 'PROVINCE_TH', 'ORGID_HH', 'D_CLUSTER', 'CCAATT', 'REMARK']
mooc_df = pd.read_csv(mooc_file, usecols=mooc_cols)
# Create H level
mooc_h_df = mooc_df.loc[(mooc_df['REMARK']!='Dummy') & (mooc_df['HOP_HINT']!='True Corp')]
mooc_h_df = mooc_h_df[['ZONE_TYPE', 'ORGID_G', 'TDS_SGMD', 'ORGID_H', 'HOP_HINT']].drop_duplicates()
mooc_h_df.dropna(how='all', inplace=True)
mooc_h_df['H_AREA_KEY'] = mooc_h_df['ORGID_H'].apply(lambda x: x if re.search(r'[a-z]', x, re.I) else int(x))
# mooc_h_df.tail(3)

In [171]:
''' Example DataFrame '''

# src_df.tail(3)
# dt_df.tail(3)
# mooc_df.tail(3)
# mooc_h_df.tail(3)
mooc_h_df.loc[mooc_h_df['ORGID_H'].str.contains('^0')].tail(3)

Unnamed: 0,ZONE_TYPE,ORGID_G,TDS_SGMD,ORGID_H,HOP_HINT,H_AREA_KEY
310,BMA,GX3,Retail Management & Regional Management 3 (East),4,"SMP : Mueang Samut Prakan, Phra Pradaeng, Phra...",4
312,BMA,GX3,Retail Management & Regional Management 3 (East),3,"SMP : Bang Bo, Bang Sao Thong, Bang Phli",3
320,BMA,GX1,Deputy CGO & Regional Management 1 (BMA-West),5,"NTB : Pak Kret, Bang Bua Thong, Sai Noi",5


### Step 2 : Aggregate Data

In [172]:
'''
    DB1R000900	Prepaid Inflow M1 : DTAC
    DB1S000101	Prepaid Gross Adds : DTAC
    DB2R000500	Postpaid Inflow M1 : DTAC
    DB2S000100	Postpaid Gross Adds : DTAC
'''

''' Filter '''
raw_df = src_df
raw_df = raw_df.loc[(raw_df['METRIC_CD'].str.contains('^DB1R000900|^DB1S000101|^DB2R000500|^DB2S000100')) & (raw_df['METRIC_CD'].str.contains('[0-9]$|[0-9]A[A-K]$'))]

''' Data Test '''
# raw_df = raw_df[raw_df['METRIC_CD'].str.contains('DB1R000900|DB1S000101|DB2R000500|DB2S000100')]
# raw_df = raw_df.loc[raw_df['MONTH_SHORT']=='MAY']
# raw_df = raw_df.loc[raw_df['METRIC_CD']=='DB1S000101']
# raw_df = raw_df.loc[raw_df['H_AREA_KEY'].isna()]

raw_df = raw_df.reset_index(drop=True)
raw_df

Unnamed: 0,DATA_DATE,MONTH_SHORT,TM_KEY_YR,METRIC_CD,METRIC_NAME,MTH_VALUE,COMP_CD,VERSION,H_AREA_KEY,ARE_DESC,AREA_TYPE
0,20240523,JAN,2024,DB1R000900,Prepaid Inflow M1 : DTAC,4194780.0,DTAC,T,50,"BKK : Bang Khun Thian, Chom Thong, Bang Bon",H
1,20240523,JAN,2024,DB1R000900,Prepaid Inflow M1 : DTAC,397642.0,DTAC,T,16,"BKK : Bangkok Yai, Bangkok Noi, Bang Phlat",H
2,20240523,JAN,2024,DB1R000900,Prepaid Inflow M1 : DTAC,3269671.0,DTAC,T,40,"BKK : Taling Chan, Phasi Charoen, Thawi Wattha...",H
3,20240523,JAN,2024,DB1R000900,Prepaid Inflow M1 : DTAC,961776.0,DTAC,T,49,"BKK : Thon Buri, Khlong San, Rat Burana, Thung...",H
4,20240523,JAN,2024,DB1R000900,Prepaid Inflow M1 : DTAC,2145895.0,DTAC,T,202,"NTB : Bang Kruai, Bang Yai",H
...,...,...,...,...,...,...,...,...,...,...,...
15835,20240523,MAY,2024,DB2S000100AK,Postpaid Gross Adds : DTAC : Wholesales,,DTAC,T,86X,"RANONG, CHUMPHON",H
15836,20240523,MAY,2024,DB2S000100AK,Postpaid Gross Adds : DTAC : Wholesales,,DTAC,T,90Z,SONGKHLA,H
15837,20240523,MAY,2024,DB2S000100AK,Postpaid Gross Adds : DTAC : Wholesales,,DTAC,T,84Z,SURAT THANI,H
15838,20240523,MAY,2024,DB2S000100AK,Postpaid Gross Adds : DTAC : Wholesales,,DTAC,T,93X,"TRANG, SATUN, PHATTHALUNG",H


In [173]:
''' Merge Data '''

# Join Area
merge_df1 = pd.merge(raw_df, mooc_h_df, how='left', on='H_AREA_KEY')

# Join Period
merge_df2 = pd.merge(merge_df1, dt_df, how='left', on=['TM_KEY_YR', 'MONTH_SHORT'])

# Prep Aggregate
prep_agg_df = merge_df2
prep_agg_df['DAY_VALUE'] = prep_agg_df['MTH_VALUE'] / prep_agg_df['DAYS_IN_MONTH']
prep_agg_df.tail(3)

Unnamed: 0,DATA_DATE,MONTH_SHORT,TM_KEY_YR,METRIC_CD,METRIC_NAME,MTH_VALUE,COMP_CD,VERSION,H_AREA_KEY,ARE_DESC,...,ZONE_TYPE,ORGID_G,TDS_SGMD,ORGID_H,HOP_HINT,TM_KEY_DAY,DAYS_IN_MONTH,TRUE_TM_KEY_WK,TM_KEY_MTH,DAY_VALUE
481533,20240523,MAY,2024,DB2S000100AK,Postpaid Gross Adds : DTAC : Wholesales,,DTAC,T,,True corp,...,,,,,,20240529,31,2024022,202405,
481534,20240523,MAY,2024,DB2S000100AK,Postpaid Gross Adds : DTAC : Wholesales,,DTAC,T,,True corp,...,,,,,,20240530,31,2024022,202405,
481535,20240523,MAY,2024,DB2S000100AK,Postpaid Gross Adds : DTAC : Wholesales,,DTAC,T,,True corp,...,,,,,,20240531,31,2024022,202405,


In [174]:
''' Aggregate P, G, H level '''


agg_cols = ['TM_KEY_YR', 'TM_KEY_MTH', 'TRUE_TM_KEY_WK', 'TM_KEY_DAY', 'METRIC_CD', 'METRIC_NAME', 'COMP_CD', 'VERSION', 'AREA_NO', 'AREA_TYPE', 'AREA_CD', 'AREA_NAME', 'DAY_VALUE', 'MTH_VALUE'] # , 'FREQUENCY', 'REMARK'

# P : Nationwide
agg_p_df = prep_agg_df.groupby(['TM_KEY_YR', 'TM_KEY_MTH', 'TRUE_TM_KEY_WK', 'TM_KEY_DAY', 'METRIC_CD', 'METRIC_NAME', 'COMP_CD', 'VERSION']).agg({'MTH_VALUE': 'sum', 'DAY_VALUE': 'sum'}).reset_index()
agg_p_df['AREA_NO'] = 1
agg_p_df['AREA_TYPE'] = 'P'
agg_p_df['AREA_CD'] = 'P'
agg_p_df['AREA_NAME'] = 'Nationwide'
agg_p_df = agg_p_df.loc[:, agg_cols]
agg_p_df[agg_p_df['TM_KEY_DAY']==20240501]

# G : Nationwide
agg_g_df = prep_agg_df.groupby(['TM_KEY_YR', 'TM_KEY_MTH', 'TRUE_TM_KEY_WK', 'TM_KEY_DAY', 'METRIC_CD', 'METRIC_NAME', 'COMP_CD', 'VERSION', 'ORGID_G', 'TDS_SGMD']).agg({'MTH_VALUE': 'sum', 'DAY_VALUE': 'sum'}).reset_index()
agg_g_df['AREA_NO'] = 2
agg_g_df['AREA_TYPE'] = 'G'
agg_g_df.rename(columns={'ORGID_G': 'AREA_CD'}, inplace=True)
agg_g_df.rename(columns={'TDS_SGMD': 'AREA_NAME'}, inplace=True)
agg_g_df = agg_g_df.loc[:, agg_cols]
agg_g_df[agg_g_df['TM_KEY_DAY']==20240501]

# H : Nationwide
agg_h_df = prep_agg_df.groupby(['TM_KEY_YR', 'TM_KEY_MTH', 'TRUE_TM_KEY_WK', 'TM_KEY_DAY', 'METRIC_CD', 'METRIC_NAME', 'COMP_CD', 'VERSION', 'ORGID_H', 'HOP_HINT']).agg({'MTH_VALUE': 'sum', 'DAY_VALUE': 'sum'}).reset_index()
agg_h_df['AREA_NO'] = 3
agg_h_df['AREA_TYPE'] = 'H'
agg_h_df.rename(columns={'ORGID_H': 'AREA_CD'}, inplace=True)
agg_h_df.rename(columns={'HOP_HINT': 'AREA_NAME'}, inplace=True)
agg_h_df = agg_h_df.loc[:, agg_cols]
agg_h_df[agg_h_df['TM_KEY_DAY']==20240501]

# Concat DataFrame
last_agg_all_df = pd.concat([agg_p_df, agg_g_df, agg_h_df], ignore_index=True)
last_agg_all_df['FREQUENCY'] = 'Daily'
last_agg_all_df['REMARK'] = 'Sales target DTAC Y2024(Jan-May) : as of 23-May-24(K.Voraphon)'
last_agg_all_df

# last_agg_all_df.loc[last_agg_all_df['TM_KEY_DAY']==20240501]

Unnamed: 0,TM_KEY_YR,TM_KEY_MTH,TRUE_TM_KEY_WK,TM_KEY_DAY,METRIC_CD,METRIC_NAME,COMP_CD,VERSION,AREA_NO,AREA_TYPE,AREA_CD,AREA_NAME,DAY_VALUE,MTH_VALUE,FREQUENCY,REMARK
0,2024,202401,2024001,20240101,DB1R000900,Prepaid Inflow M1 : DTAC,DTAC,T,1,P,P,Nationwide,9.105346e+06,2.822657e+08,Daily,Sales target DTAC Y2024(Jan-May) : as of 23-Ma...
1,2024,202401,2024001,20240101,DB1R000900AA,Prepaid Inflow M1 : DTAC : Account Executive,DTAC,T,1,P,P,Nationwide,0.000000e+00,0.000000e+00,Daily,Sales target DTAC Y2024(Jan-May) : as of 23-Ma...
2,2024,202401,2024001,20240101,DB1R000900AB,Prepaid Inflow M1 : DTAC : B2B,DTAC,T,1,P,P,Nationwide,0.000000e+00,0.000000e+00,Daily,Sales target DTAC Y2024(Jan-May) : as of 23-Ma...
3,2024,202401,2024001,20240101,DB1R000900AC,Prepaid Inflow M1 : DTAC : Branded Retail,DTAC,T,1,P,P,Nationwide,1.301623e+06,4.035030e+07,Daily,Sales target DTAC Y2024(Jan-May) : as of 23-Ma...
4,2024,202401,2024001,20240101,DB1R000900AD,Prepaid Inflow M1 : DTAC : Contact Center,DTAC,T,1,P,P,Nationwide,3.345988e+05,1.037256e+07,Daily,Sales target DTAC Y2024(Jan-May) : as of 23-Ma...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
539899,2024,202405,2024022,20240531,DB2S000100AK,Postpaid Gross Adds : DTAC : Wholesales,DTAC,T,3,H,84Z,SURAT THANI,0.000000e+00,0.000000e+00,Daily,Sales target DTAC Y2024(Jan-May) : as of 23-Ma...
539900,2024,202405,2024022,20240531,DB2S000100AK,Postpaid Gross Adds : DTAC : Wholesales,DTAC,T,3,H,86X,"RANONG, CHUMPHON",0.000000e+00,0.000000e+00,Daily,Sales target DTAC Y2024(Jan-May) : as of 23-Ma...
539901,2024,202405,2024022,20240531,DB2S000100AK,Postpaid Gross Adds : DTAC : Wholesales,DTAC,T,3,H,90Z,SONGKHLA,0.000000e+00,0.000000e+00,Daily,Sales target DTAC Y2024(Jan-May) : as of 23-Ma...
539902,2024,202405,2024022,20240531,DB2S000100AK,Postpaid Gross Adds : DTAC : Wholesales,DTAC,T,3,H,93X,"TRANG, SATUN, PHATTHALUNG",0.000000e+00,0.000000e+00,Daily,Sales target DTAC Y2024(Jan-May) : as of 23-Ma...


### Step 3 : Insert to "ADHOC_VINSIGHT_DATA"
    Delete -> Insert

In [175]:
''' Test '''

# last_agg_all_df = last_agg_all_df.loc[last_agg_all_df['AREA_TYPE']=='P']
# last_agg_all_df

' Test '

In [176]:
''' Input Parameter '''

month_list = last_agg_all_df['TM_KEY_MTH'].drop_duplicates().tolist()
mt_cd_list = last_agg_all_df['METRIC_CD'].drop_duplicates().tolist()
mt_cd_list = tuple(mt_cd_list)

v_param = dict(mth_start=min(month_list), mth_end=max(month_list), metric_cd=mt_cd_list)
v_target_schema = 'AUTOKPI'
v_target_table = 'ADHOC_VINSIGHT_DATA'

# query_delete = f"DELETE {v_target_schema}.{v_target_table} WHERE TM_KEY_MTH BETWEEN {v_param['mth_start']} AND {v_param['mth_end']} AND METRIC_CD IN {v_param['metric_cd']}"
query_delete = f"""
    DELETE {v_target_schema}.{v_target_table} 
    WHERE TM_KEY_MTH BETWEEN {v_param['mth_start']} AND {v_param['mth_end']} 
    AND METRIC_CD IN {v_param['metric_cd']}
"""

print(f"\nParameter...\n\n   -> TM_KEY_MTH BETWEEN {v_param['mth_start']} AND {v_param['mth_end']}\n   -> METRIC_CD IN {v_param['metric_cd']}")
print(f'\nDataFrame...\n\n   -> last_agg_all_df : {last_agg_all_df.shape[0]} rows, {last_agg_all_df.shape[1]} columns') 
print(f'\nquery_delete...\n{query_delete}')


Parameter...

   -> TM_KEY_MTH BETWEEN 202401 AND 202405
   -> METRIC_CD IN ('DB1R000900', 'DB1R000900AA', 'DB1R000900AB', 'DB1R000900AC', 'DB1R000900AD', 'DB1R000900AE', 'DB1R000900AF', 'DB1R000900AG', 'DB1R000900AH', 'DB1R000900AI', 'DB1R000900AJ', 'DB1R000900AK', 'DB1S000101', 'DB1S000101AA', 'DB1S000101AB', 'DB1S000101AC', 'DB1S000101AD', 'DB1S000101AE', 'DB1S000101AF', 'DB1S000101AG', 'DB1S000101AH', 'DB1S000101AI', 'DB1S000101AJ', 'DB1S000101AK', 'DB2R000500', 'DB2R000500AA', 'DB2R000500AB', 'DB2R000500AC', 'DB2R000500AD', 'DB2R000500AE', 'DB2R000500AF', 'DB2R000500AG', 'DB2R000500AH', 'DB2R000500AI', 'DB2R000500AJ', 'DB2R000500AK', 'DB2S000100', 'DB2S000100AA', 'DB2S000100AB', 'DB2S000100AC', 'DB2S000100AD', 'DB2S000100AE', 'DB2S000100AF', 'DB2S000100AG', 'DB2S000100AH', 'DB2S000100AI', 'DB2S000100AJ', 'DB2S000100AK')

DataFrame...

   -> last_agg_all_df : 539904 rows, 16 columns

query_delete...

    DELETE AUTOKPI.ADHOC_VINSIGHT_DATA 
    WHERE TM_KEY_MTH BETWEEN 202401 AND 2

In [177]:
''' Load Data '''

job_start_datetime = dt.datetime.now().strftime('%Y-%m-%d, %H:%M:%S')
print(f'\nJob Start... {job_start_datetime}')


# Create rows from DataFrame
rows = [tuple(x) for x in last_agg_all_df.values]


# Connect : AKPIPRD
dsn = f'{AKPIPRD_user}/{AKPIPRD_pwd}@{AKPIPRD_host}:{AKPIPRD_port}/{AKPIPRD_db}'
conn = oracledb.connect(dsn)
print(f'\n{AKPIPRD_db} : Connected')
cur = conn.cursor()
print(f'\nProcessing...')


try:
    # # Truncate
    # cur.execute("TRUNCATE TABLE AUTOKPI.ADHOC_VINSIGHT_DATA")
    # print(f'\n   -> TRUNCATE : "ADHOC_VINSIGHT_DATA" : Done !')

    # Delete
    cur.execute(query_delete)
    print(f'\n   -> DELETE : "ADHOC_VINSIGHT_DATA" : Done !')
    
    # Insert
    cur.executemany("""
        INSERT INTO ADHOC_VINSIGHT_DATA 
        (TM_KEY_YR, TM_KEY_MTH, TRUE_TM_KEY_WK, TM_KEY_DAY, METRIC_CD, METRIC_NAME, COMP_CD, VERSION, AREA_NO, AREA_TYPE, AREA_CD, AREA_NAME, DAY_VALUE, MTH_VALUE, FREQUENCY, REMARK) 
        VALUES (:1,:2,:3,:4,:5,:6,:7,:8,:9,:10,:11,:12,:13,:14,:15,:16)
        """, rows)
    print(f'\n   -> INSERT : "ADHOC_VINSIGHT_DATA" : Done !')

    cur.close()
    conn.commit()


except oracledb.DatabaseError as e:
    print(f'\nError with Oracle : {e}')


finally:
    conn.close()
    print(f'\n{AKPIPRD_db} : Disconnected')
    print(f'\nJob Done !!!')



Job Start... 2024-06-20, 11:38:29



AKPIPRD : Connected

Processing...

   -> DELETE : "ADHOC_VINSIGHT_DATA" : Done !

   -> INSERT : "ADHOC_VINSIGHT_DATA" : Done !

AKPIPRD : Disconnected

Job Done !!!
