# * Adhoc : DTAC Target

## Parameter

In [1]:
import os
import glob
import configparser
import datetime as dt
import pandas as pd
import numpy as np
import xlrd
import oracledb

In [2]:
config = configparser.ConfigParser()
config.read('../../my_config.ini')
config.sections()

TDMDBPR_user = config['TDMDBPR']['username']
TDMDBPR_pwd = config['TDMDBPR']['password']
TDMDBPR_db = config['TDMDBPR']['db']
TDMDBPR_host = config['TDMDBPR']['host']
TDMDBPR_port = config['TDMDBPR']['port']

AKPIPRD_user = config['AKPIPRD']['username']
AKPIPRD_pwd = config['AKPIPRD']['password']
AKPIPRD_db = config['AKPIPRD']['db']
AKPIPRD_host = config['AKPIPRD']['host']
AKPIPRD_port = config['AKPIPRD']['port']

curr_dt = dt.datetime.now().date()
str_curr_dt = curr_dt.strftime('%Y%m%d')
curr_dt

datetime.date(2024, 6, 7)

## ETL Process

### Step 1 : Import Data Source (Excel)

In [3]:
# src_file = 'C:/Ruz/Pentaho/Jobs/Input/Target/ADHOC_RAW_DTAC_SALE_TARGET_AREA.xlsx'
src_file = 'C:\Ruz\Pentaho\Jobs\Input\Target\ADHOC_RAW_DTAC_SALE_TARGET_AREA.xlsx'

# cols = ['USE_FLAG', 'TM_KEY_MTH', 'TM_KEY_DAY', 'METRIC_CD', 'METRIC_NAME', 'METRIC_VALUE', 'COMP_CD', 'ORGID_P', 'ZONE_TYPE', 'ORGID_G', 'TDS_SGMD', 'ORGID_H', 'HOP_HINT', 'ORGID_HH', 'D_CLUSTER', 'PROVINCE_ENG', 'REMARK']

src_df = pd.read_excel(src_file, sheet_name='DTAC_Sales_Target', skiprows=2, usecols='N:AD', index_col=None) 
src_df = src_df[src_df['USE_FLAG']=='Y']
src_df = src_df.replace(np.nan, None)
src_df.head(3)

Unnamed: 0,USE_FLAG,TM_KEY_MTH,TM_KEY_DAY,METRIC_CD,METRIC_NAME,METRIC_VALUE,COMP_CD,ORGID_P,ZONE_TYPE,ORGID_G,TDS_SGMD,ORGID_H,HOP_HINT,ORGID_HH,D_CLUSTER,PROVINCE_ENG,REMARK
0,Y,202401,,DB1R000900,Prepaid Inflow M1 : DTAC,4194780.0,DTAC,P,BMA,GX1,Deputy CGO & Regional Management 1 (BMA-West),50,"BKK : Bang Khun Thian, Chom Thong, Bang Bon",,,,DTAC Sales target Jan-May 2024(Ref : K.Voraphon)
1,Y,202401,,DB1R000900,Prepaid Inflow M1 : DTAC,397642.0,DTAC,P,BMA,GX1,Deputy CGO & Regional Management 1 (BMA-West),16,"BKK : Bangkok Yai, Bangkok Noi, Bang Phlat",,,,DTAC Sales target Jan-May 2024(Ref : K.Voraphon)
2,Y,202401,,DB1R000900,Prepaid Inflow M1 : DTAC,3269671.0,DTAC,P,BMA,GX1,Deputy CGO & Regional Management 1 (BMA-West),40,"BKK : Taling Chan, Phasi Charoen, Thawi Wattha...",,,,DTAC Sales target Jan-May 2024(Ref : K.Voraphon)


In [4]:
# Check null value in each column
# src_df.apply(lambda x: sum(x.isnull()), axis=0)

# src_df.columns.ravel()
# src_df['METRIC_NAME'].tolist()

src_df[['METRIC_CD', 'METRIC_NAME']].drop_duplicates().sort_values('METRIC_CD').reset_index(drop=True)

Unnamed: 0,METRIC_CD,METRIC_NAME
0,DB1R000900,Prepaid Inflow M1 : DTAC
1,DB1R000900AA,Prepaid Inflow M1 : DTAC : Account Executive
2,DB1R000900AB,Prepaid Inflow M1 : DTAC : B2B
3,DB1R000900AC,Prepaid Inflow M1 : DTAC : Branded Retail
4,DB1R000900AD,Prepaid Inflow M1 : DTAC : Contact Center
5,DB1R000900AE,Prepaid Inflow M1 : DTAC : Direct Sales
6,DB1R000900AF,Prepaid Inflow M1 : DTAC : Key Account
7,DB1R000900AG,Prepaid Inflow M1 : DTAC : Modern Trade
8,DB1R000900AH,Prepaid Inflow M1 : DTAC : Others
9,DB1R000900AI,Prepaid Inflow M1 : DTAC : Own Digital


In [6]:
# src_df.dtypes
src_df.describe()

Unnamed: 0,TM_KEY_MTH,METRIC_VALUE
count,7920.0,7920.0
mean,202403.0,403705.5
std,1.414303,2669309.0
min,202401.0,0.0
25%,202402.0,0.0
50%,202403.0,0.0
75%,202404.0,91050.0
max,202405.0,83874540.0


### Step 2 : ADHOC_RAW_DTAC_SALE_TARGET_AREA

In [24]:
# AKPIPRD

dsn = f'{AKPIPRD_user}/{AKPIPRD_pwd}@{AKPIPRD_host}:{AKPIPRD_port}/{AKPIPRD_db}'
conn = oracledb.connect(dsn)
print(f'{AKPIPRD_db} : Connected')

cur = conn.cursor()
# v_date = (curr_dt, )
rows = [tuple(x) for x in src_df.values]

try:
    print(f'\nProcessing...')
    
    # Truncate
    cur.execute("TRUNCATE TABLE AUTOKPI.ADHOC_RAW_DTAC_SALE_TARGET_AREA")

    # Insert
    cur.executemany("INSERT INTO ADHOC_RAW_DTAC_SALE_TARGET_AREA (USE_FLAG, TM_KEY_MTH, TM_KEY_DAY, METRIC_CD, METRIC_NAME, METRIC_VALUE, COMP_CD, ORGID_P, ZONE_TYPE, ORGID_G, TDS_SGMD, ORGID_H, HOP_HINT, ORGID_HH, D_CLUSTER, PROVINCE_ENG, REMARK)\
                    VALUES (:1,:2,:3,:4,:5,:6,:7,:8,:9,:10,:11,:12,:13,:14,:15,:16,:17)", rows)
    # cur.executemany("INSERT INTO ADHOC_RAW_DTAC_SALE_TARGET_AREA (USE_FLAG, TM_KEY_MTH, TM_KEY_DAY) VALUES (:1,:2,:3)", rows)
    conn.commit()

    print(f'\n"ADHOC_RAW_DTAC_SALE_TARGET_AREA" : Done !!!\n\n{src_df.shape[0]} rows, {src_df.shape[1]} columns')

except oracledb.DatabaseError as e:
    print(f'\nError with Oracle : {e}')

finally:
    cur.close()
    conn.close()
    print(f'\n{AKPIPRD_db} : Disconnected')

AKPIPRD : Connected

Processing...

"ADHOC_RAW_DTAC_SALE_TARGET_AREA" : Done !!!

7920 rows, 17 columns

AKPIPRD : Disconnected


### Step 3 : ADHOC_DTAC_TARGET_AREA_DAILY

In [25]:
# AKPIPRD

dsn = f'{AKPIPRD_user}/{AKPIPRD_pwd}@{AKPIPRD_host}:{AKPIPRD_port}/{AKPIPRD_db}'
conn = oracledb.connect(dsn)
print(f'{AKPIPRD_db} : Connected')
cur = conn.cursor()


try:
    print(f'\nProcessing...')
    
    # Delete
    cur.execute("""
        DELETE AUTOKPI.ADHOC_DTAC_TARGET_AREA_DAILY TGT
                
        WHERE EXISTS (
            SELECT 1 FROM AUTOKPI.ADHOC_RAW_DTAC_SALE_TARGET_AREA SRC
            WHERE TGT.TM_KEY_MTH = SRC.TM_KEY_MTH 
            AND TGT.METRIC_CD = SRC.METRIC_CD )
    """)
    print(f'\nDELETE : ADHOC_DTAC_TARGET_AREA_DAILY : Done !!!')

    # Insert
    cur.execute("""
        INSERT INTO AUTOKPI.ADHOC_DTAC_TARGET_AREA_DAILY (TM_KEY_YR, TM_KEY_MTH, TRUE_TM_KEY_WK, TM_KEY_DAY, DAYS_IN_MONTH, PERIODFLAG, PRODUCT_GRP, METRIC_CD, METRIC_NAME, METRIC_GRP, COMP_CD, VERSION, METRIC_VALUE, AREA_NO, AREA_TYPE, AREA_CD, AREA_NAME, FREQUENCY, REMARK)

        WITH W_RAW_DTAC_SALE_TARGET_FIXED AS 
        (
            SELECT TM_KEY_MTH, TM_KEY_DAY, METRIC_CD, METRIC_NAME, COMP_CD, METRIC_VALUE, AREA_NO, AREA_TYPE, AREA_CD, AREA_NAME, REMARK--, LOAD_DATE
            FROM (
                -->> P : Nationwide
                SELECT TM_KEY_MTH, TM_KEY_DAY, METRIC_CD, METRIC_NAME, COMP_CD, SUM(METRIC_VALUE) AS METRIC_VALUE
                    , 1 AS AREA_NO, 'P' AS AREA_TYPE, 'P' AS AREA_CD, 'Nationwide' AS AREA_NAME
                    , REMARK--, MAX(LOAD_DATE) AS LOAD_DATE
                FROM AUTOKPI.ADHOC_RAW_DTAC_SALE_TARGET_AREA NOLOCK
                WHERE USE_FLAG = 'Y'
                GROUP BY TM_KEY_MTH, TM_KEY_DAY, METRIC_CD, METRIC_NAME, COMP_CD, REMARK
                
                UNION ALL 
                
                -->> G : 8 Region
                SELECT TM_KEY_MTH, TM_KEY_DAY, METRIC_CD, METRIC_NAME, COMP_CD, SUM(METRIC_VALUE) AS METRIC_VALUE
                    , 2 AS AREA_NO, 'G' AS AREA_TYPE, ORGID_G AS AREA_CD, TDS_SGMD AS AREA_NAME
                    , REMARK--, MAX(LOAD_DATE) AS LOAD_DATE
                FROM AUTOKPI.ADHOC_RAW_DTAC_SALE_TARGET_AREA NOLOCK
                WHERE USE_FLAG = 'Y' AND ORGID_P IS NOT NULL
                GROUP BY TM_KEY_MTH, TM_KEY_DAY, METRIC_CD, METRIC_NAME, COMP_CD, ORGID_G, TDS_SGMD, REMARK
                
                UNION ALL 
                
                -->> H : 65 HOP_HINT
                SELECT TM_KEY_MTH, TM_KEY_DAY, METRIC_CD, METRIC_NAME, COMP_CD, SUM(METRIC_VALUE) AS METRIC_VALUE
                    , 3 AS AREA_NO, 'H' AS AREA_TYPE, ORGID_H AS AREA_CD, HOP_HINT AS AREA_NAME
                    , REMARK--, MAX(LOAD_DATE) AS LOAD_DATE
                FROM AUTOKPI.ADHOC_RAW_DTAC_SALE_TARGET_AREA NOLOCK
                WHERE USE_FLAG = 'Y' AND ORGID_P IS NOT NULL
                GROUP BY TM_KEY_MTH, TM_KEY_DAY, METRIC_CD, METRIC_NAME, COMP_CD, ORGID_H, HOP_HINT, REMARK
            )
        ) -->> W_RAW_DTAC_SALE_TARGET_FIXED
        -----------------------------------------------------------------------------------------------------------------------

        , W_ADHOC_DTAC_SALE_TARGET_AREA_MONTHLY AS 
        (
            SELECT P.TM_KEY_YR, P.TM_KEY_MTH, NULL AS TRUE_TM_KEY_WK, NULL AS TM_KEY_DAY, P.DAYS_IN_MONTH, NULL AS PERIODFLAG
                , CASE  WHEN T.METRIC_NAME LIKE '%Prepaid%' THEN 'Prepaid' 
                        WHEN T.METRIC_NAME LIKE '%Postpaid%' THEN 'Postpaid' 
                        WHEN T.METRIC_NAME LIKE '%TOL%' THEN 'TOL' 
                        WHEN T.METRIC_NAME LIKE '%TVS%' THEN 'TVS' 
                        END PRODUCT_GRP
                , T.METRIC_CD, T.METRIC_NAME
                , 'Sales' AS METRIC_GRP
                , T.COMP_CD, 'T' AS VERSION
                , T.METRIC_VALUE, T.AREA_NO, T.AREA_TYPE, T.AREA_CD, T.AREA_NAME
                , 'MTH' AS FREQUENCY
                , T.REMARK--, T.LOAD_DATE
                
            FROM W_RAW_DTAC_SALE_TARGET_FIXED T
            
            LEFT JOIN (SELECT DISTINCT TM_KEY_YR, TM_KEY_MTH, DAYS_IN_MONTH FROM AUTOKPI.DIM_PERIOD) P 
                ON T.TM_KEY_MTH = P.TM_KEY_MTH
        ) -->> W_ADHOC_DTAC_SALE_TARGET_AREA_MONTHLY
        -----------------------------------------------------------------------------------------------------------------------
                
        -->> Output
                
        SELECT TM_KEY_YR, TM_KEY_MTH, TRUE_TM_KEY_WK, TM_KEY_DAY, DAYS_IN_MONTH, PERIODFLAG, PRODUCT_GRP, METRIC_CD, METRIC_NAME, METRIC_GRP, COMP_CD, VERSION, METRIC_VALUE, AREA_NO, AREA_TYPE, AREA_CD, AREA_NAME, FREQUENCY, REMARK
        FROM W_ADHOC_DTAC_SALE_TARGET_AREA_MONTHLY
        ORDER BY TM_KEY_MTH, METRIC_GRP, PRODUCT_GRP, METRIC_CD, AREA_NO, AREA_TYPE 
    """)

    conn.commit()
    print(f'\nINSERT : ADHOC_DTAC_TARGET_AREA_DAILY : Done !!!')
    # print(f'\n"ADHOC_DTAC_TARGET_AREA_DAILY" : Done !!!\n\n{src_df.shape[0]} rows, {src_df.shape[1]} columns')


except oracledb.DatabaseError as e:
    print(f'\nError with Oracle : {e}')


finally:
    cur.close()
    conn.close()
    print(f'\n{AKPIPRD_db} : Disconnected')

AKPIPRD : Connected

Processing...

DELETE : ADHOC_DTAC_TARGET_AREA_DAILY : Done !!!

INSERT : ADHOC_DTAC_TARGET_AREA_DAILY : Done !!!

AKPIPRD : Disconnected


In [None]:
''' Rawdata '''

data_src = f"../data/raw/{op_file_name}.csv"
df = pd.read_csv(data_src, low_memory=False)

cols = ['TM_KEY_DAY', 'TM_KEY_WK', 'TM_KEY_MTH', 'TM_KEY_QTR', 'TM_KEY_YR', 'CENTER', 'METRIC_GRP', 'PRODUCT_GRP', 'COMP_CD', 'METRIC_CD', 'METRIC_NAME', 
        'ACTUAL_AS_OF', 'AGG_TYPE', 'RR_IND', 'GRY_IND', 'UOM', 'PERIOD', 'ACTUAL_SNAP', 'ACTUAL_AGG', 'TARGET_SNAP', 'TARGET_AGG', 'BASELINE_SNAP', 'BASELINE_AGG', 'PPN_TM']

df = df[cols]

''' Filters '''
df = df[df['TM_KEY_YR'] == 2024] # YEAR filter
# df = df[df['TM_KEY_MTH'] >= 202401] # MONTH filter

df.tail()

# ''' Fix Metric CD list '''

# ''' Fix Agg Column Name(Total Revenue) '''
# bg_col_list = ['POST_DTAC', 'POST_TMH', 'PRE_DTAC', 'PRE_TMH', 'TOL', 'TVS']

In [8]:
# AKPIPRD

# src_df[src_df['TM_KEY_DAY']==20240601].reset_index(drop=True)
src_df[(src_df['TM_KEY_DAY']==20240501) & (src_df['METRIC_CD']=='VIN00020')].sort_values(['TM_KEY_DAY', 'METRIC_CD', 'AREA_NO', 'AREA_CD']).reset_index(drop=True)

Unnamed: 0,TM_KEY_DAY,METRIC_CD,METRIC_NAME,METRIC_VALUE,COMP_CD,VERSION,AREA_NO,AREA_CD,AREA_DESC,AREA_TYPE,LOAD_DATE,REMARK,END_MTH_IND
0,20240501,VIN00020,Broadband Subs Share : TOL,36.52,True,A,1,P,Nationwide,P,2024-06-04 14:14:53.206419,,31
1,20240501,VIN00020,Broadband Subs Share : TOL,48.82,True,A,2,BMA,BMA,Z,2024-06-04 14:14:53.206419,,31
2,20240501,VIN00020,Broadband Subs Share : TOL,30.38,True,A,2,UPC,UPC,Z,2024-06-04 14:14:53.206419,,31
3,20240501,VIN00020,Broadband Subs Share : TOL,32.59,True,A,3,EAST,East,Z,2024-06-04 14:14:53.206419,,31
4,20240501,VIN00020,Broadband Subs Share : TOL,50.31,True,A,4,10,Bangkok,Z,2024-06-04 14:14:53.206419,,31
5,20240501,VIN00020,Broadband Subs Share : TOL,47.47,True,A,4,11,Samut Prakan,Z,2024-06-04 14:14:53.206419,,31
6,20240501,VIN00020,Broadband Subs Share : TOL,46.3,True,A,4,12,Nonthaburi,Z,2024-06-04 14:14:53.206419,,31
7,20240501,VIN00020,Broadband Subs Share : TOL,45.35,True,A,4,13,Pathum Thani,Z,2024-06-04 14:14:53.206419,,31
8,20240501,VIN00020,Broadband Subs Share : TOL,47.87,True,A,5,GX1,Deputy CGO & Regional Management 1 (BMA-West),G,2024-06-04 14:14:53.206419,,31
9,20240501,VIN00020,Broadband Subs Share : TOL,50.05,True,A,5,GX2,Regional Management 2 (BMA-East),G,2024-06-04 14:14:53.206419,,31


In [9]:
# TDMDBPR

# df[df['TM_KEY_DAY']==20240501].reset_index(drop=True)
df[(df['TM_KEY_DAY']==20240501) & (df['METRIC_CD']=='VIN00020')].sort_values(['TM_KEY_DAY', 'METRIC_CD', 'AREA_NO', 'AREA_CD']).reset_index(drop=True)

Unnamed: 0,TM_KEY_DAY,METRIC_CD,METRIC_NAME,AREA_NO,AREA_TYPE,AREA_CD,AREA_NAME,ACTUAL_SNAP,ACTUAL_AGG,PPN_TM
0,20240501,VIN00020,Broadband Subs Share : TOL,1,P,P,Nationwide,36.52,36.52,2024-06-06 10:34:07
1,20240501,VIN00020,Broadband Subs Share : TOL,2,Z,BMA,BMA,48.82,48.82,2024-06-06 10:34:07
2,20240501,VIN00020,Broadband Subs Share : TOL,2,Z,UPC,UPC,30.38,30.38,2024-06-06 10:34:07
3,20240501,VIN00020,Broadband Subs Share : TOL,3,Z,EAST,EAST,32.59,32.59,2024-06-06 10:34:07
4,20240501,VIN00020,Broadband Subs Share : TOL,4,Z,10,Bangkok,50.31,50.31,2024-06-06 10:34:07
5,20240501,VIN00020,Broadband Subs Share : TOL,4,Z,11,Samut Prakan,47.47,47.47,2024-06-06 10:34:07
6,20240501,VIN00020,Broadband Subs Share : TOL,4,Z,12,Nonthaburi,46.3,46.3,2024-06-06 10:34:07
7,20240501,VIN00020,Broadband Subs Share : TOL,4,Z,13,Pathum Thani,45.35,45.35,2024-06-06 10:34:07
8,20240501,VIN00020,Broadband Subs Share : TOL,5,G,GX1,Regional Management 1 (BMA : West),47.87,47.87,2024-06-06 10:34:07
9,20240501,VIN00020,Broadband Subs Share : TOL,5,G,GX2,Regional Management 2 (BMA : East),50.05,50.05,2024-06-06 10:34:07


In [10]:
df[(df['METRIC_CD']=='VIN00020') & (df['AREA_CD']=='P')].sort_values(['TM_KEY_DAY', 'METRIC_CD', 'AREA_NO', 'AREA_CD']).reset_index(drop=True)

Unnamed: 0,TM_KEY_DAY,METRIC_CD,METRIC_NAME,AREA_NO,AREA_TYPE,AREA_CD,AREA_NAME,ACTUAL_SNAP,ACTUAL_AGG,PPN_TM
0,20240101,VIN00020,Broadband Subs Share : TOL,1,P,P,Nationwide,36.49,36.49,2024-06-06 10:34:07
1,20240201,VIN00020,Broadband Subs Share : TOL,1,P,P,Nationwide,36.5,36.5,2024-06-06 10:34:07
2,20240301,VIN00020,Broadband Subs Share : TOL,1,P,P,Nationwide,36.52,36.52,2024-06-06 10:34:07
3,20240401,VIN00020,Broadband Subs Share : TOL,1,P,P,Nationwide,36.49,36.49,2024-06-06 10:34:07
4,20240501,VIN00020,Broadband Subs Share : TOL,1,P,P,Nationwide,36.52,36.52,2024-06-06 10:34:07
5,20240601,VIN00020,Broadband Subs Share : TOL,1,P,P,Nationwide,36.52,36.52,2024-06-06 10:34:07


In [11]:
# df.to_csv(f'{op_raw_file}', index=False, encoding='utf-8')

# print(f'\n"{op_raw_file}" is generated')