# * Interim : 2025 Target Sales DTAC
    Waiting for CDS developing

## Parameter

In [1]:
import configparser
import datetime as dt
import pandas as pd
import numpy as np
import oracledb
import re

config = configparser.ConfigParser()
config.read('../../my_config.ini')
config.sections()

TDMDBPR_user = config['TDMDBPR']['username']
TDMDBPR_pwd = config['TDMDBPR']['password']
TDMDBPR_db = config['TDMDBPR']['db']
TDMDBPR_host = config['TDMDBPR']['host']
TDMDBPR_port = config['TDMDBPR']['port']

AKPIPRD_user = config['AKPIPRD']['username']
AKPIPRD_pwd = config['AKPIPRD']['password']
AKPIPRD_db = config['AKPIPRD']['db']
AKPIPRD_host = config['AKPIPRD']['host']
AKPIPRD_port = config['AKPIPRD']['port']

curr_dt = dt.datetime.now().date()
str_curr_dt = curr_dt.strftime('%Y%m%d')
curr_dt

datetime.date(2025, 4, 28)

## ETL Process...

### Step 1 : Import Data Source

In [4]:
''' Rawdata '''

# Target_Sales_DTAC_Y2025
src_file = '../../data/interim/Sales/Target_Sales_DTAC_Y2025.xlsx'
src_sheet = 'Rawdata'
src_col = ['DATA_DATE', 'DATA_MONTH', 'DATA_YEAR', 'METRIC_VALUES', 'COMP_CD', 'VERSION', 'METRIC_CD', 'METRIC_NAME', 'AREA_TYPE', 'AREA_CD', 'AREA_DESC', 'CHANNEL_CD']
src_df = pd.read_excel(src_file, sheet_name=src_sheet, usecols=src_col, index_col=None) 
src_df.rename(columns={'AREA_CD': 'AREA_KEY', 'DATA_MONTH': 'MONTH_SHORT', 'DATA_YEAR': 'TM_KEY_YR', 'METRIC_VALUES': 'MTH_VALUE'}, inplace=True)
src_df['MONTH_SHORT'] = src_df['MONTH_SHORT'].apply(lambda x: x.upper())
src_df = src_df[['DATA_DATE', 'MONTH_SHORT', 'TM_KEY_YR', 'COMP_CD', 'VERSION', 'METRIC_CD', 'METRIC_NAME', 'AREA_TYPE', 'AREA_KEY', 'AREA_DESC', 'CHANNEL_CD', 'MTH_VALUE']]

print(f'\nsrc_df : {src_df.shape[0]} rows, {src_df.shape[1]} columns')
src_df.tail(3)


src_df : 21728 rows, 12 columns


Unnamed: 0,DATA_DATE,MONTH_SHORT,TM_KEY_YR,COMP_CD,VERSION,METRIC_CD,METRIC_NAME,AREA_TYPE,AREA_KEY,AREA_DESC,CHANNEL_CD,MTH_VALUE
21725,202504,APR,2025,DTAC,T,DB1S000101AGB,Prepaid Gross Adds : DTAC : Modern Trade Non-S...,HH,906111,TRANG,GB,
21726,202504,APR,2025,DTAC,T,DB1S000101AGB,Prepaid Gross Adds : DTAC : Modern Trade Non-S...,HH,906112,YALA,GB,
21727,202504,APR,2025,DTAC,T,DB1S000101AGB,Prepaid Gross Adds : DTAC : Modern Trade Non-S...,True corp,True corp,True corp,GB,18000.0


In [5]:
''' Master Data '''

# DIM_TIME
dt_file = '../CFW/data/dim_time.csv'
dt_cols = ['TM_KEY_YR', 'MONTH_SHORT', 'TM_KEY_MTH', 'TRUE_TM_KEY_WK', 'TM_KEY_DAY', 'DAYS_IN_MONTH']
dt_df = pd.read_csv(dt_file, usecols=dt_cols)
dt_df['MONTH_SHORT'] = dt_df['MONTH_SHORT'].apply(lambda x: x.upper())

# DIM_MOOC_AREA
mooc_file = '../CFW/data/dim_mooc_area.csv'
mooc_cols = ['ZONE_TYPE', 'ORGID_G', 'TDS_SGMD', 'ORGID_R', 'TDS_RGM_CODE', 'ORGID_H', 'HOP_HINT', 'TDS_PROVINCE', 'PROVINCE_ENG', 'PROVINCE_TH', 'ORGID_HH', 'D_CLUSTER', 'CCAATT', 'REMARK']
mooc_df = pd.read_csv(mooc_file, usecols=mooc_cols)
mooc_df = mooc_df.loc[(mooc_df['REMARK']!='Dummy') & (mooc_df['HOP_HINT']!='True Corp')]

# Create HH level
mooc_hh_df = mooc_df[['ZONE_TYPE', 'ORGID_G', 'TDS_SGMD', 'ORGID_H', 'HOP_HINT', 'ORGID_HH', 'D_CLUSTER']].drop_duplicates()
mooc_hh_df.dropna(how='all', inplace=True)
mooc_hh_df['AREA_KEY'] = mooc_hh_df['ORGID_HH'].astype(int).astype(str)

In [6]:
''' Example DataFrame '''

src_df.tail(3)
# dt_df.tail(3)
# mooc_df.tail(3)
# mooc_h_df.tail(3)
# mooc_h_df.loc[mooc_h_df['ORGID_H'].str.contains('^0')].tail(3)

# tmp_df = src_df.groupby(['VERSION', 'COMP_CD', 'METRIC_CD', 'METRIC_NAME']).agg({'MTH_VALUE': 'sum', 'AREA_TYPE': 'nunique', 'AREA_KEY': 'nunique'}).reset_index()
# tmp_df

Unnamed: 0,DATA_DATE,MONTH_SHORT,TM_KEY_YR,COMP_CD,VERSION,METRIC_CD,METRIC_NAME,AREA_TYPE,AREA_KEY,AREA_DESC,CHANNEL_CD,MTH_VALUE
21725,202504,APR,2025,DTAC,T,DB1S000101AGB,Prepaid Gross Adds : DTAC : Modern Trade Non-S...,HH,906111,TRANG,GB,
21726,202504,APR,2025,DTAC,T,DB1S000101AGB,Prepaid Gross Adds : DTAC : Modern Trade Non-S...,HH,906112,YALA,GB,
21727,202504,APR,2025,DTAC,T,DB1S000101AGB,Prepaid Gross Adds : DTAC : Modern Trade Non-S...,True corp,True corp,True corp,GB,18000.0


### Step 2 : Aggregate Data

In [7]:
''' ALL Channel '''


def all_chn_metric(v_cd, v_name):

    cd = v_cd
    name = v_name
    two_result = []
    if cd == 'DB1R000900': two_result = 'DB1R000900CS', 'Prepaid Inflow M1 : DTAC'
    elif cd == 'DB1S000101': two_result = 'DB1S000101CS', 'Prepaid Gross Adds : DTAC'
    elif cd == 'DB2R000500': two_result = 'DB2R010500CS', 'Postpaid Inflow M1 B2C : DTAC'
    elif cd == 'DB2S000100': two_result = 'DB2S010100CS', 'Postpaid Gross Adds B2C : DTAC'
    else: two_result = cd, name
    return two_result


''' Filter '''
all_chn_df = src_df.copy()
all_chn_df = all_chn_df.loc[all_chn_df['METRIC_CD'].isin(['DB1R000900', 'DB1S000101', 'DB2R000500', 'DB2S000100'])]

''' Modify '''
all_chn_df[['METRIC_CD', 'METRIC_NAME']] = all_chn_df.apply(lambda x: pd.Series(all_chn_metric(x['METRIC_CD'], x['METRIC_NAME'])), axis=1)
# all_chn_df['METRIC_CD'] = all_chn_df.apply(lambda x: all_chn_metric(x['METRIC_CD'], x['METRIC_NAME'])[0], axis=1)
# all_chn_df['METRIC_NAME'] = all_chn_df.apply(lambda x: all_chn_metric(x['METRIC_CD'], x['METRIC_NAME'])[1], axis=1)

''' Data Test '''
# all_chn_df = all_chn_df.loc[all_chn_df['MONTH_SHORT'].isin(['FEB','MAR'])]
# all_chn_df = all_chn_df.loc[all_chn_df['MONTH_SHORT']=='JAN']
# all_chn_df = all_chn_df.loc[all_chn_df['METRIC_CD']=='DB1S000101CS']
# all_chn_df = all_chn_df.loc[all_chn_df['AREA_KEY']=='902033']
# all_chn_df = all_chn_df.loc[all_chn_df['AREA_KEY'].isna()]

''' Sort '''
all_chn_df = all_chn_df.reset_index(drop=True)

# ''' Test : Sum ALL & Sort '''
# test_all_chn_df = all_chn_df.groupby(['DATA_DATE', 'MONTH_SHORT', 'TM_KEY_YR', 'COMP_CD', 'VERSION', 'METRIC_CD', 'METRIC_NAME', 'AREA_TYPE', 'CHANNEL_CD'])\
#     .agg({'MTH_VALUE': 'sum'}).sort_values(by=['MONTH_SHORT', 'METRIC_CD', 'AREA_TYPE']).reset_index()
# test_all_chn_df

print(f'\nall_chn_df : {all_chn_df.shape[0]} rows, {all_chn_df.shape[1]} columns')
all_chn_df.tail(3)


all_chn_df : 1552 rows, 12 columns


Unnamed: 0,DATA_DATE,MONTH_SHORT,TM_KEY_YR,COMP_CD,VERSION,METRIC_CD,METRIC_NAME,AREA_TYPE,AREA_KEY,AREA_DESC,CHANNEL_CD,MTH_VALUE
1549,202504,APR,2025,DTAC,T,DB2S010100CS,Postpaid Gross Adds B2C : DTAC,HH,906111,TRANG,ALL,119.618574
1550,202504,APR,2025,DTAC,T,DB2S010100CS,Postpaid Gross Adds B2C : DTAC,HH,906112,YALA,ALL,21.770401
1551,202504,APR,2025,DTAC,T,DB2S010100CS,Postpaid Gross Adds B2C : DTAC,True corp,True corp,True corp,ALL,31976.981157


In [8]:
''' GEO Channel

    AA  : Account Executive
    AC  : Branded Retail
    AE  : Direct Sales
    AGA : Modern Trade Synergy
    AJ  : Retail Sales
    AK  : Wholesales
'''


def geo_chn_metric(v_cd, v_name):

    cd = v_cd
    name = v_name
    two_result = []
    if re.search('DB1R000900', cd): two_result = 'DB1R000900CG', 'Prepaid Inflow M1 : DTAC - GEO Channel'
    elif re.search('DB1S000101', cd): two_result = 'DB1S000101CG', 'Prepaid Gross Adds : DTAC - GEO Channel'
    elif re.search('DB2R000500', cd): two_result = 'DB2R010500CG', 'Postpaid Inflow M1 B2C : DTAC - GEO Channel'
    elif re.search('DB2S000100', cd): two_result = 'DB2S010100CG', 'Postpaid Gross Adds B2C : DTAC - GEO Channel'
    else: two_result = cd, name
    return two_result


''' Filter '''
geo_chn_df = src_df.copy()
# geo_chn_df = geo_chn_df.loc[geo_chn_df['METRIC_CD'].str.contains('^DB1R000900|^DB1S000101|^DB2R000500|^DB2S000100') & geo_chn_df['METRIC_CD'].str.contains('AA$|AC$|AE$|AGA$|AJ$|AK$')]
geo_chn_df = geo_chn_df.loc[geo_chn_df['METRIC_CD'].str.contains('^DB1R000900|^DB1S000101|^DB2R000500|^DB2S000100') & geo_chn_df['METRIC_CD'].str.contains('AC$|AE$|AGA$|AJ$')]

''' Modify '''
# geo_chn_df['METRIC_CD(TMP)'] = geo_chn_df['METRIC_CD']
# geo_chn_df['METRIC_NAME(TMP)'] = geo_chn_df['METRIC_NAME']
# geo_chn_df['CHANNEL_CD(TMP)'] = geo_chn_df['CHANNEL_CD']
geo_chn_df[['METRIC_CD', 'METRIC_NAME']] = geo_chn_df.apply(lambda x: pd.Series(geo_chn_metric(x['METRIC_CD'], x['METRIC_NAME'])), axis=1)
geo_chn_df['CHANNEL_CD'] = 'GEO'

''' Data Test '''
# geo_chn_df = geo_chn_df.loc[geo_chn_df['MONTH_SHORT']=='JAN']
# geo_chn_df = geo_chn_df.loc[geo_chn_df['METRIC_CD']=='DB1S000101CG']
# geo_chn_df = geo_chn_df.loc[geo_chn_df['AREA_KEY']=='902033']

''' Sum GEO & Sort '''
geo_chn_df = geo_chn_df.groupby(['DATA_DATE', 'MONTH_SHORT', 'TM_KEY_YR', 'COMP_CD', 'VERSION', 'METRIC_CD', 'METRIC_NAME', 'AREA_TYPE', 'AREA_KEY', 'AREA_DESC', 'CHANNEL_CD']).agg({'MTH_VALUE': 'sum'}).reset_index()

# ''' Test : Sum GEO & Sort '''
# test_geo_chn_df = geo_chn_df.groupby(['DATA_DATE', 'MONTH_SHORT', 'TM_KEY_YR', 'COMP_CD', 'VERSION', 'METRIC_CD', 'METRIC_NAME', 'AREA_TYPE', 'CHANNEL_CD'])\
#     .agg({'MTH_VALUE': 'sum'}).sort_values(by=['MONTH_SHORT', 'METRIC_CD', 'AREA_TYPE']).reset_index()
# test_geo_chn_df

print(f'\ngeo_chn_df : {geo_chn_df.shape[0]} rows, {geo_chn_df.shape[1]} columns')
geo_chn_df.tail(3)


geo_chn_df : 1552 rows, 12 columns


Unnamed: 0,DATA_DATE,MONTH_SHORT,TM_KEY_YR,COMP_CD,VERSION,METRIC_CD,METRIC_NAME,AREA_TYPE,AREA_KEY,AREA_DESC,CHANNEL_CD,MTH_VALUE
1549,20250221,FEB,2025,DTAC,T,DB2S010100CG,Postpaid Gross Adds B2C : DTAC - GEO Channel,HH,910097,UBON RATCHATHANI,GEO,719.4908
1550,20250221,FEB,2025,DTAC,T,DB2S010100CG,Postpaid Gross Adds B2C : DTAC - GEO Channel,HH,910098,YASOTHON,GEO,0.0
1551,20250221,FEB,2025,DTAC,T,DB2S010100CG,Postpaid Gross Adds B2C : DTAC - GEO Channel,True corp,True corp,True corp,GEO,0.0


In [12]:
''' Concat ALL & GEO'''

raw_df = pd.concat([all_chn_df, geo_chn_df])
raw_df.tail(3)

Unnamed: 0,DATA_DATE,MONTH_SHORT,TM_KEY_YR,COMP_CD,VERSION,METRIC_CD,METRIC_NAME,AREA_TYPE,AREA_KEY,AREA_DESC,CHANNEL_CD,MTH_VALUE
1549,20250221,FEB,2025,DTAC,T,DB2S010100CG,Postpaid Gross Adds B2C : DTAC - GEO Channel,HH,910097,UBON RATCHATHANI,GEO,719.4908
1550,20250221,FEB,2025,DTAC,T,DB2S010100CG,Postpaid Gross Adds B2C : DTAC - GEO Channel,HH,910098,YASOTHON,GEO,0.0
1551,20250221,FEB,2025,DTAC,T,DB2S010100CG,Postpaid Gross Adds B2C : DTAC - GEO Channel,True corp,True corp,True corp,GEO,0.0


In [13]:
''' Join Area '''

corp_df = raw_df.loc[raw_df['AREA_TYPE']=='True corp']

merge_hh_df = pd.merge(raw_df.loc[raw_df['AREA_TYPE']=='HH'], mooc_hh_df, how='left', on='AREA_KEY')

merge_area_df = pd.concat([corp_df, merge_hh_df])
print(f'\nprep_agg_df : {merge_area_df.shape[0]} rows, {merge_area_df.shape[1]} columns')
merge_area_df.tail(3)


prep_agg_df : 3104 rows, 19 columns


Unnamed: 0,DATA_DATE,MONTH_SHORT,TM_KEY_YR,COMP_CD,VERSION,METRIC_CD,METRIC_NAME,AREA_TYPE,AREA_KEY,AREA_DESC,CHANNEL_CD,MTH_VALUE,ZONE_TYPE,ORGID_G,TDS_SGMD,ORGID_H,HOP_HINT,ORGID_HH,D_CLUSTER
3069,20250221,FEB,2025,DTAC,T,DB2S010100CG,Postpaid Gross Adds B2C : DTAC - GEO Channel,HH,910096,SURIN,GEO,140.470482,UPC,GX6,Regional Management 6 (Northeast 2),33X,"SURIN, SI SA KET",910096.0,SURIN
3070,20250221,FEB,2025,DTAC,T,DB2S010100CG,Postpaid Gross Adds B2C : DTAC - GEO Channel,HH,910097,UBON RATCHATHANI,GEO,719.4908,UPC,GX6,Regional Management 6 (Northeast 2),34Z,UBON RATCHATHANI,910097.0,UBON RATCHATHANI
3071,20250221,FEB,2025,DTAC,T,DB2S010100CG,Postpaid Gross Adds B2C : DTAC - GEO Channel,HH,910098,YASOTHON,GEO,0.0,UPC,GX6,Regional Management 6 (Northeast 2),37X,"YASOTHON, AMNAT CHAROEN",910098.0,YASOTHON


In [14]:
''' Join Period '''

merge_period_df = pd.merge(merge_area_df, dt_df, how='left', on=['TM_KEY_YR', 'MONTH_SHORT'])

# Prep Aggregate
prep_agg_df = merge_period_df
prep_agg_df['DAY_VALUE'] = prep_agg_df['MTH_VALUE'] / prep_agg_df['DAYS_IN_MONTH']
print(f'\nprep_agg_df : {prep_agg_df.shape[0]} rows, {prep_agg_df.shape[1]} columns')
prep_agg_df.tail(3)


prep_agg_df : 93120 rows, 24 columns


Unnamed: 0,DATA_DATE,MONTH_SHORT,TM_KEY_YR,COMP_CD,VERSION,METRIC_CD,METRIC_NAME,AREA_TYPE,AREA_KEY,AREA_DESC,...,TDS_SGMD,ORGID_H,HOP_HINT,ORGID_HH,D_CLUSTER,TM_KEY_DAY,DAYS_IN_MONTH,TRUE_TM_KEY_WK,TM_KEY_MTH,DAY_VALUE
93117,20250221,FEB,2025,DTAC,T,DB2S010100CG,Postpaid Gross Adds B2C : DTAC - GEO Channel,HH,910098,YASOTHON,...,Regional Management 6 (Northeast 2),37X,"YASOTHON, AMNAT CHAROEN",910098.0,YASOTHON,20250226,28,2025009,202502,0.0
93118,20250221,FEB,2025,DTAC,T,DB2S010100CG,Postpaid Gross Adds B2C : DTAC - GEO Channel,HH,910098,YASOTHON,...,Regional Management 6 (Northeast 2),37X,"YASOTHON, AMNAT CHAROEN",910098.0,YASOTHON,20250227,28,2025009,202502,0.0
93119,20250221,FEB,2025,DTAC,T,DB2S010100CG,Postpaid Gross Adds B2C : DTAC - GEO Channel,HH,910098,YASOTHON,...,Regional Management 6 (Northeast 2),37X,"YASOTHON, AMNAT CHAROEN",910098.0,YASOTHON,20250228,28,2025009,202502,0.0


In [23]:
''' Aggregate P, G, H, HH '''

agg_cols = ['TM_KEY_YR', 'TM_KEY_MTH', 'TRUE_TM_KEY_WK', 'TM_KEY_DAY', 'METRIC_CD', 'METRIC_NAME', 'COMP_CD', 'VERSION', 'AREA_NO', 'AREA_TYPE', 'AREA_CD', 'AREA_NAME', 'DAY_VALUE', 'MTH_VALUE']

# P : Nationwide
agg_p_df = prep_agg_df.groupby(['TM_KEY_YR', 'TM_KEY_MTH', 'TRUE_TM_KEY_WK', 'TM_KEY_DAY', 'METRIC_CD', 'METRIC_NAME', 'COMP_CD', 'VERSION']).agg({'MTH_VALUE': 'sum', 'DAY_VALUE': 'sum'}).reset_index()
agg_p_df['AREA_NO'] = 1
agg_p_df['AREA_TYPE'] = 'P'
agg_p_df['AREA_CD'] = 'P'
agg_p_df['AREA_NAME'] = 'Nationwide'
agg_p_df = agg_p_df.loc[:, agg_cols]

# G : Region
agg_g_df = prep_agg_df.groupby(['TM_KEY_YR', 'TM_KEY_MTH', 'TRUE_TM_KEY_WK', 'TM_KEY_DAY', 'METRIC_CD', 'METRIC_NAME', 'COMP_CD', 'VERSION', 'ORGID_G', 'TDS_SGMD']).agg({'MTH_VALUE': 'sum', 'DAY_VALUE': 'sum'}).reset_index()
agg_g_df['AREA_NO'] = 2
agg_g_df['AREA_TYPE'] = 'G'
agg_g_df.rename(columns={'ORGID_G': 'AREA_CD'}, inplace=True)
agg_g_df.rename(columns={'TDS_SGMD': 'AREA_NAME'}, inplace=True)
agg_g_df = agg_g_df.loc[:, agg_cols]

# H : HOP_HINT
agg_h_df = prep_agg_df.groupby(['TM_KEY_YR', 'TM_KEY_MTH', 'TRUE_TM_KEY_WK', 'TM_KEY_DAY', 'METRIC_CD', 'METRIC_NAME', 'COMP_CD', 'VERSION', 'ORGID_H', 'HOP_HINT']).agg({'MTH_VALUE': 'sum', 'DAY_VALUE': 'sum'}).reset_index()
agg_h_df['AREA_NO'] = 3
agg_h_df['AREA_TYPE'] = 'H'
agg_h_df.rename(columns={'ORGID_H': 'AREA_CD'}, inplace=True)
agg_h_df.rename(columns={'HOP_HINT': 'AREA_NAME'}, inplace=True)
agg_h_df = agg_h_df.loc[:, agg_cols]

# HH : D_CLUSTER
agg_hh_df = prep_agg_df.groupby(['TM_KEY_YR', 'TM_KEY_MTH', 'TRUE_TM_KEY_WK', 'TM_KEY_DAY', 'METRIC_CD', 'METRIC_NAME', 'COMP_CD', 'VERSION', 'ORGID_HH', 'D_CLUSTER']).agg({'MTH_VALUE': 'sum', 'DAY_VALUE': 'sum'}).reset_index()
agg_hh_df['AREA_NO'] = 4
agg_hh_df['AREA_TYPE'] = 'HH'
agg_hh_df['ORGID_HH'] = agg_hh_df['ORGID_HH'].astype(int).astype(str)
agg_hh_df.rename(columns={'ORGID_HH': 'AREA_CD'}, inplace=True)
agg_hh_df.rename(columns={'D_CLUSTER': 'AREA_NAME'}, inplace=True)
agg_hh_df = agg_hh_df.loc[:, agg_cols]

# Concat DataFrame
last_agg_all_df = pd.concat([agg_p_df, agg_g_df, agg_h_df, agg_hh_df], ignore_index=True)
last_agg_all_df['AGG_TYPE'] = 'S'
last_agg_all_df['FREQUENCY'] = 'Daily'
last_agg_all_df['REMARK'] = 'Allocate from 96 Cluster (HH level)'
# last_agg_all_df['REMARK'] = 'HH Level 96 Cluster'
# last_agg_all_df['REMARK'] = last_agg_all_df['TM_KEY_MTH'].apply(lambda x: 'H Level 64 Province' if x>=202401 and x<=202403 else 'HH Level 96 Cluster')

# Sort
last_agg_all_df = last_agg_all_df.sort_values(by=['TM_KEY_DAY', 'METRIC_CD', 'AREA_NO', 'AREA_CD']).reset_index(drop=True)

print(f'\nlast_agg_all_df : {last_agg_all_df.shape[0]} rows, {last_agg_all_df.shape[1]} columns')
last_agg_all_df.tail(3)


last_agg_all_df : 162240 rows, 17 columns


Unnamed: 0,TM_KEY_YR,TM_KEY_MTH,TRUE_TM_KEY_WK,TM_KEY_DAY,METRIC_CD,METRIC_NAME,COMP_CD,VERSION,AREA_NO,AREA_TYPE,AREA_CD,AREA_NAME,DAY_VALUE,MTH_VALUE,AGG_TYPE,FREQUENCY,REMARK
162237,2025,202504,2025018,20250430,DB2S010100CS,Postpaid Gross Adds B2C : DTAC,DTAC,T,4,HH,910096,SURIN,4.685977,140.579313,S,Daily,Allocate from 96 Cluster (HH level)
162238,2025,202504,2025018,20250430,DB2S010100CS,Postpaid Gross Adds B2C : DTAC,DTAC,T,4,HH,910097,UBON RATCHATHANI,24.240838,727.225127,S,Daily,Allocate from 96 Cluster (HH level)
162239,2025,202504,2025018,20250430,DB2S010100CS,Postpaid Gross Adds B2C : DTAC,DTAC,T,4,HH,910098,YASOTHON,0.0,0.0,S,Daily,Allocate from 96 Cluster (HH level)


In [24]:
''' Filter Data '''

last_agg_all_df = last_agg_all_df.loc[last_agg_all_df['TM_KEY_MTH'].between(202503, 202504)]
# last_agg_all_df = last_agg_all_df.loc[last_agg_all_df['TM_KEY_MTH']==202504]

last_agg_all_df#.tail(3)

Unnamed: 0,TM_KEY_YR,TM_KEY_MTH,TRUE_TM_KEY_WK,TM_KEY_DAY,METRIC_CD,METRIC_NAME,COMP_CD,VERSION,AREA_NO,AREA_TYPE,AREA_CD,AREA_NAME,DAY_VALUE,MTH_VALUE,AGG_TYPE,FREQUENCY,REMARK
79768,2025,202503,2025009,20250301,DB1R000900CG,Prepaid Inflow M1 : DTAC - GEO Channel,DTAC,T,1,P,P,Nationwide,6.409673e+06,1.986999e+08,S,Daily,Allocate from 96 Cluster (HH level)
79769,2025,202503,2025009,20250301,DB1R000900CG,Prepaid Inflow M1 : DTAC - GEO Channel,DTAC,T,2,G,GX1,Deputy CGO & Regional Management 1 (BMA-West),5.555767e+05,1.722288e+07,S,Daily,Allocate from 96 Cluster (HH level)
79770,2025,202503,2025009,20250301,DB1R000900CG,Prepaid Inflow M1 : DTAC - GEO Channel,DTAC,T,2,G,GX2,Regional Management 2 (BMA-East),8.463129e+05,2.623570e+07,S,Daily,Allocate from 96 Cluster (HH level)
79771,2025,202503,2025009,20250301,DB1R000900CG,Prepaid Inflow M1 : DTAC - GEO Channel,DTAC,T,2,G,GX3,Retail Management & Regional Management 3 (East),1.439032e+06,4.460998e+07,S,Daily,Allocate from 96 Cluster (HH level)
79772,2025,202503,2025009,20250301,DB1R000900CG,Prepaid Inflow M1 : DTAC - GEO Channel,DTAC,T,2,G,GX4,Regional Management 4 (North),7.112099e+05,2.204751e+07,S,Daily,Allocate from 96 Cluster (HH level)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
162235,2025,202504,2025018,20250430,DB2S010100CS,Postpaid Gross Adds B2C : DTAC,DTAC,T,4,HH,910094,ROI ET,1.932510e+00,5.797531e+01,S,Daily,Allocate from 96 Cluster (HH level)
162236,2025,202504,2025018,20250430,DB2S010100CS,Postpaid Gross Adds B2C : DTAC,DTAC,T,4,HH,910095,SI SA KET,6.872557e+00,2.061767e+02,S,Daily,Allocate from 96 Cluster (HH level)
162237,2025,202504,2025018,20250430,DB2S010100CS,Postpaid Gross Adds B2C : DTAC,DTAC,T,4,HH,910096,SURIN,4.685977e+00,1.405793e+02,S,Daily,Allocate from 96 Cluster (HH level)
162238,2025,202504,2025018,20250430,DB2S010100CS,Postpaid Gross Adds B2C : DTAC,DTAC,T,4,HH,910097,UBON RATCHATHANI,2.424084e+01,7.272251e+02,S,Daily,Allocate from 96 Cluster (HH level)


In [26]:
# ''' Testing '''

# test_df = last_agg_all_df.copy()
# # test_df = test_df.loc[test_df['TM_KEY_DAY']==20250101]
# test_df = test_df.loc[test_df['TM_KEY_MTH']==202504]
# # test_df = test_df.loc[test_df['AREA_CD']=='P']
# # print(f'\n{test_df.to_string(max_cols=100)}')

# ''' Sum & Sort '''
# sum_test_df = test_df.groupby(['TM_KEY_MTH', 'METRIC_CD', 'METRIC_NAME', 'AREA_TYPE']).agg({'DAY_VALUE': 'sum'}).sort_values(by=['TM_KEY_MTH', 'METRIC_CD', 'AREA_TYPE']).reset_index()

# mod_col_list = sum_test_df.iloc[:, 4:5].columns.tolist()
# for col in mod_col_list:
#     sum_test_df[col] = sum_test_df[col].apply(lambda x: format(x, ',.0f'))

# sum_test_df

### Step 3 : Insert to "INTERIM_VINSIGHT_DATA"
    Delete -> Insert

In [27]:
''' Input Parameter '''

# Create list
month_list = last_agg_all_df['TM_KEY_MTH'].drop_duplicates().tolist()
mt_cd_list = last_agg_all_df['METRIC_CD'].drop_duplicates().tolist()

if len(mt_cd_list) == 1:
    mt_cd_list = str(mt_cd_list).replace(r'[', '(').replace(r']', ')')
else:
    mt_cd_list = tuple(mt_cd_list)

# Create Param
# v_param = dict(mth_start=202406, mth_end=202408, metric_cd=mt_cd_list)
v_param = dict(mth_start=min(month_list), mth_end=max(month_list), metric_cd=mt_cd_list)
v_target_schema = 'AUTOKPI'
v_target_table = 'INTERIM_VINSIGHT_DATA'

query_delete = f"""
    DELETE {v_target_schema}.{v_target_table} 
    WHERE TM_KEY_MTH BETWEEN {v_param['mth_start']} AND {v_param['mth_end']} 
    AND METRIC_CD IN {v_param['metric_cd']}
"""

print(f"\nParameter...\n\n   -> TM_KEY_MTH BETWEEN {v_param['mth_start']} AND {v_param['mth_end']}\n   -> METRIC_CD IN {v_param['metric_cd']}")
print(f'\nDataFrame...\n\n   -> last_agg_all_df : {last_agg_all_df.shape[0]} rows, {last_agg_all_df.shape[1]} columns') 
print(f'\nquery_delete...\n{query_delete}')


Parameter...

   -> TM_KEY_MTH BETWEEN 202503 AND 202504
   -> METRIC_CD IN ('DB1R000900CG', 'DB1R000900CS', 'DB1S000101CG', 'DB1S000101CS', 'DB2R010500CG', 'DB2R010500CS', 'DB2S010100CG', 'DB2S010100CS')

DataFrame...

   -> last_agg_all_df : 82472 rows, 17 columns

query_delete...

    DELETE AUTOKPI.INTERIM_VINSIGHT_DATA 
    WHERE TM_KEY_MTH BETWEEN 202503 AND 202504 
    AND METRIC_CD IN ('DB1R000900CG', 'DB1R000900CS', 'DB1S000101CG', 'DB1S000101CS', 'DB2R010500CG', 'DB2R010500CS', 'DB2S010100CG', 'DB2S010100CS')



In [28]:
''' Load Data '''

job_start_datetime = dt.datetime.now().strftime('%Y-%m-%d, %H:%M:%S')
print(f'\nJob Start... {job_start_datetime}')


# Create rows from DataFrame
rows = [tuple(x) for x in last_agg_all_df.values]


# Connect : AKPIPRD
dsn = f'{AKPIPRD_user}/{AKPIPRD_pwd}@{AKPIPRD_host}:{AKPIPRD_port}/{AKPIPRD_db}'
conn = oracledb.connect(dsn)
print(f'\n{AKPIPRD_db} : Connected')
cur = conn.cursor()
print(f'\nProcessing...')


try:
    # # Truncate
    # cur.execute(f"TRUNCATE TABLE {v_target_schema}.{v_target_table}")
    # print(f'\n   -> TRUNCATE : "{v_target_table}" : Done !')

    # Delete
    cur.execute(query_delete)
    print(f'\n   -> DELETE : "{v_target_table}" : Done !')
    
    # Insert
    cur.executemany(f"""
        INSERT INTO {v_target_table} 
        (TM_KEY_YR, TM_KEY_MTH, TRUE_TM_KEY_WK, TM_KEY_DAY, METRIC_CD, METRIC_NAME, COMP_CD, VERSION, AREA_NO, AREA_TYPE, AREA_CD, AREA_NAME, DAY_VALUE, MTH_VALUE, AGG_TYPE, FREQUENCY, REMARK) 
        VALUES (:1,:2,:3,:4,:5,:6,:7,:8,:9,:10,:11,:12,:13,:14,:15,:16,:17)
        """, rows)
    print(f'\n   -> INSERT : "{v_target_table}" : Done !')

    cur.close()
    conn.commit()


except oracledb.DatabaseError as e:
    print(f'\nError with Oracle : {e}')


finally:
    conn.close()
    print(f'\n{AKPIPRD_db} : Disconnected')
    print(f'\nJob Done !!!')


Job Start... 2025-03-25, 15:34:34

AKPIPRD : Connected

Processing...

   -> DELETE : "INTERIM_VINSIGHT_DATA" : Done !

   -> INSERT : "INTERIM_VINSIGHT_DATA" : Done !

AKPIPRD : Disconnected

Job Done !!!
