# * Interim : 2025 Target Revenue TRUE
    96 Areas

In [1]:
import configparser
import datetime as dt
import pandas as pd
import numpy as np
import oracledb
import re

config = configparser.ConfigParser()
config.read('../../my_config.ini')
config.sections()

TDMDBPR_user = config['TDMDBPR']['username']
TDMDBPR_pwd = config['TDMDBPR']['password']
TDMDBPR_db = config['TDMDBPR']['db']
TDMDBPR_host = config['TDMDBPR']['host']
TDMDBPR_port = config['TDMDBPR']['port']

AKPIPRD_user = config['AKPIPRD']['username']
AKPIPRD_pwd = config['AKPIPRD']['password']
AKPIPRD_db = config['AKPIPRD']['db']
AKPIPRD_host = config['AKPIPRD']['host']
AKPIPRD_port = config['AKPIPRD']['port']

curr_dt = dt.datetime.now().date()
str_curr_dt = curr_dt.strftime('%Y%m%d')
curr_dt

datetime.date(2025, 11, 12)

## ETL Process...

### Step 1 : Import Data Source

In [2]:
''' Rawdata '''

# Target_Revenue_TRUE_Y2025
src_file = '../../data/interim/Revenue/Target_Revenue_TRUE_Y2025.xlsx'
src_df_cols = ['CUST_TYPE', 'TM_KEY_MTH', 'METRIC_CD', 'METRIC_NAME', 'METRIC_VALUE', 'COMP_CD', 'VERSION', 'AREA_TYPE', 'AREA_CD', 'AREA_DESC']

src_b2c_sheet = 'B2C Rawdata'
src_b2c_df = pd.read_excel(src_file, sheet_name=src_b2c_sheet, index_col=None) 
src_b2c_df = src_b2c_df.loc[src_b2c_df['AREA_TYPE']=="HH"]
src_b2c_df['CUST_TYPE'] = 'B2C'
src_b2c_df = src_b2c_df[src_df_cols]

src_b2b_sheet = 'B2B Rawdata'
src_b2b_df = pd.read_excel(src_file, sheet_name=src_b2b_sheet, index_col=None) 
src_b2b_df['CUST_TYPE'] = 'B2B'
src_b2b_df = src_b2b_df[src_df_cols]

src_df = pd.concat([src_b2c_df, src_b2b_df])
src_df.rename(columns={'AREA_CD': 'AREA_KEY', 'METRIC_VALUE': 'TARGET_MTH'}, inplace=True)
src_df['COMP_CD'] = 'TRUE' #src_df['COMP_CD'].astype(str)
# src_df = src_df.replace(np.nan, None)
src_df = src_df.reset_index(drop=True)

print(f'\nsrc_df : {src_df.shape[0]} rows, {src_df.shape[1]} columns')
src_df#.tail(3)


src_df : 4235 rows, 10 columns


Unnamed: 0,CUST_TYPE,TM_KEY_MTH,METRIC_CD,METRIC_NAME,TARGET_MTH,COMP_CD,VERSION,AREA_TYPE,AREA_KEY,AREA_DESC
0,B2C,202501,TB1R000100,Prepaid Revenue : TMH,5.710683e+06,TRUE,T,HH,902033,ANG THONG
1,B2C,202501,TB1R000100,Prepaid Revenue : TMH,1.081782e+07,TRUE,T,HH,902034,CHAI NAT
2,B2C,202501,TB1R000100,Prepaid Revenue : TMH,1.663773e+07,TRUE,T,HH,902035,KANCHANABURI
3,B2C,202501,TB1R000100,Prepaid Revenue : TMH,1.987740e+07,TRUE,T,HH,902036,LOP BURI
4,B2C,202501,TB1R000100,Prepaid Revenue : TMH,3.576186e+07,TRUE,T,HH,902037,NAKHON PATHOM
...,...,...,...,...,...,...,...,...,...,...
4230,B2B,202507,TB2R020100,Postpaid Revenue B2B : TMH,3.182800e+08,TRUE,T,P,P,Nationwide
4231,B2B,202508,TB2R020100,Postpaid Revenue B2B : TMH,3.182800e+08,TRUE,T,P,P,Nationwide
4232,B2B,202509,TB2R020100,Postpaid Revenue B2B : TMH,3.182800e+08,TRUE,T,P,P,Nationwide
4233,B2B,202510,TB2R020100,Postpaid Revenue B2B : TMH,3.182800e+08,TRUE,T,P,P,Nationwide


In [3]:
''' Add Columns '''

def product_group(v_cd):
    cd = v_cd
    result = ''
    if re.search('B1', cd): result = 'Prepaid'
    elif re.search('B2', cd): result = 'Postpaid'
    elif re.search('B3', cd): result = 'TOL'
    elif re.search('B4', cd): result = 'TVS'
    else: result = 'Unknown' 
    return result

src_df['PRODUCT_GRP'] = src_df.apply(lambda x: product_group(x['METRIC_CD']), axis=1)
# src_df['FREQUENCY'] = np.where(src_df['PRODUCT_GRP']=='Prepaid', 'DAY', 'BILL')
src_df['FREQUENCY'] = np.where(src_df['PRODUCT_GRP']=='Prepaid', 'Daily', 'Bill Cycle')
src_df['TM_KEY_YR'] = src_df['TM_KEY_MTH'].apply(str).str[:4].astype(int)

src_df#.tail(3)

Unnamed: 0,CUST_TYPE,TM_KEY_MTH,METRIC_CD,METRIC_NAME,TARGET_MTH,COMP_CD,VERSION,AREA_TYPE,AREA_KEY,AREA_DESC,PRODUCT_GRP,FREQUENCY,TM_KEY_YR
0,B2C,202501,TB1R000100,Prepaid Revenue : TMH,5.710683e+06,TRUE,T,HH,902033,ANG THONG,Prepaid,Daily,2025
1,B2C,202501,TB1R000100,Prepaid Revenue : TMH,1.081782e+07,TRUE,T,HH,902034,CHAI NAT,Prepaid,Daily,2025
2,B2C,202501,TB1R000100,Prepaid Revenue : TMH,1.663773e+07,TRUE,T,HH,902035,KANCHANABURI,Prepaid,Daily,2025
3,B2C,202501,TB1R000100,Prepaid Revenue : TMH,1.987740e+07,TRUE,T,HH,902036,LOP BURI,Prepaid,Daily,2025
4,B2C,202501,TB1R000100,Prepaid Revenue : TMH,3.576186e+07,TRUE,T,HH,902037,NAKHON PATHOM,Prepaid,Daily,2025
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4230,B2B,202507,TB2R020100,Postpaid Revenue B2B : TMH,3.182800e+08,TRUE,T,P,P,Nationwide,Postpaid,Bill Cycle,2025
4231,B2B,202508,TB2R020100,Postpaid Revenue B2B : TMH,3.182800e+08,TRUE,T,P,P,Nationwide,Postpaid,Bill Cycle,2025
4232,B2B,202509,TB2R020100,Postpaid Revenue B2B : TMH,3.182800e+08,TRUE,T,P,P,Nationwide,Postpaid,Bill Cycle,2025
4233,B2B,202510,TB2R020100,Postpaid Revenue B2B : TMH,3.182800e+08,TRUE,T,P,P,Nationwide,Postpaid,Bill Cycle,2025


In [4]:
''' Master Data '''

# DIM_TIME
dt_file = '../CFW/data/dim_time.csv'
# dt_cols = ['TM_KEY_YR', 'MONTH_SHORT', 'TM_KEY_MTH', 'TRUE_TM_KEY_WK', 'TM_KEY_DAY', 'DAYS_IN_MONTH']
dt_cols = ['TM_KEY_MTH', 'TRUE_TM_KEY_WK', 'TM_KEY_DAY', 'DAY_NO', 'DAYS_IN_MONTH', 'PERIODFLAG']
dt_df = pd.read_csv(dt_file, usecols=dt_cols)

# DIM_MOOC_AREA
mooc_file = '../CFW/data/dim_mooc_area.csv'
mooc_cols = ['ZONE_TYPE', 'TEAM_CODE', 'ORGID_G', 'TDS_SGMD', 'ORGID_R', 'TDS_RGM_CODE', 'ORGID_H', 'HOP_HINT', 'TDS_PROVINCE', 'PROVINCE_ENG', 'PROVINCE_TH', 'ORGID_HH', 'D_CLUSTER', 'CCAATT', 'REMARK']
mooc_df = pd.read_csv(mooc_file, usecols=mooc_cols)
mooc_df = mooc_df.loc[(mooc_df['REMARK']!='Dummy') & (mooc_df['TEAM_CODE']!='ไม่ระบุ') & (mooc_df['HOP_HINT']!='True Corp')]

# Create HH level
mooc_hh_df = mooc_df[['ZONE_TYPE', 'ORGID_G', 'TDS_SGMD', 'ORGID_H', 'HOP_HINT', 'ORGID_HH', 'D_CLUSTER']].drop_duplicates()
mooc_hh_df.dropna(how='all', inplace=True)
mooc_hh_df['AREA_KEY'] = mooc_hh_df['ORGID_HH'].astype(int).astype(str)

In [5]:
''' Portion Data '''

portion_file = '../CFW/data/revenue_portion.xlsx'

new_existing_sheet = 'New & Existing'
new_existing_df = pd.read_excel(portion_file, sheet_name=new_existing_sheet, index_col=None) 
new_existing_df = new_existing_df.loc[new_existing_df['TM_KEY_YR']==2025]
new_existing_cols = ['PRODUCT_GRP', 'TM_KEY_MTH', 'NEW', 'EXIST']
new_existing_df = new_existing_df[new_existing_cols]
# new_existing_df

bill_cycle_sheet = 'Bill Cycle'
bill_cycle_df = pd.read_excel(portion_file, sheet_name=bill_cycle_sheet, index_col=None) 
bill_cycle_df = bill_cycle_df.loc[bill_cycle_df['TM_KEY_YR']==2025]
bill_cycle_cols = ['METRIC_CD', 'TM_KEY_YR', 'BILLING_DAY', 'BILL_PORTION']
bill_cycle_df = bill_cycle_df[bill_cycle_cols]
# bill_cycle_df

# src_df_cols = ['TM_KEY_MTH', 'METRIC_CD', 'METRIC_NAME', 'METRIC_VALUE', 'COMP_CD', 'VERSION', 'AREA_TYPE', 'AREA_CD', 'AREA_DESC']
# src_df = src_df[src_df_cols]
# src_df.rename(columns={'AREA_CD': 'AREA_KEY', 'METRIC_VALUE': 'MTH_VALUE'}, inplace=True)

### Step 2 : Aggregate Data

In [7]:
# ''' Example DataFrame '''

# src_df.tail(3)
# dt_df.tail(3)
# mooc_df.tail(3)
# mooc_h_df.tail(3)
# mooc_h_df.loc[mooc_h_df['ORGID_H'].str.contains('^0')].tail(3)

In [7]:
''' Filter Rawdata

    TB1R000100	Prepaid Revenue : TMH
    TB2R010100	Postpaid Revenue B2C : TMH
    TB2R020100	Postpaid Revenue B2B : TMH
    TB3R000100	TOL Revenue
    TB4R000100	TVS Revenue
'''

''' Filter '''
raw_df = src_df.copy()
raw_df = raw_df.loc[raw_df['TM_KEY_MTH']==202511]
# raw_df = raw_df.loc[raw_df['TM_KEY_MTH']>=202510]
# raw_df = raw_df.loc[raw_df['METRIC_CD']=='TB1R000100'] #Prepaid Revenue : TMH
# raw_df = raw_df.loc[raw_df['TM_KEY_MTH'].isin([202501, 202502, 202503])]
# raw_df = raw_df.loc[raw_df['METRIC_CD'].isin(['TB1R000100', 'TB2R010100', 'TB3R000100', 'TB4R000100'])]

''' Data Test '''
# raw_df = raw_df.loc[raw_df['TM_KEY_MTH']==202504]
# raw_df = raw_df.loc[raw_df['METRIC_CD']=='TB1R000100']
# raw_df = raw_df.loc[raw_df['AREA_KEY']=='902033']
# raw_df = raw_df.loc[raw_df['AREA_KEY'].isna()]

raw_df = raw_df.reset_index(drop=True)
print(f'\nraw_df : {raw_df.shape[0]} rows, {raw_df.shape[1]} columns')
raw_df#.tail(3)


raw_df : 385 rows, 13 columns


Unnamed: 0,CUST_TYPE,TM_KEY_MTH,METRIC_CD,METRIC_NAME,TARGET_MTH,COMP_CD,VERSION,AREA_TYPE,AREA_KEY,AREA_DESC,PRODUCT_GRP,FREQUENCY,TM_KEY_YR
0,B2C,202511,TB1R000100,Prepaid Revenue : TMH,2.965001e+07,TRUE,T,HH,907030,"BKK : Bang Khen, Lat Phrao, Wang Thonglang",Prepaid,Daily,2025
1,B2C,202511,TB1R000100,Prepaid Revenue : TMH,2.027266e+07,TRUE,T,HH,907016,"BKK : Bang Sue, Chatuchak",Prepaid,Daily,2025
2,B2C,202511,TB1R000100,Prepaid Revenue : TMH,2.597717e+07,TRUE,T,HH,907017,"BKK : Don Mueang, Sai Mai, Lak Si",Prepaid,Daily,2025
3,B2C,202511,TB1R000100,Prepaid Revenue : TMH,4.181252e+07,TRUE,T,HH,907019,"BKK : Lat Krabang, Nong Chok, Khlong Sam Wa",Prepaid,Daily,2025
4,B2C,202511,TB1R000100,Prepaid Revenue : TMH,2.523325e+07,TRUE,T,HH,907020,"BKK : Min Buri, Khan Na Yao, Bueng Kum",Prepaid,Daily,2025
...,...,...,...,...,...,...,...,...,...,...,...,...,...
380,B2C,202511,TB4R000100,TVS Revenue,3.564107e+06,TRUE,T,HH,906109,SONGKHLA,TVS,Bill Cycle,2025
381,B2C,202511,TB4R000100,TVS Revenue,4.869001e+06,TRUE,T,HH,906110,SURAT THANI,TVS,Bill Cycle,2025
382,B2C,202511,TB4R000100,TVS Revenue,6.916983e+05,TRUE,T,HH,906111,TRANG,TVS,Bill Cycle,2025
383,B2C,202511,TB4R000100,TVS Revenue,2.956843e+05,TRUE,T,HH,906112,YALA,TVS,Bill Cycle,2025


In [8]:
''' Join New & Existing '''

merge_new_existing_df = pd.merge(raw_df, new_existing_df, how='left', on=['PRODUCT_GRP', 'TM_KEY_MTH'])
merge_new_existing_df['TARGET_MTH_NEW'] = merge_new_existing_df['TARGET_MTH'] * merge_new_existing_df['NEW']
merge_new_existing_df['TARGET_MTH_EXIST'] = merge_new_existing_df['TARGET_MTH'] * merge_new_existing_df['EXIST']
merge_new_existing_df = merge_new_existing_df[['CUST_TYPE', 'TM_KEY_YR', 'TM_KEY_MTH', 'PRODUCT_GRP', 'FREQUENCY', 'METRIC_CD', 'METRIC_NAME', 'COMP_CD', 'VERSION', 'AREA_TYPE', 'AREA_KEY', 'AREA_DESC', 'TARGET_MTH', 'TARGET_MTH_NEW', 'TARGET_MTH_EXIST']]
merge_new_existing_df.tail(3)

Unnamed: 0,CUST_TYPE,TM_KEY_YR,TM_KEY_MTH,PRODUCT_GRP,FREQUENCY,METRIC_CD,METRIC_NAME,COMP_CD,VERSION,AREA_TYPE,AREA_KEY,AREA_DESC,TARGET_MTH,TARGET_MTH_NEW,TARGET_MTH_EXIST
382,B2C,2025,202511,TVS,Bill Cycle,TB4R000100,TVS Revenue,True,T,HH,906111,TRANG,691698.3,4124.501,687569.8
383,B2C,2025,202511,TVS,Bill Cycle,TB4R000100,TVS Revenue,True,T,HH,906112,YALA,295684.3,1763.124,293919.4
384,B2B,2025,202511,Postpaid,Bill Cycle,TB2R020100,Postpaid Revenue B2B : TMH,True,T,P,P,Nationwide,318280000.0,53602000.0,263907500.0


In [9]:
''' Allocate Daily '''

day_df = merge_new_existing_df.loc[merge_new_existing_df['FREQUENCY']=='Daily']

day_df = pd.merge(day_df, dt_df, how='left', on='TM_KEY_MTH')

day_df['TARGET_DAY'] = day_df['TARGET_MTH'] / day_df['DAYS_IN_MONTH']
day_df['TARGET_DAY_NEW'] = day_df['TARGET_MTH_NEW'] / day_df['DAYS_IN_MONTH']
day_df['TARGET_DAY_EXIST'] = day_df['TARGET_MTH_EXIST'] / day_df['DAYS_IN_MONTH']

day_df = day_df[['CUST_TYPE', 'TM_KEY_YR', 'TM_KEY_MTH', 'TRUE_TM_KEY_WK', 'TM_KEY_DAY', 'DAYS_IN_MONTH', 'PERIODFLAG', 'PRODUCT_GRP', 'FREQUENCY', 'METRIC_CD', 'METRIC_NAME', 'COMP_CD', 'VERSION', 'AREA_TYPE', 'AREA_KEY', 'AREA_DESC', 'TARGET_MTH', 'TARGET_MTH_NEW', 'TARGET_MTH_EXIST', 'TARGET_DAY', 'TARGET_DAY_NEW', 'TARGET_DAY_EXIST']]
day_df.tail(3)

Unnamed: 0,CUST_TYPE,TM_KEY_YR,TM_KEY_MTH,TRUE_TM_KEY_WK,TM_KEY_DAY,DAYS_IN_MONTH,PERIODFLAG,PRODUCT_GRP,FREQUENCY,METRIC_CD,...,VERSION,AREA_TYPE,AREA_KEY,AREA_DESC,TARGET_MTH,TARGET_MTH_NEW,TARGET_MTH_EXIST,TARGET_DAY,TARGET_DAY_NEW,TARGET_DAY_EXIST
2877,B2C,2025,202511,2025048,20251128,30,N,Prepaid,Daily,TB1R000100,...,T,HH,906112,YALA,7993613.0,2367855.0,5625758.0,266453.782004,78928.506158,187525.275846
2878,B2C,2025,202511,2025048,20251129,30,N,Prepaid,Daily,TB1R000100,...,T,HH,906112,YALA,7993613.0,2367855.0,5625758.0,266453.782004,78928.506158,187525.275846
2879,B2C,2025,202511,2025048,20251130,30,EM,Prepaid,Daily,TB1R000100,...,T,HH,906112,YALA,7993613.0,2367855.0,5625758.0,266453.782004,78928.506158,187525.275846


In [10]:
''' Allocate Bill Cycle '''

bill_df = merge_new_existing_df.loc[merge_new_existing_df['FREQUENCY']=='Bill Cycle']
bill_df = pd.merge(bill_df, bill_cycle_df, how='left', on=['METRIC_CD', 'TM_KEY_YR'])

day_in_month_df = dt_df[['TM_KEY_MTH', 'DAYS_IN_MONTH']].drop_duplicates()
bill_df = pd.merge(bill_df, day_in_month_df, how='left', on='TM_KEY_MTH')
bill_df['BILLING_DAY'] = np.where(bill_df['BILLING_DAY']==1, bill_df['DAYS_IN_MONTH'], bill_df['BILLING_DAY'])

period_flag_df = dt_df[['TM_KEY_MTH', 'TRUE_TM_KEY_WK', 'TM_KEY_DAY', 'DAY_NO', 'PERIODFLAG']].drop_duplicates()
period_flag_df.rename(columns={'DAY_NO': 'BILLING_DAY'}, inplace=True)
bill_df = pd.merge(bill_df, period_flag_df, how='left', on=['TM_KEY_MTH', 'BILLING_DAY'])

bill_df['TARGET_DAY'] = bill_df['TARGET_MTH'] * bill_df['BILL_PORTION']
bill_df['TARGET_DAY_NEW'] = bill_df['TARGET_MTH_NEW'] * bill_df['BILL_PORTION']
bill_df['TARGET_DAY_EXIST'] = bill_df['TARGET_MTH_EXIST'] * bill_df['BILL_PORTION']

bill_df = bill_df[['CUST_TYPE', 'TM_KEY_YR', 'TM_KEY_MTH', 'TRUE_TM_KEY_WK', 'TM_KEY_DAY', 'DAYS_IN_MONTH', 'PERIODFLAG', 'PRODUCT_GRP', 'FREQUENCY', 'METRIC_CD', 'METRIC_NAME', 'COMP_CD', 'VERSION', 'AREA_TYPE', 'AREA_KEY', 'AREA_DESC', 'TARGET_MTH', 'TARGET_MTH_NEW', 'TARGET_MTH_EXIST', 'TARGET_DAY', 'TARGET_DAY_NEW', 'TARGET_DAY_EXIST']]

''' Test '''
# bill_df = bill_df.loc[bill_df['PRODUCT_GRP']=='TOL']

bill_df.tail(3)

Unnamed: 0,CUST_TYPE,TM_KEY_YR,TM_KEY_MTH,TRUE_TM_KEY_WK,TM_KEY_DAY,DAYS_IN_MONTH,PERIODFLAG,PRODUCT_GRP,FREQUENCY,METRIC_CD,...,VERSION,AREA_TYPE,AREA_KEY,AREA_DESC,TARGET_MTH,TARGET_MTH_NEW,TARGET_MTH_EXIST,TARGET_DAY,TARGET_DAY_NEW,TARGET_DAY_EXIST
2887,B2B,2025,202511,2025047,20251122,30,N,Postpaid,Bill Cycle,TB2R020100,...,T,P,P,Nationwide,318280000.0,53602000.0,263907500.0,15639960.0,2633949.0,12968150.0
2888,B2B,2025,202511,2025048,20251125,30,N,Postpaid,Bill Cycle,TB2R020100,...,T,P,P,Nationwide,318280000.0,53602000.0,263907500.0,35788670.0,6027223.0,29674810.0
2889,B2B,2025,202511,2025048,20251128,30,N,Postpaid,Bill Cycle,TB2R020100,...,T,P,P,Nationwide,318280000.0,53602000.0,263907500.0,44267960.0,7455231.0,36705560.0


In [11]:
''' Concat Day & Bill '''

day_and_bill_df = pd.concat([day_df, bill_df])

''' Test '''
# day_and_bill_df = day_and_bill_df.loc[day_and_bill_df['PRODUCT_GRP']=='Postpaid']
# day_and_bill_df = day_and_bill_df.loc[day_and_bill_df['AREA_KEY'].isin(['P', '902033'])]

print(f'day_and_bill_df : {day_and_bill_df.shape[0]} rows, {day_and_bill_df.shape[1]} columns')
day_and_bill_df.tail(3)

day_and_bill_df : 5770 rows, 22 columns


Unnamed: 0,CUST_TYPE,TM_KEY_YR,TM_KEY_MTH,TRUE_TM_KEY_WK,TM_KEY_DAY,DAYS_IN_MONTH,PERIODFLAG,PRODUCT_GRP,FREQUENCY,METRIC_CD,...,VERSION,AREA_TYPE,AREA_KEY,AREA_DESC,TARGET_MTH,TARGET_MTH_NEW,TARGET_MTH_EXIST,TARGET_DAY,TARGET_DAY_NEW,TARGET_DAY_EXIST
2887,B2B,2025,202511,2025047,20251122,30,N,Postpaid,Bill Cycle,TB2R020100,...,T,P,P,Nationwide,318280000.0,53602000.0,263907500.0,15639960.0,2633949.0,12968150.0
2888,B2B,2025,202511,2025048,20251125,30,N,Postpaid,Bill Cycle,TB2R020100,...,T,P,P,Nationwide,318280000.0,53602000.0,263907500.0,35788670.0,6027223.0,29674810.0
2889,B2B,2025,202511,2025048,20251128,30,N,Postpaid,Bill Cycle,TB2R020100,...,T,P,P,Nationwide,318280000.0,53602000.0,263907500.0,44267960.0,7455231.0,36705560.0


In [12]:
''' Join Area '''

merge_hh_df = pd.merge(day_and_bill_df, mooc_hh_df, how='left', on='AREA_KEY')
# merge_hh_df = pd.merge(day_and_bill_df.loc[day_and_bill_df['AREA_TYPE']=='HH'], mooc_hh_df, how='left', on='AREA_KEY')

''' Test '''
# merge_hh_df = merge_hh_df.loc[merge_hh_df['PRODUCT_GRP']=='TOL']
# merge_hh_df = merge_hh_df.loc[merge_hh_df['AREA_KEY'].isin(['P', '902033'])]
# merge_hh_df = merge_hh_df.loc[merge_hh_df['TM_KEY_DAY']==20250302]

merge_hh_df = merge_hh_df[['CUST_TYPE', 'TM_KEY_YR', 'TM_KEY_MTH', 'TRUE_TM_KEY_WK', 'TM_KEY_DAY', 'DAYS_IN_MONTH', 'PERIODFLAG', 'PRODUCT_GRP', 'FREQUENCY', 'METRIC_CD', 'METRIC_NAME', 'COMP_CD', 'VERSION', 'AREA_TYPE', 'AREA_KEY', 'AREA_DESC', 'ZONE_TYPE', 'ORGID_G', 'TDS_SGMD', 'ORGID_H', 'HOP_HINT', 'ORGID_HH', 'D_CLUSTER', 'TARGET_MTH', 'TARGET_MTH_NEW', 'TARGET_MTH_EXIST', 'TARGET_DAY', 'TARGET_DAY_NEW', 'TARGET_DAY_EXIST']]
print(f'merge_hh_df : {merge_hh_df.shape[0]} rows, {merge_hh_df.shape[1]} columns')
merge_hh_df.tail(3)

merge_hh_df : 5770 rows, 29 columns


Unnamed: 0,CUST_TYPE,TM_KEY_YR,TM_KEY_MTH,TRUE_TM_KEY_WK,TM_KEY_DAY,DAYS_IN_MONTH,PERIODFLAG,PRODUCT_GRP,FREQUENCY,METRIC_CD,...,ORGID_H,HOP_HINT,ORGID_HH,D_CLUSTER,TARGET_MTH,TARGET_MTH_NEW,TARGET_MTH_EXIST,TARGET_DAY,TARGET_DAY_NEW,TARGET_DAY_EXIST
5767,B2B,2025,202511,2025047,20251122,30,N,Postpaid,Bill Cycle,TB2R020100,...,,,,,318280000.0,53602000.0,263907500.0,15639960.0,2633949.0,12968150.0
5768,B2B,2025,202511,2025048,20251125,30,N,Postpaid,Bill Cycle,TB2R020100,...,,,,,318280000.0,53602000.0,263907500.0,35788670.0,6027223.0,29674810.0
5769,B2B,2025,202511,2025048,20251128,30,N,Postpaid,Bill Cycle,TB2R020100,...,,,,,318280000.0,53602000.0,263907500.0,44267960.0,7455231.0,36705560.0


In [13]:
''' Aggregate P, G, H, HH '''

# agg_cols = ['TM_KEY_YR', 'TM_KEY_MTH', 'TRUE_TM_KEY_WK', 'TM_KEY_DAY', 'METRIC_CD', 'METRIC_NAME', 'COMP_CD', 'VERSION', 'AREA_NO', 'AREA_TYPE', 'AREA_CD', 'AREA_NAME', 'DAY_VALUE', 'MTH_VALUE'] # , 'FREQUENCY', 'REMARK'
agg_cols = ['TM_KEY_YR', 'TM_KEY_MTH', 'TRUE_TM_KEY_WK', 'TM_KEY_DAY', 'METRIC_CD', 'METRIC_NAME', 'COMP_CD', 'VERSION', 'AREA_NO', 'AREA_TYPE', 'AREA_CD', 'AREA_NAME', 'FREQUENCY', 'TARGET_MTH', 'TARGET_MTH_NEW', 'TARGET_MTH_EXIST', 'TARGET_DAY', 'TARGET_DAY_NEW', 'TARGET_DAY_EXIST']

# P : Nationwide
agg_p_df = merge_hh_df.groupby(['TM_KEY_YR', 'TM_KEY_MTH', 'TRUE_TM_KEY_WK', 'TM_KEY_DAY', 'METRIC_CD', 'METRIC_NAME', 'COMP_CD', 'VERSION', 'FREQUENCY']).agg({'TARGET_MTH': 'sum', 'TARGET_MTH_NEW': 'sum', 'TARGET_MTH_EXIST': 'sum', 'TARGET_DAY': 'sum', 'TARGET_DAY_NEW': 'sum', 'TARGET_DAY_EXIST': 'sum'}).reset_index()
agg_p_df['AREA_NO'] = 1
agg_p_df['AREA_TYPE'] = 'P'
agg_p_df['AREA_CD'] = 'P'
agg_p_df['AREA_NAME'] = 'Nationwide'
agg_p_df = agg_p_df.loc[:, agg_cols]
# agg_p_df[agg_p_df['TM_KEY_DAY']==20240501]

# G : Region
agg_g_df = merge_hh_df.groupby(['TM_KEY_YR', 'TM_KEY_MTH', 'TRUE_TM_KEY_WK', 'TM_KEY_DAY', 'METRIC_CD', 'METRIC_NAME', 'COMP_CD', 'VERSION', 'FREQUENCY', 'ORGID_G', 'TDS_SGMD']).agg({'TARGET_MTH': 'sum', 'TARGET_MTH_NEW': 'sum', 'TARGET_MTH_EXIST': 'sum', 'TARGET_DAY': 'sum', 'TARGET_DAY_NEW': 'sum', 'TARGET_DAY_EXIST': 'sum'}).reset_index()
agg_g_df['AREA_NO'] = 2
agg_g_df['AREA_TYPE'] = 'G'
agg_g_df.rename(columns={'ORGID_G': 'AREA_CD'}, inplace=True)
agg_g_df.rename(columns={'TDS_SGMD': 'AREA_NAME'}, inplace=True)
agg_g_df = agg_g_df.loc[:, agg_cols]
# agg_g_df[agg_g_df['TM_KEY_DAY']==20240501]

# H : HOP_HINT
agg_h_df = merge_hh_df.groupby(['TM_KEY_YR', 'TM_KEY_MTH', 'TRUE_TM_KEY_WK', 'TM_KEY_DAY', 'METRIC_CD', 'METRIC_NAME', 'COMP_CD', 'VERSION', 'FREQUENCY', 'ORGID_H', 'HOP_HINT']).agg({'TARGET_MTH': 'sum', 'TARGET_MTH_NEW': 'sum', 'TARGET_MTH_EXIST': 'sum', 'TARGET_DAY': 'sum', 'TARGET_DAY_NEW': 'sum', 'TARGET_DAY_EXIST': 'sum'}).reset_index()
agg_h_df['AREA_NO'] = 3
agg_h_df['AREA_TYPE'] = 'H'
agg_h_df.rename(columns={'ORGID_H': 'AREA_CD'}, inplace=True)
agg_h_df.rename(columns={'HOP_HINT': 'AREA_NAME'}, inplace=True)
agg_h_df = agg_h_df.loc[:, agg_cols]
# agg_h_df[agg_h_df['TM_KEY_DAY']==20240501]

# HH : D_CLUSTER
agg_hh_df = merge_hh_df.groupby(['TM_KEY_YR', 'TM_KEY_MTH', 'TRUE_TM_KEY_WK', 'TM_KEY_DAY', 'METRIC_CD', 'METRIC_NAME', 'COMP_CD', 'VERSION', 'FREQUENCY', 'ORGID_HH', 'D_CLUSTER']).agg({'TARGET_MTH': 'sum', 'TARGET_MTH_NEW': 'sum', 'TARGET_MTH_EXIST': 'sum', 'TARGET_DAY': 'sum', 'TARGET_DAY_NEW': 'sum', 'TARGET_DAY_EXIST': 'sum'}).reset_index()
agg_hh_df['AREA_NO'] = 4
agg_hh_df['AREA_TYPE'] = 'HH'
agg_hh_df['ORGID_HH'] = agg_hh_df['ORGID_HH'].astype(int).astype(str)
agg_hh_df.rename(columns={'ORGID_HH': 'AREA_CD'}, inplace=True)
agg_hh_df.rename(columns={'D_CLUSTER': 'AREA_NAME'}, inplace=True)
agg_hh_df = agg_hh_df.loc[:, agg_cols]
# agg_hh_df[agg_hh_df['TM_KEY_DAY']==20240601]

# Concat DataFrame
agg_all_area_df = pd.concat([agg_p_df, agg_g_df, agg_h_df, agg_hh_df], ignore_index=True)
# agg_all_area_df['AGG_TYPE'] = 'S'
# agg_all_area_df['FREQUENCY'] = np.where(agg_all_area_df['FREQUENCY']=='DAY', 'Daily', 'Bill Cycle')
# agg_all_area_df['REMARK'] = 'Allocate from 96 Cluster (HH level)'
# # agg_all_area_df['REMARK'] = agg_all_area_df['TM_KEY_MTH'].apply(lambda x: 'H Level 64 Province' if x>=202401 and x<=202403 else 'HH Level 96 Cluster')
# agg_all_area_df = agg_all_area_df[['TM_KEY_YR', 'TM_KEY_MTH', 'TRUE_TM_KEY_WK', 'TM_KEY_DAY', 'METRIC_CD', 'METRIC_NAME', 'COMP_CD', 'VERSION', 'AREA_NO', 'AREA_TYPE', 'AREA_CD', 'AREA_NAME', 'TARGET_DAY', 'TARGET_DAY_NEW', 'TARGET_DAY_EXIST', 'TARGET_MTH', 'TARGET_MTH_NEW', 'TARGET_MTH_EXIST', 'AGG_TYPE', 'FREQUENCY', 'REMARK']]

print(f'agg_all_area_df : {agg_all_area_df.shape[0]} rows, {agg_all_area_df.shape[1]} columns')
# agg_all_area_df.loc[agg_all_area_df['TM_KEY_DAY']==20240601]
agg_all_area_df.tail(3)

agg_all_area_df : 10150 rows, 19 columns


Unnamed: 0,TM_KEY_YR,TM_KEY_MTH,TRUE_TM_KEY_WK,TM_KEY_DAY,METRIC_CD,METRIC_NAME,COMP_CD,VERSION,AREA_NO,AREA_TYPE,AREA_CD,AREA_NAME,FREQUENCY,TARGET_MTH,TARGET_MTH_NEW,TARGET_MTH_EXIST,TARGET_DAY,TARGET_DAY_NEW,TARGET_DAY_EXIST
10147,2025,202511,2025048,20251130,TB4R000100,TVS Revenue,True,T,4,HH,910096,SURIN,Bill Cycle,1280847.0,7637.514618,1273202.0,420974.623009,2510.213583,418462.005072
10148,2025,202511,2025048,20251130,TB4R000100,TVS Revenue,True,T,4,HH,910097,UBON RATCHATHANI,Bill Cycle,1868074.0,11139.066384,1856924.0,613977.780208,3661.064774,610313.208761
10149,2025,202511,2025048,20251130,TB4R000100,TVS Revenue,True,T,4,HH,910098,YASOTHON,Bill Cycle,456687.5,2723.16427,453961.7,150098.966674,895.019425,149203.089974


In [14]:
''' Aggregate TB2R000100 : Postpaid Revenue : TMH (Nationwide Only)'''

post_revenue_tmh_df = agg_all_area_df.loc[(agg_all_area_df['METRIC_CD'].isin(['TB2R010100', 'TB2R020100'])) & (agg_all_area_df['AREA_CD']=='P')]
post_revenue_tmh_df = post_revenue_tmh_df\
    .groupby(['TM_KEY_YR', 'TM_KEY_MTH', 'TRUE_TM_KEY_WK', 'TM_KEY_DAY', 'COMP_CD', 'VERSION', 'AREA_NO', 'AREA_TYPE', 'AREA_CD', 'AREA_NAME', 'FREQUENCY'])\
        .agg({'TARGET_MTH': 'sum', 'TARGET_MTH_NEW': 'sum', 'TARGET_MTH_EXIST': 'sum', 'TARGET_DAY': 'sum', 'TARGET_DAY_NEW': 'sum', 'TARGET_DAY_EXIST': 'sum'}).reset_index()
post_revenue_tmh_df['METRIC_CD'] = 'TB2R000100'
post_revenue_tmh_df['METRIC_NAME'] = 'Postpaid Revenue : TMH'
post_revenue_tmh_df = post_revenue_tmh_df[['TM_KEY_YR', 'TM_KEY_MTH', 'TRUE_TM_KEY_WK', 'TM_KEY_DAY', 'METRIC_CD', 'METRIC_NAME', 'COMP_CD', 'VERSION', 'AREA_NO', 'AREA_TYPE', 'AREA_CD', 'AREA_NAME', 'FREQUENCY', 'TARGET_MTH', 'TARGET_MTH_NEW', 'TARGET_MTH_EXIST', 'TARGET_DAY', 'TARGET_DAY_NEW', 'TARGET_DAY_EXIST']]

''' Test '''
# post_revenue_tmh_df = post_revenue_tmh_df.loc[post_revenue_tmh_df['PRODUCT_GRP']=='TOL']
# post_revenue_tmh_df = post_revenue_tmh_df.loc[post_revenue_tmh_df['AREA_KEY'].isin(['P', '902033'])]
# post_revenue_tmh_df = post_revenue_tmh_df.loc[post_revenue_tmh_df['TM_KEY_MTH']==202503]
# post_revenue_tmh_df = post_revenue_tmh_df.loc[post_revenue_tmh_df['TM_KEY_DAY']==20250302]
# post_revenue_tmh_df = post_revenue_tmh_df.loc[post_revenue_tmh_df['AREA_TYPE']=='P']

print(f'post_revenue_tmh_df : {post_revenue_tmh_df.shape[0]} rows, {post_revenue_tmh_df.shape[1]} columns')
post_revenue_tmh_df.tail(3)

post_revenue_tmh_df : 10 rows, 19 columns


Unnamed: 0,TM_KEY_YR,TM_KEY_MTH,TRUE_TM_KEY_WK,TM_KEY_DAY,METRIC_CD,METRIC_NAME,COMP_CD,VERSION,AREA_NO,AREA_TYPE,AREA_CD,AREA_NAME,FREQUENCY,TARGET_MTH,TARGET_MTH_NEW,TARGET_MTH_EXIST,TARGET_DAY,TARGET_DAY_NEW,TARGET_DAY_EXIST
7,2025,202511,2025048,20251125,TB2R000100,Postpaid Revenue : TMH,True,T,1,P,P,Nationwide,Bill Cycle,3737211000.0,629389200.0,3098775000.0,373745800.0,62943080.0,309898000.0
8,2025,202511,2025048,20251128,TB2R000100,Postpaid Revenue : TMH,True,T,1,P,P,Nationwide,Bill Cycle,3737211000.0,629389200.0,3098775000.0,402091900.0,67716890.0,333401700.0
9,2025,202511,2025048,20251130,TB2R000100,Postpaid Revenue : TMH,True,T,1,P,P,Nationwide,Bill Cycle,3737211000.0,629389200.0,3098775000.0,25972120.0,4374003.0,21535250.0


In [15]:
''' Prepairing Latest Results '''

latest_concat_df = pd.concat([agg_all_area_df, post_revenue_tmh_df], ignore_index=True)
latest_concat_df.rename(columns={'TARGET_DAY': 'DAY_VALUE', 'TARGET_MTH': 'MTH_VALUE'}, inplace=True)
latest_concat_df['AGG_TYPE'] = 'S'
latest_concat_df['REMARK'] = 'Allocate from 96 Cluster (HH level)'

latest_results_df = latest_concat_df[['TM_KEY_YR', 'TM_KEY_MTH', 'TRUE_TM_KEY_WK', 'TM_KEY_DAY', 'METRIC_CD', 'METRIC_NAME', 'COMP_CD', 'VERSION', 'AREA_NO', 'AREA_TYPE', 'AREA_CD', 'AREA_NAME', 'DAY_VALUE', 'MTH_VALUE', 'AGG_TYPE', 'FREQUENCY', 'REMARK']]
latest_results_df.tail(3)

Unnamed: 0,TM_KEY_YR,TM_KEY_MTH,TRUE_TM_KEY_WK,TM_KEY_DAY,METRIC_CD,METRIC_NAME,COMP_CD,VERSION,AREA_NO,AREA_TYPE,AREA_CD,AREA_NAME,DAY_VALUE,MTH_VALUE,AGG_TYPE,FREQUENCY,REMARK
10157,2025,202511,2025048,20251125,TB2R000100,Postpaid Revenue : TMH,True,T,1,P,P,Nationwide,373745800.0,3737211000.0,S,Bill Cycle,Allocate from 96 Cluster (HH level)
10158,2025,202511,2025048,20251128,TB2R000100,Postpaid Revenue : TMH,True,T,1,P,P,Nationwide,402091900.0,3737211000.0,S,Bill Cycle,Allocate from 96 Cluster (HH level)
10159,2025,202511,2025048,20251130,TB2R000100,Postpaid Revenue : TMH,True,T,1,P,P,Nationwide,25972120.0,3737211000.0,S,Bill Cycle,Allocate from 96 Cluster (HH level)


In [16]:
sample_daily_df = latest_results_df.loc[latest_results_df['TM_KEY_DAY']==20251102]

sample_daily_df = sample_daily_df\
    .groupby(['TM_KEY_YR', 'TM_KEY_MTH', 'TRUE_TM_KEY_WK', 'TM_KEY_DAY', 'FREQUENCY', 'METRIC_CD', 'METRIC_NAME', 'AREA_TYPE'])\
        .agg({'MTH_VALUE': 'sum'}).reset_index()

mod_col_list = sample_daily_df.iloc[:, 8:].columns.tolist()
for col in mod_col_list:
    sample_daily_df[col] = sample_daily_df[col].apply(lambda x: format(x, ',.0f'))

sample_daily_df

Unnamed: 0,TM_KEY_YR,TM_KEY_MTH,TRUE_TM_KEY_WK,TM_KEY_DAY,FREQUENCY,METRIC_CD,METRIC_NAME,AREA_TYPE,MTH_VALUE
0,2025,202511,2025044,20251102,Bill Cycle,TB2R000100,Postpaid Revenue : TMH,P,3737211385
1,2025,202511,2025044,20251102,Bill Cycle,TB2R010100,Postpaid Revenue B2C : TMH,G,3418931385
2,2025,202511,2025044,20251102,Bill Cycle,TB2R010100,Postpaid Revenue B2C : TMH,H,3418931385
3,2025,202511,2025044,20251102,Bill Cycle,TB2R010100,Postpaid Revenue B2C : TMH,HH,3418931385
4,2025,202511,2025044,20251102,Bill Cycle,TB2R010100,Postpaid Revenue B2C : TMH,P,3418931385
5,2025,202511,2025044,20251102,Bill Cycle,TB2R020100,Postpaid Revenue B2B : TMH,P,318280000
6,2025,202511,2025044,20251102,Bill Cycle,TB3R000100,TOL Revenue,G,1674162284
7,2025,202511,2025044,20251102,Bill Cycle,TB3R000100,TOL Revenue,H,1674162284
8,2025,202511,2025044,20251102,Bill Cycle,TB3R000100,TOL Revenue,HH,1674162284
9,2025,202511,2025044,20251102,Bill Cycle,TB3R000100,TOL Revenue,P,1674162284


In [None]:
# main_kpi_df = merge_hh_df
# main_kpi_df = main_kpi_df[['CUST_TYPE', 'TM_KEY_YR', 'TM_KEY_MTH', 'TRUE_TM_KEY_WK', 'TM_KEY_DAY', 'DAYS_IN_MONTH', 'PERIODFLAG', 'PRODUCT_GRP', 'FREQUENCY', 'METRIC_CD', 'METRIC_NAME', 'COMP_CD', 'VERSION', 'AREA_TYPE', 'AREA_KEY', 'AREA_DESC', 'TARGET_MTH', 'TARGET_DAY']]

# main_kpi_df.tail(3)

### Step 3 : Insert to "INTERIM_VINSIGHT_DATA"
    Delete -> Insert

In [17]:
''' Input Parameter '''

# Create list
month_list = latest_results_df['TM_KEY_MTH'].drop_duplicates().tolist()
mt_cd_list = latest_results_df['METRIC_CD'].drop_duplicates().tolist()

if len(mt_cd_list) == 1:
    mt_cd_list = str(mt_cd_list).replace(r'[', '(').replace(r']', ')')
else:
    mt_cd_list = tuple(mt_cd_list)

# Create Param
# v_param = dict(mth_start=202406, mth_end=202408, metric_cd=mt_cd_list)
v_param = dict(mth_start=min(month_list), mth_end=max(month_list), metric_cd=mt_cd_list)
v_target_schema = 'AUTOKPI'
v_target_table = 'INTERIM_VINSIGHT_DATA'

# query_delete = f"DELETE {v_target_schema}.{v_target_table} WHERE TM_KEY_MTH BETWEEN {v_param['mth_start']} AND {v_param['mth_end']} AND METRIC_CD IN {v_param['metric_cd']}"
query_delete = f"""
    DELETE {v_target_schema}.{v_target_table} 
    WHERE VERSION = 'T'
    AND METRIC_CD IN {v_param['metric_cd']}
    AND TM_KEY_MTH BETWEEN {v_param['mth_start']} AND {v_param['mth_end']} 
"""

print(f"\nParameter...\n\n   -> TM_KEY_MTH BETWEEN {v_param['mth_start']} AND {v_param['mth_end']}\n   -> METRIC_CD IN {v_param['metric_cd']}")
print(f'\nDataFrame...\n\n   -> latest_results_df : {latest_results_df.shape[0]} rows, {latest_results_df.shape[1]} columns') 
print(f'\nquery_delete...\n{query_delete}')


Parameter...

   -> TM_KEY_MTH BETWEEN 202511 AND 202511
   -> METRIC_CD IN ('TB1R000100', 'TB2R010100', 'TB2R020100', 'TB3R000100', 'TB4R000100', 'TB2R000100')

DataFrame...

   -> latest_results_df : 10160 rows, 17 columns

query_delete...

    DELETE AUTOKPI.INTERIM_VINSIGHT_DATA 
    WHERE VERSION = 'T'
    AND METRIC_CD IN ('TB1R000100', 'TB2R010100', 'TB2R020100', 'TB3R000100', 'TB4R000100', 'TB2R000100')
    AND TM_KEY_MTH BETWEEN 202511 AND 202511 



In [18]:
''' DELETE -> INSERT '''

job_start_datetime = dt.datetime.now().strftime('%Y-%m-%d, %H:%M:%S')
print(f'\nJob Start... {job_start_datetime}')


# Create rows from DataFrame
rows = [tuple(x) for x in latest_results_df.values]


# Connect : AKPIPRD
dsn = f'{AKPIPRD_user}/{AKPIPRD_pwd}@{AKPIPRD_host}:{AKPIPRD_port}/{AKPIPRD_db}'
conn = oracledb.connect(dsn)
print(f'\n{AKPIPRD_db} : Connected')
cur = conn.cursor()
print(f'\nProcessing...')


try:
    # # Truncate
    # cur.execute(f"TRUNCATE TABLE {v_target_schema}.{v_target_table}")
    # print(f'\n   -> TRUNCATE : "{v_target_table}" : Done !')

    # Delete
    cur.execute(query_delete)
    print(f'\n   -> DELETE : "{v_target_table}" : Done !')
    
    # Insert
    cur.executemany(f"""
        INSERT INTO {v_target_table} 
        (TM_KEY_YR, TM_KEY_MTH, TRUE_TM_KEY_WK, TM_KEY_DAY, METRIC_CD, METRIC_NAME, COMP_CD, VERSION, AREA_NO, AREA_TYPE, AREA_CD, AREA_NAME, DAY_VALUE, MTH_VALUE, AGG_TYPE, FREQUENCY, REMARK) 
        VALUES (:1,:2,:3,:4,:5,:6,:7,:8,:9,:10,:11,:12,:13,:14,:15,:16,:17)
        """, rows)
    print(f'\n   -> INSERT : "{v_target_table}" : Done !')

    cur.close()
    conn.commit()


except oracledb.DatabaseError as e:
    print(f'\nError with Oracle : {e}')


finally:
    conn.close()
    print(f'\n{AKPIPRD_db} : Disconnected')
    print(f'\nJob Done !!!')



Job Start... 2025-11-12, 15:35:10

AKPIPRD : Connected

Processing...

   -> DELETE : "INTERIM_VINSIGHT_DATA" : Done !

   -> INSERT : "INTERIM_VINSIGHT_DATA" : Done !

AKPIPRD : Disconnected

Job Done !!!


### Step 4 : Check Result "INTERIM_VINSIGHT_DATA"

In [19]:
''' Create Result DataFrame '''

# Connect : AKPIPRD
tgt_dsn = f'{AKPIPRD_user}/{AKPIPRD_pwd}@{AKPIPRD_host}:{AKPIPRD_port}/{AKPIPRD_db}'
tgt_conn = oracledb.connect(tgt_dsn)
tgt_cur = tgt_conn.cursor()


try:
    # Get : Result Data Summary
    tgt_cur.execute("""
        SELECT TM_KEY_MTH, METRIC_CD, METRIC_NAME, COMP_CD, VERSION
            , SUM(CASE WHEN AREA_TYPE = 'P' THEN DAY_VALUE END) AS P
            , SUM(CASE WHEN AREA_TYPE = 'G' THEN DAY_VALUE END) AS G
            , SUM(CASE WHEN AREA_TYPE = 'H' THEN DAY_VALUE END) AS H
            , SUM(CASE WHEN AREA_TYPE = 'HH' THEN DAY_VALUE END) AS HH
            , MAX(LOAD_DATE) LOAD_DATE
        FROM AUTOKPI.INTERIM_VINSIGHT_DATA
        WHERE VERSION = 'T'
        AND METRIC_CD IN (
            'TB1R000100' --Prepaid Revenue : TMH
            , 'TB1R000101' --Prepaid New Revenue : TMH (2024 only)
            , 'TB1R000102' --Prepaid Existing Revenue : TMH (2024 only)
            , 'TB2R000100' --Postpaid Revenue : TMH
            , 'TB2R000101' --Postpaid New Revenue : TMH (2024 only)
            , 'TB2R000102' --Postpaid Existing Revenue : TMH (2024 only)
            , 'TB2R010100' --Postpaid Revenue B2C : TMH
            , 'TB2R020100' --Postpaid Revenue B2B : TMH
            , 'TB3R000100' --TOL Revenue
            , 'TB3R000101' --TOL New Revenue (2024 only)
            , 'TB3R000102' --TOL Existing Revenue (2024 only)
            , 'TB4R000100' --TVS Revenue
            , 'TB4R000101' --TVS New Revenue (2024 only)
            , 'TB4R000102' --TVS Existing Revenue (2024 only)
            )
        AND TM_KEY_MTH >= 202501
        GROUP BY TM_KEY_MTH, METRIC_CD, METRIC_NAME, COMP_CD, VERSION
        --ORDER BY TM_KEY_MTH, METRIC_CD
    """)
    rows = tgt_cur.fetchall()
    print(f'\nGet : Fact Summary...')
    chk_result_df = pd.DataFrame.from_records(rows, columns=[x[0] for x in tgt_cur.description])
    print(f'\n   -> chk_result_df : {chk_result_df.shape[0]} rows, {chk_result_df.shape[1]} columns') 
    
    # # Display
    # tmp_result_df = chk_result_df.copy()
    # # tmp_result_df = tmp_result_df.replace(np.nan, None)
    # # tmp_result_df.iloc[:, 4:18] = tmp_result_df.iloc[:, 4:18].fillna(0)
    # mod_col_list = tmp_result_df.iloc[:, 5:9].columns.tolist()
    # for col in mod_col_list:
    #     tmp_result_df[col] = tmp_result_df[col].apply(lambda x: format(x, ',.2f') if re.search('%', col) else format(x, ',.0f'))
    # print(f'\n{tmp_result_df.to_string(max_cols=10)}') #max_rows=1000

    tgt_cur.close()


except oracledb.DatabaseError as e:
    print(f'\nError with Oracle : {e}')


finally:
    tgt_conn.close()


Get : Fact Summary...

   -> chk_result_df : 66 rows, 10 columns


In [20]:
m4_df = chk_result_df.loc[chk_result_df['TM_KEY_MTH']==202504].copy()
# m4_df = m4_df.replace(np.nan, None)
m4_df.iloc[:, 5:9] = m4_df.iloc[:, 5:9].fillna(0)
mod_col_list = m4_df.iloc[:, 5:9].columns.tolist()
for col in mod_col_list:
    m4_df[col] = m4_df[col].apply(lambda x: format(x, ',.2f') if re.search('%', col) else format(x, ',.0f'))
m4_df = m4_df.sort_values(by=['TM_KEY_MTH', 'METRIC_CD']).reset_index(drop=True)
m4_df

Unnamed: 0,TM_KEY_MTH,METRIC_CD,METRIC_NAME,COMP_CD,VERSION,P,G,H,HH,LOAD_DATE
0,202504,TB1R000100,Prepaid Revenue : TMH,True,T,2488191889,2488191889,2488191889,2488191889,2025-04-28 15:46:42.603195
1,202504,TB2R000100,Postpaid Revenue : TMH,True,T,3676555973,0,0,0,2025-04-28 15:46:42.603195
2,202504,TB2R010100,Postpaid Revenue B2C : TMH,True,T,3358275973,3358275973,3358275973,3358275973,2025-04-28 15:46:42.603195
3,202504,TB2R020100,Postpaid Revenue B2B : TMH,True,T,318280000,0,0,0,2025-04-28 15:46:42.603195
4,202504,TB3R000100,TOL Revenue,True,T,1672918006,1672918006,1672918006,1672918006,2025-04-28 15:46:42.603195
5,202504,TB4R000100,TVS Revenue,True,T,379499818,379499818,379499818,379499818,2025-04-28 15:46:42.603195


In [21]:
m5_df = chk_result_df.loc[chk_result_df['TM_KEY_MTH']==202505].copy()
m5_df.iloc[:, 5:9] = m5_df.iloc[:, 5:9].fillna(0)
mod_col_list = m5_df.iloc[:, 5:9].columns.tolist()
for col in mod_col_list:
    m5_df[col] = m5_df[col].apply(lambda x: format(x, ',.2f') if re.search('%', col) else format(x, ',.0f'))
m5_df = m5_df.sort_values(by=['TM_KEY_MTH', 'METRIC_CD']).reset_index(drop=True)
m5_df

Unnamed: 0,TM_KEY_MTH,METRIC_CD,METRIC_NAME,COMP_CD,VERSION,P,G,H,HH,LOAD_DATE
0,202505,TB1R000100,Prepaid Revenue : TMH,True,T,2611860019,2611860019,2611860019,2611860019,2025-07-02 15:35:25.142416
1,202505,TB2R000100,Postpaid Revenue : TMH,True,T,3691470486,0,0,0,2025-07-02 15:35:25.142416
2,202505,TB2R010100,Postpaid Revenue B2C : TMH,True,T,3373190486,3373190486,3373190486,3373190486,2025-07-02 15:35:25.142416
3,202505,TB2R020100,Postpaid Revenue B2B : TMH,True,T,318280000,0,0,0,2025-07-02 15:35:25.142416
4,202505,TB3R000100,TOL Revenue,True,T,1692989226,1692989226,1692989226,1692989226,2025-07-02 15:35:25.142416
5,202505,TB4R000100,TVS Revenue,True,T,377894818,377894818,377894818,377894818,2025-07-02 15:35:25.142416


In [22]:
m6_df = chk_result_df.loc[chk_result_df['TM_KEY_MTH']==202506].copy()
m6_df.iloc[:, 5:9] = m6_df.iloc[:, 5:9].fillna(0)
mod_col_list = m6_df.iloc[:, 5:9].columns.tolist()
for col in mod_col_list:
    m6_df[col] = m6_df[col].apply(lambda x: format(x, ',.2f') if re.search('%', col) else format(x, ',.0f'))
m6_df = m6_df.sort_values(by=['TM_KEY_MTH', 'METRIC_CD']).reset_index(drop=True)
m6_df

Unnamed: 0,TM_KEY_MTH,METRIC_CD,METRIC_NAME,COMP_CD,VERSION,P,G,H,HH,LOAD_DATE
0,202506,TB1R000100,Prepaid Revenue : TMH,True,T,2549834893,2549834893,2549834893,2549834893,2025-06-26 12:35:37.716382
1,202506,TB2R000100,Postpaid Revenue : TMH,True,T,3714466435,0,0,0,2025-06-26 12:35:37.716382
2,202506,TB2R010100,Postpaid Revenue B2C : TMH,True,T,3396186435,3396186435,3396186435,3396186435,2025-06-26 12:35:37.716382
3,202506,TB2R020100,Postpaid Revenue B2B : TMH,True,T,318280000,0,0,0,2025-06-26 12:35:37.716382
4,202506,TB3R000100,TOL Revenue,True,T,1714420575,1714420575,1714420575,1714420575,2025-06-26 12:35:37.716382
5,202506,TB4R000100,TVS Revenue,True,T,341143818,341143818,341143818,341143818,2025-06-26 12:35:37.716382


In [23]:
m7_df = chk_result_df.loc[chk_result_df['TM_KEY_MTH']==202507].copy()
m7_df.iloc[:, 5:9] = m7_df.iloc[:, 5:9].fillna(0)
mod_col_list = m7_df.iloc[:, 5:9].columns.tolist()
for col in mod_col_list:
    m7_df[col] = m7_df[col].apply(lambda x: format(x, ',.2f') if re.search('%', col) else format(x, ',.0f'))
m7_df = m7_df.sort_values(by=['TM_KEY_MTH', 'METRIC_CD']).reset_index(drop=True)
m7_df

Unnamed: 0,TM_KEY_MTH,METRIC_CD,METRIC_NAME,COMP_CD,VERSION,P,G,H,HH,LOAD_DATE
0,202507,TB1R000100,Prepaid Revenue : TMH,True,T,2546815318,2546815318,2546815318,2546815318,2025-07-21 16:22:26.994998
1,202507,TB2R000100,Postpaid Revenue : TMH,True,T,3695365948,0,0,0,2025-07-21 16:22:26.994998
2,202507,TB2R010100,Postpaid Revenue B2C : TMH,True,T,3377085948,3377085948,3377085948,3377085948,2025-07-21 16:22:26.994998
3,202507,TB2R020100,Postpaid Revenue B2B : TMH,True,T,318280000,0,0,0,2025-07-21 16:22:26.994998
4,202507,TB3R000100,TOL Revenue,True,T,1669133811,1669133811,1669133811,1669133811,2025-07-21 16:22:26.994998
5,202507,TB4R000100,TVS Revenue,True,T,315329930,315329930,315329930,315329930,2025-07-21 16:22:26.994998


In [24]:
m8_df = chk_result_df.loc[chk_result_df['TM_KEY_MTH']==202508].copy()
m8_df.iloc[:, 5:9] = m8_df.iloc[:, 5:9].fillna(0)
mod_col_list = m8_df.iloc[:, 5:9].columns.tolist()
for col in mod_col_list:
    m8_df[col] = m8_df[col].apply(lambda x: format(x, ',.2f') if re.search('%', col) else format(x, ',.0f'))
m8_df = m8_df.sort_values(by=['TM_KEY_MTH', 'METRIC_CD']).reset_index(drop=True)
m8_df

Unnamed: 0,TM_KEY_MTH,METRIC_CD,METRIC_NAME,COMP_CD,VERSION,P,G,H,HH,LOAD_DATE
0,202508,TB1R000100,Prepaid Revenue : TMH,True,T,2550314352,2550314352,2550314352,2550314352,2025-08-15 17:02:57.092400
1,202508,TB2R000100,Postpaid Revenue : TMH,True,T,3748084085,0,0,0,2025-08-15 17:02:57.092400
2,202508,TB2R010100,Postpaid Revenue B2C : TMH,True,T,3429804085,3429804085,3429804085,3429804085,2025-08-15 17:02:57.092400
3,202508,TB2R020100,Postpaid Revenue B2B : TMH,True,T,318280000,0,0,0,2025-08-15 17:02:57.092400
4,202508,TB3R000100,TOL Revenue,True,T,1673968948,1673968948,1673968948,1673968948,2025-08-15 17:02:57.092400
5,202508,TB4R000100,TVS Revenue,True,T,307105486,307105486,307105486,307105486,2025-08-15 17:02:57.092400


In [25]:
m9_df = chk_result_df.loc[chk_result_df['TM_KEY_MTH']==202509].copy()
m9_df.iloc[:, 5:9] = m9_df.iloc[:, 5:9].fillna(0)
mod_col_list = m9_df.iloc[:, 5:9].columns.tolist()
for col in mod_col_list:
    m9_df[col] = m9_df[col].apply(lambda x: format(x, ',.2f') if re.search('%', col) else format(x, ',.0f'))
m9_df = m9_df.sort_values(by=['TM_KEY_MTH', 'METRIC_CD']).reset_index(drop=True)
m9_df

Unnamed: 0,TM_KEY_MTH,METRIC_CD,METRIC_NAME,COMP_CD,VERSION,P,G,H,HH,LOAD_DATE
0,202509,TB1R000100,Prepaid Revenue : TMH,True,T,2507258899,2507258899,2507258899,2507258899,2025-09-23 13:03:59.129913
1,202509,TB2R000100,Postpaid Revenue : TMH,True,T,3803262307,0,0,0,2025-09-23 13:03:59.129913
2,202509,TB2R010100,Postpaid Revenue B2C : TMH,True,T,3484982307,3484982307,3484982307,3484982307,2025-09-23 13:03:59.129913
3,202509,TB2R020100,Postpaid Revenue B2B : TMH,True,T,318280000,0,0,0,2025-09-23 13:03:59.129913
4,202509,TB3R000100,TOL Revenue,True,T,1681091962,1681091962,1681091962,1681091962,2025-09-23 13:03:59.129913
5,202509,TB4R000100,TVS Revenue,True,T,298896279,298896279,298896279,298896279,2025-09-23 13:03:59.129913


In [31]:
m10_df = chk_result_df.loc[chk_result_df['TM_KEY_MTH']==202510].copy()
m10_df.iloc[:, 5:9] = m10_df.iloc[:, 5:9].fillna(0)
mod_col_list = m10_df.iloc[:, 5:9].columns.tolist()
for col in mod_col_list:
    m10_df[col] = m10_df[col].apply(lambda x: format(x, ',.2f') if re.search('%', col) else format(x, ',.0f'))
m10_df = m10_df.sort_values(by=['TM_KEY_MTH', 'METRIC_CD']).reset_index(drop=True)
m10_df

Unnamed: 0,TM_KEY_MTH,METRIC_CD,METRIC_NAME,COMP_CD,VERSION,P,G,H,HH,LOAD_DATE
0,202510,TB1R000100,Prepaid Revenue : TMH,True,T,2583946012,2583946012,2583946012,2583946012,2025-11-11 15:10:29.697172
1,202510,TB2R000100,Postpaid Revenue : TMH,True,T,3700778140,0,0,0,2025-11-11 15:10:29.697172
2,202510,TB2R010100,Postpaid Revenue B2C : TMH,True,T,3382498140,3382498140,3382498140,3382498140,2025-11-11 15:10:29.697172
3,202510,TB2R020100,Postpaid Revenue B2B : TMH,True,T,318280000,0,0,0,2025-11-11 15:10:29.697172
4,202510,TB3R000100,TOL Revenue,True,T,1660875019,1660875019,1660875019,1660875019,2025-11-11 15:10:29.697172
5,202510,TB4R000100,TVS Revenue,True,T,298797598,298797598,298797598,298797598,2025-11-11 15:10:29.697172


In [20]:
m11_df = chk_result_df.loc[chk_result_df['TM_KEY_MTH']==202511].copy()
m11_df.iloc[:, 5:9] = m11_df.iloc[:, 5:9].fillna(0)
mod_col_list = m11_df.iloc[:, 5:9].columns.tolist()
for col in mod_col_list:
    m11_df[col] = m11_df[col].apply(lambda x: format(x, ',.2f') if re.search('%', col) else format(x, ',.0f'))
m11_df = m11_df.sort_values(by=['TM_KEY_MTH', 'METRIC_CD']).reset_index(drop=True)
m11_df

Unnamed: 0,TM_KEY_MTH,METRIC_CD,METRIC_NAME,COMP_CD,VERSION,P,G,H,HH,LOAD_DATE
0,202511,TB1R000100,Prepaid Revenue : TMH,True,T,2578208663,2578208663,2578208663,2578208663,2025-11-12 15:35:11.193765
1,202511,TB2R000100,Postpaid Revenue : TMH,True,T,3737211385,0,0,0,2025-11-12 15:35:11.193765
2,202511,TB2R010100,Postpaid Revenue B2C : TMH,True,T,3418931385,3418931385,3418931385,3418931385,2025-11-12 15:35:11.193765
3,202511,TB2R020100,Postpaid Revenue B2B : TMH,True,T,318280000,0,0,0,2025-11-12 15:35:11.193765
4,202511,TB3R000100,TOL Revenue,True,T,1674162284,1674162284,1674162284,1674162284,2025-11-12 15:35:11.193765
5,202511,TB4R000100,TVS Revenue,True,T,306494227,306494227,306494227,306494227,2025-11-12 15:35:11.193765
