# * VINSIGHT : Data Monitoring

## Parameter

In [1]:
import configparser
import datetime as dt
import pandas as pd
import numpy as np
import xlrd
import oracledb
import re
import FN_Monitoring as fn

config = configparser.ConfigParser()
config.read('../../my_config.ini')
config.sections()

TDMDBPR_user = config['TDMDBPR']['username']
TDMDBPR_pwd = config['TDMDBPR']['password']
TDMDBPR_db = config['TDMDBPR']['db']
TDMDBPR_host = config['TDMDBPR']['host']
TDMDBPR_port = config['TDMDBPR']['port']

AKPIPRD_user = config['AKPIPRD']['username']
AKPIPRD_pwd = config['AKPIPRD']['password']
AKPIPRD_db = config['AKPIPRD']['db']
AKPIPRD_host = config['AKPIPRD']['host']
AKPIPRD_port = config['AKPIPRD']['port']

curr_dt = dt.datetime.now().date()
str_curr_dt = curr_dt.strftime('%Y%m%d')

In [2]:
''' Input parameter '''

op_dir = 'data'
op_monthly_file = f'vis_monthly_{str_curr_dt}'
op_daily_file = f'vis_daily_{str_curr_dt}'
op_ccaa_ccaatt_file = f'vis_ccaa_ccaatt_{str_curr_dt}'

v_year = 0
v_month_start = 0
v_month_end = 0
v_date = 20240101

# print(f"\nParameter input...\n\n   -> op_dir: {op_dir}\n   -> op_monthly_file: {op_monthly_file}\n   -> op_daily_file: {op_daily_file}")
print(f'\nParameter input...\n')
print(f'   -> op_dir: {op_dir}')
print(f'   -> op_monthly_file: {op_monthly_file}')
print(f'   -> op_daily_file: {op_daily_file}')
print(f'   -> op_ccaa_ccaatt_file: {op_ccaa_ccaatt_file}')
print(f'\n   -> v_year: {v_year}')
print(f'   -> v_month_start: {v_month_start}')
print(f'   -> v_month_end: {v_month_end}')
print(f'   -> v_date: {v_date}')


Parameter input...

   -> op_dir: data
   -> op_monthly_file: vis_monthly_20250617
   -> op_daily_file: vis_daily_20250617
   -> op_ccaa_ccaatt_file: vis_ccaa_ccaatt_20250617

   -> v_year: 0
   -> v_month_start: 0
   -> v_month_end: 0
   -> v_date: 20240101


## Aggregate : Timestamp

In [3]:
''' Execute AGG_PERF_NEWCO '''


# Input parameter
v_start_date = 20240101
print(f'\nParameter input...')
print(f'   -> v_start_date: {v_start_date}')

curr_datetime = dt.datetime.now().strftime('%Y-%m-%d, %H:%M:%S')
print(f'\nData as of {curr_datetime}')


# Connect : TDMDBPR
src_dsn = f'{TDMDBPR_user}/{TDMDBPR_pwd}@{TDMDBPR_host}:{TDMDBPR_port}/{TDMDBPR_db}'
src_conn = oracledb.connect(src_dsn)
src_cur = src_conn.cursor()
query = (f"""
    SELECT /*+PARALLEL(8)*/ PPN_TM, AREA_TYPE
        , COUNT(DISTINCT METRIC_CD) CNT_METRIC, COUNT(1) ROW_CNT
        , MIN(TM_KEY_DAY) MIN_DT, MAX(TM_KEY_DAY) MAX_DT, MAX(ACTUAL_AS_OF) LAST_ACTUAL
    FROM GEOSPCAPPO.AGG_PERF_NEWCO
    WHERE CENTER IN ('Revenue', 'Sales', 'Profitability')
    AND AREA_TYPE NOT IN ('CCAA', 'CCAATT')
    AND TM_KEY_DAY >= {v_start_date} 
    GROUP BY PPN_TM, AREA_TYPE
    ORDER BY PPN_TM, AREA_TYPE
""")


try:
    # Get : Timestamp
    src_cur.execute(query)
    rows = src_cur.fetchall()
    print(f'\nCurrent Timestamp...')
    chk_src_df = pd.DataFrame.from_records(rows, columns=[x[0] for x in src_cur.description])

    # Display
    vis_timestamp_df = chk_src_df.copy()
    print(f'\n{vis_timestamp_df.to_string(max_cols=7)}') #max_rows=1000
    
    src_cur.close()


except oracledb.DatabaseError as e:
    print(f'\nError with Oracle : {e}')


finally:
    src_conn.close()


Parameter input...
   -> v_start_date: 20240101

Data as of 2025-06-17, 10:25:11

Current Timestamp...

                PPN_TM AREA_TYPE  CNT_METRIC   ROW_CNT    MIN_DT    MAX_DT  LAST_ACTUAL
0  2025-06-16 22:53:52         C         529    281428  20240101  20250615     20250615
1  2025-06-16 22:53:52         G         356   1515136  20240101  20250615     20250615
2  2025-06-16 22:53:52         P         385    204820  20240101  20250615     20250615
3  2025-06-16 22:53:52         Z          29    107996  20240101  20250615     20250615
4  2025-06-16 22:54:28         C           4      2128  20240101  20250615     20250615
5  2025-06-16 22:54:28         G           4     17024  20240101  20250615     20250615
6  2025-06-16 22:54:28         P           4      2128  20240101  20250615     20250615
7  2025-06-16 23:46:17         H         355  12087040  20240101  20250615     20250615
8  2025-06-16 23:49:52         H           4    136192  20240101  20250615     20250615
9  2025-06-17 0

In [4]:
''' Execute AGG_PERF_NEWCO_CCAATT '''


# Input parameter
curr_datetime = dt.datetime.now().strftime('%Y-%m-%d, %H:%M:%S')
v_start_date = 20250401
print(f'\nParameter input...\n')
print(f'   -> Data as of {curr_datetime}')
print(f'   -> v_start_date: {v_start_date}')


# Connect : TDMDBPR
src_dsn = f'{TDMDBPR_user}/{TDMDBPR_pwd}@{TDMDBPR_host}:{TDMDBPR_port}/{TDMDBPR_db}'
src_conn = oracledb.connect(src_dsn)
src_cur = src_conn.cursor()
query = (f"""
    SELECT /*+PARALLEL(8)*/ AREA_TYPE
        , MAX(PPN_TM) PPN_TM
        , COUNT(1) ROW_CNT, MIN(TM_KEY_DAY) MIN_DT, MAX(TM_KEY_DAY) MAX_DT, MAX(ACTUAL_AS_OF) LAST_ACTUAL
        , COUNT(DISTINCT METRIC_CD) CNT_METRIC
    FROM GEOSPCAPPO.AGG_PERF_NEWCO_CCAATT 
    WHERE TM_KEY_DAY >= {v_start_date} 
    GROUP BY AREA_TYPE
    ORDER BY 1
""")


try:
    # Get : Timestamp
    src_cur.execute(query)
    rows = src_cur.fetchall()
    print(f'\nCurrent Timestamp...')
    chk_src_df = pd.DataFrame.from_records(rows, columns=[x[0] for x in src_cur.description])

    # Display
    vis_timestamp_df = chk_src_df.copy()
    print(f'\n{vis_timestamp_df.to_string(max_cols=7)}') #max_rows=1000
    
    src_cur.close()


except oracledb.DatabaseError as e:
    print(f'\nError with Oracle : {e}')


finally:
    src_conn.close()


Parameter input...

   -> Data as of 2025-06-17, 11:04:58
   -> v_start_date: 20250401

Current Timestamp...

  AREA_TYPE              PPN_TM    ROW_CNT    MIN_DT    MAX_DT  LAST_ACTUAL  CNT_METRIC
0      CCAA 2025-06-15 04:44:55   17982784  20250401  20250615     20250614         257
1    CCAATT 2025-06-15 04:34:51  126583028  20250401  20250615     20250614         226


## Import : Snap Data

### Area : C, P, G, H, HH
    Source -> GEOSPCAPPO.AGG_PERF_NEWCO
    Output -> tmp_vis_monthly_YYYYMMDD.csv

In [5]:
''' Execute AGG_PERF_NEWCO '''


# Connect : TDMDBPR
src_dsn = f'{TDMDBPR_user}/{TDMDBPR_pwd}@{TDMDBPR_host}:{TDMDBPR_port}/{TDMDBPR_db}'
src_conn = oracledb.connect(src_dsn)
print(f'\n{TDMDBPR_db} : Connected')
src_cur = src_conn.cursor()


query = (f"""
    /*** Import data from "AGG_PERF_NEWCO" ***/
    -----------------------------------------------------------------------------------------------------------------------

    WITH W_PARAM AS 
    (
        SELECT {v_year} AS V_YR
            , {v_month_start} AS V_MTH_START
            , {v_month_end}V_MTH_END
            , {v_date} AS V_DT_START 
        FROM DUAL
    )
    -----------------------------------------------------------------------------------------------------------------------

    , W_VINSIGHT_SNAP AS 
    (
        SELECT TM_KEY_YR, TM_KEY_MTH, TM_KEY_DAY
            , CENTER, PRODUCT_GRP, COMP_CD, METRIC_GRP, METRIC_CD, METRIC_NAME
            , ACTUAL_AS_OF, AGG_TYPE--, RR_IND, GRY_IND, UOM
            , AREA_TYPE, AREA_CD, AREA_NAME
            , ACTUAL_SNAP, TARGET_SNAP, ACTUAL_AGG_MTH, TARGET_AGG_MTH
            , PPN_TM
        
        FROM GEOSPCAPPO.AGG_PERF_NEWCO 
        
        WHERE CENTER IN ('Revenue', 'Sales', 'Profitability')
        AND NOT REGEXP_LIKE(METRIC_CD, '[0-9]C$|[0-9]H$|[0-9]MCOM$') --|[0-9]CORP$|[0-9]GEO$|[0-9]A[A-K]$
        AND AREA_TYPE IN ('C', 'P', 'G', 'H', 'HH')
        AND TM_KEY_DAY >= (SELECT V_DT_START FROM W_PARAM)
    )
    -----------------------------------------------------------------------------------------------------------------------

    /*** Monthly Snap ***/

    SELECT /*+ PARALLEL(8) */ 
        TM_KEY_MTH, CENTER, METRIC_GRP, PRODUCT_GRP, COMP_CD, METRIC_CD, METRIC_NAME, AGG_TYPE--, UOM
        , CAST(SUM(CASE WHEN AREA_TYPE = 'C' THEN ACTUAL_TMP END) AS DECIMAL(18,2)) AS C_ACTUAL
        , CAST(SUM(CASE WHEN AREA_TYPE = 'P' THEN ACTUAL_TMP END) AS DECIMAL(18,2)) AS P_ACTUAL
        , CAST(SUM(CASE WHEN AREA_TYPE = 'G' THEN ACTUAL_TMP END) AS DECIMAL(18,2)) AS G_ACTUAL
        , CAST(SUM(CASE WHEN AREA_TYPE = 'H' THEN ACTUAL_TMP END) AS DECIMAL(18,2)) AS H_ACTUAL
        , CAST(SUM(CASE WHEN AREA_TYPE = 'HH' THEN ACTUAL_TMP END) AS DECIMAL(18,2)) AS HH_ACTUAL
        , CAST(SUM(CASE WHEN AREA_TYPE = 'C' THEN TARGET_TMP END) AS DECIMAL(18,2)) AS C_TARGET
        , CAST(SUM(CASE WHEN AREA_TYPE = 'P' THEN TARGET_TMP END) AS DECIMAL(18,2)) AS P_TARGET
        , CAST(SUM(CASE WHEN AREA_TYPE = 'G' THEN TARGET_TMP END) AS DECIMAL(18,2)) AS G_TARGET
        , CAST(SUM(CASE WHEN AREA_TYPE = 'H' THEN TARGET_TMP END) AS DECIMAL(18,2)) AS H_TARGET
        , CAST(SUM(CASE WHEN AREA_TYPE = 'HH' THEN TARGET_TMP END) AS DECIMAL(18,2)) AS HH_TARGET
        , MAX(ACTUAL_AS_OF) ACTUAL_AS_OF, MIN(TM_KEY_DAY) MIN_DAY, MAX(TM_KEY_DAY) MAX_DAY, MAX(PPN_TM) PPN_TM
        
    FROM (
        SELECT TM_KEY_YR, TM_KEY_MTH, TM_KEY_DAY
            , CENTER, PRODUCT_GRP, COMP_CD, METRIC_GRP, METRIC_CD, METRIC_NAME, ACTUAL_AS_OF, AGG_TYPE, AREA_TYPE, AREA_CD, AREA_NAME
            , CASE 	WHEN AGG_TYPE = 'S' THEN ACTUAL_SNAP 
                    ELSE (CASE WHEN TM_KEY_DAY = MAX(TM_KEY_DAY) OVER(PARTITION BY METRIC_CD, TM_KEY_MTH) THEN ACTUAL_AGG_MTH END)
                    END ACTUAL_TMP
            , CASE 	WHEN AGG_TYPE = 'S' THEN TARGET_SNAP 
                    ELSE (CASE WHEN TM_KEY_DAY = MAX(TM_KEY_DAY) OVER(PARTITION BY METRIC_CD, TM_KEY_MTH) THEN TARGET_AGG_MTH END)
                    END TARGET_TMP
            , ACTUAL_SNAP, TARGET_SNAP, ACTUAL_AGG_MTH, TARGET_AGG_MTH, PPN_TM
            --, CASE WHEN REGEXP_LIKE(METRIC_CD, '[0-9]A[A-K]$') THEN SUBSTR(METRIC_CD,-2) ELSE 'ALL' END CHANNEL_CD
        FROM W_VINSIGHT_SNAP
    ) MTH_SNAP

    GROUP BY TM_KEY_MTH, CENTER, METRIC_GRP, PRODUCT_GRP, COMP_CD, METRIC_CD, METRIC_NAME, AGG_TYPE
    --ORDER BY TM_KEY_MTH, CENTER, METRIC_GRP, PRODUCT_GRP, COMP_CD, METRIC_CD
""")


try:
    execute_datetime = dt.datetime.now().strftime('%Y-%m-%d, %H:%M:%S')
    print(f'\n   -> Execute query... {execute_datetime}')
    
    # Create Dataframe
    src_cur.execute(query)
    rows = src_cur.fetchall()
    src_df = pd.DataFrame.from_records(rows, columns=[x[0] for x in src_cur.description])
    print(f'\n   -> DataFrame : {src_df.shape[0]} rows, {src_df.shape[1]} columns')

    # # Generate CSV file
    # src_df.to_csv(f'{op_dir}/{op_monthly_file}.csv', index=False, encoding='utf-8')
    # print(f'\n   -> Generate "{op_monthly_file}.csv" successfully')

    # # Generate Excel file
    # src_df.to_excel(f'{op_dir}/{op_monthly_file}.xlsx', sheet_name='Data', index=False)
    # print(f'\n   -> Generate "{op_monthly_file}.xlsx" successfully')


except oracledb.DatabaseError as e:
    print(f'Error with Oracle : {e}')


finally:
    src_cur.close()
    src_conn.close()
    print(f'\n{TDMDBPR_db} : Disconnected')


TDMDBPR : Connected

   -> Execute query... 2025-06-17, 11:28:49

   -> DataFrame : 7524 rows, 22 columns

TDMDBPR : Disconnected


In [6]:
''' Add columns & Output file '''

pbh_df = src_df.copy()
# pbh_df['TMP_CD'] = pbh_df['METRIC_CD'].replace(r'AA$|AB$|AC$|AD$|AE$|AF$|AG$|AH$|AI$|AJ$|AK$', '', regex=True)
# pbh_df['TMP_NAME'] = pbh_df['METRIC_NAME'].replace(r' : Account Executive| : B2B| : Branded Retail| : Contact Center| : Direct Sales| : Key Account| : Modern Trade| : Others| : Own Digital| : Retail Sales| : Wholesales', '', regex=True)
pbh_df['MY_GROUP'] = pbh_df.apply(lambda x: fn.my_metric_group(str(x['METRIC_GRP']), str(x['METRIC_CD']), str(x['METRIC_NAME'])), axis=1)
pbh_df['MAIN_FLAG'] = np.where((pbh_df['METRIC_CD'].str.contains('[0-9]C$|[0-9]H$|[0-9]MCOM$|[0-9]CUS$')) | (pbh_df['METRIC_NAME'].str.contains('New Revenue|Existing Revenue')), 'N', 'Y')
pbh_df['GEO_FLAG'] = np.where((pbh_df['METRIC_CD'].str.contains('GEO$')) | (pbh_df['METRIC_NAME'].str.contains('\(Geo\)$')), 'Y', 'N')
pbh_df['CDS_FLAG'] = np.where((pbh_df['METRIC_CD'].str.contains('^TB1R000109|^TB1S000109|^TB1R000106|^TB1S000106|^TB1S000102|^TB1S000103|^TB1S000104|^TB0R00010002|^TB2S000100|^TB1S000101|^TB3S000100|^TB3S000101|^TB3S000102|^TB3S000103|^TB3S000900|^TB3S000901|^TB3S000902|^TB4S000100|^TB4S000104|^TB4S001300|^TB0R00010001|^TB2R000500|^TB1R000900|^TB3R000600|^TB3R000601|^TB3R000602|^TB4R001000|^TB1R001000|^TB4S001400|^TB4R001700|^TB2S010100|^TB2S020100')) & (pbh_df['METRIC_CD'].str.contains('[0-9]$|[0-9]A[A-K]$')), 'Y', 'N')
# pbh_df['CDS_FLAG'] = np.where(pbh_df['METRIC_CD'].isin(['TB1R000109', 'TB1S000109', 'TB1R000106', 'TB1S000106', 'TB1S000102', 'TB1S000103', 'TB1S000104', 'TB0R00010002', 'TB2S000100', 'TB1S000101', 'TB3S000100', 'TB3S000101', 'TB3S000102', 'TB3S000103', 'TB3S000900', 'TB3S000901', 'TB3S000902', 'TB4S000100', 'TB4S000104', 'TB4S001300', 'TB0R00010001', 'TB2R000500', 'TB1R000900', 'TB3R000600', 'TB3R000601', 'TB3R000602', 'TB4R001000', 'TB1R001000', 'TB4S001400', 'TB4R001700']), 'Y', 'N')

''' Generate CSV file '''
pbh_df.to_csv(f'{op_dir}/tmp_{op_monthly_file}.csv', index=False, encoding='utf-8')
print(f'\n   -> Generate "tmp_{op_monthly_file}.csv" successfully')

# pbh_df.tail(3)


   -> Generate "tmp_vis_monthly_20250617.csv" successfully


### Area : CCAA, CCAATT
    Source -> GEOSPCAPPO.AGG_PERF_NEWCO_CCAATT
    Output -> tmp_vis_ccaa_ccaatt.csv

In [7]:
''' Execute AGG_PERF_NEWCO_CCAATT '''


# Connect : TDMDBPR
src_dsn = f'{TDMDBPR_user}/{TDMDBPR_pwd}@{TDMDBPR_host}:{TDMDBPR_port}/{TDMDBPR_db}'
src_conn = oracledb.connect(src_dsn)
print(f'\n{TDMDBPR_db} : Connected')
src_cur = src_conn.cursor()


query = (f"""
    /*** Import data from "AGG_PERF_NEWCO_CCAATT" ***/
    -----------------------------------------------------------------------------------------------------------------------

    WITH W_PARAM AS 
    (
        SELECT {v_year} AS V_YR
            , {v_month_start} AS V_MTH_START
            , {v_month_end}V_MTH_END
            , {v_date} AS V_DT_START 
        FROM DUAL
    )
    -----------------------------------------------------------------------------------------------------------------------

    /*** CCAA, CCAATT Summary ***/

    SELECT /*+PARALLEL(8)*/ 
        METRIC_GRP, METRIC_NAME_GROUP, METRIC_CD, METRIC_NAME 
        , SUM(CASE WHEN AREA_TYPE = 'CCAA' THEN ACTUAL_SNAP END) CCAA
        , SUM(CASE WHEN AREA_TYPE = 'CCAATT' THEN ACTUAL_SNAP END) CCAATT
        , MIN(TM_KEY_DAY) MIN_DT, MAX(TM_KEY_DAY) MAX_DT, MAX(ACTUAL_AS_OF) LAST_ACTUAL
        , COUNT(1) ROW_CNT, MAX(PPN_TM) PPN_TM

    FROM GEOSPCAPPO.AGG_PERF_NEWCO_CCAATT 

    WHERE CENTER IN ('Revenue', 'Sales', 'Profitability')
    AND TM_KEY_DAY >= (SELECT V_DT_START FROM W_PARAM)
    
    GROUP BY METRIC_GRP, METRIC_NAME_GROUP, METRIC_CD, METRIC_NAME
    --ORDER BY METRIC_GRP, METRIC_NAME_GROUP, METRIC_CD 
""")


try:
    execute_datetime = dt.datetime.now().strftime('%Y-%m-%d, %H:%M:%S')
    print(f'\n   -> Execute query... {execute_datetime}')
    
    # Create Dataframe
    src_cur.execute(query)
    rows = src_cur.fetchall()
    sub_src_df = pd.DataFrame.from_records(rows, columns=[x[0] for x in src_cur.description])
    sub_src_df = sub_src_df.sort_values(by=['METRIC_GRP', 'METRIC_CD']).reset_index(drop=True)
    print(f'\n   -> DataFrame : {sub_src_df.shape[0]} rows, {sub_src_df.shape[1]} columns')

    # Generate CSV file
    sub_src_df.to_csv(f'{op_dir}/tmp_{op_ccaa_ccaatt_file}.csv', index=False, encoding='utf-8')
    print(f'\n   -> Generate "tmp_{op_ccaa_ccaatt_file}.csv" successfully')


except oracledb.DatabaseError as e:
    print(f'Error with Oracle : {e}')


finally:
    src_cur.close()
    src_conn.close()
    print(f'\n{TDMDBPR_db} : Disconnected')


TDMDBPR : Connected

   -> Execute query... 2025-06-16, 10:32:07

   -> DataFrame : 222 rows, 11 columns

   -> Generate "tmp_vis_ccaa_ccaatt_20250616.csv" successfully

TDMDBPR : Disconnected


## Generate Output file

In [8]:
# # to Excel file

# op_dir = 'temp'
# op_file = 'VINSIGHT Data Monitoring.xlsx'

# df.to_excel(f'{op_dir}/{op_file}', sheet_name='Data', index=False)
# print(f'\n  -> Generate "{op_file}" successfully')

In [9]:
# # to CSV file

# op_dir = 'temp'
# op_file = 'VINSIGHT Data Monitoring.csv'

# df.to_csv(f'{op_dir}/{op_file}', index=False, encoding='utf-8')
# print(f'\n  -> Generate "{op_file}" successfully')