# * VINSIGHT : Data Monitoring

## Parameter

In [1]:
import configparser
import datetime as dt
import pandas as pd
import numpy as np
import xlrd
import oracledb
import re
import FN_Monitoring as fn

config = configparser.ConfigParser()
config.read('../../my_config.ini')
config.sections()

TDMDBPR_user = config['TDMDBPR']['username']
TDMDBPR_pwd = config['TDMDBPR']['password']
TDMDBPR_db = config['TDMDBPR']['db']
TDMDBPR_host = config['TDMDBPR']['host']
TDMDBPR_port = config['TDMDBPR']['port']

AKPIPRD_user = config['AKPIPRD']['username']
AKPIPRD_pwd = config['AKPIPRD']['password']
AKPIPRD_db = config['AKPIPRD']['db']
AKPIPRD_host = config['AKPIPRD']['host']
AKPIPRD_port = config['AKPIPRD']['port']

curr_dt = dt.datetime.now().date()
str_curr_dt = curr_dt.strftime('%Y%m%d')

In [2]:
# Input parameter

op_dir = 'data'
op_monthly_file = f'monthly_snap_{str_curr_dt}'
op_daily_file = f'daily_snap_{str_curr_dt}'

v_year = 2024
v_month_start = 0
v_month_end = 0
v_date = 0

# print(f"\nParameter input...\n\n   -> op_dir: {op_dir}\n   -> op_monthly_file: {op_monthly_file}\n   -> op_daily_file: {op_daily_file}")
print(f'\nParameter input...\n')
print(f'   -> op_dir: {op_dir}')
print(f'   -> op_monthly_file: {op_monthly_file}')
print(f'   -> op_daily_file: {op_daily_file}')
print(f'\n   -> v_year: {v_year}')
print(f'   -> v_month_start: {v_month_start}')
print(f'   -> v_month_end: {v_month_end}')
print(f'   -> v_date: {v_date}')


Parameter input...

   -> op_dir: data
   -> op_monthly_file: monthly_snap_20240925
   -> op_daily_file: daily_snap_20240925

   -> v_year: 2024
   -> v_month_start: 0
   -> v_month_end: 0
   -> v_date: 0


## Import : PowerBI Data
    DB -> Output file (daily_snap_YYYYMMDD.csv)

    Source : GEOSPCAPPO.AGG_PERF_NEWCO

In [3]:
# Connect : TDMDBPR
src_dsn = f'{TDMDBPR_user}/{TDMDBPR_pwd}@{TDMDBPR_host}:{TDMDBPR_port}/{TDMDBPR_db}'
src_conn = oracledb.connect(src_dsn)
print(f'\n{TDMDBPR_db} : Connected')
src_cur = src_conn.cursor()


query = (f"""
    /*** Import data from "AGG_PERF_NEWCO" ***/
    -----------------------------------------------------------------------------------------------------------------------

    WITH W_PARAM AS 
    (
        SELECT {v_year} AS V_YR
            , {v_month_start} AS V_MTH_START
            , {v_month_end}V_MTH_END
            , {v_date} AS V_DT 
        FROM DUAL
    )
    -----------------------------------------------------------------------------------------------------------------------

    , W_VINSIGHT_SNAP AS 
    (
        SELECT TM_KEY_YR, TM_KEY_QTR, TM_KEY_MTH, TM_KEY_WK, TM_KEY_DAY
            , CENTER, PRODUCT_GRP, COMP_CD, METRIC_GRP, METRIC_CD, METRIC_NAME--, SEQ
            , ACTUAL_AS_OF, AGG_TYPE--, RR_IND, GRY_IND, UOM
            , AREA_TYPE, AREA_CD, AREA_NAME
            , ACTUAL_SNAP, TARGET_SNAP, ACTUAL_AGG_MTH, TARGET_AGG_MTH
            , WOW, MOM, QOQ, YOY--, WOW_PERCENT, MOM_PERCENT, QOQ_PERCENT, YOY_PERCENT
            , WTD_YOY, MTD_YOY, QTD_YOY, YTD_YOY--, WTD_YOY_PERCENT, MTD_YOY_PERCENT, QTD_YOY_PERCENT, YTD_YOY_PERCENT
            , PPN_TM
        
        FROM GEOSPCAPPO.AGG_PERF_NEWCO 
        
        WHERE ( METRIC_CD IN ('B2R000100', 'DB2R000100', 'TB2R000100', 'B2R010100', 'DB2R010100', 'TB2R010100', 'B1R000100', 'DB1R000100', 'TB1R000100', 'TB3R000100', 'TB4R000100', 'TB4R000103', 'TB4R000104', 'TB4R000105', 'TB4R000106' --Revenue
            , 'B2S000500', 'B2S010500', 'DB2S000500', 'DB2S010500', 'TB2S000500', 'TB2S010500', 'B1S000500', 'B1S000600', 'DB1S000500', 'DB1S000600', 'TB1S000500', 'TB1S000600', 'TB3S000500', 'TB4S000500') --Subs
        OR (REGEXP_LIKE(METRIC_CD, 'B2R000500|B2S000100|B1R000900|B1S000101|TB3R000600|TB3S000100|TB3R000601|TB3S000102|TB4R001000|TB4S000100|TB4S000101|TB4S000102|TB4S000103|TB4S000104') 
            AND REGEXP_LIKE(METRIC_CD, '[0-9]|A[A-K]$')) )
    --    WHERE METRIC_CD IN ('B2R000100', 'DB2R000100', 'TB2R000100', 'B2R010100', 'DB2R010100', 'TB2R010100', 'B1R000100', 'DB1R000100', 'TB1R000100', 'TB3R000100', 'TB4R000100', 'TB4R000103', 'TB4R000104', 'TB4R000105', 'TB4R000106' --Revenue
    --  		, 'B2S000500', 'B2S010500', 'DB2S000500', 'DB2S010500', 'TB2S000500', 'TB2S010500', 'B1S000500', 'B1S000600', 'DB1S000500', 'DB1S000600', 'TB1S000500', 'TB1S000600', 'TB3S000500', 'TB4S000500' --Subs
    --    	, 'B2R000500', 'DB2R000500', 'TB2R000500', 'B1R000900', 'DB1R000900', 'TB1R000900', 'TB3R000600', 'TB3R000601', 'TB4R001000' --Sales : Inflow M1
    --    	, 'B2S000100', 'DB2S000100', 'TB2S000100', 'B1S000101', 'DB1S000101', 'TB1S000101', 'TB3S000100', 'TB3S000101', 'TB3S000102', 'TB4S000100', 'TB4S000101', 'TB4S000102', 'TB4S000103', 'TB4S000104') --Sales : Gross Adds
        AND AREA_TYPE IN ('C', 'P', 'G')
        AND TM_KEY_YR >= (SELECT V_YR FROM W_PARAM)
    ) -->> W_VINSIGHT_SNAP
    -----------------------------------------------------------------------------------------------------------------------

    /*** Monthly Snap ***/

    SELECT *
    FROM (
        SELECT TM_KEY_YR, TM_KEY_QTR, TM_KEY_MTH, TM_KEY_WK, TM_KEY_DAY
            , CENTER, PRODUCT_GRP, COMP_CD, METRIC_GRP, METRIC_CD, METRIC_NAME, ACTUAL_AS_OF, AGG_TYPE, AREA_TYPE, AREA_CD, AREA_NAME
            , CASE 	WHEN REGEXP_LIKE(METRIC_CD, '[0-9]A[A-K]$') THEN SUBSTR(METRIC_CD,-2) ELSE 'ALL' END CHANNEL_CD
            , CASE 	WHEN AGG_TYPE = 'S' THEN ACTUAL_SNAP 
                    ELSE (CASE WHEN TM_KEY_DAY = MAX(TM_KEY_DAY) OVER(PARTITION BY METRIC_CD, TM_KEY_MTH) THEN ACTUAL_AGG_MTH END)
                    END ACTUAL_MTH
            , CASE 	WHEN AGG_TYPE = 'S' THEN TARGET_SNAP 
                    ELSE (CASE WHEN TM_KEY_DAY = MAX(TM_KEY_DAY) OVER(PARTITION BY METRIC_CD, TM_KEY_MTH) THEN TARGET_AGG_MTH END)
                    END TARGET_MTH
            , ACTUAL_SNAP, TARGET_SNAP--, ACTUAL_AGG_MTH, TARGET_AGG_MTH
            , WOW, MOM, QOQ, YOY
            , WTD_YOY, MTD_YOY, QTD_YOY, YTD_YOY
            , PPN_TM
            , CASE WHEN TM_KEY_DAY = MAX(TM_KEY_DAY) OVER(PARTITION BY METRIC_CD, TM_KEY_WK) THEN 'Y' END END_WK_FLAG
            , CASE WHEN TM_KEY_DAY = MAX(TM_KEY_DAY) OVER(PARTITION BY METRIC_CD, TM_KEY_MTH) THEN 'Y' END END_MTH_FLAG
            , CASE WHEN TM_KEY_DAY = MAX(TM_KEY_DAY) OVER(PARTITION BY METRIC_CD, TM_KEY_QTR) THEN 'Y' END END_Q_FLAG
            , CASE WHEN TM_KEY_DAY = MAX(TM_KEY_DAY) OVER(PARTITION BY METRIC_CD, TM_KEY_YR) THEN 'Y' END END_YR_FLAG
        FROM W_VINSIGHT_SNAP
    ) TMP
    --ORDER BY TM_KEY_DAY, CENTER, METRIC_GRP, PRODUCT_GRP, COMP_CD, METRIC_CD
""")


try:
    execute_datetime = dt.datetime.now().strftime('%Y-%m-%d, %H:%M:%S')
    print(f'\n   -> Execute query... {execute_datetime}')
    
    # Create Dataframe
    src_cur.execute(query)
    rows = src_cur.fetchall()
    src_df = pd.DataFrame.from_records(rows, columns=[x[0] for x in src_cur.description])
    print(f'\n   -> DataFrame : {src_df.shape[0]} rows, {src_df.shape[1]} columns')

    # Generate CSV file
    src_df.to_csv(f'{op_dir}/{op_daily_file}.csv', index=False, encoding='utf-8')
    print(f'\n   -> Generate "{op_daily_file}.csv" successfully')


except oracledb.DatabaseError as e:
    print(f'Error with Oracle : {e}')


finally:
    src_cur.close()
    src_conn.close()
    print(f'\n{TDMDBPR_db} : Disconnected')


TDMDBPR : Connected

   -> Execute query... 2024-09-25, 15:28:27

   -> DataFrame : 527424 rows, 34 columns

   -> Generate "daily_snap_20240925.csv" successfully

TDMDBPR : Disconnected


## Import : Monthly Snap Data
    DB -> Output file (monthly_snap_YYYYMMDD.csv)

    Source : GEOSPCAPPO.AGG_PERF_NEWCO

In [3]:
# Connect : TDMDBPR
src_dsn = f'{TDMDBPR_user}/{TDMDBPR_pwd}@{TDMDBPR_host}:{TDMDBPR_port}/{TDMDBPR_db}'
src_conn = oracledb.connect(src_dsn)
print(f'\n{TDMDBPR_db} : Connected')
src_cur = src_conn.cursor()


query = (f"""
    /*** Import data from "AGG_PERF_NEWCO" ***/
    -----------------------------------------------------------------------------------------------------------------------

    WITH W_PARAM AS 
    (
        SELECT {v_year} AS V_YR
            , {v_month_start} AS V_MTH_START
            , {v_month_end}V_MTH_END
            , {v_date} AS V_DT 
        FROM DUAL
    )
    -----------------------------------------------------------------------------------------------------------------------

    , W_VINSIGHT_SNAP AS 
    (
        SELECT TM_KEY_YR, TM_KEY_QTR, TM_KEY_MTH, TM_KEY_WK, TM_KEY_DAY
            , CENTER, PRODUCT_GRP, COMP_CD, METRIC_GRP, METRIC_CD, METRIC_NAME, SEQ
            , ACTUAL_AS_OF, AGG_TYPE, RR_IND, GRY_IND, UOM
            , AREA_TYPE, AREA_CD, AREA_NAME
            , ACTUAL_SNAP, TARGET_SNAP, ACTUAL_AGG_MTH, TARGET_AGG_MTH
            , PPN_TM
        
        FROM GEOSPCAPPO.AGG_PERF_NEWCO 
        
        WHERE CENTER IN ('Revenue', 'Sales', 'Profitability')
        AND NOT REGEXP_LIKE(METRIC_CD, '[0-9]C$|[0-9]H$|[0-9]MCOM$') --|[0-9]CORP$|[0-9]GEO$|[0-9]A[A-K]$
    	AND TM_KEY_YR >= (SELECT V_YR FROM W_PARAM)
        --AND TM_KEY_MTH >= (SELECT V_MTH_END FROM W_PARAM)
        --AND TM_KEY_MTH BETWEEN (SELECT V_MTH_START FROM W_PARAM) AND (SELECT V_MTH_END FROM W_PARAM)
    )
    -----------------------------------------------------------------------------------------------------------------------

    /*** Monthly Snap ***/

    SELECT TM_KEY_MTH, CENTER, METRIC_GRP, PRODUCT_GRP, COMP_CD, METRIC_CD, METRIC_NAME, CHANNEL_CD, AGG_TYPE, UOM
        , CAST(SUM(CASE WHEN AREA_TYPE = 'C' THEN ACTUAL_TMP END) AS DECIMAL(18,2)) AS C_ACTUAL
        , CAST(SUM(CASE WHEN AREA_TYPE = 'P' THEN ACTUAL_TMP END) AS DECIMAL(18,2)) AS P_ACTUAL
        , CAST(SUM(CASE WHEN AREA_TYPE = 'G' THEN ACTUAL_TMP END) AS DECIMAL(18,2)) AS G_ACTUAL
        , CAST(SUM(CASE WHEN AREA_TYPE = 'H' THEN ACTUAL_TMP END) AS DECIMAL(18,2)) AS H_ACTUAL
        , CAST(SUM(CASE WHEN AREA_TYPE = 'HH' THEN ACTUAL_TMP END) AS DECIMAL(18,2)) AS HH_ACTUAL
        , CAST(SUM(CASE WHEN AREA_TYPE = 'CCAA' THEN ACTUAL_TMP END) AS DECIMAL(18,2)) AS CCAA_ACTUAL
        , CAST(SUM(CASE WHEN AREA_TYPE = 'CCAATT' THEN ACTUAL_TMP END) AS DECIMAL(18,2)) AS CCAATT_ACTUAL
        , CAST(SUM(CASE WHEN AREA_TYPE = 'C' THEN TARGET_TMP END) AS DECIMAL(18,2)) AS C_TARGET
        , CAST(SUM(CASE WHEN AREA_TYPE = 'P' THEN TARGET_TMP END) AS DECIMAL(18,2)) AS P_TARGET
        , CAST(SUM(CASE WHEN AREA_TYPE = 'G' THEN TARGET_TMP END) AS DECIMAL(18,2)) AS G_TARGET
        , CAST(SUM(CASE WHEN AREA_TYPE = 'H' THEN TARGET_TMP END) AS DECIMAL(18,2)) AS H_TARGET
        , CAST(SUM(CASE WHEN AREA_TYPE = 'HH' THEN TARGET_TMP END) AS DECIMAL(18,2)) AS HH_TARGET
        , CAST(SUM(CASE WHEN AREA_TYPE = 'CCAA' THEN TARGET_TMP END) AS DECIMAL(18,2)) AS CCAA_TARGET
        , CAST(SUM(CASE WHEN AREA_TYPE = 'CCAATT' THEN TARGET_TMP END) AS DECIMAL(18,2)) AS CCAATT_TARGET
        , MAX(ACTUAL_AS_OF) ACTUAL_AS_OF, MIN(TM_KEY_DAY) MIN_DAY, MAX(TM_KEY_DAY) MAX_DAY, MAX(PPN_TM) PPN_TM
        
    FROM (
        SELECT TM_KEY_YR, TM_KEY_QTR, TM_KEY_MTH, TM_KEY_WK, TM_KEY_DAY
            , CENTER, PRODUCT_GRP, COMP_CD, METRIC_GRP, METRIC_CD, METRIC_NAME, SEQ, ACTUAL_AS_OF, AGG_TYPE, RR_IND, GRY_IND, UOM, AREA_TYPE, AREA_CD, AREA_NAME
            , CASE 	WHEN AGG_TYPE = 'S' THEN ACTUAL_SNAP 
                    ELSE (CASE WHEN TM_KEY_DAY = MAX(TM_KEY_DAY) OVER(PARTITION BY METRIC_CD, TM_KEY_MTH) THEN ACTUAL_AGG_MTH END)
                    END ACTUAL_TMP
            , CASE 	WHEN AGG_TYPE = 'S' THEN TARGET_SNAP 
                    ELSE (CASE WHEN TM_KEY_DAY = MAX(TM_KEY_DAY) OVER(PARTITION BY METRIC_CD, TM_KEY_MTH) THEN TARGET_AGG_MTH END)
                    END TARGET_TMP
            , ACTUAL_SNAP, TARGET_SNAP, ACTUAL_AGG_MTH, TARGET_AGG_MTH, PPN_TM
            , CASE WHEN REGEXP_LIKE(METRIC_CD, '[0-9]A[A-K]$') THEN SUBSTR(METRIC_CD,-2) ELSE 'ALL' END CHANNEL_CD
        FROM W_VINSIGHT_SNAP
    ) MTH_SNAP

    --WHERE CHANNEL_CD = 'ALL'
    GROUP BY TM_KEY_MTH, CENTER, METRIC_GRP, PRODUCT_GRP, COMP_CD, METRIC_CD, METRIC_NAME, CHANNEL_CD, AGG_TYPE, UOM
    --ORDER BY TM_KEY_MTH, CENTER, METRIC_GRP, PRODUCT_GRP, COMP_CD, METRIC_CD
""")


try:
    execute_datetime = dt.datetime.now().strftime('%Y-%m-%d, %H:%M:%S')
    print(f'\n   -> Execute query... {execute_datetime}')
    
    # Create Dataframe
    src_cur.execute(query)
    rows = src_cur.fetchall()
    src_df = pd.DataFrame.from_records(rows, columns=[x[0] for x in src_cur.description])
    print(f'\n   -> DataFrame : {src_df.shape[0]} rows, {src_df.shape[1]} columns')

    # Generate CSV file
    src_df.to_csv(f'{op_dir}/{op_monthly_file}.csv', index=False, encoding='utf-8')
    print(f'\n   -> Generate "{op_monthly_file}.csv" successfully')

    # # Generate Excel file
    # src_df.to_excel(f'{op_dir}/{op_monthly_file}.xlsx', sheet_name='Data', index=False)
    # print(f'\n   -> Generate "{op_monthly_file}.xlsx" successfully')


except oracledb.DatabaseError as e:
    print(f'Error with Oracle : {e}')


finally:
    src_cur.close()
    src_conn.close()
    print(f'\n{TDMDBPR_db} : Disconnected')


TDMDBPR : Connected

   -> Execute query... 2024-09-25, 12:56:29


## Reconcile

### Import Rawdata

In [5]:
''' Import Rawdata '''

data_src = f'{op_dir}/{op_monthly_file}.csv'
raw_df = pd.read_csv(data_src, low_memory=False)
print(f'\nraw_df : {raw_df.shape[0]} rows, {raw_df.shape[1]} columns')
# raw_df.tail(3)


raw_df : 7506 rows, 28 columns


In [6]:
''' Add columns '''
tmp_df = raw_df
tmp_df['TMP_CD'] = tmp_df['METRIC_CD'].replace(r'AA$|AB$|AC$|AD$|AE$|AF$|AG$|AH$|AI$|AJ$|AK$', '', regex=True)
tmp_df['TMP_NAME'] = tmp_df['METRIC_NAME'].replace(r' : Account Executive| : B2B| : Branded Retail| : Contact Center| : Direct Sales| : Key Account| : Modern Trade| : Others| : Own Digital| : Retail Sales| : Wholesales', '', regex=True)
tmp_df['MY_GROUP'] = tmp_df.apply(lambda x: fn.my_metric_group(str(x['METRIC_GRP']), str(x['METRIC_CD']), str(x['METRIC_NAME'])), axis=1)
tmp_df['MAIN_FLAG'] = np.where((tmp_df['METRIC_CD'].str.contains('[0-9]C$|[0-9]H$|[0-9]MCOM$|[0-9]CUS$')) | (tmp_df['METRIC_NAME'].str.contains('New Revenue|Existing Revenue')), 'N', 'Y')
tmp_df['GEO_FLAG'] = np.where((tmp_df['METRIC_CD'].str.contains('GEO$')) | (tmp_df['METRIC_NAME'].str.contains('\(Geo\)$')), 'Y', 'N')
tmp_df['CDS_FLAG'] = np.where((tmp_df['METRIC_CD'].str.contains('^TB1R000109|^TB1S000109|^TB1R000106|^TB1S000106|^TB1S000102|^TB1S000103|^TB1S000104|^TB0R00010002|^TB2S000100|^TB1S000101|^TB3S000100|^TB3S000101|^TB3S000102|^TB3S000103|^TB3S000900|^TB3S000901|^TB3S000902|^TB4S000100|^TB4S000104|^TB4S001300|^TB0R00010001|^TB2R000500|^TB1R000900|^TB3R000600|^TB3R000601|^TB3R000602|^TB4R001000|^TB1R001000|^TB4S001400|^TB4R001700')) & (tmp_df['METRIC_CD'].str.contains('[0-9]$|[0-9]A[A-K]$')), 'Y', 'N')
# tmp_df['CDS_FLAG'] = np.where(tmp_df['METRIC_CD'].isin(['TB1R000109', 'TB1S000109', 'TB1R000106', 'TB1S000106', 'TB1S000102', 'TB1S000103', 'TB1S000104', 'TB0R00010002', 'TB2S000100', 'TB1S000101', 'TB3S000100', 'TB3S000101', 'TB3S000102', 'TB3S000103', 'TB3S000900', 'TB3S000901', 'TB3S000902', 'TB4S000100', 'TB4S000104', 'TB4S001300', 'TB0R00010001', 'TB2R000500', 'TB1R000900', 'TB3R000600', 'TB3R000601', 'TB3R000602', 'TB4R001000', 'TB1R001000', 'TB4S001400', 'TB4R001700']), 'Y', 'N')

''' Generate CSV file '''
tmp_df.to_csv(f'{op_dir}/tmp_{op_monthly_file}.csv', index=False, encoding='utf-8')
print(f'\n   -> Generate "tmp_{op_monthly_file}.csv" successfully')

tmp_df.tail(3)


   -> Generate "tmp_monthly_snap_20240924.csv" successfully


Unnamed: 0,TM_KEY_MTH,CENTER,METRIC_GRP,PRODUCT_GRP,COMP_CD,METRIC_CD,METRIC_NAME,CHANNEL_CD,AGG_TYPE,UOM,...,ACTUAL_AS_OF,MIN_DAY,MAX_DAY,PPN_TM,TMP_CD,TMP_NAME,MY_GROUP,MAIN_FLAG,GEO_FLAG,CDS_FLAG
7503,202401,Revenue,Market Share,Mobile,ALL,VIN00049,Mobile Subs Share (Subs) : AIS,ALL,N,,...,20240131.0,20240101,20240131,2024-09-24 03:47:20,VIN00049,Mobile Subs Share (Subs) : AIS,MB MKS(Subs),Y,N,N
7504,202407,Revenue,Market Share,Mobile,ALL,VIN00049,Mobile Subs Share (Subs) : AIS,ALL,N,,...,20240731.0,20240701,20240731,2024-09-24 03:47:20,VIN00049,Mobile Subs Share (Subs) : AIS,MB MKS(Subs),Y,N,N
7505,202406,Revenue,Market Share,Prepaid,DTAC,VIN00062,Prepaid Subs Share (Subs) : DTAC,ALL,N,,...,20240630.0,20240601,20240630,2024-09-24 03:47:20,VIN00062,Prepaid Subs Share (Subs) : DTAC,MB MKS(Subs),Y,N,N


### Sales Channel

In [6]:
''' Create Reconcile Data '''

rec_df = tmp_df

''' Filters '''
# ~
# my_str = '^Postpaid Revenue.*DTAC$'
# my_str = ' B2C| B2B|Geo'

# rec_df = rec_df.loc[rec_df['TM_KEY_MTH']==202406]
# rec_df = rec_df.loc[rec_df['CDS_FLAG']=='Y']
rec_df = rec_df.loc[rec_df['CHANNEL_CD']=='ALL']
# rec_df = rec_df.loc[rec_df['PRODUCT_GRP']=='Prepaid']
# rec_df = rec_df.loc[rec_df['COMP_CD']=='TRUE']
# rec_df = rec_df.loc[rec_df['MY_GROUP']=='']

''' Issue as 20240625 : Actual loss (G, H, HH) '''
rec_df = rec_df.loc[rec_df['METRIC_CD'].str.contains('^TB1R000106|^TB1R000109|^TB1S000106|^TB3R000601|^TB3R000602|^TB3S000102|^TB3S000103|^TB3S000901|^TB3S000902|^TB4S001300')]
rec_df = rec_df.loc[rec_df['METRIC_CD'].str.contains('[0-9]$|[0-9]A[A-K]$')]

# rec_df = rec_df.replace(np.nan, None)
rec_df = rec_df.fillna(0)
rec_df = rec_df.reset_index(drop=True)
# all_cols = ['TM_KEY_MTH', 'CENTER', 'METRIC_GRP', 'PRODUCT_GRP', 'COMP_CD', 'METRIC_CD', 'METRIC_NAME', 'CHANNEL_CD', 'AGG_TYPE', 'UOM', 'P_ACTUAL', 'G_ACTUAL', 'H_ACTUAL', 'HH_ACTUAL', 'P_TARGET', 'G_TARGET', 'H_TARGET', 'HH_TARGET', 'ACTUAL_AS_OF', 'MIN_DAY', 'MAX_DAY', 'PPN_TM']
actual_cols = ['TM_KEY_MTH', 'PRODUCT_GRP', 'COMP_CD', 'METRIC_CD', 'METRIC_NAME', 'CHANNEL_CD', 'AGG_TYPE', 'UOM', 'P_ACTUAL', 'G_ACTUAL', 'H_ACTUAL', 'HH_ACTUAL', 'P_TARGET', 'ACTUAL_AS_OF', 'MIN_DAY', 'MAX_DAY', 'PPN_TM']
rec_df = rec_df[actual_cols]
# rec_df.tail(3)

''' Display '''
tmp_rec_df = rec_df.copy()
mod_col_list = tmp_rec_df.iloc[:, 8:13].columns.tolist()
for col in mod_col_list:
    tmp_rec_df[col] = tmp_rec_df[col].apply(lambda x: format(x, ',.0f'))
tmp_rec_df.tail()

  rec_df = rec_df.fillna(0)


Unnamed: 0,TM_KEY_MTH,PRODUCT_GRP,COMP_CD,METRIC_CD,METRIC_NAME,CHANNEL_CD,AGG_TYPE,UOM,P_ACTUAL,G_ACTUAL,H_ACTUAL,HH_ACTUAL,P_TARGET,ACTUAL_AS_OF,MIN_DAY,MAX_DAY,PPN_TM
65,202407,TOL,True,TB3S000102,TOL Gross Adds - Connected (Consumer),ALL,S,0,28558,21423,21423,21255,0,20240717.0,20240701,20240718,2024-07-19 05:50:30
66,202407,TOL,True,TB3S000103,TOL Gross Adds - Connected (DataService),ALL,S,0,1476,28,28,28,0,20240717.0,20240701,20240718,2024-07-19 05:50:30
67,202407,TOL,True,TB3S000901,TOL Gross Adds - Connected (Install Location -...,ALL,S,0,28558,28555,28555,28555,0,20240717.0,20240701,20240718,2024-07-19 05:50:30
68,202407,TOL,True,TB3S000902,TOL Gross Adds - Connected (Install Location -...,ALL,S,0,1476,765,765,757,0,20240717.0,20240701,20240718,2024-07-19 05:50:30
69,202407,TVS,True,TB4S001300,TVS Gross Adds (Install Location),ALL,S,0,8,7,7,7,0,20240714.0,20240701,20240718,2024-07-19 05:50:30


### Temp data

In [23]:
''' Generate Temp files '''

''' GROUP list '''
# grp_list_df = tmp_df[['METRIC_GRP', 'PRODUCT_GRP']].drop_duplicates().reset_index(drop=True)
# grp_list_df.dropna(how='all')
# # grp_list_df.dropna(axis=1, how='all')
# # grp_list_df.dropna(subset=['PRODUCT_GRP'])
grp_list_df = tmp_df.groupby(['METRIC_GRP', 'PRODUCT_GRP']).agg({'METRIC_CD': 'nunique', 'MIN_DAY': 'min', 'MAX_DAY': 'max'}).reset_index()
grp_list_df.rename(columns={'METRIC_CD': 'CNT_METRIC'}, inplace=True)
grp_list_df.to_excel(f'temp/Metric_Grp_List_{str_curr_dt}.xlsx', sheet_name='Data', index=False)
print(f'\n   -> Generate "Metric_Grp_List_{str_curr_dt}.xlsx" successfully')

''' METRIC list '''
# metric_list_df = tmp_df[['METRIC_GRP', 'PRODUCT_GRP', 'COMP_CD', 'METRIC_CD', 'METRIC_NAME']].drop_duplicates().reset_index(drop=True)
# metric_list_df.dropna(how='all')
metric_list_df = tmp_df.groupby(['METRIC_GRP', 'PRODUCT_GRP', 'COMP_CD', 'TMP_CD', 'TMP_NAME', 'AGG_TYPE', 'UOM']).agg({'CHANNEL_CD': 'nunique', 'ACTUAL_AS_OF': 'max', 'MIN_DAY': 'min', 'MAX_DAY': 'max'}).reset_index()
metric_list_df.rename(columns={'CHANNEL_CD': 'CNT_CHANNEL', 'ACTUAL_AS_OF': 'LAST_ACTUAL'}, inplace=True)
metric_list_df.to_excel(f'temp/Metric_Cd_List_{str_curr_dt}.xlsx', sheet_name='Data', index=False)
print(f'\n   -> Generate "Metric_Cd_List_{str_curr_dt}.xlsx" successfully')



   -> Generate "Metric_Grp_List_20240625.xlsx" successfully

   -> Generate "Metric_Cd_List_20240625.xlsx" successfully


In [29]:
''' Create Reconcile Data '''

rec_df = tmp_df

''' Filters '''
# rec_df = rec_df.loc[rec_df['TM_KEY_MTH']==202406]
# rec_df = rec_df.loc[rec_df['VIEW_GRP']=='']
rec_df = rec_df.loc[rec_df['CDS_FLAG']=='Y']
# rec_df = rec_df.loc[rec_df['CHANNEL_CD']=='ALL']
# rec_df = rec_df.loc[rec_df['COMP_CD']=='TRUE']
# rec_df = rec_df.loc[rec_df['PRODUCT_GRP']=='TOL']

# my_str = 'Prepaid Topping|Prepaid Pay per Use'
# my_str = '^Postpaid Revenue.*DTAC$'
# my_str = 'Inflow M1|Gross Add'
# my_str2 = 'CORP'
# my_str3 = ' B2C| B2B|Geo'

# rec_df = rec_df[rec_df['METRIC_CD']=='DB1R000900']
# rec_df = rec_df.loc[rec_df['METRIC_NAME'].str.contains(my_str)]
# rec_df = rec_df.loc[~rec_df['METRIC_NAME'].str.contains(my_str2)]
# rec_df = rec_df.loc[~rec_df['METRIC_NAME'].str.contains(my_str3)]
# rec_df = rec_df.replace(np.nan, None)
# rec_df = rec_df.fillna(0)
rec_df = rec_df.reset_index(drop=True)
rec_df.tail(3)

Unnamed: 0,TM_KEY_MTH,CENTER,METRIC_GRP,PRODUCT_GRP,COMP_CD,METRIC_CD,METRIC_NAME,CHANNEL_CD,AGG_TYPE,UOM,...,H_TARGET,HH_TARGET,ACTUAL_AS_OF,MIN_DAY,MAX_DAY,PPN_TM,TMP_CD,TMP_NAME,VIEW_GRP,CDS_FLAG
1311,202406,Revenue,Sales,TVS,True,TB4S001400,TVS Now Gross Adds,ALL,S,subs,...,,,,20240601,20240624,2024-06-25 08:34:50,TB4S001400,TVS Now Gross Adds,Gross Adds,Y
1312,202406,Sales,Sales,TOL,True,TB3R000600,TOL Inflow M1 - Connected,ALL,S,baht,...,18658767.34,18548566.08,20240623.0,20240601,20240624,2024-06-25 08:34:50,TB3R000600,TOL Inflow M1 - Connected,Inflow M1,Y
1313,202406,Sales,Sales,TVS,True,TB4R001000,TVS Inflow M1,ALL,S,baht,...,,,20240622.0,20240601,20240624,2024-06-25 08:34:50,TB4R001000,TVS Inflow M1,Inflow M1,Y


In [30]:
''' Aggregate '''

# Yearly
# agg_df = rec_df.groupby(['TM_KEY_YR', 'COMP_CD', 'PRODUCT_GRP', 'METRIC_CD', 'METRIC_NAME', 'CHANNEL_CD', 'UOM']).agg({'P_ACTUAL': 'sum', 'P_TARGET': 'sum', 'PPN_TM': 'max'}).reset_index()
# agg_df = rec_df.groupby(['TM_KEY_YR', 'COMP_CD', 'PRODUCT_GRP', 'METRIC_CD', 'METRIC_NAME', 'CHANNEL_CD', 'UOM']).agg({'P_ACTUAL': 'sum', 'G_ACTUAL': 'sum', 'H_ACTUAL': 'sum', 'HH_ACTUAL': 'sum', 'PPN_TM': 'max'}).reset_index()
# agg_df = rec_df.groupby(['TM_KEY_YR', 'COMP_CD', 'PRODUCT_GRP', 'METRIC_CD', 'METRIC_NAME', 'CHANNEL_CD', 'UOM']).agg({'P_ACTUAL': 'sum', 'G_ACTUAL': 'sum', 'H_ACTUAL': 'sum', 'HH_ACTUAL': 'sum', 'P_TARGET': 'sum', 'G_TARGET': 'sum', 'H_TARGET': 'sum', 'HH_TARGET': 'sum', 'PPN_TM': 'max'}).reset_index()

# Monthly
agg_df = rec_df.groupby(['TM_KEY_MTH', 'COMP_CD', 'PRODUCT_GRP', 'METRIC_CD', 'METRIC_NAME', 'CHANNEL_CD', 'UOM']).agg({'P_ACTUAL': 'sum', 'G_ACTUAL': 'sum', 'H_ACTUAL': 'sum', 'HH_ACTUAL': 'sum', 'P_TARGET': 'sum', 'G_TARGET': 'sum', 'H_TARGET': 'sum', 'HH_TARGET': 'sum', 'PPN_TM': 'max'}).reset_index()


''' Create Temp File '''
# agg_df.to_excel(f'temp/Temp.xlsx', sheet_name='Data', index=False)
# print(f'\n -> Generate "Temp.xlsx" successfully')

agg_df.tail()

Unnamed: 0,TM_KEY_MTH,COMP_CD,PRODUCT_GRP,METRIC_CD,METRIC_NAME,CHANNEL_CD,UOM,P_ACTUAL,G_ACTUAL,H_ACTUAL,HH_ACTUAL,P_TARGET,G_TARGET,H_TARGET,HH_TARGET,PPN_TM
1309,202406,True,TVS,TB4S000100AH,TVS Gross Adds : Others,AH,subs,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2024-06-25 08:34:50
1310,202406,True,TVS,TB4S000100AJ,TVS Gross Adds : Retail Sales,AJ,subs,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2024-06-25 08:34:50
1311,202406,True,TVS,TB4S000104,TVS Gross Adds : TVS Now,ALL,subs,617.0,595.0,595.0,595.0,0.0,0.0,0.0,0.0,2024-06-25 08:34:50
1312,202406,True,TVS,TB4S001300,TVS Gross Adds (Install Location),ALL,subs,20.0,0.0,0.0,19.0,0.0,0.0,0.0,0.0,2024-06-25 08:34:50
1313,202406,True,TVS,TB4S001400,TVS Now Gross Adds,ALL,subs,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2024-06-25 08:34:50


In [31]:
''' Pivot Table '''

# # Actual & Target
# pv_target = pd.pivot_table(x, values=['P_ACTUAL', 'P_TARGET'], index=['COMP_CD', 'TMP_CD', 'TMP_NAME'], columns='CHANNEL_CD', aggfunc='sum', fill_value=0)

# Actual
pv_actual = pd.pivot_table(tmp_df, values='P_ACTUAL', index=['COMP_CD', 'TMP_CD', 'TMP_NAME'], columns='CHANNEL_CD', aggfunc='sum', fill_value=0)
pv_actual['VERION'] = 'A'

# Target
pv_target = pd.pivot_table(tmp_df, values='P_TARGET', index=['COMP_CD', 'TMP_CD', 'TMP_NAME'], columns='CHANNEL_CD', aggfunc='sum', fill_value=0)
pv_target['VERION'] = 'T'

# Concat Dataframe
pd.concat([pv_actual, pv_target]).reset_index()

CHANNEL_CD,COMP_CD,TMP_CD,TMP_NAME,AA,AB,AC,AD,AE,AF,AG,AH,AI,AJ,AK,ALL,VERION
0,ALL,B0R000100,Total Revenue,0.00,0.00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,7.163789e+10,A
1,ALL,B0R00010001,Total Inflow M1,3013211.55,69382947.26,3.525488e+08,1.953428e+08,1.619986e+08,1.235131e+08,1.281055e+08,48302015.95,11446012.23,7.533273e+08,24737758.22,1.871505e+09,A
2,ALL,B0R00010001CORP,Total Inflow M1,0.00,0.00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,1.871505e+09,A
3,ALL,B0R00010002,Total Gross Adds,2453.00,151523.00,1.191808e+06,6.195680e+05,8.572620e+05,2.407030e+05,1.031605e+06,192643.00,19302.00,5.434834e+06,1675200.00,1.141646e+07,A
4,ALL,B0R0001002,%Revenue Growth (YTD YoY),0.00,0.00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,3.629000e+01,A
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
971,TRUE,VIN00042,Revenue (Corporate),0.00,0.00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,2.400000e+01,T
972,TRUE,VIN00043,Profitability (Corporate),0.00,0.00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,2.400000e+01,T
973,TRUE,VIN00050,Mobile Subs Share (Subs) : TMH,0.00,0.00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.000000e+00,T
974,TRUE,VIN00061,Prepaid Subs Share (Subs) : TMH,0.00,0.00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.000000e+00,T


## Generate Output file

In [45]:
# # to Excel file

# op_dir = 'temp'
# op_file = 'VINSIGHT Data Monitoring.xlsx'

# df.to_excel(f'{op_dir}/{op_file}', sheet_name='Data', index=False)
# print(f'\n  -> Generate "{op_file}" successfully')


 -> Generate "Metric_List.xlsx" successfully


In [44]:
# # to CSV file

# op_dir = 'temp'
# op_file = 'VINSIGHT Data Monitoring.csv'

# df.to_csv(f'{op_dir}/{op_file}', index=False, encoding='utf-8')
# print(f'\n  -> Generate "{op_file}" successfully')


 -> Generate "Metric_List.csv" successfully
