# * VINSIGHT : Data Monitoring
    Process required -> "ETL-AGG_PERF_NEWCO_SNAP"

## Parameter

In [3]:
import os
import glob
import configparser
import datetime as dt
import pandas as pd
import numpy as np
import xlrd
import oracledb
import re

config = configparser.ConfigParser()
config.read('../../my_config.ini')
config.sections()

TDMDBPR_user = config['TDMDBPR']['username']
TDMDBPR_pwd = config['TDMDBPR']['password']
TDMDBPR_db = config['TDMDBPR']['db']
TDMDBPR_host = config['TDMDBPR']['host']
TDMDBPR_port = config['TDMDBPR']['port']

AKPIPRD_user = config['AKPIPRD']['username']
AKPIPRD_pwd = config['AKPIPRD']['password']
AKPIPRD_db = config['AKPIPRD']['db']
AKPIPRD_host = config['AKPIPRD']['host']
AKPIPRD_port = config['AKPIPRD']['port']

curr_dt = dt.datetime.now().date()
str_curr_dt = curr_dt.strftime('%Y%m%d')

# Input parameter
op_dir = 'data'
op_file = f'agg_perf_newco_snap_{str_curr_dt}'

## Create DataFrame
    DB -> CSV file

In [5]:
# Connect : AKPIPRD
dsn = f'{AKPIPRD_user}/{AKPIPRD_pwd}@{AKPIPRD_host}:{AKPIPRD_port}/{AKPIPRD_db}'
conn = oracledb.connect(dsn)
print(f'\n{AKPIPRD_db} : Connected\n\nProcessing...')
cur = conn.cursor()

query = """
    -->> Actual Monthly

    SELECT TM_KEY_MTH, CENTER, METRIC_GRP, PRODUCT_GRP, COMP_CD, METRIC_CD, METRIC_NAME, CHANNEL_CD, AGG_TYPE, UOM
    
        , CAST(SUM(CASE WHEN AREA_TYPE = 'P' THEN ACTUAL_TMP END) AS DECIMAL(18,2)) AS P_ACTUAL
        , CAST(SUM(CASE WHEN AREA_TYPE = 'G' THEN ACTUAL_TMP END) AS DECIMAL(18,2)) AS G_ACTUAL
        , CAST(SUM(CASE WHEN AREA_TYPE = 'H' THEN ACTUAL_TMP END) AS DECIMAL(18,2)) AS H_ACTUAL
        , CAST(SUM(CASE WHEN AREA_TYPE = 'HH' THEN ACTUAL_TMP END) AS DECIMAL(18,2)) AS HH_ACTUAL

        , CAST(SUM(CASE WHEN AREA_TYPE = 'P' THEN TARGET_TMP END) AS DECIMAL(18,2)) AS P_TARGET
        , CAST(SUM(CASE WHEN AREA_TYPE = 'G' THEN TARGET_TMP END) AS DECIMAL(18,2)) AS G_TARGET
        , CAST(SUM(CASE WHEN AREA_TYPE = 'H' THEN TARGET_TMP END) AS DECIMAL(18,2)) AS H_TARGET
        , CAST(SUM(CASE WHEN AREA_TYPE = 'HH' THEN TARGET_TMP END) AS DECIMAL(18,2)) AS HH_TARGET

        , MAX(ACTUAL_AS_OF) ACTUAL_AS_OF, MIN(TM_KEY_DAY) MIN_DAY, MAX(TM_KEY_DAY) MAX_DAY, MAX(PPN_TM) PPN_TM, MAX(LOAD_DATE) LOAD_DATE
        
    FROM (
        SELECT TM_KEY_YR, TM_KEY_QTR, TM_KEY_MTH, TM_KEY_WK, TM_KEY_DAY
            , CENTER, PRODUCT_GRP, COMP_CD, METRIC_GRP, METRIC_CD, METRIC_NAME, SEQ, ACTUAL_AS_OF, AGG_TYPE, RR_IND, GRY_IND, UOM, AREA_TYPE, AREA_CD, AREA_NAME
            , CASE 	WHEN AGG_TYPE = 'S' THEN ACTUAL_SNAP 
                    ELSE (CASE WHEN TM_KEY_DAY = MAX(TM_KEY_DAY) OVER(PARTITION BY METRIC_CD, TM_KEY_MTH) THEN ACTUAL_AGG END)
                    END ACTUAL_TMP
            , CASE 	WHEN AGG_TYPE = 'S' THEN TARGET_SNAP 
                    ELSE (CASE WHEN TM_KEY_DAY = MAX(TM_KEY_DAY) OVER(PARTITION BY METRIC_CD, TM_KEY_MTH) THEN TARGET_AGG END)
                    END TARGET_TMP
            , ACTUAL_SNAP, TARGET_SNAP, BASELINE_SNAP, ACTUAL_AGG, TARGET_AGG, BASELINE_AGG, PPN_TM, LOAD_DATE
            , CASE WHEN REGEXP_LIKE(METRIC_CD, '[0-9]A[A-K]$') THEN SUBSTR(METRIC_CD,-2) ELSE 'ALL' END CHANNEL_CD
        FROM AUTOKPI.AGG_PERF_NEWCO_SNAP NOLOCK
    ) TMP_MTH

    --WHERE CHANNEL_CD = 'ALL'
    GROUP BY TM_KEY_MTH, CENTER, METRIC_GRP, PRODUCT_GRP, COMP_CD, METRIC_CD, METRIC_NAME, CHANNEL_CD, AGG_TYPE, UOM
    ORDER BY TM_KEY_MTH, CENTER, METRIC_GRP, PRODUCT_GRP, COMP_CD, METRIC_CD
"""


try:
    # Create Dataframe
    execute_datetime = dt.datetime.now().strftime('%Y-%m-%d, %H:%M:%S')
    print(f'\n   -> Execute query... {execute_datetime}')
    cur.execute(query)
    rows = cur.fetchall()
    df = pd.DataFrame.from_records(rows, columns=[x[0] for x in cur.description])
    print(f'\n   -> DataFrame : {df.shape[0]} rows, {df.shape[1]} columns')

    # Generate CSV file
    df.to_csv(f'{op_dir}/{op_file}.csv', index=False, encoding='utf-8')
    print(f'\n   -> Generate "{op_file}.csv" successfully')

    # # Generate Excel file
    # df.to_excel(f'{op_dir}/{op_file}.xlsx', sheet_name='Data', index=False)
    # print(f'\n   -> Generate "{op_file}.xlsx" successfully')


except oracledb.DatabaseError as e:
    print(f'Error with Oracle : {e}')


finally:
    cur.close()
    conn.close()
    print(f'\n{AKPIPRD_db} : Disconnected')



AKPIPRD : Connected

Processing...

   -> Execute query... 2024-06-19, 21:13:51

   -> DataFrame : 7198 rows, 23 columns

   -> Generate "agg_perf_newco_snap_20240619.csv" successfully

AKPIPRD : Disconnected


In [4]:
df.tail(1)

Unnamed: 0,TM_KEY_MTH,CENTER,METRIC_GRP,PRODUCT_GRP,COMP_CD,METRIC_CD,METRIC_NAME,CHANNEL_CD,AGG_TYPE,UOM,...,HH_ACTUAL,P_TARGET,G_TARGET,H_TARGET,HH_TARGET,ACTUAL_AS_OF,MIN_DAY,MAX_DAY,PPN_TM,LOAD_DATE
7197,202406,Sales,Subs,TOL,True,TB3S000700GEO,TOL %NAD 30DPDB2 (Due Date),ALL,N,%,...,,,,,,,20240601,20240616,2024-06-18 02:01:34,2024-06-18 10:28:47.459960


## Reconcile

In [6]:
''' Rawdata '''
data_src = f'{op_dir}/{op_file}.csv'
raw_df = pd.read_csv(data_src, low_memory=False)
print(f'\nraw_df : {raw_df.shape[0]} rows, {raw_df.shape[1]} columns')
# raw_df.tail(3)


raw_df : 7198 rows, 23 columns


In [7]:
''' Create Temp DataFrame '''

''' Add columns '''
tmp_df = raw_df
tmp_df['TMP_CD'] = tmp_df['METRIC_CD'].replace(r'AA$|AB$|AC$|AD$|AE$|AF$|AG$|AH$|AI$|AJ$|AK$', '', regex=True)
tmp_df['TMP_NAME'] = tmp_df['METRIC_NAME'].replace(r' : Account Executive| : B2B| : Branded Retail| : Contact Center| : Direct Sales| : Key Account| : Modern Trade| : Others| : Own Digital| : Retail Sales| : Wholesales', '', regex=True)
tmp_df.tail(3)

Unnamed: 0,TM_KEY_MTH,CENTER,METRIC_GRP,PRODUCT_GRP,COMP_CD,METRIC_CD,METRIC_NAME,CHANNEL_CD,AGG_TYPE,UOM,...,G_TARGET,H_TARGET,HH_TARGET,ACTUAL_AS_OF,MIN_DAY,MAX_DAY,PPN_TM,LOAD_DATE,TMP_CD,TMP_NAME
7195,202406,Sales,Sales,TVS,True,TB4R001600,TVS GA ARPU,ALL,N,baht,...,,,,20240616.0,20240601,20240616,2024-06-18 02:01:34,2024-06-18 10:28:47.459960,TB4R001600,TVS GA ARPU
7196,202406,Sales,Subs,Postpaid,True,TB2S000700GEO,Postpaid %NAD 30DPDB2 (Due Date) : TMH,ALL,N,%,...,,,,,20240601,20240616,2024-06-18 02:01:34,2024-06-18 10:28:47.459960,TB2S000700GEO,Postpaid %NAD 30DPDB2 (Due Date) : TMH
7197,202406,Sales,Subs,TOL,True,TB3S000700GEO,TOL %NAD 30DPDB2 (Due Date),ALL,N,%,...,,,,,20240601,20240616,2024-06-18 02:01:34,2024-06-18 10:28:47.459960,TB3S000700GEO,TOL %NAD 30DPDB2 (Due Date)


In [8]:
''' Generate Temp files '''

''' GROUP list '''
# grp_list_df = tmp_df[['METRIC_GRP', 'PRODUCT_GRP']].drop_duplicates().reset_index(drop=True)
# grp_list_df.dropna(how='all')
# # grp_list_df.dropna(axis=1, how='all')
# # grp_list_df.dropna(subset=['PRODUCT_GRP'])
grp_list_df = tmp_df.groupby(['METRIC_GRP', 'PRODUCT_GRP']).agg({'METRIC_CD': 'nunique', 'MIN_DAY': 'min', 'MAX_DAY': 'max'}).reset_index()
grp_list_df.rename(columns={'METRIC_CD': 'CNT_METRIC'}, inplace=True)
grp_list_df.to_excel(f'temp/Metric_Grp_List_{str_curr_dt}.xlsx', sheet_name='Data', index=False)
print(f'\n   -> Generate "Metric_Grp_List_{str_curr_dt}.xlsx" successfully')

''' METRIC list '''
# metric_list_df = tmp_df[['METRIC_GRP', 'PRODUCT_GRP', 'COMP_CD', 'METRIC_CD', 'METRIC_NAME']].drop_duplicates().reset_index(drop=True)
# metric_list_df.dropna(how='all')
metric_list_df = tmp_df.groupby(['METRIC_GRP', 'PRODUCT_GRP', 'COMP_CD', 'TMP_CD', 'TMP_NAME', 'AGG_TYPE', 'UOM']).agg({'CHANNEL_CD': 'nunique', 'ACTUAL_AS_OF': 'max', 'MIN_DAY': 'min', 'MAX_DAY': 'max'}).reset_index()
metric_list_df.rename(columns={'CHANNEL_CD': 'CNT_CHANNEL', 'ACTUAL_AS_OF': 'LAST_ACTUAL'}, inplace=True)
metric_list_df.to_excel(f'temp/Metric_Cd_List_{str_curr_dt}.xlsx', sheet_name='Data', index=False)
print(f'\n   -> Generate "Metric_Cd_List_{str_curr_dt}.xlsx" successfully')



   -> Generate "Metric_Grp_List_20240619.xlsx" successfully

   -> Generate "Metric_Cd_List_20240619.xlsx" successfully


In [13]:
''' Create Reconcile Data '''

rec_df = tmp_df

''' Filters '''
rec_df = rec_df[rec_df['TM_KEY_MTH']==202406]
rec_df = rec_df[rec_df['CHANNEL_CD']=='ALL']

# my_str = 'Prepaid Topping|Prepaid Pay per Use'
# my_str = '^Postpaid Revenue.*DTAC$'
# my_str = 'Prepaid Inflow M1'

rec_df = rec_df[rec_df['METRIC_CD']=='DB1R000900']
# rec_df = rec_df[rec_df['METRIC_NAME'].str.contains(my_str)]
rec_df = rec_df.reset_index(drop=True)
rec_df.tail(3)

Unnamed: 0,TM_KEY_MTH,CENTER,METRIC_GRP,PRODUCT_GRP,COMP_CD,METRIC_CD,METRIC_NAME,CHANNEL_CD,AGG_TYPE,UOM,...,G_TARGET,H_TARGET,HH_TARGET,ACTUAL_AS_OF,MIN_DAY,MAX_DAY,PPN_TM,LOAD_DATE,TMP_CD,TMP_NAME
0,202406,Revenue,Sales,Prepaid,DTAC,DB1R000900,Prepaid Inflow M1 : DTAC,ALL,S,baht,...,,,,20240615.0,20240601,20240616,2024-06-18 02:01:34,2024-06-18 10:28:47.459960,DB1R000900,Prepaid Inflow M1 : DTAC


In [14]:
''' Aggregate '''

agg_df = rec_df.groupby(['COMP_CD', 'METRIC_CD', 'METRIC_NAME', 'CHANNEL_CD', 'UOM']).agg({'P_ACTUAL': 'sum', 'P_TARGET': 'sum', 'LOAD_DATE': 'max'}).reset_index()
# agg_df = agg_df.astype({'P_ACTUAL': int, 'P_TARGET': int, 'LOAD_DATE': 'datetime64[ns]'})

''' Create Temp File '''
# agg_df.to_excel(f'temp/Temp.xlsx', sheet_name='Data', index=False)
# print(f'\n -> Generate "Temp.xlsx" successfully')

agg_df

Unnamed: 0,COMP_CD,METRIC_CD,METRIC_NAME,CHANNEL_CD,UOM,P_ACTUAL,P_TARGET,LOAD_DATE
0,DTAC,DB1R000900,Prepaid Inflow M1 : DTAC,ALL,baht,65831995.85,0.0,2024-06-18 10:28:47.459960


In [15]:
''' Pivot Table '''

# # Actual & Target
# pv_target = pd.pivot_table(x, values=['P_ACTUAL', 'P_TARGET'], index=['COMP_CD', 'TMP_CD', 'TMP_NAME'], columns='CHANNEL_CD', aggfunc='sum', fill_value=0)

# Actual
pv_actual = pd.pivot_table(tmp_df, values='P_ACTUAL', index=['COMP_CD', 'TMP_CD', 'TMP_NAME'], columns='CHANNEL_CD', aggfunc='sum', fill_value=0)
pv_actual['VERION'] = 'A'

# Target
pv_target = pd.pivot_table(tmp_df, values='P_TARGET', index=['COMP_CD', 'TMP_CD', 'TMP_NAME'], columns='CHANNEL_CD', aggfunc='sum', fill_value=0)
pv_target['VERION'] = 'T'

# Concat Dataframe
pd.concat([pv_actual, pv_target]).reset_index()

CHANNEL_CD,COMP_CD,TMP_CD,TMP_NAME,AA,AB,AC,AD,AE,AF,AG,AH,AI,AJ,AK,ALL,VERION
0,ALL,B0R000100,Total Revenue,0.00,0.00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.00,0.0,0.000000e+00,0.00,6.848565e+10,A
1,ALL,B0R00010001,Total Inflow M1,3105597.92,67084011.33,4.417356e+08,1.867376e+08,2.457503e+08,1.194497e+08,235899686.0,47259716.33,22814798.9,8.684782e+08,86315373.76,2.754285e+09,A
2,ALL,B0R00010001CORP,Total Inflow M1,0.00,0.00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.00,0.0,0.000000e+00,0.00,2.754285e+09,A
3,ALL,B0R00010002,Total Gross Adds,2517.00,141825.00,1.359625e+06,5.939450e+05,1.590761e+06,2.325690e+05,1563732.0,193765.00,30877.0,6.525672e+06,1975586.00,1.703563e+07,A
4,ALL,B0R0001002,%Revenue Growth (YTD YoY),0.00,0.00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.00,0.0,0.000000e+00,0.00,3.609000e+01,A
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
961,TRUE,VIN00042,Revenue (Corporate),0.00,0.00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.00,0.0,0.000000e+00,0.00,2.400000e+01,T
962,TRUE,VIN00043,Profitability (Corporate),0.00,0.00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.00,0.0,0.000000e+00,0.00,4.000000e+00,T
963,TRUE,VIN00050,Mobile Subs Share (Subs) : TMH,0.00,0.00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.00,0.0,0.000000e+00,0.00,0.000000e+00,T
964,TRUE,VIN00061,Prepaid Subs Share (Subs) : TMH,0.00,0.00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.00,0.0,0.000000e+00,0.00,0.000000e+00,T


## Generate Output file

In [45]:
# # to Excel file

# op_dir = 'temp'
# op_file = 'VINSIGHT Data Monitoring.xlsx'

# df.to_excel(f'{op_dir}/{op_file}', sheet_name='Data', index=False)
# print(f'\n  -> Generate "{op_file}" successfully')


 -> Generate "Metric_List.xlsx" successfully


In [44]:
# # to CSV file

# op_dir = 'temp'
# op_file = 'VINSIGHT Data Monitoring.csv'

# df.to_csv(f'{op_dir}/{op_file}', index=False, encoding='utf-8')
# print(f'\n  -> Generate "{op_file}" successfully')


 -> Generate "Metric_List.csv" successfully
