# * VINSIGHT : Data Monitoring
    Process required -> "ETL-AGG_PERF_NEWCO_SNAP"

## Parameter

In [1]:
import os
import glob
import configparser
import datetime as dt
import pandas as pd
import numpy as np
import xlrd
import oracledb

In [2]:
config = configparser.ConfigParser()
config.read('../my_config.ini')
config.sections()

TDMDBPR_user = config['TDMDBPR']['username']
TDMDBPR_pwd = config['TDMDBPR']['password']
TDMDBPR_db = config['TDMDBPR']['db']
TDMDBPR_host = config['TDMDBPR']['host']
TDMDBPR_port = config['TDMDBPR']['port']

AKPIPRD_user = config['AKPIPRD']['username']
AKPIPRD_pwd = config['AKPIPRD']['password']
AKPIPRD_db = config['AKPIPRD']['db']
AKPIPRD_host = config['AKPIPRD']['host']
AKPIPRD_port = config['AKPIPRD']['port']

curr_dt = dt.datetime.now().date()
str_curr_dt = curr_dt.strftime('%Y%m%d')
curr_dt

datetime.date(2024, 6, 16)

## Create DataFrame
    from "AGG_PERF_NEWCO_SNAP"

In [4]:
# Create DataFrame

dsn = f'{AKPIPRD_user}/{AKPIPRD_pwd}@{AKPIPRD_host}:{AKPIPRD_port}/{AKPIPRD_db}'
conn = oracledb.connect(dsn)
print(f'\nConnecting... {AKPIPRD_db}')
cur = conn.cursor()
# v_date = (curr_dt, )

query = """
    -->> Actual Monthly

    SELECT TM_KEY_MTH, CENTER, METRIC_GRP, PRODUCT_GRP, COMP_CD, METRIC_CD, METRIC_NAME, CHANNEL_CD, AGG_TYPE, UOM
        , CAST(SUM(CASE WHEN AREA_TYPE = 'P' THEN ACTUAL_TMP END) AS DECIMAL(18,2)) AS P_ACTUAL
        , CAST(SUM(CASE WHEN AREA_TYPE = 'G' THEN ACTUAL_TMP END) AS DECIMAL(18,2)) AS G_ACTUAL
        , CAST(SUM(CASE WHEN AREA_TYPE = 'H' THEN ACTUAL_TMP END) AS DECIMAL(18,2)) AS H_ACTUAL
        , CAST(SUM(CASE WHEN AREA_TYPE = 'HH' THEN ACTUAL_TMP END) AS DECIMAL(18,2)) AS HH_ACTUAL
        , MAX(ACTUAL_AS_OF) ACTUAL_AS_OF, MIN(TM_KEY_DAY) MIN_DAY, MAX(TM_KEY_DAY) MAX_DAY, MAX(PPN_TM) PPN_TM, MAX(LOAD_DATE) LOAD_DATE
        
    FROM (
        SELECT TM_KEY_YR, TM_KEY_QTR, TM_KEY_MTH, TM_KEY_WK, TM_KEY_DAY
            , CENTER, PRODUCT_GRP, COMP_CD, METRIC_GRP, METRIC_CD, METRIC_NAME, SEQ, ACTUAL_AS_OF, AGG_TYPE, RR_IND, GRY_IND, UOM, AREA_TYPE, AREA_CD, AREA_NAME
            , CASE 	WHEN AGG_TYPE = 'S' THEN ACTUAL_SNAP 
                    ELSE (CASE WHEN TM_KEY_DAY = MAX(TM_KEY_DAY) OVER(PARTITION BY METRIC_CD, TM_KEY_MTH) THEN ACTUAL_AGG END)
                    END ACTUAL_TMP
            , CASE 	WHEN AGG_TYPE = 'S' THEN TARGET_SNAP 
                    ELSE (CASE WHEN TM_KEY_DAY = MAX(TM_KEY_DAY) OVER(PARTITION BY METRIC_CD, TM_KEY_MTH) THEN TARGET_AGG END)
                    END TARGET_TMP
            , ACTUAL_SNAP, TARGET_SNAP, BASELINE_SNAP, ACTUAL_AGG, TARGET_AGG, BASELINE_AGG, PPN_TM, LOAD_DATE
            , CASE WHEN REGEXP_LIKE(METRIC_CD, '[0-9]A[A-K]$') THEN SUBSTR(METRIC_CD,-2) ELSE 'ALL' END CHANNEL_CD
        FROM AUTOKPI.AGG_PERF_NEWCO_SNAP NOLOCK
    ) TMP_MTH

    --WHERE CHANNEL_CD = 'ALL'
    GROUP BY TM_KEY_MTH, CENTER, METRIC_GRP, PRODUCT_GRP, COMP_CD, METRIC_CD, METRIC_NAME, CHANNEL_CD, AGG_TYPE, UOM
    ORDER BY TM_KEY_MTH, CENTER, METRIC_GRP, PRODUCT_GRP, COMP_CD, METRIC_CD
"""

try:
    execute_datetime = dt.datetime.now().strftime('%Y-%m-%d, %H:%M:%S')
    print(f'\n  -> Execute query... {execute_datetime}')
    cur.execute(query)
    rows = cur.fetchall()
    df = pd.DataFrame.from_records(rows, columns=[x[0] for x in cur.description])
    print(f'\n  -> DataFrame : {df.shape[0]} rows, {df.shape[1]} columns')


except oracledb.DatabaseError as e:
    print(f'Error with Oracle : {e}')


finally:
    cur.close()
    conn.close()
    print(f'\n{AKPIPRD_db} : Disconnected')



Connecting... AKPIPRD

  -> Execute query... 2024-06-16, 16:01:52

  -> DataFrame : 7188 rows, 19 columns

AKPIPRD : Disconnected


In [5]:
# Rawdata

df.tail(1)

Unnamed: 0,TM_KEY_MTH,CENTER,METRIC_GRP,PRODUCT_GRP,COMP_CD,METRIC_CD,METRIC_NAME,CHANNEL_CD,AGG_TYPE,UOM,P_ACTUAL,G_ACTUAL,H_ACTUAL,HH_ACTUAL,ACTUAL_AS_OF,MIN_DAY,MAX_DAY,PPN_TM,LOAD_DATE
7187,202406,Sales,Subs,TOL,True,TB3S000700GEO,TOL %NAD 30DPDB2 (Due Date),ALL,N,%,,,,,,20240601,20240613,2024-06-14 11:01:19,2024-06-14 17:04:20.682192


## Reconcile

In [16]:
# Generate Temp files

# GROUP list
grp_list_df = df[['METRIC_GRP', 'PRODUCT_GRP']].drop_duplicates().reset_index(drop=True)
grp_list_df.dropna(how='all')
# grp_list_df.dropna(axis=1, how='all')
# grp_list_df.dropna(subset=['PRODUCT_GRP'])
grp_list_df.to_excel(f'temp/Metric_Grp_List.xlsx', sheet_name='Data', index=False)
print(f'\n -> Generate "Metric_Grp_List.xlsx" successfully')

# METRIC list
metric_list_df = df[['METRIC_GRP', 'PRODUCT_GRP', 'COMP_CD', 'METRIC_CD', 'METRIC_NAME']].drop_duplicates().reset_index(drop=True)
metric_list_df.dropna(how='all')
metric_list_df.to_excel(f'temp/Metric_Cd_List.xlsx', sheet_name='Data', index=False)
print(f'\n -> Generate "Metric_Cd_List.xlsx" successfully')



 -> Generate "Metric_Grp_List.xlsx" successfully

 -> Generate "Metric_Cd_List.xlsx" successfully


In [18]:
# Create Reconcile Data

rec_df = df[df['TM_KEY_MTH']==202406]
rec_df = rec_df[rec_df['CHANNEL_CD']=='ALL']
rec_df = rec_df.reset_index(drop=True)

rec_df.tail(1)

Unnamed: 0,TM_KEY_MTH,CENTER,METRIC_GRP,PRODUCT_GRP,COMP_CD,METRIC_CD,METRIC_NAME,CHANNEL_CD,AGG_TYPE,UOM,P_ACTUAL,G_ACTUAL,H_ACTUAL,HH_ACTUAL,ACTUAL_AS_OF,MIN_DAY,MAX_DAY,PPN_TM,LOAD_DATE
471,202406,Sales,Subs,TOL,True,TB3S000700GEO,TOL %NAD 30DPDB2 (Due Date),ALL,N,%,,,,,,20240601,20240613,2024-06-14 11:01:19,2024-06-14 17:04:20.682192


In [19]:
# src_df[(src_df['TM_KEY_DAY']==20240501) & (src_df['METRIC_CD']=='VIN00020')].sort_values(['TM_KEY_DAY', 'METRIC_CD', 'AREA_NO', 'AREA_CD']).reset_index(drop=True)
rec_df.dtypes

TM_KEY_MTH               int64
CENTER                  object
METRIC_GRP              object
PRODUCT_GRP             object
COMP_CD                 object
METRIC_CD               object
METRIC_NAME             object
CHANNEL_CD              object
AGG_TYPE                object
UOM                     object
P_ACTUAL               float64
G_ACTUAL               float64
H_ACTUAL               float64
HH_ACTUAL              float64
ACTUAL_AS_OF           float64
MIN_DAY                  int64
MAX_DAY                  int64
PPN_TM          datetime64[ns]
LOAD_DATE       datetime64[ns]
dtype: object

## Generate Output file

In [45]:
# # to Excel file

# op_dir = 'temp'
# op_file = 'VINSIGHT Data Monitoring.xlsx'

# df.to_excel(f'{op_dir}/{op_file}', sheet_name='Data', index=False)
# print(f'\n -> Generate "{op_file}" successfully')


 -> Generate "Metric_List.xlsx" successfully


In [44]:
# # to CSV file

# op_dir = 'temp'
# op_file = 'VINSIGHT Data Monitoring.csv'

# df.to_csv(f'{op_dir}/{op_file}', index=False, encoding='utf-8')
# print(f'\n -> Generate "{op_file}" successfully')


 -> Generate "Metric_List.csv" successfully
