# * Adhoc : Sales Performance

## Parameter

In [1]:
import os
import glob
import configparser
import datetime as dt
import pandas as pd
import numpy as np
import xlrd
import oracledb
# from sqlalchemy import create_engine

In [2]:
config = configparser.ConfigParser()
config.read('../../my_config.ini')
config.sections()

TDMDBPR_user = config['TDMDBPR']['username']
TDMDBPR_pwd = config['TDMDBPR']['password']
TDMDBPR_db = config['TDMDBPR']['db']
TDMDBPR_host = config['TDMDBPR']['host']
TDMDBPR_port = config['TDMDBPR']['port']

AKPIPRD_user = config['AKPIPRD']['username']
AKPIPRD_pwd = config['AKPIPRD']['password']
AKPIPRD_db = config['AKPIPRD']['db']
AKPIPRD_host = config['AKPIPRD']['host']
AKPIPRD_port = config['AKPIPRD']['port']

curr_dt = dt.datetime.now().date()
str_curr_dt = curr_dt.strftime('%Y%m%d')
curr_dt

datetime.date(2024, 6, 8)

## ETL Process

### Step 1 : ADHOC_RAW_SALE_PERFORMANCE_MTH

In [3]:
# Data Source

print(f'\nProcessing...')

query = """
    /*** Sales Performance Report ***/
    -----------------------------------------------------------------------------------------------------------------------

    WITH W_SALE_TXN AS 
    (
        SELECT TM_KEY_DAY, TM_KEY_WK, TM_KEY_MTH, TM_KEY_QTR, TM_KEY_YR, CENTER, PRODUCT_GRP, COMP_CD, METRIC_GRP, METRIC_NAME_GROUP, METRIC_CD, METRIC_NAME, AREA_TYPE, AREA_CD, AREA_NAME
            , ACTUAL_AS_OF, AGG_TYPE, RR_IND, GRY_IND, UOM, PERIOD, ACTUAL_SNAP, ACTUAL_AGG, TARGET_SNAP, TARGET_AGG--, BASELINE_SNAP, BASELINE_AGG, ACH_SNAP, ACH_AGG, GAP_SNAP, GAP_AGG
            , WOW, WOW_PERCENT, MOM, MOM_PERCENT, QOQ, QOQ_PERCENT, YOY, YOY_PERCENT--, RR, RR_ACH, WTD, MTD, QTD, YTD
            , PPN_TM
        FROM GEOSPCAPPO.AGG_PERF_NEWCO NOLOCK
        WHERE METRIC_GRP = 'Sales'
        AND REGEXP_LIKE(METRIC_CD, 'DB1R000900|DB2R000500|TB1R000900|TB2R000500')
        AND REGEXP_LIKE(METRIC_CD, '[0-9]$|A[A-K]$')
        AND AREA_TYPE IN ('P', 'G', 'H')
        AND TM_KEY_YR >= 2023
        AND SUBSTR(TM_KEY_MTH,5,2) IN (01, 02, 03, 04)
    )
    -----------------------------------------------------------------------------------------------------------------------

    , W_SALE_ACTUAL_MONTHLY AS
    (
        SELECT TM_KEY_YR, TM_KEY_QTR, TM_KEY_MTH, PRODUCT_GRP, COMP_CD, METRIC_CD, METRIC_NAME
            , CASE WHEN AREA_TYPE = 'P' THEN 1 WHEN AREA_TYPE = 'G' THEN 2 WHEN AREA_TYPE = 'H' THEN 3 WHEN AREA_TYPE = 'HH' THEN 4 WHEN AREA_TYPE = 'Z' THEN 5 ELSE 0 END AREA_NO
            , AREA_TYPE, AREA_CD, AREA_NAME
            , SUM(ACTUAL_SNAP) AS ACTUAL_SNAP, SUM(TARGET_SNAP) AS TARGET_SNAP, MAX(PPN_TM) AS PPN_TM
        FROM W_SALE_TXN
        GROUP BY TM_KEY_MTH, TM_KEY_QTR, TM_KEY_YR, PRODUCT_GRP, COMP_CD, METRIC_CD, METRIC_NAME, AREA_TYPE, AREA_CD, AREA_NAME
    ) 
    -----------------------------------------------------------------------------------------------------------------------

    -->> Output

    SELECT *
    FROM W_SALE_ACTUAL_MONTHLY
    --WHERE NOT REGEXP_LIKE(METRIC_CD, 'A[A-K]$') AND AREA_TYPE = 'P'
    ORDER BY TM_KEY_MTH, PRODUCT_GRP, COMP_CD, METRIC_CD, AREA_TYPE, AREA_CD
"""

try:
    # TDMDBPR
    dsn = f'{TDMDBPR_user}/{TDMDBPR_pwd}@{TDMDBPR_host}:{TDMDBPR_port}/{TDMDBPR_db}'
    conn = oracledb.connect(dsn)
    print(f'\n{TDMDBPR_db} : Connected')
    cur = conn.cursor()

    # Create Rawdata
    cur.execute(query)
    df = pd.DataFrame(cur.fetchall(), columns=['TM_KEY_YR', 'TM_KEY_QTR', 'TM_KEY_MTH', 'PRODUCT_GRP', 'COMP_CD', 'METRIC_CD', 'METRIC_NAME', 'AREA_NO', 'AREA_TYPE', 'AREA_CD', 'AREA_NAME', 'ACTUAL_SNAP', 'TARGET_SNAP', 'PPN_TM'])
    cur.close()
    df = df.replace(np.nan, None)
    # df = df.astype('float')
    df = df.astype(dtype={'TM_KEY_YR':'float', 'TM_KEY_QTR':'float', 'TM_KEY_MTH':'float', 'AREA_NO':'float'})
    # df = df[['TM_KEY_YR', 'TM_KEY_QTR', 'TM_KEY_MTH']]
    rows = [tuple(x) for x in df.values]
    print(f'\n -> CREATE : Rawdata : Done !')


    # AKPIPRD
    op_dsn = f'{AKPIPRD_user}/{AKPIPRD_pwd}@{AKPIPRD_host}:{AKPIPRD_port}/{AKPIPRD_db}'
    op_conn = oracledb.connect(op_dsn)
    print(f'\n{AKPIPRD_db} : Connected')
    op_cur = op_conn.cursor()

    # Truncate
    op_cur.execute("TRUNCATE TABLE AUTOKPI.ADHOC_RAW_SALE_PERFORMANCE_MTH")
    print(f'\n -> TRUNCATE : "ADHOC_RAW_SALE_PERFORMANCE_MTH" : Done !')

    # Insert
    op_cur.executemany("INSERT INTO ADHOC_RAW_SALE_PERFORMANCE_MTH\
                    (TM_KEY_YR, TM_KEY_QTR, TM_KEY_MTH, PRODUCT_GRP, COMP_CD, METRIC_CD, METRIC_NAME, AREA_NO, AREA_TYPE, AREA_CD, AREA_NAME, ACTUAL_SNAP, TARGET_SNAP, PPN_TM)\
                    VALUES (:1,:2,:3,:4,:5,:6,:7,:8,:9,:10,:11,:12,:13,:14)", rows)
    op_cur.close()
    op_conn.commit()
    # print(f'\n -> INSERT : "ADHOC_RAW_SALE_PERFORMANCE_MTH" : Done !')
    print(f'\n"-> INSERT : "ADHOC_RAW_SALE_PERFORMANCE_MTH" : Done !\n\n{df.shape[0]} rows, {df.shape[1]} columns')


except oracledb.DatabaseError as e:
    print(f'\nError with Oracle : {e}')


finally:
    conn.close()
    print(f'\n{TDMDBPR_db} : Disconnected')
    op_conn.close()
    print(f'\n{AKPIPRD_db} : Disconnected')
    print(f'\nJob Done !!!')


Processing...

TDMDBPR : Connected

 -> CREATE : Rawdata : Done !

AKPIPRD : Connected

 -> TRUNCATE : "ADHOC_RAW_SALE_PERFORMANCE_MTH" : Done !

"-> INSERT : "ADHOC_RAW_SALE_PERFORMANCE_MTH" : Done !

14262 rows, 14 columns

TDMDBPR : Disconnected

AKPIPRD : Disconnected

Job Done !!!


### Test : Generate Rawdata (CSV)

In [4]:
# TDMDBPR

dsn = f'{TDMDBPR_user}/{TDMDBPR_pwd}@{TDMDBPR_host}:{TDMDBPR_port}/{TDMDBPR_db}'
conn = oracledb.connect(dsn)
print(f'{TDMDBPR_db} : Connected')
cur = conn.cursor()
rawdata_dir = 'rawdata'
rawdata_file = 'sales_performance_actual.csv'

query = """
    /*** Sales Growth Strategic report (Mar-Apr24) ***/
    -----------------------------------------------------------------------------------------------------------------------

    WITH W_SALE_TXN AS 
    (
        SELECT TM_KEY_DAY, TM_KEY_WK, TM_KEY_MTH, TM_KEY_QTR, TM_KEY_YR, CENTER, PRODUCT_GRP, COMP_CD, METRIC_GRP, METRIC_NAME_GROUP, METRIC_CD, METRIC_NAME, AREA_TYPE, AREA_CD, AREA_NAME
            , ACTUAL_AS_OF, AGG_TYPE, RR_IND, GRY_IND, UOM, PERIOD, ACTUAL_SNAP, ACTUAL_AGG, TARGET_SNAP, TARGET_AGG--, BASELINE_SNAP, BASELINE_AGG, ACH_SNAP, ACH_AGG, GAP_SNAP, GAP_AGG
            , WOW, WOW_PERCENT, MOM, MOM_PERCENT, QOQ, QOQ_PERCENT, YOY, YOY_PERCENT--, RR, RR_ACH, WTD, MTD, QTD, YTD
            , PPN_TM
        FROM GEOSPCAPPO.AGG_PERF_NEWCO NOLOCK
        WHERE METRIC_GRP = 'Sales'
        AND REGEXP_LIKE(METRIC_CD, 'DB1R000900|DB2R000500|TB1R000900|TB2R000500')
        AND NOT REGEXP_LIKE(METRIC_CD, 'C$|H$|MCOM$|CORP$|GEO$') --A[A-K]$
        AND AREA_TYPE IN ('P', 'G', 'H')
        AND TM_KEY_YR >= 2023
        AND SUBSTR(TM_KEY_MTH,5,2) IN (02, 03, 04)
    )
    -----------------------------------------------------------------------------------------------------------------------

    , W_SALE_ACTUAL_MONTHLY AS
    (
        SELECT TM_KEY_YR, TM_KEY_QTR, TM_KEY_MTH, PRODUCT_GRP, COMP_CD, METRIC_CD, METRIC_NAME
            , CASE WHEN AREA_TYPE = 'P' THEN 1 WHEN AREA_TYPE = 'G' THEN 2 WHEN AREA_TYPE = 'H' THEN 3 WHEN AREA_TYPE = 'HH' THEN 4 WHEN AREA_TYPE = 'Z' THEN 5 ELSE 0 END AREA_NO
            , AREA_TYPE, AREA_CD, AREA_NAME
            , SUM(ACTUAL_SNAP) AS ACTUAL_SNAP, SUM(TARGET_SNAP) AS TARGET_SNAP, MAX(PPN_TM) AS PPN_TM
        FROM W_SALE_TXN
        GROUP BY TM_KEY_MTH, TM_KEY_QTR, TM_KEY_YR, PRODUCT_GRP, COMP_CD, METRIC_CD, METRIC_NAME, AREA_TYPE, AREA_CD, AREA_NAME
    ) 
    -----------------------------------------------------------------------------------------------------------------------

    -->> Output

    SELECT *
    FROM W_SALE_ACTUAL_MONTHLY
    WHERE NOT REGEXP_LIKE(METRIC_CD, 'A[A-K]$')
    AND AREA_TYPE = 'P'
    ORDER BY TM_KEY_MTH, PRODUCT_GRP, COMP_CD, METRIC_CD, AREA_TYPE, AREA_CD
"""

try:
    print(f'\nProcessing...')
    cur.execute(query)
    df = pd.DataFrame(cur.fetchall(), columns=['TM_KEY_YR', 'TM_KEY_QTR', 'TM_KEY_MTH', 'PRODUCT_GRP', 'COMP_CD', 'METRIC_CD', 'METRIC_NAME', 'AREA_NO', 'AREA_TYPE', 'AREA_CD', 'AREA_NAME', 'ACTUAL_SNAP', 'TARGET_SNAP', 'PPN_TM'])
    # print(f'\n{df}')
    df.to_csv(f'{rawdata_dir}/{rawdata_file}', index=False, encoding='utf-8')
    print(f'\n"{rawdata_file}" is generated')

except oracledb.DatabaseError as e:
    print(f'Error with Oracle : {e}')

finally:
    cur.close()
    conn.close()
    print(f'\n{TDMDBPR_db} : Disconnected')

TDMDBPR : Connected

Processing...

"sales_performance_actual.csv" is generated

TDMDBPR : Disconnected


In [5]:
# Source File
src_file = 'C:\Ruz\MyProject\Code\Jupyter\ETL\Adhoc\rawdata\sales_performance_actual.csv'
src_df = pd.read_csv(src_file)
src_df = src_df.replace(np.nan, None)
src_df.head(3)
rows = [tuple(x) for x in src_df.values]


# AKPIPRD
op_dsn = f'{AKPIPRD_user}/{AKPIPRD_pwd}@{AKPIPRD_host}:{AKPIPRD_port}/{AKPIPRD_db}'
op_conn = oracledb.connect(op_dsn)
print(f'\n{AKPIPRD_db} : Connected')
op_cur = op_conn.cursor()

# Truncate
# op_cur.execute("TRUNCATE TABLE AUTOKPI.ADHOC_RAW_SALE_PERFORMANCE_MTH")
print(f'\n -> TRUNCATE : "ADHOC_RAW_SALE_PERFORMANCE_MTH"')

# Insert
# op_cur.executemany("INSERT INTO ADHOC_RAW_SALE_PERFORMANCE_MTH (TM_KEY_YR) VALUES (:1)", rows)
op_cur.executemany("INSERT INTO ADHOC_RAW_SALE_PERFORMANCE_MTH (TM_KEY_YR, TM_KEY_QTR, TM_KEY_MTH) VALUES (:1,:2,:3)", rows)

# op_cur.executemany("INSERT INTO ADHOC_RAW_SALE_PERFORMANCE_MTH (TM_KEY_YR, TM_KEY_QTR, TM_KEY_MTH, PRODUCT_GRP, COMP_CD, METRIC_CD, METRIC_NAME, AREA_NO, AREA_TYPE, AREA_CD, AREA_NAME, ACTUAL_SNAP, TARGET_SNAP, PPN_TM)\
#                 VALUES (:1,:2,:3,:4,:5,:6,:7,:8,:9,:10,:11,:12,:13,:14)", rows)

print(f'\n -> INSERT : "ADHOC_RAW_SALE_PERFORMANCE_MTH" : Done !')

op_conn.commit()
op_cur.close()
op_conn.close()
print(f'\n{AKPIPRD_db} : Disconnected')

OSError: [Errno 22] Invalid argument: 'C:\\Ruz\\MyProject\\Code\\Jupyter\\ETL\\Adhoc\rawdata\\sales_performance_actual.csv'