# Import Agg Perf Data

In [1]:
import cx_Oracle
import pandas as pd
import numpy as np

## 6 Dimension Summary

In [2]:
conStr = 'NARUT_E01044451/v9VzR8xsuwp#tyTb@TDM-scan.dc2.true.th:1521/TDMDBPR'
conn = cx_Oracle.connect(conStr)
print('TDMDBPR : Connected')
cur = conn.cursor()

# print(cx_Oracle.version)
# print("Client version:", cx_Oracle.clientversion())

v_week = (2023021, )


try:
    # print("Database version:", conn.version)
    print(f'\nExecute query as Week {v_week[0]}...')

    sqlTxt = """
        WITH W_VAR (AS_OF_WK) AS 
        ( 
            SELECT :1 AS AS_OF_WK FROM DUAL 
        ) 
        , W_TMP_PERIOD (TM_KEY_YR, TM_KEY_QTR, TM_KEY_MTH, TM_KEY_WK, WK_DAY_START, WK_DAY_END) AS 
        ( 
            SELECT TM_KEY_YR, TM_KEY_QTR, TM_KEY_MTH, TM_KEY_WK, MIN(TM_KEY_DAY) AS WK_DAY_START, MAX(TM_KEY_DAY) AS WK_DAY_END 
            FROM GEOSPCAPPO.AGG_PERFORMANCE_NEWCO NOLOCK 
            WHERE TM_KEY_WK <= (SELECT AS_OF_WK FROM W_VAR) 
            GROUP BY TM_KEY_YR, TM_KEY_QTR, TM_KEY_MTH, TM_KEY_WK 
        ) 
        SELECT * 
        FROM ( 
            SELECT TM_KEY_MTH, TM_KEY_WK, CENTER, METRIC_GRP, COMP_CD, PRODUCT_GRP, METRIC_CD, METRIC_NAME, DIMENSION_KPI_FLAG, BG_FLAG--, METRIC_NAME_GROUP 
                , AGG_TYPE, RR_IND, GRY_IND, UOM, PERIOD 
                , CAST(SUM(ACTUAL_AGG) AS DECIMAL(18,2)) AS ACTUAL_AGG 
                , CAST(SUM(TARGET_AGG) AS DECIMAL(18,2)) AS TARGET_AGG 
                , MAX(PPN_TM) AS PPN_TM 
            FROM ( 
                SELECT TM_KEY_MTH, TM_KEY_WK, TM_KEY_DAY, CENTER, METRIC_GRP, COMP_CD, PRODUCT_GRP, METRIC_CD, METRIC_NAME 
                    , CASE 	WHEN ( METRIC_CD IN ('DNEC10006', 'TNEC10002', 'TNEC20002') --NETWORK EXPERIENCE 
                                OR METRIC_CD IN ('B1R000100', 'B2R000100', 'TB3R000100', 'TB4R000100', 'TNSC00142') --BUSINESS GROWTH 
                                OR METRIC_CD IN ('DSER21101', 'DSER22101', 'TSER21101', 'TSER22101', 'TSER23101', 'DB2S000200', 'DB1S000200', 'TB2S000200', 'TB1S000200', 'TSER13100', 'TSER14100') --CUSTOMER RETENTION & EXPERIENCE 
                                OR METRIC_CD IN ('TNSC00011', 'TNSC00013', 'TNSC00087', 'TNSC00101', 'TNSC00144', 'TNSC00020', 'TNSC00130') --NEW S-CURVE 
                                --COST & PRODUCTIVITY (Unknown ?) 
                                OR METRIC_CD IN ('DIS10007', 'DIS10011', 'DIS10001') --DIGITALIZATION 
                            ) THEN 'Y' ELSE 'N' END DIMENSION_KPI_FLAG 
                    , CASE 	WHEN METRIC_CD IN ('B1R000100', 'B2R000100', 'TB3R000100', 'TB4R000100', 'TNSC00142') --BUSINESS GROWTH 
                            THEN 'Y' ELSE 'N' END BG_FLAG 
                    , METRIC_NAME_GROUP, PPN_TM, AGG_TYPE, RR_IND, GRY_IND, UOM, PERIOD, ACTUAL_AGG, TARGET_AGG 
                FROM GEOSPCAPPO.AGG_PERFORMANCE_NEWCO NOLOCK 
                WHERE TM_KEY_DAY IN (SELECT WK_DAY_END FROM W_TMP_PERIOD) 
                AND AREA_TYPE = 'P' 
            ) A 
            GROUP BY TM_KEY_MTH, TM_KEY_WK, CENTER, METRIC_GRP, COMP_CD, PRODUCT_GRP, METRIC_CD, METRIC_NAME, DIMENSION_KPI_FLAG, BG_FLAG, AGG_TYPE, RR_IND, GRY_IND, UOM, PERIOD 
        ) B 
        --ORDER BY TM_KEY_MTH, TM_KEY_WK, CENTER, METRIC_GRP, COMP_CD, PRODUCT_GRP, METRIC_CD 
    """
    
    cur.execute(sqlTxt, v_week)
    # cur.execute(sqlTxt, ('2023020',))
    # cur.executemany(sqlTxt, multiple parameter)

    print(f'\nDone !!!')

    rows = cur.fetchall()
    
    df = pd.DataFrame.from_records(rows, columns=[x[0] for x in cur.description])


except cx_Oracle.DatabaseError as e:
    print(f'Error with Oracle : {e}')


finally:
    cur.close()
    conn.close()
    print('\nTDMDBPR : Disconnected')

TDMDBPR : Connected

Execute query as Week 2023021...

Done !!!

TDMDBPR : Disconnected


In [3]:
df.shape

(7168, 18)

In [4]:
df.columns

Index(['TM_KEY_MTH', 'TM_KEY_WK', 'CENTER', 'METRIC_GRP', 'COMP_CD',
       'PRODUCT_GRP', 'METRIC_CD', 'METRIC_NAME', 'DIMENSION_KPI_FLAG',
       'BG_FLAG', 'AGG_TYPE', 'RR_IND', 'GRY_IND', 'UOM', 'PERIOD',
       'ACTUAL_AGG', 'TARGET_AGG', 'PPN_TM'],
      dtype='object')

In [5]:
df.dtypes

TM_KEY_MTH                     int64
TM_KEY_WK                      int64
CENTER                        object
METRIC_GRP                    object
COMP_CD                       object
PRODUCT_GRP                   object
METRIC_CD                     object
METRIC_NAME                   object
DIMENSION_KPI_FLAG            object
BG_FLAG                       object
AGG_TYPE                      object
RR_IND                         int64
GRY_IND                       object
UOM                           object
PERIOD                        object
ACTUAL_AGG                   float64
TARGET_AGG                   float64
PPN_TM                datetime64[ns]
dtype: object

In [6]:
df.describe()

Unnamed: 0,TM_KEY_MTH,TM_KEY_WK,RR_IND,ACTUAL_AGG,TARGET_AGG
count,7168.0,7168.0,7168.0,5620.0,1568.0
mean,202303.928571,2023015.0,0.277344,132500500.0,357500600.0
std,0.798651,3.642411,0.447719,667864900.0,1147750000.0
min,202303.0,2023009.0,0.0,-241026.0,-6650.71
25%,202303.0,2023012.0,0.0,0.0,3.0
50%,202304.0,2023014.0,0.0,0.91,4996.405
75%,202305.0,2023018.0,1.0,47923.75,24794420.0
max,202305.0,2023021.0,1.0,11960230000.0,12271550000.0


In [4]:
df.head()

Unnamed: 0,TM_KEY_MTH,TM_KEY_WK,CENTER,METRIC_GRP,COMP_CD,PRODUCT_GRP,METRIC_CD,METRIC_NAME,DIMENSION_KPI_FLAG,BG_FLAG,AGG_TYPE,RR_IND,GRY_IND,UOM,PERIOD,ACTUAL_AGG,TARGET_AGG,PPN_TM
0,202303,2023009,Digitalization,Digitalization,True,All Services,DIS10015,% After-Sales Service via Digital Channels,N,N,N,0,GYR,%,M,13.96,20.0,2023-05-29 08:50:50
1,202303,2023012,Digitalization,Digitalization,True,All Services,DIS10015,% After-Sales Service via Digital Channels,N,N,N,0,GYR,%,M,15.55,20.0,2023-05-29 08:50:50
2,202304,2023013,Digitalization,Digitalization,True,All Services,DIS10015,% After-Sales Service via Digital Channels,N,N,N,0,GYR,%,M,14.2,20.0,2023-05-29 08:50:50
3,202304,2023014,Digitalization,Digitalization,True,All Services,DIS10015,% After-Sales Service via Digital Channels,N,N,N,0,GYR,%,M,14.15,20.0,2023-05-29 08:50:50
4,202304,2023014,Digitalization,Digitalization,True,Postpaid,DIS10017,% After-Sales Service via Digital Channels : T...,N,N,N,0,GYR,%,M,,,2023-05-29 08:50:50


## Generate Output file

In [5]:
# C:\Users\Narut4\coding\Jupyter\Work with Oracle.ipynb

output_name = f'Raw-Agg Perf_Wk {v_week[0]}'

df.to_csv(f'{output_name}.csv', index=False, encoding='utf-8')

print(f'\n"{output_name}.csv" is generated')


"Raw-Agg Perf_Wk 2023021.csv" is generated
