# Import Vinsight Data

In [1]:
import os
import glob
import configparser
import oracledb
import pandas as pd
import datetime as dt

## Parameter

In [2]:
config = configparser.ConfigParser()
config.read('../my_config.ini')
config.sections()

TDMDBPR_user = config['TDMDBPR']['username']
TDMDBPR_pwd = config['TDMDBPR']['password']
TDMDBPR_db = config['TDMDBPR']['db']
TDMDBPR_host = config['TDMDBPR']['host']
TDMDBPR_port = config['TDMDBPR']['port']

AKPIPRD_user = config['AKPIPRD']['username']
AKPIPRD_pwd = config['AKPIPRD']['password']
AKPIPRD_db = config['AKPIPRD']['db']
AKPIPRD_host = config['AKPIPRD']['host']
AKPIPRD_port = config['AKPIPRD']['port']

In [3]:
curr_dt = dt.datetime.now().date()
str_curr_dt = curr_dt.strftime('%Y%m%d')
curr_dt

# raw_dir = f"../data/raw"
# op_file_name = f'Agg_Perf_Nationwide-{str_curr_dt}'
# op_raw_file = f'{raw_dir}/{op_file_name}.csv'
# op_raw_file

datetime.date(2024, 6, 6)

## AKPIPRD data

In [10]:
# AKPIPRD

# conn = oracledb.connect(user=AKPIPRD_user, password=AKPIPRD_pwd, dsn=f'{AKPIPRD_host}:{AKPIPRD_port}/{AKPIPRD_db}')
dsn = f'{AKPIPRD_user}/{AKPIPRD_pwd}@{AKPIPRD_host}:{AKPIPRD_port}/{AKPIPRD_db}'
conn = oracledb.connect(dsn)
print(f'{AKPIPRD_db} : Connected')
cur = conn.cursor()

v_date = (curr_dt, )

query = """
    SELECT *
    FROM (
        SELECT TM_KEY_DAY, METRIC_CD, METRIC_NAME, METRIC_VALUE, COMP_CD, VERSION, AREA_NO, AREA_CD, AREA_DESC, AREA_TYPE, LOAD_DATE, REMARK
            , ROW_NUMBER() OVER(PARTITION BY METRIC_CD, AREA_CD, SUBSTR(TM_KEY_DAY,1,6) ORDER BY TM_KEY_DAY DESC) END_MTH_IND
        FROM AUTOKPI.VW_ACTUAL_AREA_DAILY 
        WHERE METRIC_CD LIKE 'VIN%'
        AND TM_KEY_DAY >= 20240101
        AND AREA_TYPE IN ('P', 'Z')
    ) TMP
    --WHERE END_MTH_IND = 1
    WHERE TM_KEY_DAY LIKE '%01'
    ORDER BY TM_KEY_DAY, METRIC_CD, AREA_NO, AREA_CD 
"""

try:
    print(f'\nExecute query as of {v_date[0]}...')
    cur.execute(query)
    rows = cur.fetchall()
    src_df = pd.DataFrame.from_records(rows, columns=[x[0] for x in cur.description])
    print(f'\ndf : {src_df.shape[0]} rows, {src_df.shape[1]} columns\nDone !!!')

except oracledb.DatabaseError as e:
    print(f'Error with Oracle : {e}')

finally:
    cur.close()
    conn.close()
    print(f'\n{AKPIPRD_db} : Disconnected')

AKPIPRD : Connected

Execute query as of 2024-06-06...

df : 288 rows, 13 columns
Done !!!

AKPIPRD : Disconnected


In [17]:
src_df[src_df['TM_KEY_DAY']==20240601].reset_index()

Unnamed: 0,index,TM_KEY_DAY,METRIC_CD,METRIC_NAME,METRIC_VALUE,COMP_CD,VERSION,AREA_NO,AREA_CD,AREA_DESC,AREA_TYPE,LOAD_DATE,REMARK,END_MTH_IND
0,240,20240601,VIN00019,Broadband Subs Share : AIS & 3BB,46.61,True,A,1,P,Nationwide,P,2024-06-04 14:37:37.653334,,9
1,241,20240601,VIN00019,Broadband Subs Share : AIS & 3BB,37.37,True,A,2,BMA,BMA,Z,2024-06-04 14:37:37.653334,,9
2,242,20240601,VIN00019,Broadband Subs Share : AIS & 3BB,51.22,True,A,2,UPC,UPC,Z,2024-06-04 14:37:37.653334,,9
3,243,20240601,VIN00019,Broadband Subs Share : AIS & 3BB,50.85,True,A,3,EAST,East,Z,2024-06-04 14:37:37.653334,,9
4,244,20240601,VIN00019,Broadband Subs Share : AIS & 3BB,34.53,True,A,4,10,Bangkok,Z,2024-06-04 14:37:37.653334,,9
5,245,20240601,VIN00019,Broadband Subs Share : AIS & 3BB,40.08,True,A,4,11,Samut Prakan,Z,2024-06-04 14:37:37.653334,,9
6,246,20240601,VIN00019,Broadband Subs Share : AIS & 3BB,42.38,True,A,4,12,Nonthaburi,Z,2024-06-04 14:37:37.653334,,9
7,247,20240601,VIN00019,Broadband Subs Share : AIS & 3BB,43.62,True,A,4,13,Pathum Thani,Z,2024-06-04 14:37:37.653334,,9
8,248,20240601,VIN00020,Broadband Subs Share : TOL,36.52,True,A,1,P,Nationwide,P,2024-06-04 14:38:20.788865,,9
9,249,20240601,VIN00020,Broadband Subs Share : TOL,48.82,True,A,2,BMA,BMA,Z,2024-06-04 14:38:20.788865,,9


## TDMDBPR data

In [12]:
# TDMDBPR

# conn = oracledb.connect(user=TDMDBPR_user, password=TDMDBPR_pwd, dsn=f'{TDMDBPR_host}:{TDMDBPR_port}/{TDMDBPR_db}')
dsn = f'{TDMDBPR_user}/{TDMDBPR_pwd}@{TDMDBPR_host}:{TDMDBPR_port}/{TDMDBPR_db}'
conn = oracledb.connect(dsn)
print(f'{TDMDBPR_db} : Connected')
cur = conn.cursor()

v_date = (curr_dt, )

query = """
    SELECT *
    FROM (
        SELECT TM_KEY_DAY, METRIC_CD, METRIC_NAME 
            , CASE 	WHEN AREA_TYPE = 'P' THEN 1
                    WHEN AREA_TYPE = 'Z' THEN (CASE WHEN AREA_CD IN ('BMA', 'UPC') THEN 2 WHEN AREA_CD = 'EAST' THEN 3 ELSE 4 END)
                    WHEN AREA_TYPE = 'G' THEN 5 
                    WHEN AREA_TYPE = 'H' THEN 6 
                    WHEN AREA_TYPE = 'HH' THEN 7
                    ELSE 0 END AREA_NO
            , AREA_TYPE, AREA_CD, AREA_NAME, ACTUAL_SNAP, ACTUAL_AGG, PPN_TM  
        FROM GEOSPCAPPO.AGG_PERF_NEWCO NOLOCK
        WHERE METRIC_CD IN ('VIN00019','VIN00020','VIN00021','VIN00022','VIN00023','VIN00024')
        AND TM_KEY_MTH >= 202401
        AND TM_KEY_DAY LIKE '%01'
        AND AREA_TYPE IN ('P', 'Z')
    )TMP
    ORDER BY TM_KEY_DAY, METRIC_CD, AREA_NO, AREA_CD 
"""

try:
    print(f'\nExecute query as of {v_date[0]}...')
    cur.execute(query)
    rows = cur.fetchall()
    df = pd.DataFrame.from_records(rows, columns=[x[0] for x in cur.description])
    print(f'\ndf : {df.shape[0]} rows, {df.shape[1]} columns\nDone !!!')

except oracledb.DatabaseError as e:
    print(f'Error with Oracle : {e}')

finally:
    cur.close()
    conn.close()
    print(f'\n{TDMDBPR_db} : Disconnected')

TDMDBPR : Connected

Execute query as of 2024-06-06...

df : 240 rows, 10 columns
Done !!!

TDMDBPR : Disconnected


In [19]:
df[df['TM_KEY_DAY']==20240601].reset_index()

Unnamed: 0,index,TM_KEY_DAY,METRIC_CD,METRIC_NAME,AREA_NO,AREA_TYPE,AREA_CD,AREA_NAME,ACTUAL_SNAP,ACTUAL_AGG,PPN_TM
0,200,20240601,VIN00019,Broadband Subs Share : AIS & 3BB,1,P,P,Nationwide,46.61,46.61,2024-06-06 01:12:45
1,201,20240601,VIN00019,Broadband Subs Share : AIS & 3BB,2,Z,BMA,BMA,37.37,37.37,2024-06-06 01:12:45
2,202,20240601,VIN00019,Broadband Subs Share : AIS & 3BB,2,Z,UPC,UPC,51.22,51.22,2024-06-06 01:12:45
3,203,20240601,VIN00019,Broadband Subs Share : AIS & 3BB,3,Z,EAST,EAST,50.85,50.85,2024-06-06 01:12:45
4,204,20240601,VIN00019,Broadband Subs Share : AIS & 3BB,4,Z,10,Bangkok,34.53,34.53,2024-06-06 01:12:45
5,205,20240601,VIN00019,Broadband Subs Share : AIS & 3BB,4,Z,11,Samut Prakan,40.08,40.08,2024-06-06 01:12:45
6,206,20240601,VIN00019,Broadband Subs Share : AIS & 3BB,4,Z,12,Nonthaburi,42.38,42.38,2024-06-06 01:12:45
7,207,20240601,VIN00019,Broadband Subs Share : AIS & 3BB,4,Z,13,Pathum Thani,43.62,43.62,2024-06-06 01:12:45
8,208,20240601,VIN00020,Broadband Subs Share : TOL,1,P,P,Nationwide,36.52,36.52,2024-06-06 01:12:45
9,209,20240601,VIN00020,Broadband Subs Share : TOL,2,Z,BMA,BMA,48.82,48.82,2024-06-06 01:12:45


## Generate Output file

In [6]:
# df.to_csv(f'{op_raw_file}', index=False, encoding='utf-8')

# print(f'\n"{op_raw_file}" is generated')


"../data/raw/Agg_Perf_Nationwide-20240605.csv" is generated
