# * Import : "CDSAPPO.DIM_TIME" to "dim_time.csv"
    WHERE TM_KEY_YR >= 2023

## Parameter

In [1]:
import os
import glob
import configparser
import datetime as dt
import pandas as pd
import numpy as np
import xlrd
import oracledb

config = configparser.ConfigParser()
config.read('../../my_config.ini')
config.sections()

TDMDBPR_user = config['TDMDBPR']['username']
TDMDBPR_pwd = config['TDMDBPR']['password']
TDMDBPR_db = config['TDMDBPR']['db']
TDMDBPR_host = config['TDMDBPR']['host']
TDMDBPR_port = config['TDMDBPR']['port']

AKPIPRD_user = config['AKPIPRD']['username']
AKPIPRD_pwd = config['AKPIPRD']['password']
AKPIPRD_db = config['AKPIPRD']['db']
AKPIPRD_host = config['AKPIPRD']['host']
AKPIPRD_port = config['AKPIPRD']['port']

curr_dt = dt.datetime.now().date()
str_curr_dt = curr_dt.strftime('%Y%m%d')
curr_dt

## ETL Process

### Step 1 : DIM_TIME
    DB -> CSV file

In [15]:
# Input : 0 for dummy parameter

op_dir = 'data'
op_file = 'dim_time.csv'
v_param = dict(yr_start=2023, yr_end=0)

job_start_datetime = dt.datetime.now().strftime('%Y-%m-%d, %H:%M:%S')
print(f'\nJob Start... {job_start_datetime}')

with open('SQL/Import-DIM_TIME.sql', 'r') as sql_file:
    queries = sql_file.read().split(';')
    query = queries[0].strip()
    sql_file.close()

# Connect : TDMDBPR
src_dsn = f'{TDMDBPR_user}/{TDMDBPR_pwd}@{TDMDBPR_host}:{TDMDBPR_port}/{TDMDBPR_db}'
src_conn = oracledb.connect(src_dsn)
print(f'\n{TDMDBPR_db} : Connected')
src_cur = src_conn.cursor()


try:
    print(f"\nProcessing...\n\n   -> data as of (YR_Start: {v_param['yr_start']}, YR_End: {v_param['yr_end']})")

    # Create Dataframe
    src_cur.execute(query, v_param)
    rows = src_cur.fetchall()
    src_df = pd.DataFrame.from_records(rows, columns=[x[0] for x in src_cur.description])
    print(f'\n   -> src_df : {src_df.shape[0]} rows, {src_df.shape[1]} columns')

    # Generate CSV file
    src_df.to_csv(f'{op_dir}/{op_file}', index=False, encoding='utf-8')
    print(f'\n   -> Generate "{op_file}" successfully')


except oracledb.DatabaseError as e:
    print(f'\nError with Oracle : {e}')


finally:
    src_conn.close()
    print(f'\n{TDMDBPR_db} : Disconnected')
    print(f'\nJob Done !!!')



Job Start... 2024-06-17, 00:00:55

TDMDBPR : Connected

Processing...

   -> data as of (YR_Start: 2023, YR_End: 0)

   -> src_df : 1096 rows, 17 columns

   -> Generate "dim_time.csv" successfully

TDMDBPR : Disconnected

Job Done !!!


In [16]:
src_df.tail()

Unnamed: 0,TM_KEY_DAY,DAY_NO,DATE_VALUE,DAY_SHORT,DAY_OF_WEEK,DAYS_IN_MONTH,TRUE_TM_KEY_WK,TRUE_WEEK,TM_KEY_WK,WEEK_YEAR,TM_KEY_MTH,MONTH_NO,TM_KEY_QTR,QUARTER_NO,TM_KEY_YR,PERIODFLAG,LOAD_DATE
1091,20251227,27,2025-12-27,Sat,6,31,2025052,52,2025052,52,202512,12,20254,4,2025,N,2024-06-17 00:01:19
1092,20251228,28,2025-12-28,Sun,7,31,2025052,52,2025053,53,202512,12,20254,4,2025,N,2024-06-17 00:01:19
1093,20251229,29,2025-12-29,Mon,1,31,2026001,1,2025053,53,202512,12,20254,4,2025,N,2024-06-17 00:01:19
1094,20251230,30,2025-12-30,Tue,2,31,2026001,1,2025053,53,202512,12,20254,4,2025,N,2024-06-17 00:01:19
1095,20251231,31,2025-12-31,Wed,3,31,2026001,1,2025053,53,202512,12,20254,4,2025,EMQY,2024-06-17 00:01:19


In [None]:
# src_df.dtypes
# df[['PRODUCT_GRP', 'COMP_CD', 'METRIC_CD', 'METRIC_NAME']].drop_duplicates()