# * Sales Data

## Parameter

In [2]:
import configparser
import datetime as dt
import pandas as pd
import numpy as np
import xlrd
import oracledb
import re

config = configparser.ConfigParser()
config.read('../../my_config.ini')
config.sections()

TDMDBPR_user = config['TDMDBPR']['username']
TDMDBPR_pwd = config['TDMDBPR']['password']
TDMDBPR_db = config['TDMDBPR']['db']
TDMDBPR_host = config['TDMDBPR']['host']
TDMDBPR_port = config['TDMDBPR']['port']

AKPIPRD_user = config['AKPIPRD']['username']
AKPIPRD_pwd = config['AKPIPRD']['password']
AKPIPRD_db = config['AKPIPRD']['db']
AKPIPRD_host = config['AKPIPRD']['host']
AKPIPRD_port = config['AKPIPRD']['port']

curr_dt = dt.datetime.now().date()
str_curr_dt = curr_dt.strftime('%Y%m%d')

In [9]:
# Input parameter

op_dir = 'data'
op_file = f'dup_case_{str_curr_dt}'

print(f'\nParameter input...\n')
print(f'   -> op_dir: {op_dir}')
print(f'   -> op_file: {op_file}')


Parameter input...

   -> op_dir: data
   -> op_file: dup_case_20240711


## Import : Sales(TRUE) Data Source
    DB -> Output file

    Source : CDSAPPO.DIM_CORP_KPI

In [10]:
# Connect : TDMDBPR
src_dsn = f'{TDMDBPR_user}/{TDMDBPR_pwd}@{TDMDBPR_host}:{TDMDBPR_port}/{TDMDBPR_db}'
src_conn = oracledb.connect(src_dsn)
print(f'\n{TDMDBPR_db} : Connected')
src_cur = src_conn.cursor()


query = (f"""
    -->> Check Dup

    SELECT TM_KEY_YR, PRODUCT, METRIC_CD, METRIC_NAME, AREA_NO, AREA_TYPE, AREA_CD, COUNT(DISTINCT AREA_DESC) CNT_AREA_DESC
    FROM (
        SELECT SUBSTR(TM_KEY_DAY,1,4) TM_KEY_YR, SUBSTR(TM_KEY_DAY,1,6) TM_KEY_MTH, PRODUCT, METRIC_CD, METRIC_NAME 
            , CASE WHEN AREA_TYPE = 'P' THEN 1 WHEN AREA_TYPE = 'G' THEN 2 WHEN AREA_TYPE = 'H' THEN 3 WHEN AREA_TYPE = 'HH' THEN 4 ELSE 99 END AREA_NO 
            , AREA_TYPE, AREA_CD, AREA_DESC
            , COUNT(1) ROW_CNT
        FROM CDSAPPO.DIM_CORP_KPI NOLOCK
        GROUP BY SUBSTR(TM_KEY_DAY,1,4), SUBSTR(TM_KEY_DAY,1,6), PRODUCT, METRIC_CD, METRIC_NAME, AREA_TYPE, AREA_CD, AREA_DESC
    ) TMP
    GROUP BY TM_KEY_YR, PRODUCT, METRIC_CD, METRIC_NAME, AREA_NO, AREA_TYPE, AREA_CD
    HAVING COUNT(DISTINCT AREA_DESC) > 1
    ORDER BY 1,2,3,5,7
""")


try:
    execute_datetime = dt.datetime.now().strftime('%Y-%m-%d, %H:%M:%S')
    print(f'\n   -> Execute query... {execute_datetime}')
    
    # Create Dataframe
    src_cur.execute(query)
    rows = src_cur.fetchall()
    src_df = pd.DataFrame.from_records(rows, columns=[x[0] for x in src_cur.description])
    print(f'\n   -> DataFrame : {src_df.shape[0]} rows, {src_df.shape[1]} columns')

    # Generate CSV file
    src_df.to_csv(f'{op_dir}/{op_file}.csv', index=False, encoding='utf-8')
    print(f'\n   -> Generate "{op_file}.csv" successfully')

    # # Generate Excel file
    # src_df.to_excel(f'{op_dir}/{op_file}.xlsx', sheet_name='Data', index=False)
    # print(f'\n   -> Generate "{op_file}.xlsx" successfully')


except oracledb.DatabaseError as e:
    print(f'Error with Oracle : {e}')


finally:
    src_cur.close()
    src_conn.close()
    print(f'\n{TDMDBPR_db} : Disconnected')


TDMDBPR : Connected

   -> Execute query... 2024-07-11, 15:06:28

   -> DataFrame : 657 rows, 8 columns

   -> Generate "dup_case_20240711.csv" successfully

TDMDBPR : Disconnected


## Reconcile

### Import Rawdata

In [16]:
''' Import Rawdata '''

data_src = f'{op_dir}/{op_file}.csv'
raw_df = pd.read_csv(data_src, low_memory=False)
print(f'\nraw_df : {raw_df.shape[0]} rows, {raw_df.shape[1]} columns')
# raw_df.tail(3)


raw_df : 657 rows, 8 columns


In [24]:
''' Add columns '''
tmp_df = raw_df

''' Filter  '''
tmp_df = tmp_df.loc[tmp_df['TM_KEY_YR']==2024]
# tmp_df = tmp_df.loc[tmp_df['PRODUCT']=='POSTPAID']
tmp_df = tmp_df.loc[tmp_df['PRODUCT'].isin(['TOL', 'TRUE_ONLINE'])]
tmp_df = tmp_df.reset_index(drop=True)

''' Generate Excel file '''
# src_df.to_excel(f'{op_dir}/tmp_{op_file}.xlsx', sheet_name='Data', index=False)
# print(f'\n   -> Generate "tmp_{op_file}.xlsx" successfully')

''' Generate CSV file '''
# tmp_df.to_csv(f'{op_dir}/tmp_{op_file}.csv', index=False, encoding='utf-8')
# print(f'\n   -> Generate "tmp_{op_file}.csv" successfully')

tmp_df#.tail(3)

Unnamed: 0,TM_KEY_YR,PRODUCT,METRIC_CD,METRIC_NAME,AREA_NO,AREA_TYPE,AREA_CD,CNT_AREA_DESC
0,2024,TRUE_ONLINE,TB3R000600,TOL Inflow M1 - Connected,2,G,GX1,3
1,2024,TRUE_ONLINE,TB3R000600,TOL Inflow M1 - Connected,2,G,GX2,3
2,2024,TRUE_ONLINE,TB3R000600,TOL Inflow M1 - Connected,2,G,GX7,2
3,2024,TRUE_ONLINE,TB3R000600,TOL Inflow M1 - Connected,3,H,004,2
4,2024,TRUE_ONLINE,TB3R000600,TOL Inflow M1 - Connected,3,H,007,2
...,...,...,...,...,...,...,...,...
178,2024,TRUE_ONLINE,TB3S000900,TOL Gross Adds - Connected (Install Loc),99,R,Unidentified,4
179,2024,TRUE_ONLINE,TB3S000901,TOL Gross Adds - Connected (Install Loc - Cons...,2,G,Unidentified,3
180,2024,TRUE_ONLINE,TB3S000901,TOL Gross Adds - Connected (Install Loc - Cons...,99,R,Unidentified,3
181,2024,TRUE_ONLINE,TB3S000902,TOL Gross Adds - Connected (Install Loc - Data...,2,G,Unidentified,3
