# * Import Vinsight Data

## Parameter

In [1]:
import os
import glob
import configparser
import oracledb
import pandas as pd
import datetime as dt

In [2]:
config = configparser.ConfigParser()
config.read('../my_config.ini')
config.sections()

username = config['TDMDBPR']['username']
password = config['TDMDBPR']['password']
db = config['TDMDBPR']['db']
host = config['TDMDBPR']['host']
port = config['TDMDBPR']['port']

In [3]:
raw_dir = f"../data/raw"
curr_dt = dt.datetime.now().date()
str_curr_dt = curr_dt.strftime('%Y%m%d')
op_file_name = f'Agg_Perf_Nationwide-{str_curr_dt}'
op_raw_file = f'{raw_dir}/{op_file_name}.csv'

op_raw_file

'../data/raw/Agg_Perf_Nationwide-20240610.csv'

## Query

In [4]:
# Import AGG_PERF_NEWCO : Sales & Revenue(Nationwide only)

print(f'\nJob Start...')

# TDMDBPR

# conn = oracledb.connect(user=username, password=password, dsn=f'{host}/{db}')
dsn = f'{username}/{password}@{host}:{port}/{db}'
conn = oracledb.connect(dsn)
print(f'\n{db} : Connected')
cur = conn.cursor()
v_date = (curr_dt, )

# query = """
#     SELECT TM_KEY_DAY, TM_KEY_WK, TM_KEY_MTH, TM_KEY_QTR, TM_KEY_YR, CENTER, PRODUCT_GRP, COMP_CD, METRIC_GRP, METRIC_NAME_GROUP, METRIC_CD, METRIC_NAME, AREA_TYPE, AREA_CD, AREA_NAME
#         , ACTUAL_AS_OF, AGG_TYPE, RR_IND, GRY_IND, UOM, PERIOD, ACTUAL_SNAP, ACTUAL_AGG, TARGET_SNAP, TARGET_AGG, BASELINE_SNAP, BASELINE_AGG, ACH_SNAP, ACH_AGG, GAP_SNAP, GAP_AGG
#         , WOW, WOW_PERCENT, MOM, MOM_PERCENT, QOQ, QOQ_PERCENT, YOY, YOY_PERCENT, RR, RR_ACH, WTD, MTD, QTD, YTD, PPN_TM
#     FROM GEOSPCAPPO.AGG_PERF_NEWCO NOLOCK
#     WHERE CENTER IN ('Revenue', 'Sales')
#     AND AREA_TYPE = 'P'
# """

with open('SQL/Import Vinsight Data.sql', 'r') as sql_file:
    queries = sql_file.read().split(';')
    query = queries[0].strip()
    sql_file.close()
    # print(query)

try:
    print(f'\n Processing...')
    print(f'\nExecute query as of {v_date[0]}...')
    cur.execute(query)
    rows = cur.fetchall()
    df = pd.DataFrame.from_records(rows, columns=[x[0] for x in cur.description])
    print(f'\ndf : {df.shape[0]} rows, {df.shape[1]} columns')


except oracledb.DatabaseError as e:
    print(f'Error with Oracle : {e}')


finally:
    cur.close()
    conn.close()
    print(f'\n{db} : Disconnected')
    print(f'\nJob Done !!!')


Job Start...
TDMDBPR : Connected

 Processing...

Execute query as of 2024-06-10...

df : 564431 rows, 46 columns

TDMDBPR : Disconnected

Job Done !!!


## Sample data

In [9]:
df.tail()
# df.columns
# df.dtypes
# df.describe()

Unnamed: 0,TM_KEY_DAY,TM_KEY_WK,TM_KEY_MTH,TM_KEY_QTR,TM_KEY_YR,CENTER,PRODUCT_GRP,COMP_CD,METRIC_GRP,METRIC_NAME_GROUP,...,QOQ_PERCENT,YOY,YOY_PERCENT,RR,RR_ACH,WTD,MTD,QTD,YTD,PPN_TM
556890,20240606,2024023,202406,20242,2024,Revenue,Postpaid,TRUE,Subs,Postpaid %NAD 30DPDB2,...,,,,,,,,,,2024-06-07 11:55:57
556891,20240606,2024023,202406,20242,2024,Revenue,,TRUE,Market Share,Broadband Subs Share : AIS Fibre,...,1.60272,,,,,,,,,2024-06-07 11:55:57
556892,20240606,2024023,202406,20242,2024,Revenue,,TRUE,Market Share,Postpaid Subs Share : TMH,...,1.795463,,,,,,,,,2024-06-07 11:55:57
556893,20240606,2024023,202406,20242,2024,Revenue,Postpaid,ALL,Subs,Postpaid %NAD 30DPDB2,...,,,,,,,,,,2024-06-07 11:55:57
556894,20240606,2024023,202406,20242,2024,Revenue,Postpaid,DTAC,Sales,Postpaid Inflow M1,...,-16.02265,,,,,,,,,2024-06-07 11:55:57


## Generate Output file

In [6]:
df.to_csv(f'{op_raw_file}', index=False, encoding='utf-8')

print(f'\n"{op_raw_file}" is generated')


"../data/raw/Agg_Perf_Nationwide-20240607.csv" is generated


In [7]:
os.chdir('../data/raw')
# cmd_cwd = os.getcwd()
# cmd_cwd

# cmd_rm = f'rm *.tar.gz'
# os.system(cmd_rm)
# cmd_rm

# Remove old files
old_files = glob.glob('Agg_Perf_Nationwide*.tar.gz')
for file in old_files:
    os.remove(file)

# Add tar.gz
cmd_tar = f'Tar -cvzf {op_file_name}.tar.gz {op_file_name}.csv'
os.system(cmd_tar)

0

### Manual Step

In [8]:
op_file_name

# cd data/raw
# Tar -cvzf Agg_Perf_Nationwide-20240604.tar.gz Agg_Perf_Nationwide-20240604.csv

'Agg_Perf_Nationwide-20240607'