In [1]:
import pandas as pd
from sqlalchemy import create_engine
import os
from dotenv import load_dotenv

# Load the environment variables from the .env file
load_dotenv('.env')

# Get the values of host, user, pswd, db, and schema from the environment variables
host = os.getenv('host')
user = os.getenv('user')
pswd = os.getenv('pswd')
db = os.getenv('db')
schema = os.getenv('schema')


# Use the values as needed
engine = create_engine(
    f"postgresql://{user}:{pswd}@{host}/{db}?options=-csearch_path%3D{schema}", echo=False)
conn = engine.connect()


In [2]:
import openpyxl as pyxl
import os
from os.path import join
from datetime import datetime as dt
import pandas as pd


def parseXlsx (fileName, dirPath="."):

    # =====
    # Date related info
    # =====

    year = int(fileName[16:20])
    quarter = int(fileName[6:7])
    quarter_expr = fileName[6:9]

    wb = pyxl.load_workbook(filename=join(dirPath, fileName))

    for ws in wb.worksheets:
        if "VCData" + str(quarter_expr) + "Quarter" + str(year) in ws.title:
            break
    
    # =====
    # Quarterly data
    # =====
        
    bvp = ws['B6'].value
    avp = ws['B8'].value
    pns = ws['B10'].value
    prp = ws['B12'].value
    tcq_wd = ws['B15'].value
    tcq_we = ws['B16'].value
    bvp_wd = ws['B19'].value
    bvp_we = ws['B20'].value
    tvq_wd = ws['B23'].value
    tvq_we = ws['B24'].value
    
    
    record_quarter = [year, quarter,bvp, avp, pns, prp, tcq_wd, tcq_we, bvp_wd, bvp_we, tvq_wd, tcq_we]
    


    
    # =====
    # Periodically data
    # =====

    record_period = []
    for c in range(12):
        col = 2 + c
        period = 1 + c

        for r in range(4):
            row_wd = 27 + r*4
            row_we = 28 + r*4
            # print(c, col, r, row_wd, row_we)
            tcq_wd = ws.cell(row_wd, col).value
            tcq_we = ws.cell(row_we, col).value
            # print(tcq_wd, tcq_we)
            
            record_period.append([year, quarter, period, tcq_wd, tcq_we])

        
            period += 12
    
    
    

    wb.close()
    return record_quarter, record_period


quarters = []
periods = []

for fn in [f for f in os.listdir(".") if f.startswith("VCData") and f.endswith(".xlsx")]:
    quarter, period = parseXlsx(fn)
    
    quarters.append(quarter)
    periods.extend(period)
    # break

quarter_col = ['Year', 'Quarter', 'BVP','AVP', 'PNS', 'PRP', 'TCQ_WD', 'TCQ_WE', 'BVP_WD', 'BVP_WE', 'TVQ_WD', 'TVQ_WE']
period_col = ['Year', 'Quarter', 'Period', 'TCQ_Weekday', 'TCQ_Weekend_PH']

quarters_df = pd.DataFrame(quarters, columns=quarter_col)
quarters_df.sort_values(by=['Year', 'Quarter'], inplace=True)
quarters_df.reset_index(drop=True, inplace=True)

period_df = pd.DataFrame(periods, columns=period_col)
period_df.sort_values(by=['Year', 'Quarter', 'Period'], inplace=True)
period_df.reset_index(drop=True, inplace=True)

In [3]:
quarters_df

Unnamed: 0,Year,Quarter,BVP,AVP,PNS,PRP,TCQ_WD,TCQ_WE,BVP_WD,BVP_WE,TVQ_WD,TVQ_WE
0,2023,3,184.9,184.9,49727910.0,208.92,22501010.0,22501010.0,22501010.0,22501010.0,0,22501010.0
1,2023,4,192.64,192.64,179792000.0,279.49,22501010.0,22501010.0,22501010.0,22501010.0,0,22501010.0
2,2024,1,214.48,214.44,15951110.0,222.67,21306540.0,21306540.0,18306540.0,18306540.0,3000000,21306540.0
3,2024,2,0.0,209.66,39182960.0,229.87,21306540.0,21306540.0,18306540.0,18306540.0,3000000,21306540.0


In [4]:
period_df.tail(5)

Unnamed: 0,Year,Quarter,Period,TCQ_Weekday,TCQ_Weekend_PH
187,2024,2,44,521795.957581,511745.260075
188,2024,2,45,509037.825497,503120.417756
189,2024,2,46,492624.774907,490075.053877
190,2024,2,47,471344.391703,474155.106133
191,2024,2,48,437836.736105,448734.605604


In [5]:
quarters_df.to_sql('VCData_Quarter', conn, if_exists='replace', index=False)
period_df.to_sql('VCData_Period', conn, if_exists='replace', index=False)

192

In [6]:
# Commit and close the connection
conn.close()