# Python Script to Handle & Ingest .xlsx to Local Postgres DB

### Install Dependencies

In [1]:
import subprocess
import os


with open(os.devnull, 'wb') as devnull:
    subprocess.run(
        ['pip', 'install', '-r', '../requirements.txt'],
        stdout=devnull,
        stderr=devnull
    )

In [2]:
import sys

# Tambahkan folder root ke sys.path
sys.path.append('/Users/naufalnashif/Desktop/RF-OJK/Mas Adit OJK/Data Profile & Riwayat Entitiy Terbaru')

In [3]:
import os
import pandas as pd
from sqlalchemy import create_engine, text
from config import FOLDER_PATH, APPLICATION_FILES, SCHEMA_NAME, DB_CONFIG
from ingest_helpers import load_excel_sheets, standardize_columns

### Connect to DB

In [4]:
# DB Engine
engine = create_engine(
    f"postgresql://{DB_CONFIG['user']}:{DB_CONFIG['password']}@{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['database']}"
)

#### Make a new schema

In [5]:
with engine.connect() as conn:
    conn.execute(text(f'CREATE SCHEMA IF NOT EXISTS "{SCHEMA_NAME}"'))
    conn.commit()

### .Xlsx > Sheet > To tabel > Ingest to DB

In [7]:
for file_name in APPLICATION_FILES:
    app_name = file_name.replace('.xlsx', '').lower()
    full_path = os.path.join(FOLDER_PATH, file_name)
    print(f"📄 Processing {file_name}")

    sheets = load_excel_sheets(full_path)

    sheet_mapping = {
        'profil_entity': sheets.get(list(sheets)[0]),
        'riwayat_pendirian': sheets.get(list(sheets)[1]),
        'riwayat_direksi': sheets.get(list(sheets)[2]),
        'riwayat_komisaris': sheets.get(list(sheets)[3]),
        'riwayat_pemegang_saham': sheets.get(list(sheets)[4]),
        'riwayat_produk_aktivitas': sheets.get(list(sheets)[5]),
        'riwayat_dps': sheets.get(list(sheets)[6])
    }

    for sheet_key, df in sheet_mapping.items():
        if df is not None:
            df = standardize_columns(df)
            df.insert(0, 'application', app_name)
            table_name = f"{sheet_key}_{app_name}".lower()

            df.to_sql(
                table_name,
                engine,
                schema=SCHEMA_NAME,
                if_exists='replace',
                index=False
            )
            print(f"✅ Uploaded to table: {SCHEMA_NAME}.{table_name}")

📄 Processing APKAP.xlsx
✅ Uploaded to table: rfojk_python.profil_entity_apkap
✅ Uploaded to table: rfojk_python.riwayat_pendirian_apkap
✅ Uploaded to table: rfojk_python.riwayat_direksi_apkap
✅ Uploaded to table: rfojk_python.riwayat_komisaris_apkap
✅ Uploaded to table: rfojk_python.riwayat_pemegang_saham_apkap
✅ Uploaded to table: rfojk_python.riwayat_produk_aktivitas_apkap
✅ Uploaded to table: rfojk_python.riwayat_dps_apkap
📄 Processing SIPM.xlsx
✅ Uploaded to table: rfojk_python.profil_entity_sipm
✅ Uploaded to table: rfojk_python.riwayat_pendirian_sipm
✅ Uploaded to table: rfojk_python.riwayat_direksi_sipm
✅ Uploaded to table: rfojk_python.riwayat_komisaris_sipm
✅ Uploaded to table: rfojk_python.riwayat_pemegang_saham_sipm
✅ Uploaded to table: rfojk_python.riwayat_produk_aktivitas_sipm
✅ Uploaded to table: rfojk_python.riwayat_dps_sipm
📄 Processing SPRINT.xlsx
✅ Uploaded to table: rfojk_python.profil_entity_sprint
✅ Uploaded to table: rfojk_python.riwayat_pendirian_sprint
✅ Uploade

### Make View & Datamarts

In [11]:
applications = ['APKAP', 'SIPM', 'SPRINT', 'DAPOK']
schema = 'rfojk_python'

view_definitions = {
    "vw_profil_entity_union": {
        "base_name": "profil_entity",
        "columns": [
            "application", "institutionprofileid", "institutionname",
            "idorigin", "npwp", "companyemail", "headofficeaddress", "webaddress"
        ]
    },
    "vw_riwayat_pendirian_union": {
        "base_name": "riwayat_pendirian",
        "columns": [
            "application", "institutionprofileid", "institutionname", "idorigin", "sector",
            "subsector", "subsubsector", "legalentity", "licensetype",
            "licensenumber", "licensedate", "statusljk"
        ]
    },
    "vw_riwayat_direksi_komisaris_union": {
        "base_name": ["riwayat_direksi", "riwayat_komisaris"],
        "columns": [
            "application", "institutionprofileid", "commissionername",
            "commissionernationality", "commissionernik", "commissionerpassport",
            "position", "officiateeffectivedate", "officiateenddate", "officiateinactivedate"
        ]
    },
    "vw_riwayat_pemegang_saham_union": {
        "base_name": "riwayat_pemegang_saham",
        "columns": [
            "application", "institutionprofileid", "individualownername",
            "individualownernik", "individualownernpwp", "ownershipvalue",
            "ownershippercentage"
        ]
    },
    "vw_riwayat_produk_aktivitas_union": {
        "base_name": "riwayat_produk_aktivitas",
        "columns": [
            "application", "institutionprofileid", "productname", "productdescription",
            "producttype", "letternumber", "letterdate", "produteffectivedate", "productstatus"
        ]
    },
    "vw_riwayat_dps_union": {
        "base_name": "riwayat_dps",
        "columns": [
            "application", "institutionprofileid", "position",
            "shariasupervisoryboardname", "shariasupervisoryboardnationality",
            "shariasupervisoryboardnik", "shariasupervisoryboardpassport",
            "officiateeffectivedate", "officiateenddate", "officiateinactivedate"
        ]
    }
}


def generate_column_list(columns, application):
    quoted_columns = []
    for col in columns:
        if col == 'application':
            quoted_columns.append(f"'{application}' AS application")
        else:
            quoted_columns.append(f'"{col.lower()}"::text')
    return ', '.join(quoted_columns)


for view_name, info in view_definitions.items():
    base_names = info['base_name']
    base_names = [base_names] if isinstance(base_names, str) else base_names
    columns = info['columns']

    union_queries = []

    for app in applications:
        for base in base_names:
            table = f'{schema}."{base.lower()}_{app.lower()}"'
            select_clause = generate_column_list(columns, app)
            union_queries.append(f"SELECT {select_clause} FROM {table}")

    union_sql = "\nUNION\n".join(union_queries)
    full_view_name = f'{schema}."{view_name}"'
    create_view_sql = f"""
    CREATE OR REPLACE VIEW {full_view_name} AS
    {union_sql};
    """

    dm_table_name = view_name.replace("vw_", "dm_")
    full_dm_table = f'{schema}."{dm_table_name}"'
    create_dm_table_sql = f"""
    DROP TABLE IF EXISTS {full_dm_table};

    CREATE TABLE {full_dm_table} AS
    SELECT * FROM {full_view_name};
    """

    try:
        with engine.begin() as conn:
            print(f"➡️ Creating view: {full_view_name}")
            conn.execute(text(create_view_sql))

            count = conn.execute(
                text(f"SELECT COUNT(*) FROM {full_view_name}")
            ).scalar()

            print(f"🧾 View has {count} rows")

            if count > 0:
                conn.execute(text(create_dm_table_sql))
                print(f"📄 Created table: {full_dm_table}")
            else:
                print(f"⚠️ Skipped table creation, view is empty")

    except Exception as e:
        print(f"❌ Error: {e}")


➡️ Creating view: rfojk_python."vw_profil_entity_union"
🧾 View has 13602 rows
📄 Created table: rfojk_python."dm_profil_entity_union"
➡️ Creating view: rfojk_python."vw_riwayat_pendirian_union"
🧾 View has 12701 rows
📄 Created table: rfojk_python."dm_riwayat_pendirian_union"
➡️ Creating view: rfojk_python."vw_riwayat_direksi_komisaris_union"
🧾 View has 16596 rows
📄 Created table: rfojk_python."dm_riwayat_direksi_komisaris_union"
➡️ Creating view: rfojk_python."vw_riwayat_pemegang_saham_union"
🧾 View has 16036 rows
📄 Created table: rfojk_python."dm_riwayat_pemegang_saham_union"
➡️ Creating view: rfojk_python."vw_riwayat_produk_aktivitas_union"
🧾 View has 19413 rows
📄 Created table: rfojk_python."dm_riwayat_produk_aktivitas_union"
➡️ Creating view: rfojk_python."vw_riwayat_dps_union"
🧾 View has 496 rows
📄 Created table: rfojk_python."dm_riwayat_dps_union"


### Left Join 