# Library

In [5]:
import os
import pandas as pd

# Path Setting

In [7]:
# Use the current working directory instead
base_path = os.getcwd()
data_source_dir = os.path.abspath(os.path.join(base_path, "..", "..", "A. Data Source", "A.2. SSKI (Bank Indonesia)"))

# Additional Function

In [12]:
def read_sheet_15a_or_16a(file_path):
    try:
        # Get all sheet names
        sheet_names = pd.ExcelFile(file_path).sheet_names

        # Select the appropriate sheet
        target_sheet = None
        if '15a' in sheet_names:
            target_sheet = '15a'
        elif '16a' in sheet_names:
            target_sheet = '16a'

        if target_sheet:
            df = pd.read_excel(file_path, sheet_name=target_sheet)
            return df
        else:
            print(f"No sheet '15a' or '16a' found in {file_path}")
            return None

    except Exception as e:
        print(f"Failed to read {file_path}: {e}")
        return None

In [29]:
def normalize_text(text):
    """Convert text to lowercase and remove all spaces."""
    return str(text).lower().replace(" ", "")

# def merge_rows(df, merge_list, col_index=1):
#     # Normalize the merge_list
#     normalized_list = [normalize_text(item) for item in merge_list]

#     rows_to_drop = []
#     for i in range(len(df) - 1):
#         current_val = normalize_text(df.iloc[i, col_index])

#         if current_val in normalized_list:
#             # Merge current row with the next row (column by column)
#             df.iloc[i] = df.iloc[i].astype(str) + " " + df.iloc[i + 1].astype(str)
#             rows_to_drop.append(i + 1)

#     df = df.drop(rows_to_drop).reset_index(drop=True)
#     return df

def merge_rows(df, merge_list, col_index=1):
    # Normalize the merge_list
    normalized_list = [normalize_text(item) for item in merge_list]

    rows_to_drop = []
    for i in range(len(df) - 1):
        current_val = normalize_text(df.iloc[i, col_index])

        if current_val in normalized_list:
            # Merge current row with the next row (column by column)
            for col in df.columns:
                val1 = df.at[i, col]
                val2 = df.at[i + 1, col]

                # Convert nulls to empty string, others to string
                str1 = "" if pd.isna(val1) else str(val1)
                str2 = "" if pd.isna(val2) else str(val2)

                # Merge with a space only if both are non-empty
                if str1 and str2:
                    merged = str1 + " " + str2
                else:
                    merged = str1 + str2  # One of them is empty

                df.at[i, col] = merged

            rows_to_drop.append(i + 1)

    df = df.drop(rows_to_drop).reset_index(drop=True)
    return df

# Main Code

In [27]:
data_dict = {}

# Loop through all child folders
for folder in os.listdir(data_source_dir):
    folder_path = os.path.join(data_source_dir, folder)
    
    # Only process if it's a directory
    if os.path.isdir(folder_path):
        for file in os.listdir(folder_path):
            if file.endswith('.xlsx'):
                file_path = os.path.join(folder_path, file)
                try:
                    print(f"Found excel file : {file}")
                    df = read_sheet_15a_or_16a(file_path)
                    key = os.path.splitext(file)[0]  # Get filename without extension
                    data_dict[key] = df
                except Exception as e:
                    print(f"Failed to read {file_path}: {e}")

Found excel file : SSKI_DESEMBER_2022.xlsx
Found excel file : SSKI_DESEMBER_2023.xlsx
Found excel file : SSKI_DESEMBER_2024.xlsx
Found excel file : SSKI_JUNI 2025.xlsx
Found excel file : SSKI_JUNI_2022.xlsx
Found excel file : SSKI_JUNI_2023.xlsx
Found excel file : SSKI_JUNI_2024.xlsx


In [28]:
data_dict_backup = data_dict.copy()

In [30]:
data_dict = data_dict_backup.copy()

merge_key_list = [
    "KOMPONEN"
]

for key in data_dict.keys() :
    df = data_dict[key]
    new_df = merge_rows(df, merge_key_list, 0)
    data_dict[key] = new_df

  df.at[i, col] = merged
  df.at[i, col] = merged
  df.at[i, col] = merged
  df.at[i, col] = merged
  df.at[i, col] = merged
  df.at[i, col] = merged
  df.at[i, col] = merged
  df.at[i, col] = merged
  df.at[i, col] = merged
  df.at[i, col] = merged
  df.at[i, col] = merged
  df.at[i, col] = merged
  df.at[i, col] = merged
  df.at[i, col] = merged
  df.at[i, col] = merged
  df.at[i, col] = merged
  df.at[i, col] = merged


In [31]:
data_dict['SSKI_DESEMBER_2022']

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,...,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20,Unnamed: 21,Unnamed: 22
0,TABEL 15a.INDIKATOR SEKTOR KORPORASI/,,,,,,,,,,...,,,,,,,,,,
1,TABLE 15a. INDICATORS OF CORPORATE SECTOR,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,KOMPONEN,,SATUAN/ UNIT,2015,2016,2017,2018,2019,2020,2020 Q1,...,2021.0,2021* Q1*,Q2*,Q3*,Q4*,2022 Q1**,Q2**,Q3**,COMPONENTS,
4,1,Jumlah Emiten Non Keuangan,Perusahaan/ Company,438,452,480,531,585,628,597,...,663.0,632,638,650,663,676,684,705,Number of Listed Non Financial Corporation,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
287,- Untuk indikator PMI hanya mencakup indust...,,,,,,,,,,...,,,,,,,,,- PMI only consist of manufaturing industry s...,
288,* Angka Sementara,,,,,,,,,,...,,,,,,,,,* Preliminary figures,
289,** Angka Sangat Sementara,,,,,,,,,,...,,,,,,,,,** Very preliminary figures,
290,,,,,,,,,,,...,,,,,,,,,,


In [24]:
for key in data_dict.keys() :
    print(key)

SSKI_DESEMBER_2022
SSKI_DESEMBER_2023
SSKI_DESEMBER_2024
SSKI_JUNI 2025
SSKI_JUNI_2022
SSKI_JUNI_2023
SSKI_JUNI_2024
