# Script untuk melakukan filter data yang diperlukan untuk publikasi ST2023 Tahap 2 Tiap Kecamatan di Kabupaten Sleman

## Import Library

In [1]:
import pathlib
from concurrent.futures import ThreadPoolExecutor
import pandas as pd
from typing import List

## Define Kabupatan Yang Akan Dibuatkan Publikasi

In [2]:
kabupaten = 3404

## Function untuk filter data yang diperlukan

In [3]:
# Fungsi untuk memfilter data kabupaten
def filter_kab(path: pathlib.Path, output_dir: pathlib.Path) -> None:
    try:
        excel_file = pd.ExcelFile(path)
        sheet_names = excel_file.sheet_names

        # Find the sheets that include 'kec' and 'desa'
        nama_sheet_kec = next(sheet for sheet in sheet_names if 'kec' in sheet.lower())
        nama_sheet_desa = next(sheet for sheet in sheet_names if 'desa' in sheet.lower())

        # Read the specified sheets
        df1 = pd.read_excel(excel_file, sheet_name=nama_sheet_kec)
        df2 = pd.read_excel(excel_file, sheet_name=nama_sheet_desa)

        
        # Filter DataFrames sesuai kabupaten
        df1 = df1[df1['kab'] == kabupaten]
        df2 = df2[df2['kab'] == kabupaten]

        # Create output path
        output_path = output_dir / path.parent.name / path.name
        output_path.parent.mkdir(parents=True, exist_ok=True)

        with pd.ExcelWriter(output_path, engine='xlsxwriter') as writer:
            df1.to_excel(writer, sheet_name='Kecamatan', index=False)
            df2.to_excel(writer, sheet_name='Desa', index=False)
        print(f'Finished processing {path}')
        
    except Exception as e:
        print(f'Error processing {path}: {e}')

In [4]:
# Function to process multiple files concurrently
def process_files(paths: List[pathlib.Path], output_dir: pathlib.Path, max_workers: int = 8) -> None:
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = [executor.submit(filter_kab, path, output_dir) for path in paths]
        for future in futures:
            future.result()

# Main program
if __name__ == "__main__":
    bab_folders = ['Bab 1', 'Bab 2', 'Bab 3', 'Bab 4', 'Bab 5', 'Bab 6', 'Bab 7', 'Bab 8', 'Bab 9', 'Bab 10', 'Bab 11']
    data_dir = pathlib.Path('in')
    output_dir = pathlib.Path('out')

    for bab in bab_folders:
        folder_path = data_dir / bab
        files = list(folder_path.glob('*.xlsx'))
        process_files(files, output_dir, max_workers=16)

Finished processing in\Bab 6\1.10.4 Nasional_Tabulasi_UTP_BAB_4_tabel_4_08_komoditas_2410.xlsx
Finished processing in\Bab 6\1.10.2 Nasional_Tabulasi_UTP_BAB_4_tabel_4_08_komoditas_2406.xlsx
Finished processing in\Bab 6\1.10.13 Nasional_Tabulasi_UTP_BAB_4_tabel_4_08_komoditas_2821.xlsx
Finished processing in\Bab 6\1.10.10 Nasional_Tabulasi_UTP_BAB_4_tabel_4_08_komoditas_2605.xlsx
Finished processing in\Bab 6\1.10.11 Nasional_Tabulasi_UTP_BAB_4_tabel_4_08_komoditas_2609.xlsx
Finished processing in\Bab 6\1.10.9 Nasional_Tabulasi_UTP_BAB_4_tabel_4_08_komoditas_2604.xlsx
Finished processing in\Bab 6\1.10.5 Nasional_Tabulasi_UTP_BAB_4_tabel_4_08_komoditas_2411.xlsx
Finished processing in\Bab 6\1.10.12 Nasional_Tabulasi_UTP_BAB_4_tabel_4_08_komoditas_2803.xlsx
Finished processing in\Bab 6\1.10.7 Nasional_Tabulasi_UTP_BAB_4_tabel_4_08_komoditas_2611.xlsx
Finished processing in\Bab 6\1.10.6 Nasional_Tabulasi_UTP_BAB_4_tabel_4_08_komoditas_2412.xlsx
Finished processing in\Bab 6\1.10.8 Nasional_T