# Description
Dump Collection of Indonesian administrative region from BPS (Badan Pusat Statistik). There's around 82k+ rows of data

In [53]:
import pandas as pd
import requests  as re
import csv

In [54]:
def get_bps_data(level: str = 'provinsi', parent: str = '0'):
  BASE_URL = 'https://sig.bps.go.id/rest-bridging/getwilayah'
  PARAMS = { 
    'level': level,
    'parent': parent
  }
  req = re.get(url = BASE_URL, params=PARAMS)
  data = req.json()
  return data

In [55]:
OUTPATH_PROVINCE = 'output/separate/provinsi.csv'
OUTPATH_DISTRICT = 'output/separate/district.csv'
OUTPATH_SUBDISTRICT = 'output/separate/subdistrict.csv'
OUTPATH_VILLAGE = 'output/separate/village.csv'

def get_bps_province():
  with open(OUTPATH_PROVINCE, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['id', 'name'])

    data_province = get_bps_data()
    for province in data_province:
      writer.writerow([province['kode_bps'], province['nama_bps']])

def get_bps_district():
  with open(OUTPATH_DISTRICT, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['id', 'parent_id', 'name'])
    
    data_province = pd.read_csv(OUTPATH_PROVINCE)
    for index, row in data_province.iterrows():
      province_id = row['id']
      data_district = get_bps_data(level='kabupaten', parent= int(province_id))
      for district in data_district:
        writer.writerow([district['kode_bps'], province_id, district['nama_bps']])

def get_bps_subdistrict():
  with open(OUTPATH_SUBDISTRICT, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['id', 'parent_id', 'name'])
    
    data_district = pd.read_csv(OUTPATH_DISTRICT)
    for index, row in data_district.iterrows():
      district_id = row['id']
      data_subdistrict = get_bps_data(level='kecamatan', parent= int(district_id))
      for subdistrict in data_subdistrict:
        writer.writerow([subdistrict['kode_bps'], district_id, subdistrict['nama_bps']])

def get_bps_villages():
  with open(OUTPATH_VILLAGE, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['id', 'parent_id', 'kode_dagri', 'name'])

    data_subdistrict = pd.read_csv(OUTPATH_SUBDISTRICT)
    for index, row in data_subdistrict.iterrows():
      subdistrict_id = row['id']
      data_village = get_bps_data(level='desa', parent= int(subdistrict_id))
      for village in data_village:
        writer.writerow([village['kode_bps'], subdistrict_id, village['kode_dagri'], village['nama_bps']])

In [56]:
# get_bps_province()

# get_bps_district()
# 1 min 5 sec

#get_bps_subdistrict() 
# 3 min 46 sec

# get_bps_villages()
# WARNING: estimated 90 min 5sec to request data