In [2]:
import os
import requests
import multitasking

In [3]:
OUTPUT_LOCATION = "dataset"
FILE_NAME = "suara-tps.csv"
dataset_path = os.path.join(OUTPUT_LOCATION, FILE_NAME)
BASE_URL = "https://sirekap-obj-data.kpu.go.id"
PPWP = f'{BASE_URL}/pemilu/ppwp.json'

HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:122.0) Gecko/20100101 Firefox/122.0',
    'Accept': 'application/json, text/plain, */*',
    'Accept-Language': 'en,en-US;q=0.5',
    # 'Accept-Encoding': 'gzip, deflate, br',
    'Origin': 'https://pemilu2024.kpu.go.id',
    'Connection': 'keep-alive',
    'Referer': 'https://pemilu2024.kpu.go.id/',
    'Sec-Fetch-Dest': 'empty',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Site': 'same-site',
    'Sec-GPC': '1',
}

def base_request(url, *kode):
    tree = '/'.join(map(str, kode))
    return f"{url}/{tree}.json"

def wilayah(*kode):
    return base_request(f"{BASE_URL}/wilayah/pemilu/ppwp", *kode)

def hhcw(*kode):
    return base_request(f"{BASE_URL}/pemilu/hhcw/ppwp", *kode)

@multitasking.task
def req_parallel(url, fn):
    res = requests.get(url, headers=HEADERS).json()
    fn(res)
    
def req(url):
    return requests.get(url, headers=HEADERS).json()

In [4]:
dataset_path

'dataset/suara-tps.csv'

In [5]:
tpsaceh = req(hhcw(11,1105,110507,1105072002,1105072002001))

In [6]:
tpsaceh.get('administrasi')

{'suara_sah': 136,
 'suara_total': 142,
 'pemilih_dpt_j': 156,
 'pemilih_dpt_l': 74,
 'pemilih_dpt_p': 82,
 'pengguna_dpt_j': 140,
 'pengguna_dpt_l': 65,
 'pengguna_dpt_p': 75,
 'pengguna_dptb_j': 0,
 'pengguna_dptb_l': 0,
 'pengguna_dptb_p': 0,
 'suara_tidak_sah': 6,
 'pengguna_total_j': 142,
 'pengguna_total_l': 66,
 'pengguna_total_p': 76,
 'pengguna_non_dpt_j': 2,
 'pengguna_non_dpt_l': 1,
 'pengguna_non_dpt_p': 1}

In [7]:
a = (1,2,3,4,5)
images = tpsaceh.get('images', ('','',''))
data=a+tuple(images)

In [11]:
data[:-3]

(1, 2, 3, 4, 5)

In [12]:
def create_file():
    try:
        os.makedirs(OUTPUT_LOCATION)
        with open(dataset_path, 'a') as f:
            f.write("tps,paslon01,paslon02,paslon03,suara_sah,suara_tidak_sah,total_suara_paslon,img1,img2,img3\n")
    except:
        print("file %s exists" % dataset_path)

def write(*data):
    print(f"writing: {data[:-3]}")
    with open(dataset_path, 'a') as f:
        f.write(f"{','.join(map(str, data))}\n")

In [16]:
create_file()

file dataset/suara-tps.csv exists


In [17]:
pwp = req(PPWP)

In [18]:
req_parallel(PPWP, fn=lambda x: print(x.get('100025').get('nama')))

<Thread(Thread-4, started 123145386524672)>

H. ANIES RASYID BASWEDAN, Ph.D. - Dr. (H.C.) H. A. MUHAIMIN ISKANDAR


In [19]:
pwp

{'100025': {'ts': '2024-02-14 18:44:00',
  'nama': 'H. ANIES RASYID BASWEDAN, Ph.D. - Dr. (H.C.) H. A. MUHAIMIN ISKANDAR',
  'warna': '#8CB9BD',
  'nomor_urut': 1},
 '100026': {'ts': '2024-02-14 18:44:00',
  'nama': 'H. PRABOWO SUBIANTO - GIBRAN RAKABUMING RAKA',
  'warna': '#C7B7A3',
  'nomor_urut': 2},
 '100027': {'ts': '2024-02-14 18:44:00',
  'nama': 'H. GANJAR PRANOWO, S.H., M.I.P. - Prof. Dr. H. M. MAHFUD MD',
  'warna': '#B67352',
  'nomor_urut': 3}}

In [20]:
provinces = req(wilayah(0))

In [21]:
kode_provinsi = sorted([item.get('kode') for item in provinces])

In [22]:
def safe_get(obj, key, default="-"):
    try:
        return obj.get(key, default)
    except:
        return default;

In [14]:
@multitasking.task
def get_province(kode=0):
    req_parallel(wilayah(kode), lambda item: loop_province(item))
    
def loop_province(data):
    for prov in data:
        get_kabupaten(prov.get("kode"))

@multitasking.task
def get_kabupaten(prov):
    req_parallel(wilayah(prov), lambda item: loop_kabupaten(prov, item))
    
def loop_kabupaten(prov, data):
    for kab in data:
        get_kecamatan(prov, kab.get("kode"))

@multitasking.task
def get_kecamatan(prov, kab):
    req_parallel(wilayah(prov, kab), lambda item: loop_kecamatan(prov, kab, item))
    
def loop_kecamatan(prov, kab, data):
    for kec in data:
        get_lurah(prov, kab, kec.get("kode"))

@multitasking.task
def get_lurah(prov, kab, kec):
    req_parallel(wilayah(prov, kab, kec), lambda item: loop_lurah(prov, kab, kec, item))

def loop_lurah(prov, kab, kec, data):
    for lurah in data:
        get_tps(prov, kab, kec, lurah.get("kode"))
    

@multitasking.task
def get_tps(prov, kab, kec, lurah):
    req_parallel(wilayah(prov, kab, kec, lurah), lambda item: loop_tps(prov, kab, kec, lurah, item))
    
def loop_tps(prov, kab, kec, lurah, data):
    for tps in data:
        suara_tps(prov, kab, kec, lurah, tps)

@multitasking.task
def suara_tps(prov, kab, kec, lurah, tps):
    suara = req(hhcw(prov, kab, kec, lurah, tps.get("kode")))
    process_suara(tps, suara)
    
def process_suara(tps, suara):
    kode_tps = tps.get("kode")
    nama_tps = tps.get("nama")
    administrasi = safe_get(suara, 'administrasi', {})
    img1, img2, img3 = tuple(safe_get(suara, 'images', ('','','')))
    chart = suara.get('chart', {})
    suara1 = safe_get(chart, '100025', 0)
    suara2 = safe_get(chart, "100026", 0)
    suara3 = safe_get(chart, "100027", 0)
    suara_sah = safe_get(administrasi, "suara_sah", 0)
    suara_tidak_sah = safe_get(administrasi, "suara_tidak_sah", 0)
    total_suara = safe_get(administrasi, 'suara_total', 0)
    total_suara_paslon = int(suara1) + int(suara2) + int(suara3)
    write(kode_tps,suara1,suara2,suara3,suara_sah,suara_tidak_sah,total_suara,total_suara_paslon,img1,img2,img3)

In [24]:
get_province()

<Thread(Thread-5, stopped 123145386524672)>

writing: ('5203182008007', 56, 117, 19, 192, 5, 197, 192, 'https://sirekap-obj-formc.kpu.go.id/0de1/pemilu/ppwp/52/03/18/20/08/5203182008007-20240214-212243--44348a1b-4e8a-46d6-9297-9ac930e395aa.jpg', 'https://sirekap-obj-formc.kpu.go.id/0de1/pemilu/ppwp/52/03/18/20/08/5203182008007-20240214-205410--176d4d79-b003-4593-a8cb-eb9a21f18c3a.jpg', 'https://sirekap-obj-formc.kpu.go.id/0de1/pemilu/ppwp/52/03/18/20/08/5203182008007-20240214-202506--366aeb7e-b21c-40dd-97c9-ed9609404e0e.jpg')
writing: ('5272041011007', 0, 0, 0, 264, 2, 266, 0, 'https://sirekap-obj-formc.kpu.go.id/aff8/pemilu/ppwp/52/72/04/10/11/5272041011007-20240216-145849--0ddc2805-caf3-4ded-a36e-895cdef6f5d6.jpg', None, 'https://sirekap-obj-formc.kpu.go.id/aff8/pemilu/ppwp/52/72/04/10/11/5272041011007-20240216-145849--b8d9cebf-e64f-4093-94db-8d900e802f84.jpg')
writing: ('5272041011010', 136, 122, 13, 271, 4, 275, 271, 'https://sirekap-obj-formc.kpu.go.id/6a1b/pemilu/ppwp/52/72/04/10/11/5272041011010-20240215-094126--c9bd7a5e-e