# Precinct Result Scraper
The result for each voting center is obtained from queries to the endpoint given by the `vbc` field among the scraped region-level clustered precinct breakdown.

In [2]:
import pandas as pd

nle_pop = pd.read_csv('2022NLEPOP.csv', encoding='utf-8')
nle_pop.head()

Unnamed: 0,region,province,municipality,barangay,precinct_id,cluster,clustertotal,clustered_precincts,pollingcenter
0,BARMM,BASILAN,AKBAR,LINONGAN,7080001,1,423,"0012A, 0012P1, 0011A","LINONGAN ELEMENTARY SCHOOL, BARANGAY LINONGAN,..."
1,BARMM,BASILAN,AKBAR,LINONGAN,7080002,2,411,"0011B, 0011C, 0012B","LINONGAN ELEMENTARY SCHOOL, BARANGAY LINONGAN,..."
2,BARMM,BASILAN,AKBAR,MANGUSO,7080003,3,313,"0035A, 0035C, 0035P1, 0035B","AKBAR ELEMENTARY SCHOOL, BARANGAY UPPER BATO-B..."
3,BARMM,BASILAN,AKBAR,UPPER SINANGKAPAN,7080004,4,655,"0050A, 0050B, 0050C, 0050D, 0050P1",BARANGAY UPPER SINANGKAPAN COVERED COURT (BUSC...
4,BARMM,BASILAN,AKBAR,CADDAYAN,7080005,5,479,"0054A, 0055A, 0055B, 0055C, 0055P1","CADDAYAN ELEMENTARY SCHOOL, BARANGAY CADDAYAN,..."


In [30]:
import os

RESULTS_DIRECTORY = 'results/'
if not os.path.exists(RESULTS_DIRECTORY):
    os.mkdir(RESULTS_DIRECTORY)

In [6]:
from tqdm.notebook import tqdm
import json

REGION_DIRECTORY = 'regions/'
bob = 0
for i in tqdm(os.listdir(REGION_DIRECTORY)):
    with open(REGION_DIRECTORY+i, 'r', encoding='utf-8') as f:
        cnt = len([x for x in json.load(f).keys() if x!=i.split('.')[0]])-1
        assert cnt >= 0
        bob+=cnt
bob

  0%|          | 0/1582 [00:00<?, ?it/s]

101498

There are 101,498 ERs on the COMELEC website.

In [20]:
REGION_DIRECTORY = 'regions/'
bob = []
for i in tqdm(os.listdir(REGION_DIRECTORY)):
    with open(REGION_DIRECTORY+i, 'r', encoding='utf-8') as f:
        buf = [str(y['vbc']) for x,y in json.load(f).items() if x not in (i.split('.')[0], 'timestamp')]
        assert len(buf) >= 0
        bob+=buf
len(bob)

  0%|          | 0/1582 [00:00<?, ?it/s]

101498

In [21]:
endpoint = lambda x: f"https://2022electionresults.comelec.gov.ph/data/results/{x[:3]}/{x}.json"
endpoint(bob[0])

'https://2022electionresults.comelec.gov.ph/data/results/114/114073.json'

In [22]:
from random_user_agent.user_agent import UserAgent
from random_user_agent.params import SoftwareName, OperatingSystem


# you can also import SoftwareEngine, HardwareType, SoftwareType, Popularity from random_user_agent.params
# you can also set number of user agents required by providing `limit` as parameter

software_names = [SoftwareName.CHROME.value]
operating_systems = [OperatingSystem.WINDOWS.value, OperatingSystem.LINUX.value]   

user_agent_rotator = UserAgent(software_names=software_names, operating_systems=operating_systems, limit=100)

user_agent_rotator.get_random_user_agent()

'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.0.9757 Safari/537.36'

In [23]:
import requests as r
link = endpoint(bob[140])
req = r.get(link, headers={"User-Agent": user_agent_rotator.get_random_user_agent()})
link, req.status_code

('https://2022electionresults.comelec.gov.ph/data/results/114/114213.json',
 200)

Create sample JSON data.

In [24]:
import json

with open('sample_results_data.json', 'w', encoding='utf-8') as f:
    json.dump(req.json(), f, ensure_ascii=True, indent=4)
req.json()

{'vbc': 114213,
 'rs': [{'cc': 5587,
   'bo': 46438,
   'v': 1,
   'tot': 503,
   'per': '0.19',
   'ser': 'HPM180PA01046117'},
  {'cc': 5587,
   'bo': 46439,
   'v': 0,
   'tot': 503,
   'per': '0.00',
   'ser': 'HPM180PA01046117'},
  {'cc': 5587,
   'bo': 46440,
   'v': 25,
   'tot': 503,
   'per': '4.97',
   'ser': 'HPM180PA01046117'},
  {'cc': 5587,
   'bo': 46441,
   'v': 1,
   'tot': 503,
   'per': '0.19',
   'ser': 'HPM180PA01046117'},
  {'cc': 5587,
   'bo': 46442,
   'v': 19,
   'tot': 503,
   'per': '3.77',
   'ser': 'HPM180PA01046117'},
  {'cc': 5587,
   'bo': 46443,
   'v': 0,
   'tot': 503,
   'per': '0.00',
   'ser': 'HPM180PA01046117'},
  {'cc': 5587,
   'bo': 46444,
   'v': 219,
   'tot': 503,
   'per': '43.53',
   'ser': 'HPM180PA01046117'},
  {'cc': 5587,
   'bo': 46445,
   'v': 0,
   'tot': 503,
   'per': '0.00',
   'ser': 'HPM180PA01046117'},
  {'cc': 5587,
   'bo': 46446,
   'v': 19,
   'tot': 503,
   'per': '3.77',
   'ser': 'HPM180PA01046117'},
  {'cc': 5587,
   

Let's add a timestamp too!

In [25]:
import time
seconds = time.time()
payload = req.json()
payload['timestamp'] = seconds

with open('sample_results_data.json', 'w', encoding='utf-8') as f:
    json.dump(payload, f, ensure_ascii=True, indent=4)

In [27]:
def scrape_results(code, user_agent_rotator, endpoint, directory, thresh=3):
    link = endpoint(code)
    req = r.get(link, headers={"User-Agent": user_agent_rotator.get_random_user_agent()})
    try:
        attempt = thresh
        while attempt!=0 and req.status_code!=200:
            print('Retrying '+link)
            time.sleep(0.4)
            req = r.get(link, headers={"User-Agent": user_agent_rotator.get_random_user_agent()})
            attempt-=1
        else:
            seconds = time.time()
            payload = req.json()
            payload['timestamp'] = seconds
            with open(directory+code+'.json', 'w', encoding='utf-8') as f:
                json.dump(payload, f, ensure_ascii=True, indent=4)
    except Exception as e:
        print(e)
    return (req.status_code, code)

In [31]:
scrape_results(bob[93], user_agent_rotator, endpoint, RESULTS_DIRECTORY)

Retrying https://2022electionresults.comelec.gov.ph/data/results/114/114166.json


(200, '114166')

In [35]:
for code in tqdm(bob):
    scrape_results(code, user_agent_rotator, endpoint, RESULTS_DIRECTORY)

  0%|          | 0/101498 [00:00<?, ?it/s]

Retrying https://2022electionresults.comelec.gov.ph/data/results/114/114090.json
Retrying https://2022electionresults.comelec.gov.ph/data/results/114/114108.json
Retrying https://2022electionresults.comelec.gov.ph/data/results/114/114116.json
Retrying https://2022electionresults.comelec.gov.ph/data/results/114/114130.json
Retrying https://2022electionresults.comelec.gov.ph/data/results/114/114133.json
Retrying https://2022electionresults.comelec.gov.ph/data/results/114/114136.json
Retrying https://2022electionresults.comelec.gov.ph/data/results/114/114217.json
Retrying https://2022electionresults.comelec.gov.ph/data/results/114/114229.json
Retrying https://2022electionresults.comelec.gov.ph/data/results/114/114252.json
Retrying https://2022electionresults.comelec.gov.ph/data/results/114/114257.json
Retrying https://2022electionresults.comelec.gov.ph/data/results/114/114326.json
Retrying https://2022electionresults.comelec.gov.ph/data/results/114/114329.json
Retrying https://2022electio

KeyboardInterrupt: 