In [None]:
import asyncio
from bs4 import BeautifulSoup
import aiohttp
import pandas as pd

START = 3570001
END = 3736000
REQ_PER_SEC = 40 # second ish

col_names = [
    'Place of Assignment :',
    'Position Title :',
    'Plantilla Item No. :',
    'Salary/Job/Pay Grade :',
    'Monthly Salary :',
    'Eligibility :',
    'Education :',
    'Training :',
    'Work Experience :',
    'Posting Date :',
    'Closing Date :'
]

job_posts = pd.DataFrame({
    'place_of_assignment': [],
    'position_title': [],
    'plantilla_item_no': [],
    'salary_grade': [],
    'monthly_salary': [],
    'eligibility': [],
    'education': [],
    'training': [],
    'work_experience': [],
    'posting_date': [],
    'closing_date': [],
    'region': [],
    'agency': [],
    'http_status_code': []
})

async def fetch_html(id):
    url = f'https://csc.gov.ph/career/jobs/{id}'
    async with aiohttp.ClientSession() as session:
        async with session.get(url) as response:
            if response.status == 200:
                html = await response.text()
                soup = BeautifulSoup(html, 'html.parser')
                modal_title = soup.find('h4', {'id': 'ModalLongTitle'})
                if modal_title:
                    modal_title = modal_title.text.split('|')
                    job_table = soup.find('div', {'id': 'job-tbl'})

                    job_post = {}
                    for row in job_table.find_all('div', {'class':'row'}):
                        cols = row.find_all('div', {'class': 'col'})
                        prop = cols[0].find('b')
                        if prop:
                            if prop.text in col_names:
                                job_post[prop.text[:len(prop) - 3]] = cols[1].find('div').text
                                
                    job_posts.loc[id] = [
                        job_post['Place of Assignment'],
                        job_post['Position Title'],
                        job_post['Plantilla Item No.'],
                        job_post['Salary/Job/Pay Grade'],
                        job_post['Monthly Salary'],
                        job_post['Eligibility'],
                        job_post['Education'],
                        job_post['Training'],
                        job_post['Work Experience'],
                        job_post['Posting Date'],
                        job_post['Closing Date'],
                        modal_title[1].strip(),
                        modal_title[0].strip(),
                        200
                    ]
                else:
                    job_posts.loc[id] = [
                        None,
                        None,
                        None,
                        None,
                        None,
                        None,
                        None,
                        None,
                        None,
                        None,
                        None,
                        None,
                        None,
                        200
                    ]
                    
            else:
                job_posts.loc[id] = [
                    None,
                    None,
                    None,
                    None,
                    None,
                    None,
                    None,
                    None,
                    None,
                    None,
                    None,
                    None,
                    None,
                    response.status
                ]
                
for i in range(((END + 1) - START) // REQ_PER_SEC):
    ids = [START + (i * REQ_PER_SEC) + j for j in range(REQ_PER_SEC)]
    tasks = [fetch_html(id) for id in ids]
    await asyncio.gather(*tasks)
    await asyncio.sleep(1)
    
job_posts.to_csv('job-posts.csv')