In [1]:
import pandas as pd 
import requests
import re
from time import sleep

In [2]:
base_url = 'https://www.techinasia.com/api/2.0/job-postings'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36'}
params = '?page='

In [3]:
base_req = requests.get(url=base_url, params=params, headers=headers)
base_json = base_req.json()
base_json.keys()

dict_keys(['data', 'current_page', 'from', 'last_page', 'next_page_url', 'per_page', 'prev_page_url', 'to', 'total', 'after', 'before'])

In [4]:
def clean_tags(text):
    '''
    Clean html tags in content.
    ---------------------------
    Parameter:
    text(str): input text containing html.
    ---------------------------
    Return:
    clean_text
    '''
    pattern = re.compile('<.*?>')
    clean_text = re.sub(pattern, '', text).replace('\n', '')
    return clean_text

def get_jobs(url, params, headers):
    '''
    Get jobs data from API
    ---------------------------
    Parameters:
    url(str): input url to get data.
    params(str): page parameter for pagination.
    headers(*args): headers to avoid 418 teapot.
    ---------------------------
    Return:
    pandas DataFrame
    '''
    all_data = []

    for page in range(1, base_json['last_page'] + 1):
        req = requests.get(url=base_url + params + str(page), headers=headers)
        json = req.json()
        print(f"Page: {page}/{base_json['last_page']}", end='\r')

        for idx, job in enumerate(json['data']):
            data = {
                'id': job['id'],
                'title': job['title'],
                'is_remote': job['is_remote'],
                'salary_min': job['salary_min'],
                'salary_max': job['salary_max'],
                'has_equity': job['has_equity'],
                'vacancy_count': job['vacancy_count'],
                'experience_min': job['experience_min'],
                'experience_max': job['experience_max'],
                'description': clean_tags(job['description']),
                'city': json['data'][idx]['city']['name'],
                'country': json['data'][idx]['city']['country']['name'],
                'company_name': json['data'][idx]['company']['name'],
                'company_date_founded': json['data'][idx]['company']['date_founded'],
                'company_employee_min_count': json['data'][idx]['company']['employee_min_count'],
                'company_employee_max_count': json['data'][idx]['company']['employee_max_count'],
                'currency': json['data'][idx]['currency']['currency_code'],
                'skills': [skill['name'] for skill in json['data'][idx]['job_skills']],
                'job_type': json['data'][idx]['job_type']['name'],
                'position': json['data'][idx]['position']['name']
            }
            all_data.append(data)
        sleep(3)
    return pd.DataFrame(all_data)

In [5]:
df = get_jobs(url=base_url, params=params, headers=headers)
df.tail()

Page: 136/136

Unnamed: 0,id,title,is_remote,salary_min,salary_max,has_equity,vacancy_count,experience_min,experience_max,description,city,country,company_name,company_date_founded,company_employee_min_count,company_employee_max_count,currency,skills,job_type,position
3383,1c38d214-f6cd-4641-ad5a-b5037af75b48,On-Site Sales Advisor,False,4000000,5000000,False,1,1,4,About RukitaRukita is the nation’s leading com...,Jakarta,Indonesia,Rukita,2019-04-01,201,500,IDR,"[Cold Calling, Sales, Communication Skills, Sa...",Full-time,Sales & Business Development
3384,dc92d1e8-2d64-4c33-9f1b-2e0a2a408fa0,Sales Support Associate,False,4000000,4500000,False,1,0,1,About RukitaRukita is the nation’s leading com...,Jakarta,Indonesia,Rukita,2019-04-01,201,500,IDR,"[Business Development & Partnerships, Sales St...",Full-time,Sales & Business Development
3385,71fed12b-af8e-461b-9ca4-514f2dfd30ca,Telemarketer,False,4000000,4500000,False,1,1,4,About RukitaRukita is the nation’s leading com...,Jakarta,Indonesia,Rukita,2019-04-01,201,500,IDR,"[Sales, Communication Skills, Communications, ...",Full-time,Sales & Business Development
3386,70ce5d25-4da2-4357-872c-7abd80bd6bfb,Referral Partner Manager,False,5000000,9000000,False,1,1,4,Job Description:Working together with the Part...,Jakarta,Indonesia,Mekari (PT. Mid Solusi Nusantara),2014-10-01,501,1000,IDR,"[Business Development & Partnerships, Communit...",Full-time,Marketing & PR
3387,c9e67e8d-2093-48b9-bbf9-06b4193054f2,Junior Product Manager,True,1500,2000,True,1,1,4,This job is for Globelise; one of ventures in ...,Singapore,Singapore,Jesselton Capital,,2,10,SGD,"[Project Management, Product Management, Busin...",Full-time,Project & Product Management


In [6]:
df.to_csv('tia_jobs.csv', index=False)