# MyCareersFuture.SG API Calls
This runs much faster than manually running a automated web driver.

In [1]:
import requests
from urllib.parse import urlencode
from bs4 import BeautifulSoup

In [2]:
API_LINK = 'https://api1.mycareersfuture.sg/v2/jobs?'

## Variables to change
Please change the *search query* and the *number of total jobs* you would like to query.

In [3]:
jobs = []
LIMIT = 100 # Limit should not exceed 100. The smaller the number, the gentler it is
SEARCH_QUERY = '' #search query
TOTAL_JOBS = 20 * 1178 # Number of jobs to be queried
N_PAGES = TOTAL_JOBS//LIMIT

## Running query

In [4]:
#For limited queries
for page in range(N_PAGES):
    query = {'limit': LIMIT, 'page': page, 'search': SEARCH_QUERY}
    r = requests.get(API_LINK + urlencode(query))
    jobs.extend(r.json()["results"])

In [None]:
#To query all pages
page = 0 
query = {'limit': LIMIT, 'page': page, 'search': SEARCH_QUERY}
r = requests.get(API_LINK + urlencode(query))

while r.json()["results"]:
    jobs.extend(r.json()["results"])
    page += 1
    query = {'limit': LIMIT, 'page': page, 'search': SEARCH_QUERY}
    r = requests.get(API_LINK + urlencode(query))    

## Extract the Information Out

In [5]:
job_id = list(map(lambda job: job['uuid'], jobs))
ext_job_id = list(map(lambda job: job['metadata']['jobPostId'], jobs))
job_title = list(map(lambda job: job['title'], jobs))
job_description = list(map(lambda job: BeautifulSoup(job['description'], 'lxml').text, jobs))
minimum_years_experience = list(map(lambda job: job['minimumYearsExperience'], jobs))
ssoc_code = list(map(lambda job: job['ssocCode'], jobs))
categories = list(map(lambda job: '; '.join(list(map(lambda category: category['category'], job['categories']))), jobs))
employment_types = list(map(lambda job: '; '.join(list(map(lambda employmentType: employmentType['employmentType'], job['employmentTypes']))), jobs))
position_levels = list(map(lambda job: '; '.join(list(map(lambda positionLevel: positionLevel['position'], job['positionLevels']))), jobs))
skills = list(map(lambda job: '; '.join(list(map(lambda skill: skill['skill'], job['skills']))), jobs))
organisation = list(map(lambda job: job['postedCompany']['name'], jobs))
last_updated = list(map(lambda job: job['metadata']['updatedAt'], jobs))
salary_minimum = list(map(lambda job: job['salary']['minimum'], jobs))
salary_maximum = list(map(lambda job: job['salary']['maximum'], jobs))
salary_type = list(map(lambda job: job['salary']['type']['salaryType'], jobs))
api_link = list(map(lambda job: job['_links']['self']['href'], jobs))

## Save as Dataframe and Export as CSV

In [6]:
import pandas as pd

In [7]:
col = {'job_id': job_id, 'ext_job_id': ext_job_id, 
       'job_title': job_title, 'job_description': job_description,
       'minimum_years_experience': minimum_years_experience, 
       'ssoc_code': ssoc_code, 'categories': categories, 
       'employment_types': employment_types, 'position_levels': position_levels,
       'skills': skills, 'organisation': organisation,
       'salary_minimum': salary_minimum, 'salary_maximum': salary_maximum, 'salary_type': salary_type,
       'api_link': api_link, 'last_updated': last_updated}

In [8]:
jobs = pd.DataFrame(col)

In [9]:
FILENAME = "mycareersfuturesg_results" + ".csv" # change filename
jobs.to_csv(FILENAME, index=False)