# Job Portal - JOBLUM

## Imports used (to be described)

* `os` - a module that provides functions to interact with the operating system.
* `pandas` - is a tool that helps analyze data.
* `numpy` - Library that contains multiple functions that help ease the work with arrays, matrices, and alike to better reassemble data.
* `json` - enables import and export from and to JSON files
* `re` - Short for Regular Expressions, help recognize patterns on strings of data and is used to orderly reassemble them.
* `gensim` - Library that efficiently handles large, unmanaged text collections of data.
* `nltk` - Short for Natural Language Toolkit. It helps the program to apply human language data to statistical natural language.
* `requests` - Requests allows the program to send HTTP requests easily.
* `Seaborn` - A library in python that is used to better visualize data through drawing informative graphs.
* `math` - Imported library that allows quick computations of mathematical tasks
* `calplot` - 
* `matplotlib.pyplot` -
* `gensim.utils` `simple_preprocess` - used to preprocess text by making them lower-cased, and transforming the words to their original form (de-tokenizing)
* `gensim.parsing.preprocessing` `STOPWORDS` - stop words common words that do not have value and are often removed in pre-processing
* `gensim` `corpora` - used to work with corpus and words
* `gensim` `models` - used for topic modelling and model training
* `nltk.stem` `WordNetLemmatizer` - used for grouping similar strings together
* `bs4` `BeautifulSoup` - library used to web scrape HTML from websites
* `datetime` `datetime` - An imported module in python to create an object that properly resembles date and time. Used for converting string of time into datetime format to month, day, and year.
* `datetime` `timedelta` - used for finding delta of time ago with time scraped if date has minutes, hours, days, or weeks ago
* `dateutil.relativedelta` `relativedelta` - used for finding delta of time ago with time scraped if date has months and years

In [1]:
import os
import pandas as pd
import numpy as np
import json
import re
import gensim
import nltk
import requests
import datetime
import seaborn as sns
import calplot
import matplotlib.pyplot as plt
import math

from gensim.utils import simple_preprocess
from gensim.parsing.preprocessing import STOPWORDS
from gensim import corpora, models
from nltk.stem import WordNetLemmatizer
from bs4 import BeautifulSoup
from datetime import datetime
from datetime import timedelta
from dateutil.relativedelta import relativedelta

today = datetime.today()



Joblum.com is an online career portal that features relevant and up-to-the-minute job listings. 

### Check number of jobs in soup

In [None]:
#Get number of jobs per category (JOBLUM)
def getNumJobs(soup):
    NUM_JOBS = soup.find_all('p',{'class':'no-of-jobs'})
    NUM_JOBS =  re.findall(r'(?s)(?<= of ).*?(?= jobs)',str(NUM_JOBS[1]))
    if NUM_JOBS:
        return int(NUM_JOBS[0])
    else:
        return 0

### Souptest

Getting the html of the URL of the Information Communications Technology job openings, it can be observed that it contains the list of jobs that we interested in.

In [None]:
#Loading the page and parsing HTML data (JOBLUM)
def getSoup(JOBLUM_JOB_URL):
    page = requests.get(JOBLUM_JOB_URL)
    return BeautifulSoup(page.content, 'html.parser')

### Check number of pages in soup

In [None]:
#Find number of pages (JOBLUM)
def getNumPages(NUM_JOBS):
    return math.ceil(NUM_JOBS/10)

### Get links for each page

In [None]:
#Get the links of each page (JOBLUM)
def getLinks(Num_Pages, JOBLUM_URLs):
    JOB_LINKS = []
    for j in range(1,NUM_PAGES+1):
        JOB_LINKS.append(JOBLUM_URLs + str(j))
    return JOB_LINKS

### Get url for each job in each page

In [None]:
#Geting the URLs of each job posting (JOBLUM)
def getJobURL(JOBLUM_SOUP):
    JOBLUM_JOBS_URL = JOBLUM_SOUP.find_all('div',{'class':'mobile-company-logo hidden-md hidden-lg'})
    return re.findall(r'(?s)(?<= href=").*?(?=" )',str(JOBLUM_JOBS_URL))    

### Get the job description of the job

In [None]:
#Geting Job Description (JOBLUM)
def getJobDescription(JOB_SOUP):
    try:
        JOB_INFO = JOB_SOUP.find('span',{'itemprop':'description'})
        JOB_INFO_ARRAY = []
        for n in range(len(JOB_INFO.contents[1].contents[0])):
            if(isinstance(JOB_INFO.contents[1].contents[0].contents[n],NavigableString)):
                JOB_INFO_ARRAY.append(JOB_INFO.contents[1].contents[0].contents[n])
            else:
                for s in range(len(JOB_INFO.contents[1].contents[0].contents[n])):
                    if(isinstance(JOB_INFO.contents[1].contents[0].contents[n].contents[s], Tag)):
                        JOB_INFO_ARRAY.append(JOB_INFO.contents[1].contents[0].contents[n].contents[s].text)
                    else:
                        JOB_INFO_ARRAY.append(JOB_INFO.contents[1].contents[0].contents[n].contents[s])
        JOB_DESCRIPTION = ' '.join(JOB_INFO_ARRAY)
        JOB_DESCRIPTION = JOB_DESCRIPTION.replace("\xa0"," ")
        JOB_DESCRIPTION = re.sub(' +', ' ', JOB_DESCRIPTION) 
        return JOB_DESCRIPTION
    except:
        return None

### Get the job salary of the job

In [None]:
#Geting Job Salary (JOBLUM)
def getJobSalary(JOB_SOUP):
    JOB_INFO = JOB_SOUP.find('span',{'itemprop':'description'})
    JOB_SALARY_FINDER = JOB_SOUP.find('p',{'class':'job-subinfo'})
    JOB_SALARY_ARRAY = re.findall('[0-9]+,[0-9]+',str(JOB_SALARY_FINDER))
    JOB_SALARY = ""
    JOB_SALARY_MIN = "Not Specified"
    JOB_SALARY_MAX = "Not Specified"
    JOB_SALARY_CHECKER = re.findall('(?i)Salary',str(JOB_SALARY_FINDER))
    if not JOB_SALARY_CHECKER:
        JOB_SALARY_CHECKER = re.findall('(?i)Salary',str(JOB_INFO))
    if not JOB_SALARY_CHECKER:
        JOB_SALARY_CHECKER = re.findall('(?i)PHP',str(JOB_INFO))
    if not JOB_SALARY_CHECKER:
        JOB_SALARY_CHECKER = re.findall('(?i)pesos',str(JOB_INFO))
    if not JOB_SALARY_ARRAY:
        JOB_SALARY_ARRAY = re.findall('[0-9]+,[0-9]+',str(JOB_INFO))
    if JOB_SALARY_CHECKER:
        if (len(JOB_SALARY_ARRAY)==2):
            if (int(JOB_SALARY_ARRAY[1].replace(",","")) > int(JOB_SALARY_ARRAY[0].replace(",",""))):
                if (int(JOB_SALARY_ARRAY[1].replace(",","")) > 1000):
                    JOB_SALARY = '-'.join(JOB_SALARY_ARRAY)
                    JOB_SALARY_MIN = JOB_SALARY_ARRAY[0]
                    JOB_SALARY_MAX = JOB_SALARY_ARRAY[1]                
        elif (len(JOB_SALARY_ARRAY)==1):
            JOB_SALARY = JOB_SALARY_ARRAY[0]
            JOB_SALARY_MIN = JOB_SALARY_ARRAY[0]
            JOB_SALARY_MAX = JOB_SALARY_ARRAY[0]
    return JOB_SALARY, JOB_SALARY_MIN, JOB_SALARY_MAX

### Get the job type of the job

In [None]:
#Geting Job Type (JOBLUM)
def getJobType(JOB_SOUP):
    JOB_INFO = JOB_SOUP.find('div',{'class':'col-sm-8 job-main-col'})
    if (len(re.findall('(?i)Full-time',str(JOB_INFO)))>0):
        return "Full Time"
    elif (len(re.findall('(?i)Fulltime',str(JOB_INFO)))>0):
        return "Full Time"
    elif (len(re.findall('(?i)Full time',str(JOB_INFO)))>0):
        return "Full Time"
    elif (len(re.findall('(?i)Part-time',str(JOB_INFO)))>0):
        return "Part Time"
    elif (len(re.findall('(?i)Parttime',str(JOB_INFO)))>0):
        return "Part Time"
    elif (len(re.findall('(?i)Part time',str(JOB_INFO)))>0):
        return "Part Time"
    else:
        return "Not Specified"

### Web Scraping function based on selected category

In [None]:
#Scraping data of each job posting (JOBLUM)
def scrapeJob(URL, JOB_STATUS_LIST, JOB_SALARY_LIST, JOB_SALARY_MIN_LIST, 
              JOB_SALARY_MAX_LIST, JOB_DESCRIPTION_LIST):
    
    for m in range(len(URL)):
        JOB_SOUP = getSoup('https://ph.joblum.com' + URL[m])
        JOB_DESCRIPTION = getJobDescription(JOB_SOUP)
        JOB_SALARY, JOB_SALARY_MIN, JOB_SALARY_MAX = getJobSalary(JOB_SOUP)
        JOB_STATUS = getJobType(JOB_SOUP)
        JOB_STATUS_LIST.append(JOB_STATUS)
        JOB_SALARY_LIST.append(JOB_SALARY)
        JOB_SALARY_MIN_LIST.append(JOB_SALARY_MIN)
        JOB_SALARY_MAX_LIST.append(JOB_SALARY_MAX)
        JOB_DESCRIPTION_LIST.append(JOB_DESCRIPTION)

### For Categorizing

<a href="https://www.onetonline.org/find/stem?t=0">
    onetonline.org
</a> and <a href="https://www.istemnetwork.org/parents-students/stem-career-opportunities/">
    careerwise.minnstate.edu
</a>

- Basis for careers from AGRI were classified as Agriculture

<a href="https://www.bestcolleges.com/careers/stem/">
    bestcolleges.com
</a> 

- Basis for careers from T_HARDWARE, IT_SYS, IT_SOFTWARE  were classified as IT
- Basis for careers from ARCHI, AVI, CHEMENG, CIVILENG, CONSTRUCTION, ELEC, ELECENG, ELECTRO, ELECTROENG, ENVIENG, INDUSENG, MAINTENANCE, MECH, MECHENG, NURSE, OIL, OILENG, ENG, QUALITY, QUANTITY were classified as Engineering
- Basis for careers from STAT were classified as Mathematics
- Basis for careers from BIOMED, BIOTECH, DIAGNOSIS, DOCTOR, PHARMA, and PRAC were classified as Medicine

### CATEGORY - Actuarial/Statistic First Half

In [None]:
#Scraping data of Actuarial/Statistics (JOBLUM) - FIRST HALF

STAT_TITLE_FIRST = []
STAT_COMPANY_FIRST = []
STAT_DATE_FIRST = []
STAT_LOCATION_FIRST = []
STAT_STATUS_FIRST = []
STAT_SALARY_FIRST = []
STAT_SALARY_MIN_FIRST = []
STAT_SALARY_MAX_FIRST = []
STAT_DESCRIPTION_FIRST = []

JOBLUM_URLs = 'https://ph.joblum.com/jobs-spec-actuarial-statistics?p='
soup = getSoup(JOBLUM_URLs)
NUM_JOBS = getNumJobs(soup)
NUM_PAGES = getNumPages(NUM_JOBS)
JOB_LINKS = getLinks(NUM_PAGES, JOBLUM_URLs)
FIRST_HALF = math.ceil(NUM_PAGES/2)

for i in range(FIRST_HALF):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        STAT_TITLE_FIRST.append(JOB_TITLE_ARRAY[j].text.strip())
        STAT_COMPANY_FIRST.append(JOB_COMPANY_ARRAY[j].text.strip())
        STAT_DATE_FIRST.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        STAT_LOCATION_FIRST.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, STAT_STATUS_FIRST, STAT_SALARY_FIRST, STAT_SALARY_MIN_FIRST, 
              STAT_SALARY_MAX_FIRST, STAT_DESCRIPTION_FIRST)

### CATEGORY - Actuarial/Statistic Second Half

In [None]:
#Scraping data of Actuarial/Statistics (JOBLUM) - SECOND HALF

STAT_TITLE_SECOND = []
STAT_COMPANY_SECOND = []
STAT_DATE_SECOND = []
STAT_LOCATION_SECOND = []
STAT_STATUS_SECOND = []
STAT_SALARY_SECOND = []
STAT_SALARY_MIN_SECOND = []
STAT_SALARY_MAX_SECOND = []
STAT_DESCRIPTION_SECOND = []

for i in range(FIRST_HALF, NUM_PAGES):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        STAT_TITLE_SECOND.append(JOB_TITLE_ARRAY[j].text.strip())
        STAT_COMPANY_SECOND.append(JOB_COMPANY_ARRAY[j].text.strip())
        STAT_DATE_SECOND.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        STAT_LOCATION_SECOND.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, STAT_STATUS_SECOND, STAT_SALARY_SECOND, STAT_SALARY_MIN_SECOND, 
              STAT_SALARY_MAX_SECOND, STAT_DESCRIPTION_SECOND)

### CATEGORY - Joined Actuarial/Statistic

In [None]:
#Joining Scrape data of Actuarial/Statistics (JOBLUM) 

STAT_TITLE_LIST = np.concatenate((STAT_TITLE_FIRST, STAT_TITLE_SECOND))
STAT_COMPANY_LIST = np.concatenate((STAT_COMPANY_FIRST, STAT_COMPANY_SECOND))
STAT_DATE_LIST = np.concatenate((STAT_DATE_FIRST, STAT_DATE_SECOND))
STAT_LOCATION_LIST = np.concatenate((STAT_LOCATION_FIRST, STAT_LOCATION_SECOND))
STAT_STATUS_LIST = np.concatenate((STAT_STATUS_FIRST, STAT_STATUS_SECOND))
STAT_SALARY_LIST = np.concatenate((STAT_SALARY_FIRST, STAT_SALARY_SECOND))
STAT_SALARY_MIN_LIST = np.concatenate((STAT_SALARY_MIN_FIRST, STAT_SALARY_MIN_SECOND))
STAT_SALARY_MAX_LIST = np.concatenate((STAT_SALARY_MAX_FIRST, STAT_SALARY_MAX_SECOND))
STAT_DESCRIPTION_LIST = np.concatenate((STAT_DESCRIPTION_FIRST, STAT_DESCRIPTION_SECOND))

### CATEGORY - Actuarial/Statistic Dataframe

In [None]:
#Creating Data Frame for Actuarial/Statistics (JOBLUM) 
STAT={'Website': "Joblum",
      'Job Title': STAT_TITLE_LIST, 
      'Category': "Actuarial/Statistics", 
      'Company': STAT_COMPANY_LIST, 
      'Date Posted': STAT_DATE_LIST, 
      'Location': STAT_LOCATION_LIST, 
      'Status': STAT_STATUS_LIST, 
      'Salary': STAT_SALARY_LIST,
      'Education': "Not Specified / In Description",
      'Years of Work Experience': "Not Specified / In Description",
      'Job Description': STAT_DESCRIPTION_LIST,
      'Min Salary': STAT_SALARY_MIN_LIST,
      'Max Salary': STAT_SALARY_MAX_LIST,
      'Min Years of Work Experience': "Not Specified",
      'Max Years of Work Experience': "Not Specified",
      'Field': "Mathematics"}
STAT_df = pd.DataFrame(data=STAT)
STAT_df

### Parse data into a csv

In [None]:
STAT_df.to_csv ('Joblum Data\JOBLUM-STAT.csv', index = False)

### CATEGORY - Agriculture

In [None]:
#Scraping data of Agriculture (JOBLUM) - FIRST HALF

AGRI_TITLE_FIRST = []
AGRI_COMPANY_FIRST = []
AGRI_DATE_FIRST = []
AGRI_LOCATION_FIRST = []
AGRI_STATUS_FIRST = []
AGRI_SALARY_FIRST = []
AGRI_SALARY_MIN_FIRST = []
AGRI_SALARY_MAX_FIRST = []
AGRI_DESCRIPTION_FIRST = []

JOBLUM_URLs = 'https://ph.joblum.com/jobs-spec-agriculture?p='
soup = getSoup(JOBLUM_URLs)
NUM_JOBS = getNumJobs(soup)
NUM_PAGES = getNumPages(NUM_JOBS)
JOB_LINKS = getLinks(NUM_PAGES, JOBLUM_URLs)
FIRST_HALF = math.ceil(NUM_PAGES/2)

for i in range(FIRST_HALF):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        AGRI_TITLE_FIRST.append(JOB_TITLE_ARRAY[j].text.strip())
        AGRI_COMPANY_FIRST.append(JOB_COMPANY_ARRAY[j].text.strip())
        AGRI_DATE_FIRST.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        AGRI_LOCATION_FIRST.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, AGRI_STATUS_FIRST, AGRI_SALARY_FIRST, AGRI_SALARY_MIN_FIRST, 
              AGRI_SALARY_MAX_FIRST, AGRI_DESCRIPTION_FIRST)

In [None]:
#Scraping data of Agriculture (JOBLUM) - SECOND HALF

AGRI_TITLE_SECOND = []
AGRI_COMPANY_SECOND = []
AGRI_DATE_SECOND = []
AGRI_LOCATION_SECOND = []
AGRI_STATUS_SECOND = []
AGRI_SALARY_SECOND = []
AGRI_SALARY_MIN_SECOND = []
AGRI_SALARY_MAX_SECOND = []
AGRI_DESCRIPTION_SECOND = []

for i in range(FIRST_HALF, NUM_PAGES):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        AGRI_TITLE_SECOND.append(JOB_TITLE_ARRAY[j].text.strip())
        AGRI_COMPANY_SECOND.append(JOB_COMPANY_ARRAY[j].text.strip())
        AGRI_DATE_SECOND.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        AGRI_LOCATION_SECOND.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, AGRI_STATUS_SECOND, AGRI_SALARY_SECOND, AGRI_SALARY_MIN_SECOND, 
              AGRI_SALARY_MAX_SECOND, AGRI_DESCRIPTION_SECOND)

In [None]:
#Joining Scrape data of Agriculture (JOBLUM) 

AGRI_TITLE_LIST = np.concatenate((AGRI_TITLE_FIRST, AGRI_TITLE_SECOND))
AGRI_COMPANY_LIST = np.concatenate((AGRI_COMPANY_FIRST, AGRI_COMPANY_SECOND))
AGRI_DATE_LIST = np.concatenate((AGRI_DATE_FIRST, AGRI_DATE_SECOND))
AGRI_LOCATION_LIST = np.concatenate((AGRI_LOCATION_FIRST, AGRI_LOCATION_SECOND))
AGRI_STATUS_LIST = np.concatenate((AGRI_STATUS_FIRST, AGRI_STATUS_SECOND))
AGRI_SALARY_LIST = np.concatenate((AGRI_SALARY_FIRST, AGRI_SALARY_SECOND))
AGRI_SALARY_MIN_LIST = np.concatenate((AGRI_SALARY_MIN_FIRST, AGRI_SALARY_MIN_SECOND))
AGRI_SALARY_MAX_LIST = np.concatenate((AGRI_SALARY_MAX_FIRST, AGRI_SALARY_MAX_SECOND))
AGRI_DESCRIPTION_LIST = np.concatenate((AGRI_DESCRIPTION_FIRST, AGRI_DESCRIPTION_SECOND))

In [None]:
#Creating Data Frame for Agriculture (JOBLUM) 
AGRI={'Website': "Joblum",
      'Job Title': AGRI_TITLE_LIST, 
      'Category': "Agriculture", 
      'Company': AGRI_COMPANY_LIST, 
      'Date Posted': AGRI_DATE_LIST, 
      'Location': AGRI_LOCATION_LIST, 
      'Status': AGRI_STATUS_LIST, 
      'Salary': AGRI_SALARY_LIST,
      'Education': "Not Specified / In Description",
      'Years of Work Experience': "Not Specified / In Description",
      'Job Description': AGRI_DESCRIPTION_LIST,
      'Min Salary': AGRI_SALARY_MIN_LIST,
      'Max Salary': AGRI_SALARY_MAX_LIST,
      'Min Years of Work Experience': "Not Specified",
      'Max Years of Work Experience': "Not Specified",
      'Field': "Agriculture"}
AGRI_df = pd.DataFrame(data=AGRI)
AGRI_df

In [None]:
AGRI_df.to_csv ('Joblum Data\JOBLUM-AGRI.csv', index = False)

### CATEGORY - Architect/Interior Design

In [None]:
#Scraping data of Architect/Interior Design (JOBLUM) - FIRST HALF

ARCHI_TITLE_FIRST = []
ARCHI_COMPANY_FIRST = []
ARCHI_DATE_FIRST = []
ARCHI_LOCATION_FIRST = []
ARCHI_STATUS_FIRST = []
ARCHI_SALARY_FIRST = []
ARCHI_SALARY_MIN_FIRST = []
ARCHI_SALARY_MAX_FIRST = []
ARCHI_DESCRIPTION_FIRST = []

JOBLUM_URLs = 'https://ph.joblum.com/jobs-spec-architect-interior-design?p='
soup = getSoup(JOBLUM_URLs)
NUM_JOBS = getNumJobs(soup)
NUM_PAGES = getNumPages(NUM_JOBS)
JOB_LINKS = getLinks(NUM_PAGES, JOBLUM_URLs)
FIRST_HALF = math.ceil(NUM_PAGES/2)

for i in range(FIRST_HALF):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        ARCHI_TITLE_FIRST.append(JOB_TITLE_ARRAY[j].text.strip())
        ARCHI_COMPANY_FIRST.append(JOB_COMPANY_ARRAY[j].text.strip())
        ARCHI_DATE_FIRST.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        ARCHI_LOCATION_FIRST.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, ARCHI_STATUS_FIRST, ARCHI_SALARY_FIRST, ARCHI_SALARY_MIN_FIRST, 
              ARCHI_SALARY_MAX_FIRST, ARCHI_DESCRIPTION_FIRST)

In [None]:
#Scraping data of Architect/Interior Design (JOBLUM) - SECOND HALF

ARCHI_TITLE_SECOND = []
ARCHI_COMPANY_SECOND = []
ARCHI_DATE_SECOND = []
ARCHI_LOCATION_SECOND = []
ARCHI_STATUS_SECOND = []
ARCHI_SALARY_SECOND = []
ARCHI_SALARY_MIN_SECOND = []
ARCHI_SALARY_MAX_SECOND = []
ARCHI_DESCRIPTION_SECOND = []

for i in range(FIRST_HALF, NUM_PAGES):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        ARCHI_TITLE_SECOND.append(JOB_TITLE_ARRAY[j].text.strip())
        ARCHI_COMPANY_SECOND.append(JOB_COMPANY_ARRAY[j].text.strip())
        ARCHI_DATE_SECOND.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        ARCHI_LOCATION_SECOND.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, ARCHI_STATUS_SECOND, ARCHI_SALARY_SECOND, ARCHI_SALARY_MIN_SECOND, 
              ARCHI_SALARY_MAX_SECOND, ARCHI_DESCRIPTION_SECOND)

In [None]:
#Joining Scrape data of Architect/Interior Design (JOBLUM) 

ARCHI_TITLE_LIST = np.concatenate((ARCHI_TITLE_FIRST, ARCHI_TITLE_SECOND))
ARCHI_COMPANY_LIST = np.concatenate((ARCHI_COMPANY_FIRST, ARCHI_COMPANY_SECOND))
ARCHI_DATE_LIST = np.concatenate((ARCHI_DATE_FIRST, ARCHI_DATE_SECOND))
ARCHI_LOCATION_LIST = np.concatenate((ARCHI_LOCATION_FIRST, ARCHI_LOCATION_SECOND))
ARCHI_STATUS_LIST = np.concatenate((ARCHI_STATUS_FIRST, ARCHI_STATUS_SECOND))
ARCHI_SALARY_LIST = np.concatenate((ARCHI_SALARY_FIRST, ARCHI_SALARY_SECOND))
ARCHI_SALARY_MIN_LIST = np.concatenate((ARCHI_SALARY_MIN_FIRST, ARCHI_SALARY_MIN_SECOND))
ARCHI_SALARY_MAX_LIST = np.concatenate((ARCHI_SALARY_MAX_FIRST, ARCHI_SALARY_MAX_SECOND))
ARCHI_DESCRIPTION_LIST = np.concatenate((ARCHI_DESCRIPTION_FIRST, ARCHI_DESCRIPTION_SECOND))

In [None]:
#Creating Data Frame for Architect/Interior Design (JOBLUM) 
ARCHI={'Website': "Joblum",
      'Job Title': ARCHI_TITLE_LIST, 
      'Category': "Architect/Interior Design", 
      'Company': ARCHI_COMPANY_LIST, 
      'Date Posted': ARCHI_DATE_LIST, 
      'Location': ARCHI_LOCATION_LIST, 
      'Status': ARCHI_STATUS_LIST, 
      'Salary': ARCHI_SALARY_LIST,
      'Education': "Not Specified / In Description",
      'Years of Work Experience': "Not Specified / In Description",
      'Job Description': ARCHI_DESCRIPTION_LIST,
      'Min Salary': ARCHI_SALARY_MIN_LIST,
      'Max Salary': ARCHI_SALARY_MAX_LIST,
      'Min Years of Work Experience': "Not Specified",
      'Max Years of Work Experience': "Not Specified",
      'Field': "Engineering"}
ARCHI_df = pd.DataFrame(data=ARCHI)
ARCHI_df

In [None]:
ARCHI_df.to_csv ('Joblum Data\JOBLUM-ARCHI.csv', index = False)

### CATEGORY - Aviation

In [None]:
#Scraping data of Aviation (JOBLUM) - FIRST HALF

AVI_TITLE_FIRST = []
AVI_COMPANY_FIRST = []
AVI_DATE_FIRST = []
AVI_LOCATION_FIRST = []
AVI_STATUS_FIRST = []
AVI_SALARY_FIRST = []
AVI_SALARY_MIN_FIRST = []
AVI_SALARY_MAX_FIRST = []
AVI_DESCRIPTION_FIRST = []

JOBLUM_URLs = 'https://ph.joblum.com/jobs-spec-aviation?p='
soup = getSoup(JOBLUM_URLs)
NUM_JOBS = getNumJobs(soup)
NUM_PAGES = getNumPages(NUM_JOBS)
JOB_LINKS = getLinks(NUM_PAGES, JOBLUM_URLs)
FIRST_HALF = math.ceil(NUM_PAGES/2)

for i in range(FIRST_HALF):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        AVI_TITLE_FIRST.append(JOB_TITLE_ARRAY[j].text.strip())
        AVI_COMPANY_FIRST.append(JOB_COMPANY_ARRAY[j].text.strip())
        AVI_DATE_FIRST.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        AVI_LOCATION_FIRST.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, AVI_STATUS_FIRST, AVI_SALARY_FIRST, AVI_SALARY_MIN_FIRST, 
              AVI_SALARY_MAX_FIRST, AVI_DESCRIPTION_FIRST)

In [None]:
#Scraping data of Aviation (JOBLUM) - SECOND HALF

AVI_TITLE_SECOND = []
AVI_COMPANY_SECOND = []
AVI_DATE_SECOND = []
AVI_LOCATION_SECOND = []
AVI_STATUS_SECOND = []
AVI_SALARY_SECOND = []
AVI_SALARY_MIN_SECOND = []
AVI_SALARY_MAX_SECOND = []
AVI_DESCRIPTION_SECOND = []

for i in range(FIRST_HALF, NUM_PAGES):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        AVI_TITLE_SECOND.append(JOB_TITLE_ARRAY[j].text.strip())
        AVI_COMPANY_SECOND.append(JOB_COMPANY_ARRAY[j].text.strip())
        AVI_DATE_SECOND.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        AVI_LOCATION_SECOND.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, AVI_STATUS_SECOND, AVI_SALARY_SECOND, AVI_SALARY_MIN_SECOND, 
              AVI_SALARY_MAX_SECOND, AVI_DESCRIPTION_SECOND)

In [None]:
#Joining Scrape data of Aviation (JOBLUM) 

AVI_TITLE_LIST = np.concatenate((AVI_TITLE_FIRST, AVI_TITLE_SECOND))
AVI_COMPANY_LIST = np.concatenate((AVI_COMPANY_FIRST, AVI_COMPANY_SECOND))
AVI_DATE_LIST = np.concatenate((AVI_DATE_FIRST, AVI_DATE_SECOND))
AVI_LOCATION_LIST = np.concatenate((AVI_LOCATION_FIRST, AVI_LOCATION_SECOND))
AVI_STATUS_LIST = np.concatenate((AVI_STATUS_FIRST, AVI_STATUS_SECOND))
AVI_SALARY_LIST = np.concatenate((AVI_SALARY_FIRST, AVI_SALARY_SECOND))
AVI_SALARY_MIN_LIST = np.concatenate((AVI_SALARY_MIN_FIRST, AVI_SALARY_MIN_SECOND))
AVI_SALARY_MAX_LIST = np.concatenate((AVI_SALARY_MAX_FIRST, AVI_SALARY_MAX_SECOND))
AVI_DESCRIPTION_LIST = np.concatenate((AVI_DESCRIPTION_FIRST, AVI_DESCRIPTION_SECOND))

In [None]:
#Creating Data Frame for Aviation (JOBLUM) 
AVI={'Website': "Joblum",
      'Job Title': AVI_TITLE_LIST, 
      'Category': "Aviation", 
      'Company': AVI_COMPANY_LIST, 
      'Date Posted': AVI_DATE_LIST, 
      'Location': AVI_LOCATION_LIST, 
      'Status': AVI_STATUS_LIST, 
      'Salary': AVI_SALARY_LIST,
      'Education': "Not Specified / In Description",
      'Years of Work Experience': "Not Specified / In Description",
      'Job Description': AVI_DESCRIPTION_LIST,
      'Min Salary': AVI_SALARY_MIN_LIST,
      'Max Salary': AVI_SALARY_MAX_LIST,
      'Min Years of Work Experience': "Not Specified",
      'Max Years of Work Experience': "Not Specified",
      'Field': "Engineering"}
AVI_df = pd.DataFrame(data=AVI)
AVI_df

In [None]:
AVI_df.to_csv ('Joblum Data\JOBLUM-AVI.csv', index = False)

### CATEGORY - Biomedical

In [None]:
#Scraping data of Biomedical (JOBLUM) - FIRST HALF

BIOMED_TITLE_FIRST = []
BIOMED_COMPANY_FIRST = []
BIOMED_DATE_FIRST = []
BIOMED_LOCATION_FIRST = []
BIOMED_STATUS_FIRST = []
BIOMED_SALARY_FIRST = []
BIOMED_SALARY_MIN_FIRST = []
BIOMED_SALARY_MAX_FIRST = []
BIOMED_DESCRIPTION_FIRST = []

JOBLUM_URLs = 'https://ph.joblum.com/jobs-spec-biomedical?p='
soup = getSoup(JOBLUM_URLs)
NUM_JOBS = getNumJobs(soup)
NUM_PAGES = getNumPages(NUM_JOBS)
JOB_LINKS = getLinks(NUM_PAGES, JOBLUM_URLs)
FIRST_HALF = math.ceil(NUM_PAGES/2)

for i in range(FIRST_HALF):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        BIOMED_TITLE_FIRST.append(JOB_TITLE_ARRAY[j].text.strip())
        BIOMED_COMPANY_FIRST.append(JOB_COMPANY_ARRAY[j].text.strip())
        BIOMED_DATE_FIRST.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        BIOMED_LOCATION_FIRST.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, BIOMED_STATUS_FIRST, BIOMED_SALARY_FIRST, BIOMED_SALARY_MIN_FIRST, 
              BIOMED_SALARY_MAX_FIRST, BIOMED_DESCRIPTION_FIRST)

In [None]:
#Scraping data of Biomedical (JOBLUM) - SECOND HALF

BIOMED_TITLE_SECOND = []
BIOMED_COMPANY_SECOND = []
BIOMED_DATE_SECOND = []
BIOMED_LOCATION_SECOND = []
BIOMED_STATUS_SECOND = []
BIOMED_SALARY_SECOND = []
BIOMED_SALARY_MIN_SECOND = []
BIOMED_SALARY_MAX_SECOND = []
BIOMED_DESCRIPTION_SECOND = []

for i in range(FIRST_HALF, NUM_PAGES):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        BIOMED_TITLE_SECOND.append(JOB_TITLE_ARRAY[j].text.strip())
        BIOMED_COMPANY_SECOND.append(JOB_COMPANY_ARRAY[j].text.strip())
        BIOMED_DATE_SECOND.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        BIOMED_LOCATION_SECOND.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, BIOMED_STATUS_SECOND, BIOMED_SALARY_SECOND, BIOMED_SALARY_MIN_SECOND, 
              BIOMED_SALARY_MAX_SECOND, BIOMED_DESCRIPTION_SECOND)

In [None]:
#Joining Scrape data of Biomedical (JOBLUM) 

BIOMED_TITLE_LIST = np.concatenate((BIOMED_TITLE_FIRST, BIOMED_TITLE_SECOND))
BIOMED_COMPANY_LIST = np.concatenate((BIOMED_COMPANY_FIRST, BIOMED_COMPANY_SECOND))
BIOMED_DATE_LIST = np.concatenate((BIOMED_DATE_FIRST, BIOMED_DATE_SECOND))
BIOMED_LOCATION_LIST = np.concatenate((BIOMED_LOCATION_FIRST, BIOMED_LOCATION_SECOND))
BIOMED_STATUS_LIST = np.concatenate((BIOMED_STATUS_FIRST, BIOMED_STATUS_SECOND))
BIOMED_SALARY_LIST = np.concatenate((BIOMED_SALARY_FIRST, BIOMED_SALARY_SECOND))
BIOMED_SALARY_MIN_LIST = np.concatenate((BIOMED_SALARY_MIN_FIRST, BIOMED_SALARY_MIN_SECOND))
BIOMED_SALARY_MAX_LIST = np.concatenate((BIOMED_SALARY_MAX_FIRST, BIOMED_SALARY_MAX_SECOND))
BIOMED_DESCRIPTION_LIST = np.concatenate((BIOMED_DESCRIPTION_FIRST, BIOMED_DESCRIPTION_SECOND))

In [None]:
#Creating Data Frame for Biomedical (JOBLUM) 
BIOMED={'Website': "Joblum",
      'Job Title': BIOMED_TITLE_LIST, 
      'Category': "Biomedical", 
      'Company': BIOMED_COMPANY_LIST, 
      'Date Posted': BIOMED_DATE_LIST, 
      'Location': BIOMED_LOCATION_LIST, 
      'Status': BIOMED_STATUS_LIST, 
      'Salary': BIOMED_SALARY_LIST,
      'Education': "Not Specified / In Description",
      'Years of Work Experience': "Not Specified / In Description",
      'Job Description': BIOMED_DESCRIPTION_LIST,
      'Min Salary': BIOMED_SALARY_MIN_LIST,
      'Max Salary': BIOMED_SALARY_MAX_LIST,
      'Min Years of Work Experience': "Not Specified",
      'Max Years of Work Experience': "Not Specified",
      'Field': "Medicine"}
BIOMED_df = pd.DataFrame(data=BIOMED)
BIOMED_df

In [None]:
BIOMED_df.to_csv ('Joblum Data\JOBLUM-BIOMED.csv', index = False)

### CATEGORY - Biotechnology

In [None]:
#Scraping data of Biotechnology (JOBLUM) - FIRST HALF

BIOTECH_TITLE_FIRST = []
BIOTECH_COMPANY_FIRST = []
BIOTECH_DATE_FIRST = []
BIOTECH_LOCATION_FIRST = []
BIOTECH_STATUS_FIRST = []
BIOTECH_SALARY_FIRST = []
BIOTECH_SALARY_MIN_FIRST = []
BIOTECH_SALARY_MAX_FIRST = []
BIOTECH_DESCRIPTION_FIRST = []

JOBLUM_URLs = 'https://ph.joblum.com/jobs-spec-biotechnology?p='
soup = getSoup(JOBLUM_URLs)
NUM_JOBS = getNumJobs(soup)
NUM_PAGES = getNumPages(NUM_JOBS)
JOB_LINKS = getLinks(NUM_PAGES, JOBLUM_URLs)
FIRST_HALF = math.ceil(NUM_PAGES/2)

for i in range(FIRST_HALF):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        BIOTECH_TITLE_FIRST.append(JOB_TITLE_ARRAY[j].text.strip())
        BIOTECH_COMPANY_FIRST.append(JOB_COMPANY_ARRAY[j].text.strip())
        BIOTECH_DATE_FIRST.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        BIOTECH_LOCATION_FIRST.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, BIOTECH_STATUS_FIRST, BIOTECH_SALARY_FIRST, BIOTECH_SALARY_MIN_FIRST, 
              BIOTECH_SALARY_MAX_FIRST, BIOTECH_DESCRIPTION_FIRST)

In [None]:
#Scraping data of Biotechnology (JOBLUM) - SECOND HALF

BIOTECH_TITLE_SECOND = []
BIOTECH_COMPANY_SECOND = []
BIOTECH_DATE_SECOND = []
BIOTECH_LOCATION_SECOND = []
BIOTECH_STATUS_SECOND = []
BIOTECH_SALARY_SECOND = []
BIOTECH_SALARY_MIN_SECOND = []
BIOTECH_SALARY_MAX_SECOND = []
BIOTECH_DESCRIPTION_SECOND = []

for i in range(FIRST_HALF, NUM_PAGES):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        BIOTECH_TITLE_SECOND.append(JOB_TITLE_ARRAY[j].text.strip())
        BIOTECH_COMPANY_SECOND.append(JOB_COMPANY_ARRAY[j].text.strip())
        BIOTECH_DATE_SECOND.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        BIOTECH_LOCATION_SECOND.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, BIOTECH_STATUS_SECOND, BIOTECH_SALARY_SECOND, BIOTECH_SALARY_MIN_SECOND, 
              BIOTECH_SALARY_MAX_SECOND, BIOTECH_DESCRIPTION_SECOND)

In [None]:
#Joining Scrape data of Biotechnology (JOBLUM) 

BIOTECH_TITLE_LIST = np.concatenate((BIOTECH_TITLE_FIRST, BIOTECH_TITLE_SECOND))
BIOTECH_COMPANY_LIST = np.concatenate((BIOTECH_COMPANY_FIRST, BIOTECH_COMPANY_SECOND))
BIOTECH_DATE_LIST = np.concatenate((BIOTECH_DATE_FIRST, BIOTECH_DATE_SECOND))
BIOTECH_LOCATION_LIST = np.concatenate((BIOTECH_LOCATION_FIRST, BIOTECH_LOCATION_SECOND))
BIOTECH_STATUS_LIST = np.concatenate((BIOTECH_STATUS_FIRST, BIOTECH_STATUS_SECOND))
BIOTECH_SALARY_LIST = np.concatenate((BIOTECH_SALARY_FIRST, BIOTECH_SALARY_SECOND))
BIOTECH_SALARY_MIN_LIST = np.concatenate((BIOTECH_SALARY_MIN_FIRST, BIOTECH_SALARY_MIN_SECOND))
BIOTECH_SALARY_MAX_LIST = np.concatenate((BIOTECH_SALARY_MAX_FIRST, BIOTECH_SALARY_MAX_SECOND))
BIOTECH_DESCRIPTION_LIST = np.concatenate((BIOTECH_DESCRIPTION_FIRST, BIOTECH_DESCRIPTION_SECOND))

In [None]:
#Creating Data Frame for Biotechnology (JOBLUM) 
BIOTECH={'Website': "Joblum",
      'Job Title': BIOTECH_TITLE_LIST, 
      'Category': "Biotechnology", 
      'Company': BIOTECH_COMPANY_LIST, 
      'Date Posted': BIOTECH_DATE_LIST, 
      'Location': BIOTECH_LOCATION_LIST, 
      'Status': BIOTECH_STATUS_LIST, 
      'Salary': BIOTECH_SALARY_LIST,
      'Education': "Not Specified / In Description",
      'Years of Work Experience': "Not Specified / In Description",
      'Job Description': BIOTECH_DESCRIPTION_LIST,
      'Min Salary': BIOTECH_SALARY_MIN_LIST,
      'Max Salary': BIOTECH_SALARY_MAX_LIST,
      'Min Years of Work Experience': "Not Specified",
      'Max Years of Work Experience': "Not Specified",
      'Field': "Science"}
BIOTECH_df = pd.DataFrame(data=BIOTECH)
BIOTECH_df

In [None]:
BIOTECH_df.to_csv ('Joblum Data\JOBLUM-BIOTECH.csv', index = False)

### CATEGORY - Chemical Engineering

In [None]:
#Scraping data of Chemical Engineering (JOBLUM) - FIRST HALF

CHEMENG_TITLE_FIRST = []
CHEMENG_COMPANY_FIRST = []
CHEMENG_DATE_FIRST = []
CHEMENG_LOCATION_FIRST = []
CHEMENG_STATUS_FIRST = []
CHEMENG_SALARY_FIRST = []
CHEMENG_SALARY_MIN_FIRST = []
CHEMENG_SALARY_MAX_FIRST = []
CHEMENG_DESCRIPTION_FIRST = []

JOBLUM_URLs = 'https://ph.joblum.com/jobs-spec-chemical-engineering?p='
soup = getSoup(JOBLUM_URLs)
NUM_JOBS = getNumJobs(soup)
NUM_PAGES = getNumPages(NUM_JOBS)
JOB_LINKS = getLinks(NUM_PAGES, JOBLUM_URLs)
FIRST_HALF = math.ceil(NUM_PAGES/2)

for i in range(FIRST_HALF):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        CHEMENG_TITLE_FIRST.append(JOB_TITLE_ARRAY[j].text.strip())
        CHEMENG_COMPANY_FIRST.append(JOB_COMPANY_ARRAY[j].text.strip())
        CHEMENG_DATE_FIRST.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        CHEMENG_LOCATION_FIRST.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, CHEMENG_STATUS_FIRST, CHEMENG_SALARY_FIRST, CHEMENG_SALARY_MIN_FIRST, 
              CHEMENG_SALARY_MAX_FIRST, CHEMENG_DESCRIPTION_FIRST)

In [None]:
#Scraping data of Chemical Engineering (JOBLUM) - SECOND HALF

CHEMENG_TITLE_SECOND = []
CHEMENG_COMPANY_SECOND = []
CHEMENG_DATE_SECOND = []
CHEMENG_LOCATION_SECOND = []
CHEMENG_STATUS_SECOND = []
CHEMENG_SALARY_SECOND = []
CHEMENG_SALARY_MIN_SECOND = []
CHEMENG_SALARY_MAX_SECOND = []
CHEMENG_DESCRIPTION_SECOND = []

for i in range(FIRST_HALF, NUM_PAGES):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        CHEMENG_TITLE_SECOND.append(JOB_TITLE_ARRAY[j].text.strip())
        CHEMENG_COMPANY_SECOND.append(JOB_COMPANY_ARRAY[j].text.strip())
        CHEMENG_DATE_SECOND.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        CHEMENG_LOCATION_SECOND.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, CHEMENG_STATUS_SECOND, CHEMENG_SALARY_SECOND, CHEMENG_SALARY_MIN_SECOND, 
              CHEMENG_SALARY_MAX_SECOND, CHEMENG_DESCRIPTION_SECOND)

In [None]:
#Joining Scrape data of Chemical Engineering (JOBLUM) 

CHEMENG_TITLE_LIST = np.concatenate((CHEMENG_TITLE_FIRST, CHEMENG_TITLE_SECOND))
CHEMENG_COMPANY_LIST = np.concatenate((CHEMENG_COMPANY_FIRST, CHEMENG_COMPANY_SECOND))
CHEMENG_DATE_LIST = np.concatenate((CHEMENG_DATE_FIRST, CHEMENG_DATE_SECOND))
CHEMENG_LOCATION_LIST = np.concatenate((CHEMENG_LOCATION_FIRST, CHEMENG_LOCATION_SECOND))
CHEMENG_STATUS_LIST = np.concatenate((CHEMENG_STATUS_FIRST, CHEMENG_STATUS_SECOND))
CHEMENG_SALARY_LIST = np.concatenate((CHEMENG_SALARY_FIRST, CHEMENG_SALARY_SECOND))
CHEMENG_SALARY_MIN_LIST = np.concatenate((CHEMENG_SALARY_MIN_FIRST, CHEMENG_SALARY_MIN_SECOND))
CHEMENG_SALARY_MAX_LIST = np.concatenate((CHEMENG_SALARY_MAX_FIRST, CHEMENG_SALARY_MAX_SECOND))
CHEMENG_DESCRIPTION_LIST = np.concatenate((CHEMENG_DESCRIPTION_FIRST, CHEMENG_DESCRIPTION_SECOND))

In [None]:
#Creating Data Frame for Chemical Engineering (JOBLUM) 
CHEMENG={'Website': "Joblum",
      'Job Title': CHEMENG_TITLE_LIST, 
      'Category': "Chemical Engineering", 
      'Company': CHEMENG_COMPANY_LIST, 
      'Date Posted': CHEMENG_DATE_LIST, 
      'Location': CHEMENG_LOCATION_LIST, 
      'Status': CHEMENG_STATUS_LIST, 
      'Salary': CHEMENG_SALARY_LIST,
      'Education': "Not Specified / In Description",
      'Years of Work Experience': "Not Specified / In Description",
      'Job Description': CHEMENG_DESCRIPTION_LIST,
      'Min Salary': CHEMENG_SALARY_MIN_LIST,
      'Max Salary': CHEMENG_SALARY_MAX_LIST,
      'Min Years of Work Experience': "Not Specified",
      'Max Years of Work Experience': "Not Specified",
      'Field': "Engineering"}
CHEMENG_df = pd.DataFrame(data=CHEMENG)
CHEMENG_df

In [None]:
CHEMENG_df.to_csv ('Joblum Data\JOBLUM-CHEMENG.csv', index = False)

### CATEGORY - Chemistry

In [None]:
#Scraping data of Chemistry (JOBLUM) - FIRST HALF

CHEM_TITLE_FIRST = []
CHEM_COMPANY_FIRST = []
CHEM_DATE_FIRST = []
CHEM_LOCATION_FIRST = []
CHEM_STATUS_FIRST = []
CHEM_SALARY_FIRST = []
CHEM_SALARY_MIN_FIRST = []
CHEM_SALARY_MAX_FIRST = []
CHEM_DESCRIPTION_FIRST = []

JOBLUM_URLs = 'https://ph.joblum.com/jobs-spec-chemistry?p='
soup = getSoup(JOBLUM_URLs)
NUM_JOBS = getNumJobs(soup)
NUM_PAGES = getNumPages(NUM_JOBS)
JOB_LINKS = getLinks(NUM_PAGES, JOBLUM_URLs)
FIRST_HALF = math.ceil(NUM_PAGES/2)

for i in range(FIRST_HALF):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        CHEM_TITLE_FIRST.append(JOB_TITLE_ARRAY[j].text.strip())
        CHEM_COMPANY_FIRST.append(JOB_COMPANY_ARRAY[j].text.strip())
        CHEM_DATE_FIRST.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        CHEM_LOCATION_FIRST.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, CHEM_STATUS_FIRST, CHEM_SALARY_FIRST, CHEM_SALARY_MIN_FIRST, 
              CHEM_SALARY_MAX_FIRST, CHEM_DESCRIPTION_FIRST)

In [None]:
#Scraping data of Chemistry (JOBLUM) - SECOND HALF

CHEM_TITLE_SECOND = []
CHEM_COMPANY_SECOND = []
CHEM_DATE_SECOND = []
CHEM_LOCATION_SECOND = []
CHEM_STATUS_SECOND = []
CHEM_SALARY_SECOND = []
CHEM_SALARY_MIN_SECOND = []
CHEM_SALARY_MAX_SECOND = []
CHEM_DESCRIPTION_SECOND = []

for i in range(FIRST_HALF, NUM_PAGES):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        CHEM_TITLE_SECOND.append(JOB_TITLE_ARRAY[j].text.strip())
        CHEM_COMPANY_SECOND.append(JOB_COMPANY_ARRAY[j].text.strip())
        CHEM_DATE_SECOND.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        CHEM_LOCATION_SECOND.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, CHEM_STATUS_SECOND, CHEM_SALARY_SECOND, CHEM_SALARY_MIN_SECOND, 
              CHEM_SALARY_MAX_SECOND, CHEM_DESCRIPTION_SECOND)

In [None]:
#Joining Scrape data of Chemistry (JOBLUM) 

CHEM_TITLE_LIST = np.concatenate((CHEM_TITLE_FIRST, CHEM_TITLE_SECOND))
CHEM_COMPANY_LIST = np.concatenate((CHEM_COMPANY_FIRST, CHEM_COMPANY_SECOND))
CHEM_DATE_LIST = np.concatenate((CHEM_DATE_FIRST, CHEM_DATE_SECOND))
CHEM_LOCATION_LIST = np.concatenate((CHEM_LOCATION_FIRST, CHEM_LOCATION_SECOND))
CHEM_STATUS_LIST = np.concatenate((CHEM_STATUS_FIRST, CHEM_STATUS_SECOND))
CHEM_SALARY_LIST = np.concatenate((CHEM_SALARY_FIRST, CHEM_SALARY_SECOND))
CHEM_SALARY_MIN_LIST = np.concatenate((CHEM_SALARY_MIN_FIRST, CHEM_SALARY_MIN_SECOND))
CHEM_SALARY_MAX_LIST = np.concatenate((CHEM_SALARY_MAX_FIRST, CHEM_SALARY_MAX_SECOND))
CHEM_DESCRIPTION_LIST = np.concatenate((CHEM_DESCRIPTION_FIRST, CHEM_DESCRIPTION_SECOND))

In [None]:
#Creating Data Frame for Chemistry (JOBLUM) 
CHEM={'Website': "Joblum",
      'Job Title': CHEM_TITLE_LIST, 
      'Category': "Chemistry", 
      'Company': CHEM_COMPANY_LIST, 
      'Date Posted': CHEM_DATE_LIST, 
      'Location': CHEM_LOCATION_LIST, 
      'Status': CHEM_STATUS_LIST, 
      'Salary': CHEM_SALARY_LIST,
      'Education': "Not Specified / In Description",
      'Years of Work Experience': "Not Specified / In Description",
      'Job Description': CHEM_DESCRIPTION_LIST,
      'Min Salary': CHEM_SALARY_MIN_LIST,
      'Max Salary': CHEM_SALARY_MAX_LIST,
      'Min Years of Work Experience': "Not Specified",
      'Max Years of Work Experience': "Not Specified",
      'Field': "Science"}
CHEM_df = pd.DataFrame(data=CHEM)
CHEM_df

In [None]:
CHEM_df.to_csv ('Joblum Data\JOBLUM-CHEM.csv', index = False)

### CATEGORY - Civil Engineering/Construction

In [None]:
#Scraping data of Civil Engineering/Construction (JOBLUM) - FIRST HALF

CIVILENG_TITLE_FIRST = []
CIVILENG_COMPANY_FIRST = []
CIVILENG_DATE_FIRST = []
CIVILENG_LOCATION_FIRST = []
CIVILENG_STATUS_FIRST = []
CIVILENG_SALARY_FIRST = []
CIVILENG_SALARY_MIN_FIRST = []
CIVILENG_SALARY_MAX_FIRST = []
CIVILENG_DESCRIPTION_FIRST = []

JOBLUM_URLs = 'https://ph.joblum.com/jobs-spec-civil-engineering-construction?p='
soup = getSoup(JOBLUM_URLs)
NUM_JOBS = getNumJobs(soup)
NUM_PAGES = getNumPages(NUM_JOBS)
JOB_LINKS = getLinks(NUM_PAGES, JOBLUM_URLs)
FIRST_HALF = math.ceil(NUM_PAGES/2)

for i in range(FIRST_HALF):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        CIVILENG_TITLE_FIRST.append(JOB_TITLE_ARRAY[j].text.strip())
        CIVILENG_COMPANY_FIRST.append(JOB_COMPANY_ARRAY[j].text.strip())
        CIVILENG_DATE_FIRST.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        CIVILENG_LOCATION_FIRST.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, CIVILENG_STATUS_FIRST, CIVILENG_SALARY_FIRST, CIVILENG_SALARY_MIN_FIRST, 
              CIVILENG_SALARY_MAX_FIRST, CIVILENG_DESCRIPTION_FIRST)

In [None]:
#Scraping data of Civil Engineering/Construction (JOBLUM) - SECOND HALF

CIVILENG_TITLE_SECOND = []
CIVILENG_COMPANY_SECOND = []
CIVILENG_DATE_SECOND = []
CIVILENG_LOCATION_SECOND = []
CIVILENG_STATUS_SECOND = []
CIVILENG_SALARY_SECOND = []
CIVILENG_SALARY_MIN_SECOND = []
CIVILENG_SALARY_MAX_SECOND = []
CIVILENG_DESCRIPTION_SECOND = []

for i in range(FIRST_HALF, NUM_PAGES):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        CIVILENG_TITLE_SECOND.append(JOB_TITLE_ARRAY[j].text.strip())
        CIVILENG_COMPANY_SECOND.append(JOB_COMPANY_ARRAY[j].text.strip())
        CIVILENG_DATE_SECOND.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        CIVILENG_LOCATION_SECOND.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, CIVILENG_STATUS_SECOND, CIVILENG_SALARY_SECOND, CIVILENG_SALARY_MIN_SECOND, 
              CIVILENG_SALARY_MAX_SECOND, CIVILENG_DESCRIPTION_SECOND)

In [None]:
#Joining Scrape data of Civil Engineering/Construction (JOBLUM) 

CIVILENG_TITLE_LIST = np.concatenate((CIVILENG_TITLE_FIRST, CIVILENG_TITLE_SECOND))
CIVILENG_COMPANY_LIST = np.concatenate((CIVILENG_COMPANY_FIRST, CIVILENG_COMPANY_SECOND))
CIVILENG_DATE_LIST = np.concatenate((CIVILENG_DATE_FIRST, CIVILENG_DATE_SECOND))
CIVILENG_LOCATION_LIST = np.concatenate((CIVILENG_LOCATION_FIRST, CIVILENG_LOCATION_SECOND))
CIVILENG_STATUS_LIST = np.concatenate((CIVILENG_STATUS_FIRST, CIVILENG_STATUS_SECOND))
CIVILENG_SALARY_LIST = np.concatenate((CIVILENG_SALARY_FIRST, CIVILENG_SALARY_SECOND))
CIVILENG_SALARY_MIN_LIST = np.concatenate((CIVILENG_SALARY_MIN_FIRST, CIVILENG_SALARY_MIN_SECOND))
CIVILENG_SALARY_MAX_LIST = np.concatenate((CIVILENG_SALARY_MAX_FIRST, CIVILENG_SALARY_MAX_SECOND))
CIVILENG_DESCRIPTION_LIST = np.concatenate((CIVILENG_DESCRIPTION_FIRST, CIVILENG_DESCRIPTION_SECOND))

In [None]:
#Creating Data Frame for Civil Engineering/Construction (JOBLUM) 
CIVILENG={'Website': "Joblum",
      'Job Title': CIVILENG_TITLE_LIST, 
      'Category': "Civil Engineering/Construction", 
      'Company': CIVILENG_COMPANY_LIST, 
      'Date Posted': CIVILENG_DATE_LIST, 
      'Location': CIVILENG_LOCATION_LIST, 
      'Status': CIVILENG_STATUS_LIST, 
      'Salary': CIVILENG_SALARY_LIST,
      'Education': "Not Specified / In Description",
      'Years of Work Experience': "Not Specified / In Description",
      'Job Description': CIVILENG_DESCRIPTION_LIST,
      'Min Salary': CIVILENG_SALARY_MIN_LIST,
      'Max Salary': CIVILENG_SALARY_MAX_LIST,
      'Min Years of Work Experience': "Not Specified",
      'Max Years of Work Experience': "Not Specified",
      'Field': "Engineering"}
CIVILENG_df = pd.DataFrame(data=CIVILENG)
CIVILENG_df

In [None]:
CIVILENG_df.to_csv ('Joblum Data\JOBLUM-CIVILENG.csv', index = False)

### CATEGORY - Civil/Construction

In [None]:
#Scraping data of Civil/Construction (JOBLUM) - FIRST HALF

CONSTRUCTION_TITLE_FIRST = []
CONSTRUCTION_COMPANY_FIRST = []
CONSTRUCTION_DATE_FIRST = []
CONSTRUCTION_LOCATION_FIRST = []
CONSTRUCTION_STATUS_FIRST = []
CONSTRUCTION_SALARY_FIRST = []
CONSTRUCTION_SALARY_MIN_FIRST = []
CONSTRUCTION_SALARY_MAX_FIRST = []
CONSTRUCTION_DESCRIPTION_FIRST = []

JOBLUM_URLs = 'https://ph.joblum.com/jobs-spec-civil-construction?p='
soup = getSoup(JOBLUM_URLs)
NUM_JOBS = getNumJobs(soup)
NUM_PAGES = getNumPages(NUM_JOBS)
JOB_LINKS = getLinks(NUM_PAGES, JOBLUM_URLs)
FIRST_HALF = math.ceil(NUM_PAGES/2)
for i in range(FIRST_HALF):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        CONSTRUCTION_TITLE_FIRST.append(JOB_TITLE_ARRAY[j].text.strip())
        CONSTRUCTION_COMPANY_FIRST.append(JOB_COMPANY_ARRAY[j].text.strip())
        CONSTRUCTION_DATE_FIRST.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        CONSTRUCTION_LOCATION_FIRST.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, CONSTRUCTION_STATUS_FIRST, CONSTRUCTION_SALARY_FIRST, CONSTRUCTION_SALARY_MIN_FIRST, 
              CONSTRUCTION_SALARY_MAX_FIRST, CONSTRUCTION_DESCRIPTION_FIRST)

In [None]:
#Scraping data of Civil/Construction (JOBLUM) - SECOND HALF

CONSTRUCTION_TITLE_SECOND = []
CONSTRUCTION_COMPANY_SECOND = []
CONSTRUCTION_DATE_SECOND = []
CONSTRUCTION_LOCATION_SECOND = []
CONSTRUCTION_STATUS_SECOND = []
CONSTRUCTION_SALARY_SECOND = []
CONSTRUCTION_SALARY_MIN_SECOND = []
CONSTRUCTION_SALARY_MAX_SECOND = []
CONSTRUCTION_DESCRIPTION_SECOND = []

for i in range(FIRST_HALF, NUM_PAGES):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        CONSTRUCTION_TITLE_SECOND.append(JOB_TITLE_ARRAY[j].text.strip())
        CONSTRUCTION_COMPANY_SECOND.append(JOB_COMPANY_ARRAY[j].text.strip())
        CONSTRUCTION_DATE_SECOND.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        CONSTRUCTION_LOCATION_SECOND.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, CONSTRUCTION_STATUS_SECOND, CONSTRUCTION_SALARY_SECOND, CONSTRUCTION_SALARY_MIN_SECOND, 
              CONSTRUCTION_SALARY_MAX_SECOND, CONSTRUCTION_DESCRIPTION_SECOND)

In [None]:
#Joining Scrape data of Civil/Construction (JOBLUM) 

CONSTRUCTION_TITLE_LIST = np.concatenate((CONSTRUCTION_TITLE_FIRST, CONSTRUCTION_TITLE_SECOND))
CONSTRUCTION_COMPANY_LIST = np.concatenate((CONSTRUCTION_COMPANY_FIRST, CONSTRUCTION_COMPANY_SECOND))
CONSTRUCTION_DATE_LIST = np.concatenate((CONSTRUCTION_DATE_FIRST, CONSTRUCTION_DATE_SECOND))
CONSTRUCTION_LOCATION_LIST = np.concatenate((CONSTRUCTION_LOCATION_FIRST, CONSTRUCTION_LOCATION_SECOND))
CONSTRUCTION_STATUS_LIST = np.concatenate((CONSTRUCTION_STATUS_FIRST, CONSTRUCTION_STATUS_SECOND))
CONSTRUCTION_SALARY_LIST = np.concatenate((CONSTRUCTION_SALARY_FIRST, CONSTRUCTION_SALARY_SECOND))
CONSTRUCTION_SALARY_MIN_LIST = np.concatenate((CONSTRUCTION_SALARY_MIN_FIRST, CONSTRUCTION_SALARY_MIN_SECOND))
CONSTRUCTION_SALARY_MAX_LIST = np.concatenate((CONSTRUCTION_SALARY_MAX_FIRST, CONSTRUCTION_SALARY_MAX_SECOND))
CONSTRUCTION_DESCRIPTION_LIST = np.concatenate((CONSTRUCTION_DESCRIPTION_FIRST, CONSTRUCTION_DESCRIPTION_SECOND))

In [None]:
#Creating Data Frame for Civil/Construction (JOBLUM) 
CONSTRUCTION={'Website': "Joblum",
      'Job Title': CONSTRUCTION_TITLE_LIST, 
      'Category': "Civil/Construction", 
      'Company': CONSTRUCTION_COMPANY_LIST, 
      'Date Posted': CONSTRUCTION_DATE_LIST, 
      'Location': CONSTRUCTION_LOCATION_LIST, 
      'Status': CONSTRUCTION_STATUS_LIST, 
      'Salary': CONSTRUCTION_SALARY_LIST,
      'Education': "Not Specified / In Description",
      'Years of Work Experience': "Not Specified / In Description",
      'Job Description': CONSTRUCTION_DESCRIPTION_LIST,
      'Min Salary': CONSTRUCTION_SALARY_MIN_LIST,
      'Max Salary': CONSTRUCTION_SALARY_MAX_LIST,
      'Min Years of Work Experience': "Not Specified",
      'Max Years of Work Experience': "Not Specified",
      'Field': "Engineering"}
CONSTRUCTION_df = pd.DataFrame(data=CONSTRUCTION)
CONSTRUCTION_df

In [None]:
CONSTRUCTION_df.to_csv ('Joblum Data\JOBLUM-CONSTRUCTION.csv', index = False)

### CATEGORY - Diagnosis/Others

In [None]:
#Scraping data of Diagnosis/Others (JOBLUM) - FIRST HALF

DIAGNOSIS_TITLE_FIRST = []
DIAGNOSIS_COMPANY_FIRST = []
DIAGNOSIS_DATE_FIRST = []
DIAGNOSIS_LOCATION_FIRST = []
DIAGNOSIS_STATUS_FIRST = []
DIAGNOSIS_SALARY_FIRST = []
DIAGNOSIS_SALARY_MIN_FIRST = []
DIAGNOSIS_SALARY_MAX_FIRST = []
DIAGNOSIS_DESCRIPTION_FIRST = []

JOBLUM_URLs = 'https://ph.joblum.com/jobs-spec-diagnosis-others?p='
soup = getSoup(JOBLUM_URLs)
NUM_JOBS = getNumJobs(soup)
NUM_PAGES = getNumPages(NUM_JOBS)
JOB_LINKS = getLinks(NUM_PAGES, JOBLUM_URLs)
FIRST_HALF = math.ceil(NUM_PAGES/2)

for i in range(FIRST_HALF):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        DIAGNOSIS_TITLE_FIRST.append(JOB_TITLE_ARRAY[j].text.strip())
        DIAGNOSIS_COMPANY_FIRST.append(JOB_COMPANY_ARRAY[j].text.strip())
        DIAGNOSIS_DATE_FIRST.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        DIAGNOSIS_LOCATION_FIRST.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, DIAGNOSIS_STATUS_FIRST, DIAGNOSIS_SALARY_FIRST, DIAGNOSIS_SALARY_MIN_FIRST, 
              DIAGNOSIS_SALARY_MAX_FIRST, DIAGNOSIS_DESCRIPTION_FIRST)

In [None]:
#Scraping data of Diagnosis/Others (JOBLUM) - SECOND HALF

DIAGNOSIS_TITLE_SECOND = []
DIAGNOSIS_COMPANY_SECOND = []
DIAGNOSIS_DATE_SECOND = []
DIAGNOSIS_LOCATION_SECOND = []
DIAGNOSIS_STATUS_SECOND = []
DIAGNOSIS_SALARY_SECOND = []
DIAGNOSIS_SALARY_MIN_SECOND = []
DIAGNOSIS_SALARY_MAX_SECOND = []
DIAGNOSIS_DESCRIPTION_SECOND = []

for i in range(FIRST_HALF, NUM_PAGES):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        DIAGNOSIS_TITLE_SECOND.append(JOB_TITLE_ARRAY[j].text.strip())
        DIAGNOSIS_COMPANY_SECOND.append(JOB_COMPANY_ARRAY[j].text.strip())
        DIAGNOSIS_DATE_SECOND.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        DIAGNOSIS_LOCATION_SECOND.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, DIAGNOSIS_STATUS_SECOND, DIAGNOSIS_SALARY_SECOND, DIAGNOSIS_SALARY_MIN_SECOND, 
              DIAGNOSIS_SALARY_MAX_SECOND, DIAGNOSIS_DESCRIPTION_SECOND)

In [None]:
#Joining Scrape data of Diagnosis/Others (JOBLUM) 

DIAGNOSIS_TITLE_LIST = np.concatenate((DIAGNOSIS_TITLE_FIRST, DIAGNOSIS_TITLE_SECOND))
DIAGNOSIS_COMPANY_LIST = np.concatenate((DIAGNOSIS_COMPANY_FIRST, DIAGNOSIS_COMPANY_SECOND))
DIAGNOSIS_DATE_LIST = np.concatenate((DIAGNOSIS_DATE_FIRST, DIAGNOSIS_DATE_SECOND))
DIAGNOSIS_LOCATION_LIST = np.concatenate((DIAGNOSIS_LOCATION_FIRST, DIAGNOSIS_LOCATION_SECOND))
DIAGNOSIS_STATUS_LIST = np.concatenate((DIAGNOSIS_STATUS_FIRST, DIAGNOSIS_STATUS_SECOND))
DIAGNOSIS_SALARY_LIST = np.concatenate((DIAGNOSIS_SALARY_FIRST, DIAGNOSIS_SALARY_SECOND))
DIAGNOSIS_SALARY_MIN_LIST = np.concatenate((DIAGNOSIS_SALARY_MIN_FIRST, DIAGNOSIS_SALARY_MIN_SECOND))
DIAGNOSIS_SALARY_MAX_LIST = np.concatenate((DIAGNOSIS_SALARY_MAX_FIRST, DIAGNOSIS_SALARY_MAX_SECOND))
DIAGNOSIS_DESCRIPTION_LIST = np.concatenate((DIAGNOSIS_DESCRIPTION_FIRST, DIAGNOSIS_DESCRIPTION_SECOND))

In [None]:
#Creating Data Frame for Diagnosis/Others (JOBLUM) 
DIAGNOSIS={'Website': "Joblum",
      'Job Title': DIAGNOSIS_TITLE_LIST, 
      'Category': "Diagnosis/Others", 
      'Company': DIAGNOSIS_COMPANY_LIST, 
      'Date Posted': DIAGNOSIS_DATE_LIST, 
      'Location': DIAGNOSIS_LOCATION_LIST, 
      'Status': DIAGNOSIS_STATUS_LIST, 
      'Salary': DIAGNOSIS_SALARY_LIST,
      'Education': "Not Specified / In Description",
      'Years of Work Experience': "Not Specified / In Description",
      'Job Description': DIAGNOSIS_DESCRIPTION_LIST,
      'Min Salary': DIAGNOSIS_SALARY_MIN_LIST,
      'Max Salary': DIAGNOSIS_SALARY_MAX_LIST,
      'Min Years of Work Experience': "Not Specified",
      'Max Years of Work Experience': "Not Specified",
      'Field': "Medicine"}
DIAGNOSIS_df = pd.DataFrame(data=DIAGNOSIS)
DIAGNOSIS_df

In [None]:
DIAGNOSIS_df.to_csv ('Joblum Data\JOBLUM-DIAGNOSIS.csv', index = False)

### CATEGORY - Doctor

In [None]:
#Scraping data of Doctor/Diagnosis (JOBLUM) - FIRST HALF

DOCTOR_TITLE_FIRST = []
DOCTOR_COMPANY_FIRST = []
DOCTOR_DATE_FIRST = []
DOCTOR_LOCATION_FIRST = []
DOCTOR_STATUS_FIRST = []
DOCTOR_SALARY_FIRST = []
DOCTOR_SALARY_MIN_FIRST = []
DOCTOR_SALARY_MAX_FIRST = []
DOCTOR_DESCRIPTION_FIRST = []

JOBLUM_URLs = 'https://ph.joblum.com/jobs-spec-doctor-diagnosis?p='
soup = getSoup(JOBLUM_URLs)
NUM_JOBS = getNumJobs(soup)
NUM_PAGES = getNumPages(NUM_JOBS)
JOB_LINKS = getLinks(NUM_PAGES, JOBLUM_URLs)
FIRST_HALF = math.ceil(NUM_PAGES/2)

for i in range(FIRST_HALF):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        DOCTOR_TITLE_FIRST.append(JOB_TITLE_ARRAY[j].text.strip())
        DOCTOR_COMPANY_FIRST.append(JOB_COMPANY_ARRAY[j].text.strip())
        DOCTOR_DATE_FIRST.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        DOCTOR_LOCATION_FIRST.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, DOCTOR_STATUS_FIRST, DOCTOR_SALARY_FIRST, DOCTOR_SALARY_MIN_FIRST, 
              DOCTOR_SALARY_MAX_FIRST, DOCTOR_DESCRIPTION_FIRST)

In [None]:
#Scraping data of Doctor/Diagnosis (JOBLUM) - SECOND HALF

DOCTOR_TITLE_SECOND = []
DOCTOR_COMPANY_SECOND = []
DOCTOR_DATE_SECOND = []
DOCTOR_LOCATION_SECOND = []
DOCTOR_STATUS_SECOND = []
DOCTOR_SALARY_SECOND = []
DOCTOR_SALARY_MIN_SECOND = []
DOCTOR_SALARY_MAX_SECOND = []
DOCTOR_DESCRIPTION_SECOND = []

for i in range(FIRST_HALF, NUM_PAGES):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        DOCTOR_TITLE_SECOND.append(JOB_TITLE_ARRAY[j].text.strip())
        DOCTOR_COMPANY_SECOND.append(JOB_COMPANY_ARRAY[j].text.strip())
        DOCTOR_DATE_SECOND.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        DOCTOR_LOCATION_SECOND.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, DOCTOR_STATUS_SECOND, DOCTOR_SALARY_SECOND, DOCTOR_SALARY_MIN_SECOND, 
              DOCTOR_SALARY_MAX_SECOND, DOCTOR_DESCRIPTION_SECOND)

In [None]:
#Joining Scrape data of Doctor/Diagnosis (JOBLUM) 

DOCTOR_TITLE_LIST = np.concatenate((DOCTOR_TITLE_FIRST, DOCTOR_TITLE_SECOND))
DOCTOR_COMPANY_LIST = np.concatenate((DOCTOR_COMPANY_FIRST, DOCTOR_COMPANY_SECOND))
DOCTOR_DATE_LIST = np.concatenate((DOCTOR_DATE_FIRST, DOCTOR_DATE_SECOND))
DOCTOR_LOCATION_LIST = np.concatenate((DOCTOR_LOCATION_FIRST, DOCTOR_LOCATION_SECOND))
DOCTOR_STATUS_LIST = np.concatenate((DOCTOR_STATUS_FIRST, DOCTOR_STATUS_SECOND))
DOCTOR_SALARY_LIST = np.concatenate((DOCTOR_SALARY_FIRST, DOCTOR_SALARY_SECOND))
DOCTOR_SALARY_MIN_LIST = np.concatenate((DOCTOR_SALARY_MIN_FIRST, DOCTOR_SALARY_MIN_SECOND))
DOCTOR_SALARY_MAX_LIST = np.concatenate((DOCTOR_SALARY_MAX_FIRST, DOCTOR_SALARY_MAX_SECOND))
DOCTOR_DESCRIPTION_LIST = np.concatenate((DOCTOR_DESCRIPTION_FIRST, DOCTOR_DESCRIPTION_SECOND))

In [None]:
#Creating Data Frame for Doctor/Diagnosis (JOBLUM) 
DOCTOR={'Website': "Joblum",
      'Job Title': DOCTOR_TITLE_LIST, 
      'Category': "Doctor/DOCTOR", 
      'Company': DOCTOR_COMPANY_LIST, 
      'Date Posted': DOCTOR_DATE_LIST, 
      'Location': DOCTOR_LOCATION_LIST, 
      'Status': DOCTOR_STATUS_LIST, 
      'Salary': DOCTOR_SALARY_LIST,
      'Education': "Not Specified / In Description",
      'Years of Work Experience': "Not Specified / In Description",
      'Job Description': DOCTOR_DESCRIPTION_LIST,
      'Min Salary': DOCTOR_SALARY_MIN_LIST,
      'Max Salary': DOCTOR_SALARY_MAX_LIST,
      'Min Years of Work Experience': "Not Specified",
      'Max Years of Work Experience': "Not Specified",
      'Field': "Medicine"}
DOCTOR_df = pd.DataFrame(data=DOCTOR)
DOCTOR_df

In [None]:
DOCTOR_df.to_csv ('Joblum Data\JOBLUM-DOCTOR.csv', index = False)

### CATEGORY - Electrical

In [None]:
#Scraping data of Electrical (JOBLUM) - FIRST HALF

ELEC_TITLE_FIRST = []
ELEC_COMPANY_FIRST = []
ELEC_DATE_FIRST = []
ELEC_LOCATION_FIRST = []
ELEC_STATUS_FIRST = []
ELEC_SALARY_FIRST = []
ELEC_SALARY_MIN_FIRST = []
ELEC_SALARY_MAX_FIRST = []
ELEC_DESCRIPTION_FIRST = []

JOBLUM_URLs = 'https://ph.joblum.com/jobs-spec-electrical?p='
soup = getSoup(JOBLUM_URLs)
NUM_JOBS = getNumJobs(soup)
NUM_PAGES = getNumPages(NUM_JOBS)
JOB_LINKS = getLinks(NUM_PAGES, JOBLUM_URLs)
FIRST_HALF = math.ceil(NUM_PAGES/2)

for i in range(FIRST_HALF):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        ELEC_TITLE_FIRST.append(JOB_TITLE_ARRAY[j].text.strip())
        ELEC_COMPANY_FIRST.append(JOB_COMPANY_ARRAY[j].text.strip())
        ELEC_DATE_FIRST.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        ELEC_LOCATION_FIRST.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, ELEC_STATUS_FIRST, ELEC_SALARY_FIRST, ELEC_SALARY_MIN_FIRST, 
              ELEC_SALARY_MAX_FIRST, ELEC_DESCRIPTION_FIRST)

In [None]:
#Scraping data of Electrical (JOBLUM) - SECOND HALF

ELEC_TITLE_SECOND = []
ELEC_COMPANY_SECOND = []
ELEC_DATE_SECOND = []
ELEC_LOCATION_SECOND = []
ELEC_STATUS_SECOND = []
ELEC_SALARY_SECOND = []
ELEC_SALARY_MIN_SECOND = []
ELEC_SALARY_MAX_SECOND = []
ELEC_DESCRIPTION_SECOND = []

for i in range(FIRST_HALF, NUM_PAGES):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        ELEC_TITLE_SECOND.append(JOB_TITLE_ARRAY[j].text.strip())
        ELEC_COMPANY_SECOND.append(JOB_COMPANY_ARRAY[j].text.strip())
        ELEC_DATE_SECOND.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        ELEC_LOCATION_SECOND.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, ELEC_STATUS_SECOND, ELEC_SALARY_SECOND, ELEC_SALARY_MIN_SECOND, 
              ELEC_SALARY_MAX_SECOND, ELEC_DESCRIPTION_SECOND)

In [None]:
#Joining Scrape data of Electrical (JOBLUM) 

ELEC_TITLE_LIST = np.concatenate((ELEC_TITLE_FIRST, ELEC_TITLE_SECOND))
ELEC_COMPANY_LIST = np.concatenate((ELEC_COMPANY_FIRST, ELEC_COMPANY_SECOND))
ELEC_DATE_LIST = np.concatenate((ELEC_DATE_FIRST, ELEC_DATE_SECOND))
ELEC_LOCATION_LIST = np.concatenate((ELEC_LOCATION_FIRST, ELEC_LOCATION_SECOND))
ELEC_STATUS_LIST = np.concatenate((ELEC_STATUS_FIRST, ELEC_STATUS_SECOND))
ELEC_SALARY_LIST = np.concatenate((ELEC_SALARY_FIRST, ELEC_SALARY_SECOND))
ELEC_SALARY_MIN_LIST = np.concatenate((ELEC_SALARY_MIN_FIRST, ELEC_SALARY_MIN_SECOND))
ELEC_SALARY_MAX_LIST = np.concatenate((ELEC_SALARY_MAX_FIRST, ELEC_SALARY_MAX_SECOND))
ELEC_DESCRIPTION_LIST = np.concatenate((ELEC_DESCRIPTION_FIRST, ELEC_DESCRIPTION_SECOND))

In [None]:
#Creating Data Frame for Electrical (JOBLUM) 
ELEC={'Website': "Joblum",
      'Job Title': ELEC_TITLE_LIST, 
      'Category': "Electrical", 
      'Company': ELEC_COMPANY_LIST, 
      'Date Posted': ELEC_DATE_LIST, 
      'Location': ELEC_LOCATION_LIST, 
      'Status': ELEC_STATUS_LIST, 
      'Salary': ELEC_SALARY_LIST,
      'Education': "Not Specified / In Description",
      'Years of Work Experience': "Not Specified / In Description",
      'Job Description': ELEC_DESCRIPTION_LIST,
      'Min Salary': ELEC_SALARY_MIN_LIST,
      'Max Salary': ELEC_SALARY_MAX_LIST,
      'Min Years of Work Experience': "Not Specified",
      'Max Years of Work Experience': "Not Specified",
      'Field': "Engineering"}
ELEC_df = pd.DataFrame(data=ELEC)
ELEC_df

In [None]:
ELEC_df.to_csv ('Joblum Data\JOBLUM-ELEC.csv', index = False)

### CATEGORY - Electrical Engineering

In [None]:
#Scraping data of Electrical Engineering (JOBLUM) - FIRST HALF

ELECENG_TITLE_FIRST = []
ELECENG_COMPANY_FIRST = []
ELECENG_DATE_FIRST = []
ELECENG_LOCATION_FIRST = []
ELECENG_STATUS_FIRST = []
ELECENG_SALARY_FIRST = []
ELECENG_SALARY_MIN_FIRST = []
ELECENG_SALARY_MAX_FIRST = []
ELECENG_DESCRIPTION_FIRST = []

JOBLUM_URLs = 'https://ph.joblum.com/jobs-spec-electrical-engineering?p='
soup = getSoup(JOBLUM_URLs)
NUM_JOBS = getNumJobs(soup)
NUM_PAGES = getNumPages(NUM_JOBS)
JOB_LINKS = getLinks(NUM_PAGES, JOBLUM_URLs)
FIRST_HALF = math.ceil(NUM_PAGES/2)

for i in range(FIRST_HALF):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        ELECENG_TITLE_FIRST.append(JOB_TITLE_ARRAY[j].text.strip())
        ELECENG_COMPANY_FIRST.append(JOB_COMPANY_ARRAY[j].text.strip())
        ELECENG_DATE_FIRST.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        ELECENG_LOCATION_FIRST.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, ELECENG_STATUS_FIRST, ELECENG_SALARY_FIRST, ELECENG_SALARY_MIN_FIRST, 
              ELECENG_SALARY_MAX_FIRST, ELECENG_DESCRIPTION_FIRST)

In [None]:
#Scraping data of Electrical Engineering (JOBLUM) - SECOND HALF

ELECENG_TITLE_SECOND = []
ELECENG_COMPANY_SECOND = []
ELECENG_DATE_SECOND = []
ELECENG_LOCATION_SECOND = []
ELECENG_STATUS_SECOND = []
ELECENG_SALARY_SECOND = []
ELECENG_SALARY_MIN_SECOND = []
ELECENG_SALARY_MAX_SECOND = []
ELECENG_DESCRIPTION_SECOND = []

for i in range(FIRST_HALF, NUM_PAGES):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        ELECENG_TITLE_SECOND.append(JOB_TITLE_ARRAY[j].text.strip())
        ELECENG_COMPANY_SECOND.append(JOB_COMPANY_ARRAY[j].text.strip())
        ELECENG_DATE_SECOND.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        ELECENG_LOCATION_SECOND.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, ELECENG_STATUS_SECOND, ELECENG_SALARY_SECOND, ELECENG_SALARY_MIN_SECOND, 
              ELECENG_SALARY_MAX_SECOND, ELECENG_DESCRIPTION_SECOND)

In [None]:
#Joining Scrape data of Electrical Engineering (JOBLUM) 

ELECENG_TITLE_LIST = np.concatenate((ELECENG_TITLE_FIRST, ELECENG_TITLE_SECOND))
ELECENG_COMPANY_LIST = np.concatenate((ELECENG_COMPANY_FIRST, ELECENG_COMPANY_SECOND))
ELECENG_DATE_LIST = np.concatenate((ELECENG_DATE_FIRST, ELECENG_DATE_SECOND))
ELECENG_LOCATION_LIST = np.concatenate((ELECENG_LOCATION_FIRST, ELECENG_LOCATION_SECOND))
ELECENG_STATUS_LIST = np.concatenate((ELECENG_STATUS_FIRST, ELECENG_STATUS_SECOND))
ELECENG_SALARY_LIST = np.concatenate((ELECENG_SALARY_FIRST, ELECENG_SALARY_SECOND))
ELECENG_SALARY_MIN_LIST = np.concatenate((ELECENG_SALARY_MIN_FIRST, ELECENG_SALARY_MIN_SECOND))
ELECENG_SALARY_MAX_LIST = np.concatenate((ELECENG_SALARY_MAX_FIRST, ELECENG_SALARY_MAX_SECOND))
ELECENG_DESCRIPTION_LIST = np.concatenate((ELECENG_DESCRIPTION_FIRST, ELECENG_DESCRIPTION_SECOND))

In [None]:
#Creating Data Frame for Electrical Engineering (JOBLUM) 
ELECENG={'Website': "Joblum",
      'Job Title': ELECENG_TITLE_LIST, 
      'Category': "Electrical Engineering", 
      'Company': ELECENG_COMPANY_LIST, 
      'Date Posted': ELECENG_DATE_LIST, 
      'Location': ELECENG_LOCATION_LIST, 
      'Status': ELECENG_STATUS_LIST, 
      'Salary': ELECENG_SALARY_LIST,
      'Education': "Not Specified / In Description",
      'Years of Work Experience': "Not Specified / In Description",
      'Job Description': ELECENG_DESCRIPTION_LIST,
      'Min Salary': ELECENG_SALARY_MIN_LIST,
      'Max Salary': ELECENG_SALARY_MAX_LIST,
      'Min Years of Work Experience': "Not Specified",
      'Max Years of Work Experience': "Not Specified",
      'Field': "Engineering"}
ELECENG_df = pd.DataFrame(data=ELECENG)
ELECENG_df

In [None]:
ELECENG_df.to_csv ('Joblum Data\JOBLUM-ELECENG.csv', index = False)

### CATEGORY - Electronics

In [None]:
#Scraping data of Electronics (JOBLUM) - FIRST HALF

ELECTRO_TITLE_FIRST = []
ELECTRO_COMPANY_FIRST = []
ELECTRO_DATE_FIRST = []
ELECTRO_LOCATION_FIRST = []
ELECTRO_STATUS_FIRST = []
ELECTRO_SALARY_FIRST = []
ELECTRO_SALARY_MIN_FIRST = []
ELECTRO_SALARY_MAX_FIRST = []
ELECTRO_DESCRIPTION_FIRST = []

JOBLUM_URLs = 'https://ph.joblum.com/jobs-spec-electronics?p='
soup = getSoup(JOBLUM_URLs)
NUM_JOBS = getNumJobs(soup)
NUM_PAGES = getNumPages(NUM_JOBS)
JOB_LINKS = getLinks(NUM_PAGES, JOBLUM_URLs)
FIRST_HALF = math.ceil(NUM_PAGES/2)

for i in range(FIRST_HALF):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        ELECTRO_TITLE_FIRST.append(JOB_TITLE_ARRAY[j].text.strip())
        ELECTRO_COMPANY_FIRST.append(JOB_COMPANY_ARRAY[j].text.strip())
        ELECTRO_DATE_FIRST.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        ELECTRO_LOCATION_FIRST.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, ELECTRO_STATUS_FIRST, ELECTRO_SALARY_FIRST, ELECTRO_SALARY_MIN_FIRST, 
              ELECTRO_SALARY_MAX_FIRST, ELECTRO_DESCRIPTION_FIRST)

In [None]:
#Scraping data of Electronics (JOBLUM) - SECOND HALF

ELECTRO_TITLE_SECOND = []
ELECTRO_COMPANY_SECOND = []
ELECTRO_DATE_SECOND = []
ELECTRO_LOCATION_SECOND = []
ELECTRO_STATUS_SECOND = []
ELECTRO_SALARY_SECOND = []
ELECTRO_SALARY_MIN_SECOND = []
ELECTRO_SALARY_MAX_SECOND = []
ELECTRO_DESCRIPTION_SECOND = []

for i in range(FIRST_HALF, NUM_PAGES):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        ELECTRO_TITLE_SECOND.append(JOB_TITLE_ARRAY[j].text.strip())
        ELECTRO_COMPANY_SECOND.append(JOB_COMPANY_ARRAY[j].text.strip())
        ELECTRO_DATE_SECOND.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        ELECTRO_LOCATION_SECOND.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, ELECTRO_STATUS_SECOND, ELECTRO_SALARY_SECOND, ELECTRO_SALARY_MIN_SECOND, 
              ELECTRO_SALARY_MAX_SECOND, ELECTRO_DESCRIPTION_SECOND)

In [None]:
#Joining Scrape data of Electronics (JOBLUM) 

ELECTRO_TITLE_LIST = np.concatenate((ELECTRO_TITLE_FIRST, ELECTRO_TITLE_SECOND))
ELECTRO_COMPANY_LIST = np.concatenate((ELECTRO_COMPANY_FIRST, ELECTRO_COMPANY_SECOND))
ELECTRO_DATE_LIST = np.concatenate((ELECTRO_DATE_FIRST, ELECTRO_DATE_SECOND))
ELECTRO_LOCATION_LIST = np.concatenate((ELECTRO_LOCATION_FIRST, ELECTRO_LOCATION_SECOND))
ELECTRO_STATUS_LIST = np.concatenate((ELECTRO_STATUS_FIRST, ELECTRO_STATUS_SECOND))
ELECTRO_SALARY_LIST = np.concatenate((ELECTRO_SALARY_FIRST, ELECTRO_SALARY_SECOND))
ELECTRO_SALARY_MIN_LIST = np.concatenate((ELECTRO_SALARY_MIN_FIRST, ELECTRO_SALARY_MIN_SECOND))
ELECTRO_SALARY_MAX_LIST = np.concatenate((ELECTRO_SALARY_MAX_FIRST, ELECTRO_SALARY_MAX_SECOND))
ELECTRO_DESCRIPTION_LIST = np.concatenate((ELECTRO_DESCRIPTION_FIRST, ELECTRO_DESCRIPTION_SECOND))

In [None]:
#Creating Data Frame for Electronics (JOBLUM) 
ELECTRO={'Website': "Joblum",
      'Job Title': ELECTRO_TITLE_LIST, 
      'Category': "Electronics", 
      'Company': ELECTRO_COMPANY_LIST, 
      'Date Posted': ELECTRO_DATE_LIST, 
      'Location': ELECTRO_LOCATION_LIST, 
      'Status': ELECTRO_STATUS_LIST, 
      'Salary': ELECTRO_SALARY_LIST,
      'Education': "Not Specified / In Description",
      'Years of Work Experience': "Not Specified / In Description",
      'Job Description': ELECTRO_DESCRIPTION_LIST,
      'Min Salary': ELECTRO_SALARY_MIN_LIST,
      'Max Salary': ELECTRO_SALARY_MAX_LIST,
      'Min Years of Work Experience': "Not Specified",
      'Max Years of Work Experience': "Not Specified",
      'Field': "Engineering"}
ELECTRO_df = pd.DataFrame(data=ELECTRO)
ELECTRO_df

In [None]:
ELECTRO_df.to_csv ('Joblum Data\JOBLUM-ELECTRO.csv', index = False)

### CATEGORY - Electronics Engineering

In [None]:
#Scraping data of Electronics Engineering (JOBLUM) - FIRST HALF

ELECTROENG_TITLE_FIRST = []
ELECTROENG_COMPANY_FIRST = []
ELECTROENG_DATE_FIRST = []
ELECTROENG_LOCATION_FIRST = []
ELECTROENG_STATUS_FIRST = []
ELECTROENG_SALARY_FIRST = []
ELECTROENG_SALARY_MIN_FIRST = []
ELECTROENG_SALARY_MAX_FIRST = []
ELECTROENG_DESCRIPTION_FIRST = []

JOBLUM_URLs = 'https://ph.joblum.com/jobs-spec-electronics-engineering?p='
soup = getSoup(JOBLUM_URLs)
NUM_JOBS = getNumJobs(soup)
NUM_PAGES = getNumPages(NUM_JOBS)
JOB_LINKS = getLinks(NUM_PAGES, JOBLUM_URLs)
FIRST_HALF = math.ceil(NUM_PAGES/2)

for i in range(FIRST_HALF):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        ELECTROENG_TITLE_FIRST.append(JOB_TITLE_ARRAY[j].text.strip())
        ELECTROENG_COMPANY_FIRST.append(JOB_COMPANY_ARRAY[j].text.strip())
        ELECTROENG_DATE_FIRST.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        ELECTROENG_LOCATION_FIRST.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, ELECTROENG_STATUS_FIRST, ELECTROENG_SALARY_FIRST, ELECTROENG_SALARY_MIN_FIRST, 
              ELECTROENG_SALARY_MAX_FIRST, ELECTROENG_DESCRIPTION_FIRST)

In [None]:
#Scraping data of Electronics Engineering (JOBLUM) - SECOND HALF

ELECTROENG_TITLE_SECOND = []
ELECTROENG_COMPANY_SECOND = []
ELECTROENG_DATE_SECOND = []
ELECTROENG_LOCATION_SECOND = []
ELECTROENG_STATUS_SECOND = []
ELECTROENG_SALARY_SECOND = []
ELECTROENG_SALARY_MIN_SECOND = []
ELECTROENG_SALARY_MAX_SECOND = []
ELECTROENG_DESCRIPTION_SECOND = []

for i in range(FIRST_HALF, NUM_PAGES):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        ELECTROENG_TITLE_SECOND.append(JOB_TITLE_ARRAY[j].text.strip())
        ELECTROENG_COMPANY_SECOND.append(JOB_COMPANY_ARRAY[j].text.strip())
        ELECTROENG_DATE_SECOND.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        ELECTROENG_LOCATION_SECOND.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, ELECTROENG_STATUS_SECOND, ELECTROENG_SALARY_SECOND, ELECTROENG_SALARY_MIN_SECOND, 
              ELECTROENG_SALARY_MAX_SECOND, ELECTROENG_DESCRIPTION_SECOND)

In [None]:
#Joining Scrape data of Electronics Engineering (JOBLUM) 

ELECTROENG_TITLE_LIST = np.concatenate((ELECTROENG_TITLE_FIRST, ELECTROENG_TITLE_SECOND))
ELECTROENG_COMPANY_LIST = np.concatenate((ELECTROENG_COMPANY_FIRST, ELECTROENG_COMPANY_SECOND))
ELECTROENG_DATE_LIST = np.concatenate((ELECTROENG_DATE_FIRST, ELECTROENG_DATE_SECOND))
ELECTROENG_LOCATION_LIST = np.concatenate((ELECTROENG_LOCATION_FIRST, ELECTROENG_LOCATION_SECOND))
ELECTROENG_STATUS_LIST = np.concatenate((ELECTROENG_STATUS_FIRST, ELECTROENG_STATUS_SECOND))
ELECTROENG_SALARY_LIST = np.concatenate((ELECTROENG_SALARY_FIRST, ELECTROENG_SALARY_SECOND))
ELECTROENG_SALARY_MIN_LIST = np.concatenate((ELECTROENG_SALARY_MIN_FIRST, ELECTROENG_SALARY_MIN_SECOND))
ELECTROENG_SALARY_MAX_LIST = np.concatenate((ELECTROENG_SALARY_MAX_FIRST, ELECTROENG_SALARY_MAX_SECOND))
ELECTROENG_DESCRIPTION_LIST = np.concatenate((ELECTROENG_DESCRIPTION_FIRST, ELECTROENG_DESCRIPTION_SECOND))

In [None]:
#Creating Data Frame for Electronics Engineering (JOBLUM) 
ELECTROENG={'Website': "Joblum",
      'Job Title': ELECTROENG_TITLE_LIST, 
      'Category': "Electronics Engineering", 
      'Company': ELECTROENG_COMPANY_LIST, 
      'Date Posted': ELECTROENG_DATE_LIST, 
      'Location': ELECTROENG_LOCATION_LIST, 
      'Status': ELECTROENG_STATUS_LIST, 
      'Salary': ELECTROENG_SALARY_LIST,
      'Education': "Not Specified / In Description",
      'Years of Work Experience': "Not Specified / In Description",
      'Job Description': ELECTROENG_DESCRIPTION_LIST,
      'Min Salary': ELECTROENG_SALARY_MIN_LIST,
      'Max Salary': ELECTROENG_SALARY_MAX_LIST,
      'Min Years of Work Experience': "Not Specified",
      'Max Years of Work Experience': "Not Specified",
      'Field': "Engineering"}
ELECTROENG_df = pd.DataFrame(data=ELECTROENG)
ELECTROENG_df

In [None]:
ELECTROENG_df.to_csv ('Joblum Data\JOBLUM-ELECTROENG.csv', index = False)

### CATEGORY - Environmental

In [None]:
#Scraping data of Environmental (JOBLUM) - FIRST HALF

ENVI_TITLE_FIRST = []
ENVI_COMPANY_FIRST = []
ENVI_DATE_FIRST = []
ENVI_LOCATION_FIRST = []
ENVI_STATUS_FIRST = []
ENVI_SALARY_FIRST = []
ENVI_SALARY_MIN_FIRST = []
ENVI_SALARY_MAX_FIRST = []
ENVI_DESCRIPTION_FIRST = []

JOBLUM_URLs = 'https://ph.joblum.com/jobs-spec-environmental?p='
soup = getSoup(JOBLUM_URLs)
NUM_JOBS = getNumJobs(soup)
NUM_PAGES = getNumPages(NUM_JOBS)
JOB_LINKS = getLinks(NUM_PAGES, JOBLUM_URLs)
FIRST_HALF = math.ceil(NUM_PAGES/2)

for i in range(FIRST_HALF):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        ENVI_TITLE_FIRST.append(JOB_TITLE_ARRAY[j].text.strip())
        ENVI_COMPANY_FIRST.append(JOB_COMPANY_ARRAY[j].text.strip())
        ENVI_DATE_FIRST.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        ENVI_LOCATION_FIRST.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, ENVI_STATUS_FIRST, ENVI_SALARY_FIRST, ENVI_SALARY_MIN_FIRST, 
              ENVI_SALARY_MAX_FIRST, ENVI_DESCRIPTION_FIRST)

In [None]:
#Scraping data of Environmental (JOBLUM) - SECOND HALF

ENVI_TITLE_SECOND = []
ENVI_COMPANY_SECOND = []
ENVI_DATE_SECOND = []
ENVI_LOCATION_SECOND = []
ENVI_STATUS_SECOND = []
ENVI_SALARY_SECOND = []
ENVI_SALARY_MIN_SECOND = []
ENVI_SALARY_MAX_SECOND = []
ENVI_DESCRIPTION_SECOND = []

for i in range(FIRST_HALF, NUM_PAGES):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        ENVI_TITLE_SECOND.append(JOB_TITLE_ARRAY[j].text.strip())
        ENVI_COMPANY_SECOND.append(JOB_COMPANY_ARRAY[j].text.strip())
        ENVI_DATE_SECOND.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        ENVI_LOCATION_SECOND.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, ENVI_STATUS_SECOND, ENVI_SALARY_SECOND, ENVI_SALARY_MIN_SECOND, 
              ENVI_SALARY_MAX_SECOND, ENVI_DESCRIPTION_SECOND)

In [None]:
#Joining Scrape data of Environmental (JOBLUM) 

ENVI_TITLE_LIST = np.concatenate((ENVI_TITLE_FIRST, ENVI_TITLE_SECOND))
ENVI_COMPANY_LIST = np.concatenate((ENVI_COMPANY_FIRST, ENVI_COMPANY_SECOND))
ENVI_DATE_LIST = np.concatenate((ENVI_DATE_FIRST, ENVI_DATE_SECOND))
ENVI_LOCATION_LIST = np.concatenate((ENVI_LOCATION_FIRST, ENVI_LOCATION_SECOND))
ENVI_STATUS_LIST = np.concatenate((ENVI_STATUS_FIRST, ENVI_STATUS_SECOND))
ENVI_SALARY_LIST = np.concatenate((ENVI_SALARY_FIRST, ENVI_SALARY_SECOND))
ENVI_SALARY_MIN_LIST = np.concatenate((ENVI_SALARY_MIN_FIRST, ENVI_SALARY_MIN_SECOND))
ENVI_SALARY_MAX_LIST = np.concatenate((ENVI_SALARY_MAX_FIRST, ENVI_SALARY_MAX_SECOND))
ENVI_DESCRIPTION_LIST = np.concatenate((ENVI_DESCRIPTION_FIRST, ENVI_DESCRIPTION_SECOND))

In [None]:
#Creating Data Frame for Environmental (JOBLUM) 
ENVI={'Website': "Joblum",
      'Job Title': ENVI_TITLE_LIST, 
      'Category': "Environmental", 
      'Company': ENVI_COMPANY_LIST, 
      'Date Posted': ENVI_DATE_LIST, 
      'Location': ENVI_LOCATION_LIST, 
      'Status': ENVI_STATUS_LIST, 
      'Salary': ENVI_SALARY_LIST,
      'Education': "Not Specified / In Description",
      'Years of Work Experience': "Not Specified / In Description",
      'Job Description': ENVI_DESCRIPTION_LIST,
      'Min Salary': ENVI_SALARY_MIN_LIST,
      'Max Salary': ENVI_SALARY_MAX_LIST,
      'Min Years of Work Experience': "Not Specified",
      'Max Years of Work Experience': "Not Specified",
      'Field': "Science"}
ENVI_df = pd.DataFrame(data=ENVI)
ENVI_df

In [None]:
ENVI_df.to_csv ('Joblum Data\JOBLUM-ENVI.csv', index = False)

### CATEGORY - Environmental Engineering

In [None]:
#Scraping data of Environmental Engineering (JOBLUM) - FIRST HALF

ENVIENG_TITLE_FIRST = []
ENVIENG_COMPANY_FIRST = []
ENVIENG_DATE_FIRST = []
ENVIENG_LOCATION_FIRST = []
ENVIENG_STATUS_FIRST = []
ENVIENG_SALARY_FIRST = []
ENVIENG_SALARY_MIN_FIRST = []
ENVIENG_SALARY_MAX_FIRST = []
ENVIENG_DESCRIPTION_FIRST = []

JOBLUM_URLs = 'https://ph.joblum.com/jobs-spec-environmental-engineering?p='
soup = getSoup(JOBLUM_URLs)
NUM_JOBS = getNumJobs(soup)
NUM_PAGES = getNumPages(NUM_JOBS)
JOB_LINKS = getLinks(NUM_PAGES, JOBLUM_URLs)
FIRST_HALF = math.ceil(NUM_PAGES/2)

for i in range(FIRST_HALF):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        ENVIENG_TITLE_FIRST.append(JOB_TITLE_ARRAY[j].text.strip())
        ENVIENG_COMPANY_FIRST.append(JOB_COMPANY_ARRAY[j].text.strip())
        ENVIENG_DATE_FIRST.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        ENVIENG_LOCATION_FIRST.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, ENVIENG_STATUS_FIRST, ENVIENG_SALARY_FIRST, ENVIENG_SALARY_MIN_FIRST, 
              ENVIENG_SALARY_MAX_FIRST, ENVIENG_DESCRIPTION_FIRST)

In [None]:
#Scraping data of Environmental Engineering (JOBLUM) - SECOND HALF

ENVIENG_TITLE_SECOND = []
ENVIENG_COMPANY_SECOND = []
ENVIENG_DATE_SECOND = []
ENVIENG_LOCATION_SECOND = []
ENVIENG_STATUS_SECOND = []
ENVIENG_SALARY_SECOND = []
ENVIENG_SALARY_MIN_SECOND = []
ENVIENG_SALARY_MAX_SECOND = []
ENVIENG_DESCRIPTION_SECOND = []

for i in range(FIRST_HALF, NUM_PAGES):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        ENVIENG_TITLE_SECOND.append(JOB_TITLE_ARRAY[j].text.strip())
        ENVIENG_COMPANY_SECOND.append(JOB_COMPANY_ARRAY[j].text.strip())
        ENVIENG_DATE_SECOND.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        ENVIENG_LOCATION_SECOND.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, ENVIENG_STATUS_SECOND, ENVIENG_SALARY_SECOND, ENVIENG_SALARY_MIN_SECOND, 
              ENVIENG_SALARY_MAX_SECOND, ENVIENG_DESCRIPTION_SECOND)

In [None]:
#Joining Scrape data of Environmental Engineering (JOBLUM) 

ENVIENG_TITLE_LIST = np.concatenate((ENVIENG_TITLE_FIRST, ENVIENG_TITLE_SECOND))
ENVIENG_COMPANY_LIST = np.concatenate((ENVIENG_COMPANY_FIRST, ENVIENG_COMPANY_SECOND))
ENVIENG_DATE_LIST = np.concatenate((ENVIENG_DATE_FIRST, ENVIENG_DATE_SECOND))
ENVIENG_LOCATION_LIST = np.concatenate((ENVIENG_LOCATION_FIRST, ENVIENG_LOCATION_SECOND))
ENVIENG_STATUS_LIST = np.concatenate((ENVIENG_STATUS_FIRST, ENVIENG_STATUS_SECOND))
ENVIENG_SALARY_LIST = np.concatenate((ENVIENG_SALARY_FIRST, ENVIENG_SALARY_SECOND))
ENVIENG_SALARY_MIN_LIST = np.concatenate((ENVIENG_SALARY_MIN_FIRST, ENVIENG_SALARY_MIN_SECOND))
ENVIENG_SALARY_MAX_LIST = np.concatenate((ENVIENG_SALARY_MAX_FIRST, ENVIENG_SALARY_MAX_SECOND))
ENVIENG_DESCRIPTION_LIST = np.concatenate((ENVIENG_DESCRIPTION_FIRST, ENVIENG_DESCRIPTION_SECOND))

In [None]:
#Creating Data Frame for Environmental Engineering (JOBLUM) 
ENVIENG={'Website': "Joblum",
      'Job Title': ENVIENG_TITLE_LIST, 
      'Category': "Environmental Engineering", 
      'Company': ENVIENG_COMPANY_LIST, 
      'Date Posted': ENVIENG_DATE_LIST, 
      'Location': ENVIENG_LOCATION_LIST, 
      'Status': ENVIENG_STATUS_LIST, 
      'Salary': ENVIENG_SALARY_LIST,
      'Education': "Not Specified / In Description",
      'Years of Work Experience': "Not Specified / In Description",
      'Job Description': ENVIENG_DESCRIPTION_LIST,
      'Min Salary': ENVIENG_SALARY_MIN_LIST,
      'Max Salary': ENVIENG_SALARY_MAX_LIST,
      'Min Years of Work Experience': "Not Specified",
      'Max Years of Work Experience': "Not Specified",
      'Field': "Engineering"}
ENVIENG_df = pd.DataFrame(data=ENVIENG)
ENVIENG_df

In [None]:
ENVIENG_df.to_csv ('Joblum Data\JOBLUM-ENVIENG.csv', index = False)

### CATEGORY - Food Tech/Nutritionist

In [None]:
#Scraping data of Food Tech/Nutritionist (JOBLUM) - FIRST HALF

NUTRI_TITLE_FIRST = []
NUTRI_COMPANY_FIRST = []
NUTRI_DATE_FIRST = []
NUTRI_LOCATION_FIRST = []
NUTRI_STATUS_FIRST = []
NUTRI_SALARY_FIRST = []
NUTRI_SALARY_MIN_FIRST = []
NUTRI_SALARY_MAX_FIRST = []
NUTRI_DESCRIPTION_FIRST = []

JOBLUM_URLs = 'https://ph.joblum.com/jobs-spec-food-tech-nutritionist?p='
soup = getSoup(JOBLUM_URLs)
NUM_JOBS = getNumJobs(soup)
NUM_PAGES = getNumPages(NUM_JOBS)
JOB_LINKS = getLinks(NUM_PAGES, JOBLUM_URLs)
FIRST_HALF = math.ceil(NUM_PAGES/2)

for i in range(FIRST_HALF):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        NUTRI_TITLE_FIRST.append(JOB_TITLE_ARRAY[j].text.strip())
        NUTRI_COMPANY_FIRST.append(JOB_COMPANY_ARRAY[j].text.strip())
        NUTRI_DATE_FIRST.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        NUTRI_LOCATION_FIRST.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, NUTRI_STATUS_FIRST, NUTRI_SALARY_FIRST, NUTRI_SALARY_MIN_FIRST, 
              NUTRI_SALARY_MAX_FIRST, NUTRI_DESCRIPTION_FIRST)

In [None]:
#Scraping data of Food Tech/Nutritionist (JOBLUM) - SECOND HALF

NUTRI_TITLE_SECOND = []
NUTRI_COMPANY_SECOND = []
NUTRI_DATE_SECOND = []
NUTRI_LOCATION_SECOND = []
NUTRI_STATUS_SECOND = []
NUTRI_SALARY_SECOND = []
NUTRI_SALARY_MIN_SECOND = []
NUTRI_SALARY_MAX_SECOND = []
NUTRI_DESCRIPTION_SECOND = []

for i in range(FIRST_HALF, NUM_PAGES):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        NUTRI_TITLE_SECOND.append(JOB_TITLE_ARRAY[j].text.strip())
        NUTRI_COMPANY_SECOND.append(JOB_COMPANY_ARRAY[j].text.strip())
        NUTRI_DATE_SECOND.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        NUTRI_LOCATION_SECOND.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, NUTRI_STATUS_SECOND, NUTRI_SALARY_SECOND, NUTRI_SALARY_MIN_SECOND, 
              NUTRI_SALARY_MAX_SECOND, NUTRI_DESCRIPTION_SECOND)

In [None]:
#Joining Scrape data of Food Tech/Nutritionist (JOBLUM) 

NUTRI_TITLE_LIST = np.concatenate((NUTRI_TITLE_FIRST, NUTRI_TITLE_SECOND))
NUTRI_COMPANY_LIST = np.concatenate((NUTRI_COMPANY_FIRST, NUTRI_COMPANY_SECOND))
NUTRI_DATE_LIST = np.concatenate((NUTRI_DATE_FIRST, NUTRI_DATE_SECOND))
NUTRI_LOCATION_LIST = np.concatenate((NUTRI_LOCATION_FIRST, NUTRI_LOCATION_SECOND))
NUTRI_STATUS_LIST = np.concatenate((NUTRI_STATUS_FIRST, NUTRI_STATUS_SECOND))
NUTRI_SALARY_LIST = np.concatenate((NUTRI_SALARY_FIRST, NUTRI_SALARY_SECOND))
NUTRI_SALARY_MIN_LIST = np.concatenate((NUTRI_SALARY_MIN_FIRST, NUTRI_SALARY_MIN_SECOND))
NUTRI_SALARY_MAX_LIST = np.concatenate((NUTRI_SALARY_MAX_FIRST, NUTRI_SALARY_MAX_SECOND))
NUTRI_DESCRIPTION_LIST = np.concatenate((NUTRI_DESCRIPTION_FIRST, NUTRI_DESCRIPTION_SECOND))

In [None]:
#Creating Data Frame for Food Tech/Nutritionist (JOBLUM) 
NUTRI={'Website': "Joblum",
      'Job Title': NUTRI_TITLE_LIST, 
      'Category': "Food Tech/Nutritionist", 
      'Company': NUTRI_COMPANY_LIST, 
      'Date Posted': NUTRI_DATE_LIST, 
      'Location': NUTRI_LOCATION_LIST, 
      'Status': NUTRI_STATUS_LIST, 
      'Salary': NUTRI_SALARY_LIST,
      'Education': "Not Specified / In Description",
      'Years of Work Experience': "Not Specified / In Description",
      'Job Description': NUTRI_DESCRIPTION_LIST,
      'Min Salary': NUTRI_SALARY_MIN_LIST,
      'Max Salary': NUTRI_SALARY_MAX_LIST,
      'Min Years of Work Experience': "Not Specified",
      'Max Years of Work Experience': "Not Specified",
      'Field': "Science"}
NUTRI_df = pd.DataFrame(data=NUTRI)
NUTRI_df

In [None]:
NUTRI_df.to_csv ('Joblum Data\JOBLUM-NUTRI.csv', index = False)

### CATEGORY - Geology/Geophysics

In [None]:
#Scraping data of Geology/Geophysics (JOBLUM) - FIRST HALF

GEO_TITLE_FIRST = []
GEO_COMPANY_FIRST = []
GEO_DATE_FIRST = []
GEO_LOCATION_FIRST = []
GEO_STATUS_FIRST = []
GEO_SALARY_FIRST = []
GEO_SALARY_MIN_FIRST = []
GEO_SALARY_MAX_FIRST = []
GEO_DESCRIPTION_FIRST = []

JOBLUM_URLs = 'https://ph.joblum.com/jobs-spec-geology-geophysics?p='
soup = getSoup(JOBLUM_URLs)
NUM_JOBS = getNumJobs(soup)
NUM_PAGES = getNumPages(NUM_JOBS)
JOB_LINKS = getLinks(NUM_PAGES, JOBLUM_URLs)
FIRST_HALF = math.ceil(NUM_PAGES/2)

for i in range(FIRST_HALF):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        GEO_TITLE_FIRST.append(JOB_TITLE_ARRAY[j].text.strip())
        GEO_COMPANY_FIRST.append(JOB_COMPANY_ARRAY[j].text.strip())
        GEO_DATE_FIRST.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        GEO_LOCATION_FIRST.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, GEO_STATUS_FIRST, GEO_SALARY_FIRST, GEO_SALARY_MIN_FIRST, 
              GEO_SALARY_MAX_FIRST, GEO_DESCRIPTION_FIRST)

In [None]:
#Scraping data of Geology/Geophysics (JOBLUM) - SECOND HALF

GEO_TITLE_SECOND = []
GEO_COMPANY_SECOND = []
GEO_DATE_SECOND = []
GEO_LOCATION_SECOND = []
GEO_STATUS_SECOND = []
GEO_SALARY_SECOND = []
GEO_SALARY_MIN_SECOND = []
GEO_SALARY_MAX_SECOND = []
GEO_DESCRIPTION_SECOND = []

for i in range(FIRST_HALF, NUM_PAGES):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        GEO_TITLE_SECOND.append(JOB_TITLE_ARRAY[j].text.strip())
        GEO_COMPANY_SECOND.append(JOB_COMPANY_ARRAY[j].text.strip())
        GEO_DATE_SECOND.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        GEO_LOCATION_SECOND.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, GEO_STATUS_SECOND, GEO_SALARY_SECOND, GEO_SALARY_MIN_SECOND, 
              GEO_SALARY_MAX_SECOND, GEO_DESCRIPTION_SECOND)

In [None]:
#Joining Scrape data of Geology/Geophysics (JOBLUM) 

GEO_TITLE_LIST = np.concatenate((GEO_TITLE_FIRST, GEO_TITLE_SECOND))
GEO_COMPANY_LIST = np.concatenate((GEO_COMPANY_FIRST, GEO_COMPANY_SECOND))
GEO_DATE_LIST = np.concatenate((GEO_DATE_FIRST, GEO_DATE_SECOND))
GEO_LOCATION_LIST = np.concatenate((GEO_LOCATION_FIRST, GEO_LOCATION_SECOND))
GEO_STATUS_LIST = np.concatenate((GEO_STATUS_FIRST, GEO_STATUS_SECOND))
GEO_SALARY_LIST = np.concatenate((GEO_SALARY_FIRST, GEO_SALARY_SECOND))
GEO_SALARY_MIN_LIST = np.concatenate((GEO_SALARY_MIN_FIRST, GEO_SALARY_MIN_SECOND))
GEO_SALARY_MAX_LIST = np.concatenate((GEO_SALARY_MAX_FIRST, GEO_SALARY_MAX_SECOND))
GEO_DESCRIPTION_LIST = np.concatenate((GEO_DESCRIPTION_FIRST, GEO_DESCRIPTION_SECOND))

In [None]:
#Creating Data Frame for Geology/Geophysics (JOBLUM) 
GEO={'Website': "Joblum",
      'Job Title': GEO_TITLE_LIST, 
      'Category': "Geology/Geophysics", 
      'Company': GEO_COMPANY_LIST, 
      'Date Posted': GEO_DATE_LIST, 
      'Location': GEO_LOCATION_LIST, 
      'Status': GEO_STATUS_LIST, 
      'Salary': GEO_SALARY_LIST,
      'Education': "Not Specified / In Description",
      'Years of Work Experience': "Not Specified / In Description",
      'Job Description': GEO_DESCRIPTION_LIST,
      'Min Salary': GEO_SALARY_MIN_LIST,
      'Max Salary': GEO_SALARY_MAX_LIST,
      'Min Years of Work Experience': "Not Specified",
      'Max Years of Work Experience': "Not Specified",
      'Field': "Science"}
GEO_df = pd.DataFrame(data=GEO)
GEO_df

In [None]:
GEO_df.to_csv ('Joblum Data\JOBLUM-GEO.csv', index = False)

### CATEGORY - Industrial Engineering

In [None]:
#Scraping data of Industrial Engineering (JOBLUM) - FIRST HALF

INDUSENG_TITLE_FIRST = []
INDUSENG_COMPANY_FIRST = []
INDUSENG_DATE_FIRST = []
INDUSENG_LOCATION_FIRST = []
INDUSENG_STATUS_FIRST = []
INDUSENG_SALARY_FIRST = []
INDUSENG_SALARY_MIN_FIRST = []
INDUSENG_SALARY_MAX_FIRST = []
INDUSENG_DESCRIPTION_FIRST = []

JOBLUM_URLs = 'https://ph.joblum.com/jobs-spec-industrial-engineering?p='
soup = getSoup(JOBLUM_URLs)
NUM_JOBS = getNumJobs(soup)
NUM_PAGES = getNumPages(NUM_JOBS)
JOB_LINKS = getLinks(NUM_PAGES, JOBLUM_URLs)
FIRST_HALF = math.ceil(NUM_PAGES/2)

for i in range(FIRST_HALF):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        INDUSENG_TITLE_FIRST.append(JOB_TITLE_ARRAY[j].text.strip())
        INDUSENG_COMPANY_FIRST.append(JOB_COMPANY_ARRAY[j].text.strip())
        INDUSENG_DATE_FIRST.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        INDUSENG_LOCATION_FIRST.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, INDUSENG_STATUS_FIRST, INDUSENG_SALARY_FIRST, INDUSENG_SALARY_MIN_FIRST, 
              INDUSENG_SALARY_MAX_FIRST, INDUSENG_DESCRIPTION_FIRST)

In [None]:
#Scraping data of Industrial Engineering (JOBLUM) - SECOND HALF

INDUSENG_TITLE_SECOND = []
INDUSENG_COMPANY_SECOND = []
INDUSENG_DATE_SECOND = []
INDUSENG_LOCATION_SECOND = []
INDUSENG_STATUS_SECOND = []
INDUSENG_SALARY_SECOND = []
INDUSENG_SALARY_MIN_SECOND = []
INDUSENG_SALARY_MAX_SECOND = []
INDUSENG_DESCRIPTION_SECOND = []

for i in range(FIRST_HALF, NUM_PAGES):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        INDUSENG_TITLE_SECOND.append(JOB_TITLE_ARRAY[j].text.strip())
        INDUSENG_COMPANY_SECOND.append(JOB_COMPANY_ARRAY[j].text.strip())
        INDUSENG_DATE_SECOND.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        INDUSENG_LOCATION_SECOND.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, INDUSENG_STATUS_SECOND, INDUSENG_SALARY_SECOND, INDUSENG_SALARY_MIN_SECOND, 
              INDUSENG_SALARY_MAX_SECOND, INDUSENG_DESCRIPTION_SECOND)

In [None]:
#Joining Scrape data of Industrial Engineering (JOBLUM) 

INDUSENG_TITLE_LIST = np.concatenate((INDUSENG_TITLE_FIRST, INDUSENG_TITLE_SECOND))
INDUSENG_COMPANY_LIST = np.concatenate((INDUSENG_COMPANY_FIRST, INDUSENG_COMPANY_SECOND))
INDUSENG_DATE_LIST = np.concatenate((INDUSENG_DATE_FIRST, INDUSENG_DATE_SECOND))
INDUSENG_LOCATION_LIST = np.concatenate((INDUSENG_LOCATION_FIRST, INDUSENG_LOCATION_SECOND))
INDUSENG_STATUS_LIST = np.concatenate((INDUSENG_STATUS_FIRST, INDUSENG_STATUS_SECOND))
INDUSENG_SALARY_LIST = np.concatenate((INDUSENG_SALARY_FIRST, INDUSENG_SALARY_SECOND))
INDUSENG_SALARY_MIN_LIST = np.concatenate((INDUSENG_SALARY_MIN_FIRST, INDUSENG_SALARY_MIN_SECOND))
INDUSENG_SALARY_MAX_LIST = np.concatenate((INDUSENG_SALARY_MAX_FIRST, INDUSENG_SALARY_MAX_SECOND))
INDUSENG_DESCRIPTION_LIST = np.concatenate((INDUSENG_DESCRIPTION_FIRST, INDUSENG_DESCRIPTION_SECOND))

In [None]:
#Creating Data Frame for Industrial Engineering (JOBLUM) 
INDUSENG={'Website': "Joblum",
      'Job Title': INDUSENG_TITLE_LIST, 
      'Category': "Industrial Engineering", 
      'Company': INDUSENG_COMPANY_LIST, 
      'Date Posted': INDUSENG_DATE_LIST, 
      'Location': INDUSENG_LOCATION_LIST, 
      'Status': INDUSENG_STATUS_LIST, 
      'Salary': INDUSENG_SALARY_LIST,
      'Education': "Not Specified / In Description",
      'Years of Work Experience': "Not Specified / In Description",
      'Job Description': INDUSENG_DESCRIPTION_LIST,
      'Min Salary': INDUSENG_SALARY_MIN_LIST,
      'Max Salary': INDUSENG_SALARY_MAX_LIST,
      'Min Years of Work Experience': "Not Specified",
      'Max Years of Work Experience': "Not Specified",
      'Field': "Engineering"}
INDUSENG_df = pd.DataFrame(data=INDUSENG)
INDUSENG_df

In [None]:
INDUSENG_df.to_csv ('Joblum Data\JOBLUM-INDUSENG.csv', index = False)

### CATEGORY - IT - Hardware

In [None]:
#Scraping data of IT - Hardware (JOBLUM) - FIRST HALF

IT_HARDWARE_TITLE_FIRST = []
IT_HARDWARE_COMPANY_FIRST = []
IT_HARDWARE_DATE_FIRST = []
IT_HARDWARE_LOCATION_FIRST = []
IT_HARDWARE_STATUS_FIRST = []
IT_HARDWARE_SALARY_FIRST = []
IT_HARDWARE_SALARY_MIN_FIRST = []
IT_HARDWARE_SALARY_MAX_FIRST = []
IT_HARDWARE_DESCRIPTION_FIRST = []

JOBLUM_URLs = 'https://ph.joblum.com/jobs-spec-it-hardware?p='
soup = getSoup(JOBLUM_URLs)
NUM_JOBS = getNumJobs(soup)
NUM_PAGES = getNumPages(NUM_JOBS)
JOB_LINKS = getLinks(NUM_PAGES, JOBLUM_URLs)
FIRST_HALF = math.ceil(NUM_PAGES/2)

for i in range(FIRST_HALF):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        IT_HARDWARE_TITLE_FIRST.append(JOB_TITLE_ARRAY[j].text.strip())
        IT_HARDWARE_COMPANY_FIRST.append(JOB_COMPANY_ARRAY[j].text.strip())
        IT_HARDWARE_DATE_FIRST.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        IT_HARDWARE_LOCATION_FIRST.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, IT_HARDWARE_STATUS_FIRST, IT_HARDWARE_SALARY_FIRST, IT_HARDWARE_SALARY_MIN_FIRST, 
              IT_HARDWARE_SALARY_MAX_FIRST, IT_HARDWARE_DESCRIPTION_FIRST)

In [None]:
#Scraping data of IT - Hardware (JOBLUM) - SECOND HALF

IT_HARDWARE_TITLE_SECOND = []
IT_HARDWARE_COMPANY_SECOND = []
IT_HARDWARE_DATE_SECOND = []
IT_HARDWARE_LOCATION_SECOND = []
IT_HARDWARE_STATUS_SECOND = []
IT_HARDWARE_SALARY_SECOND = []
IT_HARDWARE_SALARY_MIN_SECOND = []
IT_HARDWARE_SALARY_MAX_SECOND = []
IT_HARDWARE_DESCRIPTION_SECOND = []

for i in range(FIRST_HALF, NUM_PAGES):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        IT_HARDWARE_TITLE_SECOND.append(JOB_TITLE_ARRAY[j].text.strip())
        IT_HARDWARE_COMPANY_SECOND.append(JOB_COMPANY_ARRAY[j].text.strip())
        IT_HARDWARE_DATE_SECOND.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        IT_HARDWARE_LOCATION_SECOND.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, IT_HARDWARE_STATUS_SECOND, IT_HARDWARE_SALARY_SECOND, IT_HARDWARE_SALARY_MIN_SECOND, 
              IT_HARDWARE_SALARY_MAX_SECOND, IT_HARDWARE_DESCRIPTION_SECOND)

In [None]:
#Joining Scrape data of IT - Hardware (JOBLUM) 

IT_HARDWARE_TITLE_LIST = np.concatenate((IT_HARDWARE_TITLE_FIRST, IT_HARDWARE_TITLE_SECOND))
IT_HARDWARE_COMPANY_LIST = np.concatenate((IT_HARDWARE_COMPANY_FIRST, IT_HARDWARE_COMPANY_SECOND))
IT_HARDWARE_DATE_LIST = np.concatenate((IT_HARDWARE_DATE_FIRST, IT_HARDWARE_DATE_SECOND))
IT_HARDWARE_LOCATION_LIST = np.concatenate((IT_HARDWARE_LOCATION_FIRST, IT_HARDWARE_LOCATION_SECOND))
IT_HARDWARE_STATUS_LIST = np.concatenate((IT_HARDWARE_STATUS_FIRST, IT_HARDWARE_STATUS_SECOND))
IT_HARDWARE_SALARY_LIST = np.concatenate((IT_HARDWARE_SALARY_FIRST, IT_HARDWARE_SALARY_SECOND))
IT_HARDWARE_SALARY_MIN_LIST = np.concatenate((IT_HARDWARE_SALARY_MIN_FIRST, IT_HARDWARE_SALARY_MIN_SECOND))
IT_HARDWARE_SALARY_MAX_LIST = np.concatenate((IT_HARDWARE_SALARY_MAX_FIRST, IT_HARDWARE_SALARY_MAX_SECOND))
IT_HARDWARE_DESCRIPTION_LIST = np.concatenate((IT_HARDWARE_DESCRIPTION_FIRST, IT_HARDWARE_DESCRIPTION_SECOND))

In [None]:
#Creating Data Frame for IT - Hardware (JOBLUM) 
IT_HARDWARE={'Website': "Joblum",
      'Job Title': IT_HARDWARE_TITLE_LIST, 
      'Category': "IT - Hardware", 
      'Company': IT_HARDWARE_COMPANY_LIST, 
      'Date Posted': IT_HARDWARE_DATE_LIST, 
      'Location': IT_HARDWARE_LOCATION_LIST, 
      'Status': IT_HARDWARE_STATUS_LIST, 
      'Salary': IT_HARDWARE_SALARY_LIST,
      'Education': "Not Specified / In Description",
      'Years of Work Experience': "Not Specified / In Description",
      'Job Description': IT_HARDWARE_DESCRIPTION_LIST,
      'Min Salary': IT_HARDWARE_SALARY_MIN_LIST,
      'Max Salary': IT_HARDWARE_SALARY_MAX_LIST,
      'Min Years of Work Experience': "Not Specified",
      'Max Years of Work Experience': "Not Specified",
      'Field': "IT"}
IT_HARDWARE_df = pd.DataFrame(data=IT_HARDWARE)
IT_HARDWARE_df

In [None]:
IT_HARDWARE_df.to_csv ('Joblum Data\JOBLUM-IT_HARDWARE.csv', index = False)

### CATEGORY - IT - Network/Sys/DB Admin

In [None]:
#Scraping data of IT - Network/Sys/DB Admin (JOBLUM) - FIRST HALF

IT_SYS_TITLE_FIRST = []
IT_SYS_COMPANY_FIRST = []
IT_SYS_DATE_FIRST = []
IT_SYS_LOCATION_FIRST = []
IT_SYS_STATUS_FIRST = []
IT_SYS_SALARY_FIRST = []
IT_SYS_SALARY_MIN_FIRST = []
IT_SYS_SALARY_MAX_FIRST = []
IT_SYS_DESCRIPTION_FIRST = []

JOBLUM_URLs = 'https://ph.joblum.com/jobs-spec-it-network-sys-db-admin?p='
soup = getSoup(JOBLUM_URLs)
NUM_JOBS = getNumJobs(soup)
NUM_PAGES = getNumPages(NUM_JOBS)
JOB_LINKS = getLinks(NUM_PAGES, JOBLUM_URLs)
FIRST_HALF = math.ceil(NUM_PAGES/2)

for i in range(FIRST_HALF):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        IT_SYS_TITLE_FIRST.append(JOB_TITLE_ARRAY[j].text.strip())
        IT_SYS_COMPANY_FIRST.append(JOB_COMPANY_ARRAY[j].text.strip())
        IT_SYS_DATE_FIRST.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        IT_SYS_LOCATION_FIRST.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, IT_SYS_STATUS_FIRST, IT_SYS_SALARY_FIRST, IT_SYS_SALARY_MIN_FIRST, 
              IT_SYS_SALARY_MAX_FIRST, IT_SYS_DESCRIPTION_FIRST)

In [None]:
#Scraping data of IT - Network/Sys/DB Admin (JOBLUM) - SECOND HALF

IT_SYS_TITLE_SECOND = []
IT_SYS_COMPANY_SECOND = []
IT_SYS_DATE_SECOND = []
IT_SYS_LOCATION_SECOND = []
IT_SYS_STATUS_SECOND = []
IT_SYS_SALARY_SECOND = []
IT_SYS_SALARY_MIN_SECOND = []
IT_SYS_SALARY_MAX_SECOND = []
IT_SYS_DESCRIPTION_SECOND = []

for i in range(FIRST_HALF, NUM_PAGES):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        IT_SYS_TITLE_SECOND.append(JOB_TITLE_ARRAY[j].text.strip())
        IT_SYS_COMPANY_SECOND.append(JOB_COMPANY_ARRAY[j].text.strip())
        IT_SYS_DATE_SECOND.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        IT_SYS_LOCATION_SECOND.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, IT_SYS_STATUS_SECOND, IT_SYS_SALARY_SECOND, IT_SYS_SALARY_MIN_SECOND, 
              IT_SYS_SALARY_MAX_SECOND, IT_SYS_DESCRIPTION_SECOND)

In [None]:
#Joining Scrape data of IT - Network/Sys/DB Admin (JOBLUM) 

IT_SYS_TITLE_LIST = np.concatenate((IT_SYS_TITLE_FIRST, IT_SYS_TITLE_SECOND))
IT_SYS_COMPANY_LIST = np.concatenate((IT_SYS_COMPANY_FIRST, IT_SYS_COMPANY_SECOND))
IT_SYS_DATE_LIST = np.concatenate((IT_SYS_DATE_FIRST, IT_SYS_DATE_SECOND))
IT_SYS_LOCATION_LIST = np.concatenate((IT_SYS_LOCATION_FIRST, IT_SYS_LOCATION_SECOND))
IT_SYS_STATUS_LIST = np.concatenate((IT_SYS_STATUS_FIRST, IT_SYS_STATUS_SECOND))
IT_SYS_SALARY_LIST = np.concatenate((IT_SYS_SALARY_FIRST, IT_SYS_SALARY_SECOND))
IT_SYS_SALARY_MIN_LIST = np.concatenate((IT_SYS_SALARY_MIN_FIRST, IT_SYS_SALARY_MIN_SECOND))
IT_SYS_SALARY_MAX_LIST = np.concatenate((IT_SYS_SALARY_MAX_FIRST, IT_SYS_SALARY_MAX_SECOND))
IT_SYS_DESCRIPTION_LIST = np.concatenate((IT_SYS_DESCRIPTION_FIRST, IT_SYS_DESCRIPTION_SECOND))

In [None]:
#Creating Data Frame for IT - Network/Sys/DB Admin (JOBLUM) 
IT_SYS={'Website': "Joblum",
      'Job Title': IT_SYS_TITLE_LIST, 
      'Category': "IT - Network/Sys/DB Admin", 
      'Company': IT_SYS_COMPANY_LIST, 
      'Date Posted': IT_SYS_DATE_LIST, 
      'Location': IT_SYS_LOCATION_LIST, 
      'Status': IT_SYS_STATUS_LIST, 
      'Salary': IT_SYS_SALARY_LIST,
      'Education': "Not Specified / In Description",
      'Years of Work Experience': "Not Specified / In Description",
      'Job Description': IT_SYS_DESCRIPTION_LIST,
      'Min Salary': IT_SYS_SALARY_MIN_LIST,
      'Max Salary': IT_SYS_SALARY_MAX_LIST,
      'Min Years of Work Experience': "Not Specified",
      'Max Years of Work Experience': "Not Specified",
      'Field': "IT"}
IT_SYS_df = pd.DataFrame(data=IT_SYS)
IT_SYS_df

In [None]:
IT_SYS_df.to_csv ('Joblum Data\JOBLUM-IT_SYS.csv', index = False)

### CATEGORY - IT - Software

In [None]:
#Scraping data of IT - Software (JOBLUM) - FIRST HALF

IT_SOFTWARE_TITLE_FIRST = []
IT_SOFTWARE_COMPANY_FIRST = []
IT_SOFTWARE_DATE_FIRST = []
IT_SOFTWARE_LOCATION_FIRST = []
IT_SOFTWARE_STATUS_FIRST = []
IT_SOFTWARE_SALARY_FIRST = []
IT_SOFTWARE_SALARY_MIN_FIRST = []
IT_SOFTWARE_SALARY_MAX_FIRST = []
IT_SOFTWARE_DESCRIPTION_FIRST = []

JOBLUM_URLs = 'https://ph.joblum.com/jobs-spec-it-software?p='
soup = getSoup(JOBLUM_URLs)
NUM_JOBS = getNumJobs(soup)
NUM_PAGES = getNumPages(NUM_JOBS)
JOB_LINKS = getLinks(NUM_PAGES, JOBLUM_URLs)
FIRST_HALF = math.ceil(NUM_PAGES/2)

for i in range(FIRST_HALF):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        IT_SOFTWARE_TITLE_FIRST.append(JOB_TITLE_ARRAY[j].text.strip())
        IT_SOFTWARE_COMPANY_FIRST.append(JOB_COMPANY_ARRAY[j].text.strip())
        IT_SOFTWARE_DATE_FIRST.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        IT_SOFTWARE_LOCATION_FIRST.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, IT_SOFTWARE_STATUS_FIRST, IT_SOFTWARE_SALARY_FIRST, IT_SOFTWARE_SALARY_MIN_FIRST, 
              IT_SOFTWARE_SALARY_MAX_FIRST, IT_SOFTWARE_DESCRIPTION_FIRST)

In [None]:
#Scraping data of IT - Software (JOBLUM) - SECOND HALF

IT_SOFTWARE_TITLE_SECOND = []
IT_SOFTWARE_COMPANY_SECOND = []
IT_SOFTWARE_DATE_SECOND = []
IT_SOFTWARE_LOCATION_SECOND = []
IT_SOFTWARE_STATUS_SECOND = []
IT_SOFTWARE_SALARY_SECOND = []
IT_SOFTWARE_SALARY_MIN_SECOND = []
IT_SOFTWARE_SALARY_MAX_SECOND = []
IT_SOFTWARE_DESCRIPTION_SECOND = []

for i in range(FIRST_HALF, NUM_PAGES):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        IT_SOFTWARE_TITLE_SECOND.append(JOB_TITLE_ARRAY[j].text.strip())
        IT_SOFTWARE_COMPANY_SECOND.append(JOB_COMPANY_ARRAY[j].text.strip())
        IT_SOFTWARE_DATE_SECOND.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        IT_SOFTWARE_LOCATION_SECOND.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, IT_SOFTWARE_STATUS_SECOND, IT_SOFTWARE_SALARY_SECOND, IT_SOFTWARE_SALARY_MIN_SECOND, 
              IT_SOFTWARE_SALARY_MAX_SECOND, IT_SOFTWARE_DESCRIPTION_SECOND)

In [None]:
#Joining Scrape data of IT - Software (JOBLUM) 

IT_SOFTWARE_TITLE_LIST = np.concatenate((IT_SOFTWARE_TITLE_FIRST, IT_SOFTWARE_TITLE_SECOND))
IT_SOFTWARE_COMPANY_LIST = np.concatenate((IT_SOFTWARE_COMPANY_FIRST, IT_SOFTWARE_COMPANY_SECOND))
IT_SOFTWARE_DATE_LIST = np.concatenate((IT_SOFTWARE_DATE_FIRST, IT_SOFTWARE_DATE_SECOND))
IT_SOFTWARE_LOCATION_LIST = np.concatenate((IT_SOFTWARE_LOCATION_FIRST, IT_SOFTWARE_LOCATION_SECOND))
IT_SOFTWARE_STATUS_LIST = np.concatenate((IT_SOFTWARE_STATUS_FIRST, IT_SOFTWARE_STATUS_SECOND))
IT_SOFTWARE_SALARY_LIST = np.concatenate((IT_SOFTWARE_SALARY_FIRST, IT_SOFTWARE_SALARY_SECOND))
IT_SOFTWARE_SALARY_MIN_LIST = np.concatenate((IT_SOFTWARE_SALARY_MIN_FIRST, IT_SOFTWARE_SALARY_MIN_SECOND))
IT_SOFTWARE_SALARY_MAX_LIST = np.concatenate((IT_SOFTWARE_SALARY_MAX_FIRST, IT_SOFTWARE_SALARY_MAX_SECOND))
IT_SOFTWARE_DESCRIPTION_LIST = np.concatenate((IT_SOFTWARE_DESCRIPTION_FIRST, IT_SOFTWARE_DESCRIPTION_SECOND))

In [None]:
#Creating Data Frame for IT - Software (JOBLUM) 
IT_SOFTWARE={'Website': "Joblum",
      'Job Title': IT_SOFTWARE_TITLE_LIST, 
      'Category': "IT - Software", 
      'Company': IT_SOFTWARE_COMPANY_LIST, 
      'Date Posted': IT_SOFTWARE_DATE_LIST, 
      'Location': IT_SOFTWARE_LOCATION_LIST, 
      'Status': IT_SOFTWARE_STATUS_LIST, 
      'Salary': IT_SOFTWARE_SALARY_LIST,
      'Education': "Not Specified / In Description",
      'Years of Work Experience': "Not Specified / In Description",
      'Job Description': IT_SOFTWARE_DESCRIPTION_LIST,
      'Min Salary': IT_SOFTWARE_SALARY_MIN_LIST,
      'Max Salary': IT_SOFTWARE_SALARY_MAX_LIST,
      'Min Years of Work Experience': "Not Specified",
      'Max Years of Work Experience': "Not Specified",
      'Field': "IT"}
IT_SOFTWARE_df = pd.DataFrame(data=IT_SOFTWARE)
IT_SOFTWARE_df

In [None]:
IT_SOFTWARE_df.to_csv ('Joblum Data\JOBLUM-IT_SOFTWARE.csv', index = False)

### CATEGORY - Maintenance

In [None]:
#Scraping data of Maintenance (JOBLUM) - FIRST HALF

MAINTENANCE_TITLE_FIRST = []
MAINTENANCE_COMPANY_FIRST = []
MAINTENANCE_DATE_FIRST = []
MAINTENANCE_LOCATION_FIRST = []
MAINTENANCE_STATUS_FIRST = []
MAINTENANCE_SALARY_FIRST = []
MAINTENANCE_SALARY_MIN_FIRST = []
MAINTENANCE_SALARY_MAX_FIRST = []
MAINTENANCE_DESCRIPTION_FIRST = []

JOBLUM_URLs = 'https://ph.joblum.com/jobs-spec-maintenance?p='
soup = getSoup(JOBLUM_URLs)
NUM_JOBS = getNumJobs(soup)
NUM_PAGES = getNumPages(NUM_JOBS)
JOB_LINKS = getLinks(NUM_PAGES, JOBLUM_URLs)
FIRST_HALF = math.ceil(NUM_PAGES/2)

for i in range(FIRST_HALF):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        MAINTENANCE_TITLE_FIRST.append(JOB_TITLE_ARRAY[j].text.strip())
        MAINTENANCE_COMPANY_FIRST.append(JOB_COMPANY_ARRAY[j].text.strip())
        MAINTENANCE_DATE_FIRST.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        MAINTENANCE_LOCATION_FIRST.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, MAINTENANCE_STATUS_FIRST, MAINTENANCE_SALARY_FIRST, MAINTENANCE_SALARY_MIN_FIRST, 
              MAINTENANCE_SALARY_MAX_FIRST, MAINTENANCE_DESCRIPTION_FIRST)

In [None]:
#Scraping data of Maintenance (JOBLUM) - SECOND HALF

MAINTENANCE_TITLE_SECOND = []
MAINTENANCE_COMPANY_SECOND = []
MAINTENANCE_DATE_SECOND = []
MAINTENANCE_LOCATION_SECOND = []
MAINTENANCE_STATUS_SECOND = []
MAINTENANCE_SALARY_SECOND = []
MAINTENANCE_SALARY_MIN_SECOND = []
MAINTENANCE_SALARY_MAX_SECOND = []
MAINTENANCE_DESCRIPTION_SECOND = []

for i in range(FIRST_HALF, NUM_PAGES):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        MAINTENANCE_TITLE_SECOND.append(JOB_TITLE_ARRAY[j].text.strip())
        MAINTENANCE_COMPANY_SECOND.append(JOB_COMPANY_ARRAY[j].text.strip())
        MAINTENANCE_DATE_SECOND.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        MAINTENANCE_LOCATION_SECOND.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, MAINTENANCE_STATUS_SECOND, MAINTENANCE_SALARY_SECOND, MAINTENANCE_SALARY_MIN_SECOND, 
              MAINTENANCE_SALARY_MAX_SECOND, MAINTENANCE_DESCRIPTION_SECOND)

In [None]:
#Joining Scrape data of Maintenance (JOBLUM) 

MAINTENANCE_TITLE_LIST = np.concatenate((MAINTENANCE_TITLE_FIRST, MAINTENANCE_TITLE_SECOND))
MAINTENANCE_COMPANY_LIST = np.concatenate((MAINTENANCE_COMPANY_FIRST, MAINTENANCE_COMPANY_SECOND))
MAINTENANCE_DATE_LIST = np.concatenate((MAINTENANCE_DATE_FIRST, MAINTENANCE_DATE_SECOND))
MAINTENANCE_LOCATION_LIST = np.concatenate((MAINTENANCE_LOCATION_FIRST, MAINTENANCE_LOCATION_SECOND))
MAINTENANCE_STATUS_LIST = np.concatenate((MAINTENANCE_STATUS_FIRST, MAINTENANCE_STATUS_SECOND))
MAINTENANCE_SALARY_LIST = np.concatenate((MAINTENANCE_SALARY_FIRST, MAINTENANCE_SALARY_SECOND))
MAINTENANCE_SALARY_MIN_LIST = np.concatenate((MAINTENANCE_SALARY_MIN_FIRST, MAINTENANCE_SALARY_MIN_SECOND))
MAINTENANCE_SALARY_MAX_LIST = np.concatenate((MAINTENANCE_SALARY_MAX_FIRST, MAINTENANCE_SALARY_MAX_SECOND))
MAINTENANCE_DESCRIPTION_LIST = np.concatenate((MAINTENANCE_DESCRIPTION_FIRST, MAINTENANCE_DESCRIPTION_SECOND))

In [None]:
#Creating Data Frame for Maintenance (JOBLUM) 
MAINTENANCE={'Website': "Joblum",
      'Job Title': MAINTENANCE_TITLE_LIST, 
      'Category': "Maintenance", 
      'Company': MAINTENANCE_COMPANY_LIST, 
      'Date Posted': MAINTENANCE_DATE_LIST, 
      'Location': MAINTENANCE_LOCATION_LIST, 
      'Status': MAINTENANCE_STATUS_LIST, 
      'Salary': MAINTENANCE_SALARY_LIST,
      'Education': "Not Specified / In Description",
      'Years of Work Experience': "Not Specified / In Description",
      'Job Description': MAINTENANCE_DESCRIPTION_LIST,
      'Min Salary': MAINTENANCE_SALARY_MIN_LIST,
      'Max Salary': MAINTENANCE_SALARY_MAX_LIST,
      'Min Years of Work Experience': "Not Specified",
      'Max Years of Work Experience': "Not Specified",
      'Field': "Engineering"}
MAINTENANCE_df = pd.DataFrame(data=MAINTENANCE)
MAINTENANCE_df

In [None]:
MAINTENANCE_df.to_csv ('Joblum Data\JOBLUM-MAINTENANCE.csv', index = False)

### CATEGORY - Mechanical

In [None]:
#Scraping data of Mechanical (JOBLUM) - FIRST HALF

MECH_TITLE_FIRST = []
MECH_COMPANY_FIRST = []
MECH_DATE_FIRST = []
MECH_LOCATION_FIRST = []
MECH_STATUS_FIRST = []
MECH_SALARY_FIRST = []
MECH_SALARY_MIN_FIRST = []
MECH_SALARY_MAX_FIRST = []
MECH_DESCRIPTION_FIRST = []

JOBLUM_URLs = 'https://ph.joblum.com/jobs-spec-mechanical?p='
soup = getSoup(JOBLUM_URLs)
NUM_JOBS = getNumJobs(soup)
NUM_PAGES = getNumPages(NUM_JOBS)
JOB_LINKS = getLinks(NUM_PAGES, JOBLUM_URLs)
FIRST_HALF = math.ceil(NUM_PAGES/2)

for i in range(FIRST_HALF):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        MECH_TITLE_FIRST.append(JOB_TITLE_ARRAY[j].text.strip())
        MECH_COMPANY_FIRST.append(JOB_COMPANY_ARRAY[j].text.strip())
        MECH_DATE_FIRST.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        MECH_LOCATION_FIRST.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, MECH_STATUS_FIRST, MECH_SALARY_FIRST, MECH_SALARY_MIN_FIRST, 
              MECH_SALARY_MAX_FIRST, MECH_DESCRIPTION_FIRST)

In [None]:
#Scraping data of Mechanical (JOBLUM) - SECOND HALF

MECH_TITLE_SECOND = []
MECH_COMPANY_SECOND = []
MECH_DATE_SECOND = []
MECH_LOCATION_SECOND = []
MECH_STATUS_SECOND = []
MECH_SALARY_SECOND = []
MECH_SALARY_MIN_SECOND = []
MECH_SALARY_MAX_SECOND = []
MECH_DESCRIPTION_SECOND = []

for i in range(FIRST_HALF, NUM_PAGES):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        MECH_TITLE_SECOND.append(JOB_TITLE_ARRAY[j].text.strip())
        MECH_COMPANY_SECOND.append(JOB_COMPANY_ARRAY[j].text.strip())
        MECH_DATE_SECOND.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        MECH_LOCATION_SECOND.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, MECH_STATUS_SECOND, MECH_SALARY_SECOND, MECH_SALARY_MIN_SECOND, 
              MECH_SALARY_MAX_SECOND, MECH_DESCRIPTION_SECOND)

In [None]:
#Joining Scrape data of Mechanical (JOBLUM) 

MECH_TITLE_LIST = np.concatenate((MECH_TITLE_FIRST, MECH_TITLE_SECOND))
MECH_COMPANY_LIST = np.concatenate((MECH_COMPANY_FIRST, MECH_COMPANY_SECOND))
MECH_DATE_LIST = np.concatenate((MECH_DATE_FIRST, MECH_DATE_SECOND))
MECH_LOCATION_LIST = np.concatenate((MECH_LOCATION_FIRST, MECH_LOCATION_SECOND))
MECH_STATUS_LIST = np.concatenate((MECH_STATUS_FIRST, MECH_STATUS_SECOND))
MECH_SALARY_LIST = np.concatenate((MECH_SALARY_FIRST, MECH_SALARY_SECOND))
MECH_SALARY_MIN_LIST = np.concatenate((MECH_SALARY_MIN_FIRST, MECH_SALARY_MIN_SECOND))
MECH_SALARY_MAX_LIST = np.concatenate((MECH_SALARY_MAX_FIRST, MECH_SALARY_MAX_SECOND))
MECH_DESCRIPTION_LIST = np.concatenate((MECH_DESCRIPTION_FIRST, MECH_DESCRIPTION_SECOND))

In [None]:
#Creating Data Frame for Mechanical (JOBLUM) 
MECH={'Website': "Joblum",
      'Job Title': MECH_TITLE_LIST, 
      'Category': "Mechanical", 
      'Company': MECH_COMPANY_LIST, 
      'Date Posted': MECH_DATE_LIST, 
      'Location': MECH_LOCATION_LIST, 
      'Status': MECH_STATUS_LIST, 
      'Salary': MECH_SALARY_LIST,
      'Education': "Not Specified / In Description",
      'Years of Work Experience': "Not Specified / In Description",
      'Job Description': MECH_DESCRIPTION_LIST,
      'Min Salary': MECH_SALARY_MIN_LIST,
      'Max Salary': MECH_SALARY_MAX_LIST,
      'Min Years of Work Experience': "Not Specified",
      'Max Years of Work Experience': "Not Specified",
      'Field': "Engineering"}
MECH_df = pd.DataFrame(data=MECH)
MECH_df

In [None]:
MECH_df.to_csv ('Joblum Data\JOBLUM-MECH.csv', index = False)

### CATEGORY - Mechanical/Automotive Engineering

In [None]:
#Scraping data of Mechanical/Automotive Engineering (JOBLUM) - FIRST HALF

MECHENG_TITLE_FIRST = []
MECHENG_COMPANY_FIRST = []
MECHENG_DATE_FIRST = []
MECHENG_LOCATION_FIRST = []
MECHENG_STATUS_FIRST = []
MECHENG_SALARY_FIRST = []
MECHENG_SALARY_MIN_FIRST = []
MECHENG_SALARY_MAX_FIRST = []
MECHENG_DESCRIPTION_FIRST = []

JOBLUM_URLs = 'https://ph.joblum.com/jobs-spec-mechanical-automotive-engineering?p='
soup = getSoup(JOBLUM_URLs)
NUM_JOBS = getNumJobs(soup)
NUM_PAGES = getNumPages(NUM_JOBS)
JOB_LINKS = getLinks(NUM_PAGES, JOBLUM_URLs)
FIRST_HALF = math.ceil(NUM_PAGES/2)

for i in range(FIRST_HALF):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        MECHENG_TITLE_FIRST.append(JOB_TITLE_ARRAY[j].text.strip())
        MECHENG_COMPANY_FIRST.append(JOB_COMPANY_ARRAY[j].text.strip())
        MECHENG_DATE_FIRST.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        MECHENG_LOCATION_FIRST.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, MECHENG_STATUS_FIRST, MECHENG_SALARY_FIRST, MECHENG_SALARY_MIN_FIRST, 
              MECHENG_SALARY_MAX_FIRST, MECHENG_DESCRIPTION_FIRST)

In [None]:
#Scraping data of Mechanical/Automotive Engineering (JOBLUM) - SECOND HALF

MECHENG_TITLE_SECOND = []
MECHENG_COMPANY_SECOND = []
MECHENG_DATE_SECOND = []
MECHENG_LOCATION_SECOND = []
MECHENG_STATUS_SECOND = []
MECHENG_SALARY_SECOND = []
MECHENG_SALARY_MIN_SECOND = []
MECHENG_SALARY_MAX_SECOND = []
MECHENG_DESCRIPTION_SECOND = []

for i in range(FIRST_HALF, NUM_PAGES):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        MECHENG_TITLE_SECOND.append(JOB_TITLE_ARRAY[j].text.strip())
        MECHENG_COMPANY_SECOND.append(JOB_COMPANY_ARRAY[j].text.strip())
        MECHENG_DATE_SECOND.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        MECHENG_LOCATION_SECOND.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, MECHENG_STATUS_SECOND, MECHENG_SALARY_SECOND, MECHENG_SALARY_MIN_SECOND, 
              MECHENG_SALARY_MAX_SECOND, MECHENG_DESCRIPTION_SECOND)

In [None]:
#Joining Scrape data of Mechanical/Automotive Engineering (JOBLUM) 

MECHENG_TITLE_LIST = np.concatenate((MECHENG_TITLE_FIRST, MECHENG_TITLE_SECOND))
MECHENG_COMPANY_LIST = np.concatenate((MECHENG_COMPANY_FIRST, MECHENG_COMPANY_SECOND))
MECHENG_DATE_LIST = np.concatenate((MECHENG_DATE_FIRST, MECHENG_DATE_SECOND))
MECHENG_LOCATION_LIST = np.concatenate((MECHENG_LOCATION_FIRST, MECHENG_LOCATION_SECOND))
MECHENG_STATUS_LIST = np.concatenate((MECHENG_STATUS_FIRST, MECHENG_STATUS_SECOND))
MECHENG_SALARY_LIST = np.concatenate((MECHENG_SALARY_FIRST, MECHENG_SALARY_SECOND))
MECHENG_SALARY_MIN_LIST = np.concatenate((MECHENG_SALARY_MIN_FIRST, MECHENG_SALARY_MIN_SECOND))
MECHENG_SALARY_MAX_LIST = np.concatenate((MECHENG_SALARY_MAX_FIRST, MECHENG_SALARY_MAX_SECOND))
MECHENG_DESCRIPTION_LIST = np.concatenate((MECHENG_DESCRIPTION_FIRST, MECHENG_DESCRIPTION_SECOND))

In [None]:
#Creating Data Frame for Mechanical/Automotive Engineering (JOBLUM) 
MECHENG={'Website': "Joblum",
      'Job Title': MECHENG_TITLE_LIST, 
      'Category': "Mechanical/Automotive Engineering", 
      'Company': MECHENG_COMPANY_LIST, 
      'Date Posted': MECHENG_DATE_LIST, 
      'Location': MECHENG_LOCATION_LIST, 
      'Status': MECHENG_STATUS_LIST, 
      'Salary': MECHENG_SALARY_LIST,
      'Education': "Not Specified / In Description",
      'Years of Work Experience': "Not Specified / In Description",
      'Job Description': MECHENG_DESCRIPTION_LIST,
      'Min Salary': MECHENG_SALARY_MIN_LIST,
      'Max Salary': MECHENG_SALARY_MAX_LIST,
      'Min Years of Work Experience': "Not Specified",
      'Max Years of Work Experience': "Not Specified",
      'Field': "Engineering"}
MECHENG_df = pd.DataFrame(data=MECHENG)
MECHENG_df

In [None]:
MECHENG_df.to_csv ('Joblum Data\JOBLUM-MECHENG.csv', index = False)

### CATEGORY - Nurse/Medical Support

In [None]:
#Scraping data of Nurse/Medical Support (JOBLUM) - FIRST HALF

NURSE_TITLE_FIRST = []
NURSE_COMPANY_FIRST = []
NURSE_DATE_FIRST = []
NURSE_LOCATION_FIRST = []
NURSE_STATUS_FIRST = []
NURSE_SALARY_FIRST = []
NURSE_SALARY_MIN_FIRST = []
NURSE_SALARY_MAX_FIRST = []
NURSE_DESCRIPTION_FIRST = []

JOBLUM_URLs = 'https://ph.joblum.com/jobs-spec-nurse-medical-support?p='
soup = getSoup(JOBLUM_URLs)
NUM_JOBS = getNumJobs(soup)
NUM_PAGES = getNumPages(NUM_JOBS)
JOB_LINKS = getLinks(NUM_PAGES, JOBLUM_URLs)
FIRST_HALF = math.ceil(NUM_PAGES/2)

for i in range(FIRST_HALF):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        NURSE_TITLE_FIRST.append(JOB_TITLE_ARRAY[j].text.strip())
        NURSE_COMPANY_FIRST.append(JOB_COMPANY_ARRAY[j].text.strip())
        NURSE_DATE_FIRST.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        NURSE_LOCATION_FIRST.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, NURSE_STATUS_FIRST, NURSE_SALARY_FIRST, NURSE_SALARY_MIN_FIRST, 
              NURSE_SALARY_MAX_FIRST, NURSE_DESCRIPTION_FIRST)

In [None]:
#Scraping data of Nurse/Medical Support (JOBLUM) - SECOND HALF

NURSE_TITLE_SECOND = []
NURSE_COMPANY_SECOND = []
NURSE_DATE_SECOND = []
NURSE_LOCATION_SECOND = []
NURSE_STATUS_SECOND = []
NURSE_SALARY_SECOND = []
NURSE_SALARY_MIN_SECOND = []
NURSE_SALARY_MAX_SECOND = []
NURSE_DESCRIPTION_SECOND = []

for i in range(FIRST_HALF, NUM_PAGES):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        NURSE_TITLE_SECOND.append(JOB_TITLE_ARRAY[j].text.strip())
        NURSE_COMPANY_SECOND.append(JOB_COMPANY_ARRAY[j].text.strip())
        NURSE_DATE_SECOND.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        NURSE_LOCATION_SECOND.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, NURSE_STATUS_SECOND, NURSE_SALARY_SECOND, NURSE_SALARY_MIN_SECOND, 
              NURSE_SALARY_MAX_SECOND, NURSE_DESCRIPTION_SECOND)

In [None]:
#Joining Scrape data of Nurse/Medical Support (JOBLUM) 

NURSE_TITLE_LIST = np.concatenate((NURSE_TITLE_FIRST, NURSE_TITLE_SECOND))
NURSE_COMPANY_LIST = np.concatenate((NURSE_COMPANY_FIRST, NURSE_COMPANY_SECOND))
NURSE_DATE_LIST = np.concatenate((NURSE_DATE_FIRST, NURSE_DATE_SECOND))
NURSE_LOCATION_LIST = np.concatenate((NURSE_LOCATION_FIRST, NURSE_LOCATION_SECOND))
NURSE_STATUS_LIST = np.concatenate((NURSE_STATUS_FIRST, NURSE_STATUS_SECOND))
NURSE_SALARY_LIST = np.concatenate((NURSE_SALARY_FIRST, NURSE_SALARY_SECOND))
NURSE_SALARY_MIN_LIST = np.concatenate((NURSE_SALARY_MIN_FIRST, NURSE_SALARY_MIN_SECOND))
NURSE_SALARY_MAX_LIST = np.concatenate((NURSE_SALARY_MAX_FIRST, NURSE_SALARY_MAX_SECOND))
NURSE_DESCRIPTION_LIST = np.concatenate((NURSE_DESCRIPTION_FIRST, NURSE_DESCRIPTION_SECOND))

In [None]:
#Creating Data Frame for Nurse/Medical Support (JOBLUM) 
NURSE={'Website': "Joblum",
      'Job Title': NURSE_TITLE_LIST, 
      'Category': "Nurse/Medical Support", 
      'Company': NURSE_COMPANY_LIST, 
      'Date Posted': NURSE_DATE_LIST, 
      'Location': NURSE_LOCATION_LIST, 
      'Status': NURSE_STATUS_LIST, 
      'Salary': NURSE_SALARY_LIST,
      'Education': "Not Specified / In Description",
      'Years of Work Experience': "Not Specified / In Description",
      'Job Description': NURSE_DESCRIPTION_LIST,
      'Min Salary': NURSE_SALARY_MIN_LIST,
      'Max Salary': NURSE_SALARY_MAX_LIST,
      'Min Years of Work Experience': "Not Specified",
      'Max Years of Work Experience': "Not Specified",
      'Field': "Medicine"}
NURSE_df = pd.DataFrame(data=NURSE)
NURSE_df

In [None]:
NURSE_df.to_csv ('Joblum Data\JOBLUM-NURSE.csv', index = False)

### CATEGORY - Oil/Gas

In [None]:
#Scraping data of Oil/Gas (JOBLUM) - FIRST HALF

OIL_TITLE_FIRST = []
OIL_COMPANY_FIRST = []
OIL_DATE_FIRST = []
OIL_LOCATION_FIRST = []
OIL_STATUS_FIRST = []
OIL_SALARY_FIRST = []
OIL_SALARY_MIN_FIRST = []
OIL_SALARY_MAX_FIRST = []
OIL_DESCRIPTION_FIRST = []

JOBLUM_URLs = 'https://ph.joblum.com/jobs-spec-oil-gas?p='
soup = getSoup(JOBLUM_URLs)
NUM_JOBS = getNumJobs(soup)
NUM_PAGES = getNumPages(NUM_JOBS)
JOB_LINKS = getLinks(NUM_PAGES, JOBLUM_URLs)
FIRST_HALF = math.ceil(NUM_PAGES/2)

for i in range(FIRST_HALF):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        OIL_TITLE_FIRST.append(JOB_TITLE_ARRAY[j].text.strip())
        OIL_COMPANY_FIRST.append(JOB_COMPANY_ARRAY[j].text.strip())
        OIL_DATE_FIRST.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        OIL_LOCATION_FIRST.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, OIL_STATUS_FIRST, OIL_SALARY_FIRST, OIL_SALARY_MIN_FIRST, 
              OIL_SALARY_MAX_FIRST, OIL_DESCRIPTION_FIRST)

In [None]:
#Scraping data of Oil/Gas (JOBLUM) - SECOND HALF

OIL_TITLE_SECOND = []
OIL_COMPANY_SECOND = []
OIL_DATE_SECOND = []
OIL_LOCATION_SECOND = []
OIL_STATUS_SECOND = []
OIL_SALARY_SECOND = []
OIL_SALARY_MIN_SECOND = []
OIL_SALARY_MAX_SECOND = []
OIL_DESCRIPTION_SECOND = []

for i in range(FIRST_HALF, NUM_PAGES):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        OIL_TITLE_SECOND.append(JOB_TITLE_ARRAY[j].text.strip())
        OIL_COMPANY_SECOND.append(JOB_COMPANY_ARRAY[j].text.strip())
        OIL_DATE_SECOND.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        OIL_LOCATION_SECOND.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, OIL_STATUS_SECOND, OIL_SALARY_SECOND, OIL_SALARY_MIN_SECOND, 
              OIL_SALARY_MAX_SECOND, OIL_DESCRIPTION_SECOND)

In [None]:
#Joining Scrape data of Oil/Gas (JOBLUM) 

OIL_TITLE_LIST = np.concatenate((OIL_TITLE_FIRST, OIL_TITLE_SECOND))
OIL_COMPANY_LIST = np.concatenate((OIL_COMPANY_FIRST, OIL_COMPANY_SECOND))
OIL_DATE_LIST = np.concatenate((OIL_DATE_FIRST, OIL_DATE_SECOND))
OIL_LOCATION_LIST = np.concatenate((OIL_LOCATION_FIRST, OIL_LOCATION_SECOND))
OIL_STATUS_LIST = np.concatenate((OIL_STATUS_FIRST, OIL_STATUS_SECOND))
OIL_SALARY_LIST = np.concatenate((OIL_SALARY_FIRST, OIL_SALARY_SECOND))
OIL_SALARY_MIN_LIST = np.concatenate((OIL_SALARY_MIN_FIRST, OIL_SALARY_MIN_SECOND))
OIL_SALARY_MAX_LIST = np.concatenate((OIL_SALARY_MAX_FIRST, OIL_SALARY_MAX_SECOND))
OIL_DESCRIPTION_LIST = np.concatenate((OIL_DESCRIPTION_FIRST, OIL_DESCRIPTION_SECOND))

In [None]:
#Creating Data Frame for Oil/Gas (JOBLUM) 
OIL={'Website': "Joblum",
      'Job Title': OIL_TITLE_LIST, 
      'Category': "Oil/Gas", 
      'Company': OIL_COMPANY_LIST, 
      'Date Posted': OIL_DATE_LIST, 
      'Location': OIL_LOCATION_LIST, 
      'Status': OIL_STATUS_LIST, 
      'Salary': OIL_SALARY_LIST,
      'Education': "Not Specified / In Description",
      'Years of Work Experience': "Not Specified / In Description",
      'Job Description': OIL_DESCRIPTION_LIST,
      'Min Salary': OIL_SALARY_MIN_LIST,
      'Max Salary': OIL_SALARY_MAX_LIST,
      'Min Years of Work Experience': "Not Specified",
      'Max Years of Work Experience': "Not Specified",
      'Field': "Engineering"}
OIL_df = pd.DataFrame(data=OIL)
OIL_df

In [None]:
OIL_df.to_csv ('Joblum Data\JOBLUM-OIL.csv', index = False)

### CATEGORY - Oil/Gas Engineering

In [None]:
#Scraping data of Oil/Gas Engineering (JOBLUM) - FIRST HALF

OILENG_TITLE_FIRST = []
OILENG_COMPANY_FIRST = []
OILENG_DATE_FIRST = []
OILENG_LOCATION_FIRST = []
OILENG_STATUS_FIRST = []
OILENG_SALARY_FIRST = []
OILENG_SALARY_MIN_FIRST = []
OILENG_SALARY_MAX_FIRST = []
OILENG_DESCRIPTION_FIRST = []

JOBLUM_URLs = 'https://ph.joblum.com/jobs-spec-oil-gas-engineering?p='
soup = getSoup(JOBLUM_URLs)
NUM_JOBS = getNumJobs(soup)
NUM_PAGES = getNumPages(NUM_JOBS)
JOB_LINKS = getLinks(NUM_PAGES, JOBLUM_URLs)
FIRST_HALF = math.ceil(NUM_PAGES/2)

for i in range(FIRST_HALF):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        OILENG_TITLE_FIRST.append(JOB_TITLE_ARRAY[j].text.strip())
        OILENG_COMPANY_FIRST.append(JOB_COMPANY_ARRAY[j].text.strip())
        OILENG_DATE_FIRST.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        OILENG_LOCATION_FIRST.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, OILENG_STATUS_FIRST, OILENG_SALARY_FIRST, OILENG_SALARY_MIN_FIRST, 
              OILENG_SALARY_MAX_FIRST, OILENG_DESCRIPTION_FIRST)

In [None]:
#Scraping data of Oil/Gas Engineering (JOBLUM) - SECOND HALF

OILENG_TITLE_SECOND = []
OILENG_COMPANY_SECOND = []
OILENG_DATE_SECOND = []
OILENG_LOCATION_SECOND = []
OILENG_STATUS_SECOND = []
OILENG_SALARY_SECOND = []
OILENG_SALARY_MIN_SECOND = []
OILENG_SALARY_MAX_SECOND = []
OILENG_DESCRIPTION_SECOND = []

for i in range(FIRST_HALF, NUM_PAGES):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        OILENG_TITLE_SECOND.append(JOB_TITLE_ARRAY[j].text.strip())
        OILENG_COMPANY_SECOND.append(JOB_COMPANY_ARRAY[j].text.strip())
        OILENG_DATE_SECOND.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        OILENG_LOCATION_SECOND.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, OILENG_STATUS_SECOND, OILENG_SALARY_SECOND, OILENG_SALARY_MIN_SECOND, 
              OILENG_SALARY_MAX_SECOND, OILENG_DESCRIPTION_SECOND)

In [None]:
#Joining Scrape data of Oil/Gas Engineering (JOBLUM) 

OILENG_TITLE_LIST = np.concatenate((OILENG_TITLE_FIRST, OILENG_TITLE_SECOND))
OILENG_COMPANY_LIST = np.concatenate((OILENG_COMPANY_FIRST, OILENG_COMPANY_SECOND))
OILENG_DATE_LIST = np.concatenate((OILENG_DATE_FIRST, OILENG_DATE_SECOND))
OILENG_LOCATION_LIST = np.concatenate((OILENG_LOCATION_FIRST, OILENG_LOCATION_SECOND))
OILENG_STATUS_LIST = np.concatenate((OILENG_STATUS_FIRST, OILENG_STATUS_SECOND))
OILENG_SALARY_LIST = np.concatenate((OILENG_SALARY_FIRST, OILENG_SALARY_SECOND))
OILENG_SALARY_MIN_LIST = np.concatenate((OILENG_SALARY_MIN_FIRST, OILENG_SALARY_MIN_SECOND))
OILENG_SALARY_MAX_LIST = np.concatenate((OILENG_SALARY_MAX_FIRST, OILENG_SALARY_MAX_SECOND))
OILENG_DESCRIPTION_LIST = np.concatenate((OILENG_DESCRIPTION_FIRST, OILENG_DESCRIPTION_SECOND))

In [None]:
#Creating Data Frame for Oil/Gas Engineering (JOBLUM) 
OILENG={'Website': "Joblum",
      'Job Title': OILENG_TITLE_LIST, 
      'Category': "Oil/Gas Engineering", 
      'Company': OILENG_COMPANY_LIST, 
      'Date Posted': OILENG_DATE_LIST, 
      'Location': OILENG_LOCATION_LIST, 
      'Status': OILENG_STATUS_LIST, 
      'Salary': OILENG_SALARY_LIST,
      'Education': "Not Specified / In Description",
      'Years of Work Experience': "Not Specified / In Description",
      'Job Description': OILENG_DESCRIPTION_LIST,
      'Min Salary': OILENG_SALARY_MIN_LIST,
      'Max Salary': OILENG_SALARY_MAX_LIST,
      'Min Years of Work Experience': "Not Specified",
      'Max Years of Work Experience': "Not Specified",
      'Field': "Engineering"}
OILENG_df = pd.DataFrame(data=OILENG)
OILENG_df

In [None]:
OILENG_df.to_csv ('Joblum Data\JOBLUM-OILENG.csv', index = False)

### CATEGORY - Other Engineering

In [None]:
#Scraping data of Other Engineering (JOBLUM) - FIRST HALF

ENG_TITLE_FIRST = []
ENG_COMPANY_FIRST = []
ENG_DATE_FIRST = []
ENG_LOCATION_FIRST = []
ENG_STATUS_FIRST = []
ENG_SALARY_FIRST = []
ENG_SALARY_MIN_FIRST = []
ENG_SALARY_MAX_FIRST = []
ENG_DESCRIPTION_FIRST = []

JOBLUM_URLs = 'https://ph.joblum.com/jobs-spec-other-engineering?p='
soup = getSoup(JOBLUM_URLs)
NUM_JOBS = getNumJobs(soup)
NUM_PAGES = getNumPages(NUM_JOBS)
JOB_LINKS = getLinks(NUM_PAGES, JOBLUM_URLs)
FIRST_HALF = math.ceil(NUM_PAGES/2)

for i in range(FIRST_HALF):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        ENG_TITLE_FIRST.append(JOB_TITLE_ARRAY[j].text.strip())
        ENG_COMPANY_FIRST.append(JOB_COMPANY_ARRAY[j].text.strip())
        ENG_DATE_FIRST.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        ENG_LOCATION_FIRST.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, ENG_STATUS_FIRST, ENG_SALARY_FIRST, ENG_SALARY_MIN_FIRST, 
              ENG_SALARY_MAX_FIRST, ENG_DESCRIPTION_FIRST)

In [None]:
#Scraping data of Other Engineering (JOBLUM) - SECOND HALF

ENG_TITLE_SECOND = []
ENG_COMPANY_SECOND = []
ENG_DATE_SECOND = []
ENG_LOCATION_SECOND = []
ENG_STATUS_SECOND = []
ENG_SALARY_SECOND = []
ENG_SALARY_MIN_SECOND = []
ENG_SALARY_MAX_SECOND = []
ENG_DESCRIPTION_SECOND = []

for i in range(FIRST_HALF, NUM_PAGES):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        ENG_TITLE_SECOND.append(JOB_TITLE_ARRAY[j].text.strip())
        ENG_COMPANY_SECOND.append(JOB_COMPANY_ARRAY[j].text.strip())
        ENG_DATE_SECOND.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        ENG_LOCATION_SECOND.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, ENG_STATUS_SECOND, ENG_SALARY_SECOND, ENG_SALARY_MIN_SECOND, 
              ENG_SALARY_MAX_SECOND, ENG_DESCRIPTION_SECOND)

In [None]:
#Joining Scrape data of Other Engineering (JOBLUM) 

ENG_TITLE_LIST = np.concatenate((ENG_TITLE_FIRST, ENG_TITLE_SECOND))
ENG_COMPANY_LIST = np.concatenate((ENG_COMPANY_FIRST, ENG_COMPANY_SECOND))
ENG_DATE_LIST = np.concatenate((ENG_DATE_FIRST, ENG_DATE_SECOND))
ENG_LOCATION_LIST = np.concatenate((ENG_LOCATION_FIRST, ENG_LOCATION_SECOND))
ENG_STATUS_LIST = np.concatenate((ENG_STATUS_FIRST, ENG_STATUS_SECOND))
ENG_SALARY_LIST = np.concatenate((ENG_SALARY_FIRST, ENG_SALARY_SECOND))
ENG_SALARY_MIN_LIST = np.concatenate((ENG_SALARY_MIN_FIRST, ENG_SALARY_MIN_SECOND))
ENG_SALARY_MAX_LIST = np.concatenate((ENG_SALARY_MAX_FIRST, ENG_SALARY_MAX_SECOND))
ENG_DESCRIPTION_LIST = np.concatenate((ENG_DESCRIPTION_FIRST, ENG_DESCRIPTION_SECOND))

In [None]:
#Creating Data Frame for Other Engineering (JOBLUM) 
ENG={'Website': "Joblum",
      'Job Title': ENG_TITLE_LIST, 
      'Category': "Other Engineering", 
      'Company': ENG_COMPANY_LIST, 
      'Date Posted': ENG_DATE_LIST, 
      'Location': ENG_LOCATION_LIST, 
      'Status': ENG_STATUS_LIST, 
      'Salary': ENG_SALARY_LIST,
      'Education': "Not Specified / In Description",
      'Years of Work Experience': "Not Specified / In Description",
      'Job Description': ENG_DESCRIPTION_LIST,
      'Min Salary': ENG_SALARY_MIN_LIST,
      'Max Salary': ENG_SALARY_MAX_LIST,
      'Min Years of Work Experience': "Not Specified",
      'Max Years of Work Experience': "Not Specified",
      'Field': "Engineering"}
ENG_df = pd.DataFrame(data=ENG)
ENG_df

In [None]:
ENG_df.to_csv ('Joblum Data\JOBLUM-ENG.csv', index = False)

### CATEGORY - Pharmacy

In [None]:
#Scraping data of Pharmacy (JOBLUM) - FIRST HALF

PHARMA_TITLE_FIRST = []
PHARMA_COMPANY_FIRST = []
PHARMA_DATE_FIRST = []
PHARMA_LOCATION_FIRST = []
PHARMA_STATUS_FIRST = []
PHARMA_SALARY_FIRST = []
PHARMA_SALARY_MIN_FIRST = []
PHARMA_SALARY_MAX_FIRST = []
PHARMA_DESCRIPTION_FIRST = []

JOBLUM_URLs = 'https://ph.joblum.com/jobs-spec-pharmacy?p='
soup = getSoup(JOBLUM_URLs)
NUM_JOBS = getNumJobs(soup)
NUM_PAGES = getNumPages(NUM_JOBS)
JOB_LINKS = getLinks(NUM_PAGES, JOBLUM_URLs)
FIRST_HALF = math.ceil(NUM_PAGES/2)

for i in range(FIRST_HALF):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        PHARMA_TITLE_FIRST.append(JOB_TITLE_ARRAY[j].text.strip())
        PHARMA_COMPANY_FIRST.append(JOB_COMPANY_ARRAY[j].text.strip())
        PHARMA_DATE_FIRST.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        PHARMA_LOCATION_FIRST.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, PHARMA_STATUS_FIRST, PHARMA_SALARY_FIRST, PHARMA_SALARY_MIN_FIRST, 
              PHARMA_SALARY_MAX_FIRST, PHARMA_DESCRIPTION_FIRST)

In [None]:
#Scraping data of Pharmacy (JOBLUM) - SECOND HALF

PHARMA_TITLE_SECOND = []
PHARMA_COMPANY_SECOND = []
PHARMA_DATE_SECOND = []
PHARMA_LOCATION_SECOND = []
PHARMA_STATUS_SECOND = []
PHARMA_SALARY_SECOND = []
PHARMA_SALARY_MIN_SECOND = []
PHARMA_SALARY_MAX_SECOND = []
PHARMA_DESCRIPTION_SECOND = []

for i in range(FIRST_HALF, NUM_PAGES):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        PHARMA_TITLE_SECOND.append(JOB_TITLE_ARRAY[j].text.strip())
        PHARMA_COMPANY_SECOND.append(JOB_COMPANY_ARRAY[j].text.strip())
        PHARMA_DATE_SECOND.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        PHARMA_LOCATION_SECOND.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, PHARMA_STATUS_SECOND, PHARMA_SALARY_SECOND, PHARMA_SALARY_MIN_SECOND, 
              PHARMA_SALARY_MAX_SECOND, PHARMA_DESCRIPTION_SECOND)

In [None]:
#Joining Scrape data of Pharmacy (JOBLUM) 

PHARMA_TITLE_LIST = np.concatenate((PHARMA_TITLE_FIRST, PHARMA_TITLE_SECOND))
PHARMA_COMPANY_LIST = np.concatenate((PHARMA_COMPANY_FIRST, PHARMA_COMPANY_SECOND))
PHARMA_DATE_LIST = np.concatenate((PHARMA_DATE_FIRST, PHARMA_DATE_SECOND))
PHARMA_LOCATION_LIST = np.concatenate((PHARMA_LOCATION_FIRST, PHARMA_LOCATION_SECOND))
PHARMA_STATUS_LIST = np.concatenate((PHARMA_STATUS_FIRST, PHARMA_STATUS_SECOND))
PHARMA_SALARY_LIST = np.concatenate((PHARMA_SALARY_FIRST, PHARMA_SALARY_SECOND))
PHARMA_SALARY_MIN_LIST = np.concatenate((PHARMA_SALARY_MIN_FIRST, PHARMA_SALARY_MIN_SECOND))
PHARMA_SALARY_MAX_LIST = np.concatenate((PHARMA_SALARY_MAX_FIRST, PHARMA_SALARY_MAX_SECOND))
PHARMA_DESCRIPTION_LIST = np.concatenate((PHARMA_DESCRIPTION_FIRST, PHARMA_DESCRIPTION_SECOND))

In [None]:
#Creating Data Frame for Pharmacy (JOBLUM) 
PHARMA={'Website': "Joblum",
      'Job Title': PHARMA_TITLE_LIST, 
      'Category': "Pharmacy", 
      'Company': PHARMA_COMPANY_LIST, 
      'Date Posted': PHARMA_DATE_LIST, 
      'Location': PHARMA_LOCATION_LIST, 
      'Status': PHARMA_STATUS_LIST, 
      'Salary': PHARMA_SALARY_LIST,
      'Education': "Not Specified / In Description",
      'Years of Work Experience': "Not Specified / In Description",
      'Job Description': PHARMA_DESCRIPTION_LIST,
      'Min Salary': PHARMA_SALARY_MIN_LIST,
      'Max Salary': PHARMA_SALARY_MAX_LIST,
      'Min Years of Work Experience': "Not Specified",
      'Max Years of Work Experience': "Not Specified",
      'Field': "Medicine"}
PHARMA_df = pd.DataFrame(data=PHARMA)
PHARMA_df

In [None]:
PHARMA_df.to_csv ('Joblum Data\JOBLUM-PHARMA.csv', index = False)

### CATEGORY - Quality Assurance

In [None]:
#Scraping data of Quality Assurance (JOBLUM) - FIRST HALF

QUALITY_TITLE_FIRST = []
QUALITY_COMPANY_FIRST = []
QUALITY_DATE_FIRST = []
QUALITY_LOCATION_FIRST = []
QUALITY_STATUS_FIRST = []
QUALITY_SALARY_FIRST = []
QUALITY_SALARY_MIN_FIRST = []
QUALITY_SALARY_MAX_FIRST = []
QUALITY_DESCRIPTION_FIRST = []

JOBLUM_URLs = 'https://ph.joblum.com/jobs-spec-quality-assurance?p='
soup = getSoup(JOBLUM_URLs)
NUM_JOBS = getNumJobs(soup)
NUM_PAGES = getNumPages(NUM_JOBS)
JOB_LINKS = getLinks(NUM_PAGES, JOBLUM_URLs)
FIRST_HALF = math.ceil(NUM_PAGES/2)

for i in range(FIRST_HALF):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        QUALITY_TITLE_FIRST.append(JOB_TITLE_ARRAY[j].text.strip())
        QUALITY_COMPANY_FIRST.append(JOB_COMPANY_ARRAY[j].text.strip())
        QUALITY_DATE_FIRST.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        QUALITY_LOCATION_FIRST.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, QUALITY_STATUS_FIRST, QUALITY_SALARY_FIRST, QUALITY_SALARY_MIN_FIRST, 
              QUALITY_SALARY_MAX_FIRST, QUALITY_DESCRIPTION_FIRST)

In [None]:
#Scraping data of Quality Assurance (JOBLUM) - SECOND HALF

QUALITY_TITLE_SECOND = []
QUALITY_COMPANY_SECOND = []
QUALITY_DATE_SECOND = []
QUALITY_LOCATION_SECOND = []
QUALITY_STATUS_SECOND = []
QUALITY_SALARY_SECOND = []
QUALITY_SALARY_MIN_SECOND = []
QUALITY_SALARY_MAX_SECOND = []
QUALITY_DESCRIPTION_SECOND = []

for i in range(FIRST_HALF, NUM_PAGES):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        QUALITY_TITLE_SECOND.append(JOB_TITLE_ARRAY[j].text.strip())
        QUALITY_COMPANY_SECOND.append(JOB_COMPANY_ARRAY[j].text.strip())
        QUALITY_DATE_SECOND.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        QUALITY_LOCATION_SECOND.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, QUALITY_STATUS_SECOND, QUALITY_SALARY_SECOND, QUALITY_SALARY_MIN_SECOND, 
              QUALITY_SALARY_MAX_SECOND, QUALITY_DESCRIPTION_SECOND)

In [None]:
#Joining Scrape data of Quality Assurance (JOBLUM) 

QUALITY_TITLE_LIST = np.concatenate((QUALITY_TITLE_FIRST, QUALITY_TITLE_SECOND))
QUALITY_COMPANY_LIST = np.concatenate((QUALITY_COMPANY_FIRST, QUALITY_COMPANY_SECOND))
QUALITY_DATE_LIST = np.concatenate((QUALITY_DATE_FIRST, QUALITY_DATE_SECOND))
QUALITY_LOCATION_LIST = np.concatenate((QUALITY_LOCATION_FIRST, QUALITY_LOCATION_SECOND))
QUALITY_STATUS_LIST = np.concatenate((QUALITY_STATUS_FIRST, QUALITY_STATUS_SECOND))
QUALITY_SALARY_LIST = np.concatenate((QUALITY_SALARY_FIRST, QUALITY_SALARY_SECOND))
QUALITY_SALARY_MIN_LIST = np.concatenate((QUALITY_SALARY_MIN_FIRST, QUALITY_SALARY_MIN_SECOND))
QUALITY_SALARY_MAX_LIST = np.concatenate((QUALITY_SALARY_MAX_FIRST, QUALITY_SALARY_MAX_SECOND))
QUALITY_DESCRIPTION_LIST = np.concatenate((QUALITY_DESCRIPTION_FIRST, QUALITY_DESCRIPTION_SECOND))

In [None]:
#Creating Data Frame for Quality Assurance (JOBLUM) 
QUALITY={'Website': "Joblum",
      'Job Title': QUALITY_TITLE_LIST, 
      'Category': "Quality Assurance", 
      'Company': QUALITY_COMPANY_LIST, 
      'Date Posted': QUALITY_DATE_LIST, 
      'Location': QUALITY_LOCATION_LIST, 
      'Status': QUALITY_STATUS_LIST, 
      'Salary': QUALITY_SALARY_LIST,
      'Education': "Not Specified / In Description",
      'Years of Work Experience': "Not Specified / In Description",
      'Job Description': QUALITY_DESCRIPTION_LIST,
      'Min Salary': QUALITY_SALARY_MIN_LIST,
      'Max Salary': QUALITY_SALARY_MAX_LIST,
      'Min Years of Work Experience': "Not Specified",
      'Max Years of Work Experience': "Not Specified",
      'Field': "Engineering"}
QUALITY_df = pd.DataFrame(data=QUALITY)
QUALITY_df

In [None]:
QUALITY_df.to_csv ('Joblum Data\JOBLUM-QUALITY.csv', index = False)

### CATEGORY - Quantity Surveying

In [None]:
#Scraping data of Quantity Surveying (JOBLUM) - FIRST HALF

QUANTITY_TITLE_FIRST = []
QUANTITY_COMPANY_FIRST = []
QUANTITY_DATE_FIRST = []
QUANTITY_LOCATION_FIRST = []
QUANTITY_STATUS_FIRST = []
QUANTITY_SALARY_FIRST = []
QUANTITY_SALARY_MIN_FIRST = []
QUANTITY_SALARY_MAX_FIRST = []
QUANTITY_DESCRIPTION_FIRST = []

JOBLUM_URLs = 'https://ph.joblum.com/jobs-spec-quantity-surveying?p='
soup = getSoup(JOBLUM_URLs)
NUM_JOBS = getNumJobs(soup)
NUM_PAGES = getNumPages(NUM_JOBS)
JOB_LINKS = getLinks(NUM_PAGES, JOBLUM_URLs)
FIRST_HALF = math.ceil(NUM_PAGES/2)

for i in range(FIRST_HALF):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        QUANTITY_TITLE_FIRST.append(JOB_TITLE_ARRAY[j].text.strip())
        QUANTITY_COMPANY_FIRST.append(JOB_COMPANY_ARRAY[j].text.strip())
        QUANTITY_DATE_FIRST.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        QUANTITY_LOCATION_FIRST.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, QUANTITY_STATUS_FIRST, QUANTITY_SALARY_FIRST, QUANTITY_SALARY_MIN_FIRST, 
              QUANTITY_SALARY_MAX_FIRST, QUANTITY_DESCRIPTION_FIRST)

In [None]:
#Scraping data of Quantity Surveying (JOBLUM) - SECOND HALF

QUANTITY_TITLE_SECOND = []
QUANTITY_COMPANY_SECOND = []
QUANTITY_DATE_SECOND = []
QUANTITY_LOCATION_SECOND = []
QUANTITY_STATUS_SECOND = []
QUANTITY_SALARY_SECOND = []
QUANTITY_SALARY_MIN_SECOND = []
QUANTITY_SALARY_MAX_SECOND = []
QUANTITY_DESCRIPTION_SECOND = []

for i in range(FIRST_HALF, NUM_PAGES):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        QUANTITY_TITLE_SECOND.append(JOB_TITLE_ARRAY[j].text.strip())
        QUANTITY_COMPANY_SECOND.append(JOB_COMPANY_ARRAY[j].text.strip())
        QUANTITY_DATE_SECOND.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        QUANTITY_LOCATION_SECOND.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, QUANTITY_STATUS_SECOND, QUANTITY_SALARY_SECOND, QUANTITY_SALARY_MIN_SECOND, 
              QUANTITY_SALARY_MAX_SECOND, QUANTITY_DESCRIPTION_SECOND)

In [None]:
#Joining Scrape data of Quantity Surveying (JOBLUM) 

QUANTITY_TITLE_LIST = np.concatenate((QUANTITY_TITLE_FIRST, QUANTITY_TITLE_SECOND))
QUANTITY_COMPANY_LIST = np.concatenate((QUANTITY_COMPANY_FIRST, QUANTITY_COMPANY_SECOND))
QUANTITY_DATE_LIST = np.concatenate((QUANTITY_DATE_FIRST, QUANTITY_DATE_SECOND))
QUANTITY_LOCATION_LIST = np.concatenate((QUANTITY_LOCATION_FIRST, QUANTITY_LOCATION_SECOND))
QUANTITY_STATUS_LIST = np.concatenate((QUANTITY_STATUS_FIRST, QUANTITY_STATUS_SECOND))
QUANTITY_SALARY_LIST = np.concatenate((QUANTITY_SALARY_FIRST, QUANTITY_SALARY_SECOND))
QUANTITY_SALARY_MIN_LIST = np.concatenate((QUANTITY_SALARY_MIN_FIRST, QUANTITY_SALARY_MIN_SECOND))
QUANTITY_SALARY_MAX_LIST = np.concatenate((QUANTITY_SALARY_MAX_FIRST, QUANTITY_SALARY_MAX_SECOND))
QUANTITY_DESCRIPTION_LIST = np.concatenate((QUANTITY_DESCRIPTION_FIRST, QUANTITY_DESCRIPTION_SECOND))

In [None]:
#Creating Data Frame for Quantity Surveying (JOBLUM) 
QUANTITY={'Website': "Joblum",
      'Job Title': QUANTITY_TITLE_LIST, 
      'Category': "Quantity Surveying", 
      'Company': QUANTITY_COMPANY_LIST, 
      'Date Posted': QUANTITY_DATE_LIST, 
      'Location': QUANTITY_LOCATION_LIST, 
      'Status': QUANTITY_STATUS_LIST, 
      'Salary': QUANTITY_SALARY_LIST,
      'Education': "Not Specified / In Description",
      'Years of Work Experience': "Not Specified / In Description",
      'Job Description': QUANTITY_DESCRIPTION_LIST,
      'Min Salary': QUANTITY_SALARY_MIN_LIST,
      'Max Salary': QUANTITY_SALARY_MAX_LIST,
      'Min Years of Work Experience': "Not Specified",
      'Max Years of Work Experience': "Not Specified",
      'Field': "Engineering"}
QUANTITY_df = pd.DataFrame(data=QUANTITY)
QUANTITY_df

In [None]:
QUANTITY_df.to_csv ('Joblum Data\JOBLUM-QUANTITY.csv', index = False)

### CATEGORY - Science & Technology

In [None]:
#Scraping data of Science & Technology (JOBLUM) - FIRST HALF

SNT_TITLE_FIRST = []
SNT_COMPANY_FIRST = []
SNT_DATE_FIRST = []
SNT_LOCATION_FIRST = []
SNT_STATUS_FIRST = []
SNT_SALARY_FIRST = []
SNT_SALARY_MIN_FIRST = []
SNT_SALARY_MAX_FIRST = []
SNT_DESCRIPTION_FIRST = []

JOBLUM_URLs = 'https://ph.joblum.com/jobs-spec-science-amp-technology?p='
soup = getSoup(JOBLUM_URLs)
NUM_JOBS = getNumJobs(soup)
NUM_PAGES = getNumPages(NUM_JOBS)
JOB_LINKS = getLinks(NUM_PAGES, JOBLUM_URLs)
FIRST_HALF = math.ceil(NUM_PAGES/2)

for i in range(FIRST_HALF):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        SNT_TITLE_FIRST.append(JOB_TITLE_ARRAY[j].text.strip())
        SNT_COMPANY_FIRST.append(JOB_COMPANY_ARRAY[j].text.strip())
        SNT_DATE_FIRST.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        SNT_LOCATION_FIRST.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, SNT_STATUS_FIRST, SNT_SALARY_FIRST, SNT_SALARY_MIN_FIRST, 
              SNT_SALARY_MAX_FIRST, SNT_DESCRIPTION_FIRST)

In [None]:
#Scraping data of Science & Technology (JOBLUM) - SECOND HALF

SNT_TITLE_SECOND = []
SNT_COMPANY_SECOND = []
SNT_DATE_SECOND = []
SNT_LOCATION_SECOND = []
SNT_STATUS_SECOND = []
SNT_SALARY_SECOND = []
SNT_SALARY_MIN_SECOND = []
SNT_SALARY_MAX_SECOND = []
SNT_DESCRIPTION_SECOND = []

for i in range(FIRST_HALF, NUM_PAGES):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        SNT_TITLE_SECOND.append(JOB_TITLE_ARRAY[j].text.strip())
        SNT_COMPANY_SECOND.append(JOB_COMPANY_ARRAY[j].text.strip())
        SNT_DATE_SECOND.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        SNT_LOCATION_SECOND.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, SNT_STATUS_SECOND, SNT_SALARY_SECOND, SNT_SALARY_MIN_SECOND, 
              SNT_SALARY_MAX_SECOND, SNT_DESCRIPTION_SECOND)

In [None]:
#Joining Scrape data of Science & Technology (JOBLUM) 

SNT_TITLE_LIST = np.concatenate((SNT_TITLE_FIRST, SNT_TITLE_SECOND))
SNT_COMPANY_LIST = np.concatenate((SNT_COMPANY_FIRST, SNT_COMPANY_SECOND))
SNT_DATE_LIST = np.concatenate((SNT_DATE_FIRST, SNT_DATE_SECOND))
SNT_LOCATION_LIST = np.concatenate((SNT_LOCATION_FIRST, SNT_LOCATION_SECOND))
SNT_STATUS_LIST = np.concatenate((SNT_STATUS_FIRST, SNT_STATUS_SECOND))
SNT_SALARY_LIST = np.concatenate((SNT_SALARY_FIRST, SNT_SALARY_SECOND))
SNT_SALARY_MIN_LIST = np.concatenate((SNT_SALARY_MIN_FIRST, SNT_SALARY_MIN_SECOND))
SNT_SALARY_MAX_LIST = np.concatenate((SNT_SALARY_MAX_FIRST, SNT_SALARY_MAX_SECOND))
SNT_DESCRIPTION_LIST = np.concatenate((SNT_DESCRIPTION_FIRST, SNT_DESCRIPTION_SECOND))

In [None]:
#Creating Data Frame for Science & Technology (JOBLUM) 
SNT={'Website': "Joblum",
      'Job Title': SNT_TITLE_LIST, 
      'Category': "Science & Technology", 
      'Company': SNT_COMPANY_LIST, 
      'Date Posted': SNT_DATE_LIST, 
      'Location': SNT_LOCATION_LIST, 
      'Status': SNT_STATUS_LIST, 
      'Salary': SNT_SALARY_LIST,
      'Education': "Not Specified / In Description",
      'Years of Work Experience': "Not Specified / In Description",
      'Job Description': SNT_DESCRIPTION_LIST,
      'Min Salary': SNT_SALARY_MIN_LIST,
      'Max Salary': SNT_SALARY_MAX_LIST,
      'Min Years of Work Experience': "Not Specified",
      'Max Years of Work Experience': "Not Specified",
      'Field': "Science"}
SNT_df = pd.DataFrame(data=SNT)
SNT_df

In [None]:
SNT_df.to_csv ('Joblum Data\JOBLUM-SNT.csv', index = False)

### CATEGORY - Practitioner/Medical Asst Jobs

In [None]:
#Scraping data of Practitioner/Medical Asst Jobs (JOBLUM) - FIRST HALF

PRAC_TITLE_FIRST = []
PRAC_COMPANY_FIRST = []
PRAC_DATE_FIRST = []
PRAC_LOCATION_FIRST = []
PRAC_STATUS_FIRST = []
PRAC_SALARY_FIRST = []
PRAC_SALARY_MIN_FIRST = []
PRAC_SALARY_MAX_FIRST = []
PRAC_DESCRIPTION_FIRST = []

JOBLUM_URLs = 'https://ph.joblum.com/jobs-spec-practitioner-medical-asst?p='
soup = getSoup(JOBLUM_URLs)
NUM_JOBS = getNumJobs(soup)
NUM_PAGES = getNumPages(NUM_JOBS)
JOB_LINKS = getLinks(NUM_PAGES, JOBLUM_URLs)
FIRST_HALF = math.ceil(NUM_PAGES/2)

for i in range(FIRST_HALF):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        PRAC_TITLE_FIRST.append(JOB_TITLE_ARRAY[j].text.strip())
        PRAC_COMPANY_FIRST.append(JOB_COMPANY_ARRAY[j].text.strip())
        PRAC_DATE_FIRST.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        PRAC_LOCATION_FIRST.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, PRAC_STATUS_FIRST, PRAC_SALARY_FIRST, PRAC_SALARY_MIN_FIRST, 
              PRAC_SALARY_MAX_FIRST, PRAC_DESCRIPTION_FIRST)

In [None]:
#Scraping data of Practitioner/Medical Asst Jobs (JOBLUM) - SECOND HALF

PRAC_TITLE_SECOND = []
PRAC_COMPANY_SECOND = []
PRAC_DATE_SECOND = []
PRAC_LOCATION_SECOND = []
PRAC_STATUS_SECOND = []
PRAC_SALARY_SECOND = []
PRAC_SALARY_MIN_SECOND = []
PRAC_SALARY_MAX_SECOND = []
PRAC_DESCRIPTION_SECOND = []

for i in range(FIRST_HALF, NUM_PAGES):
    JOBLUM_SOUP = getSoup(JOB_LINKS[i])
    URL = getJobURL(JOBLUM_SOUP)
    JOB_TITLE_ARRAY = JOBLUM_SOUP.find_all('h2',{'class':'job-title'})
    JOB_COMPANY_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'company-name'})
    JOB_DATE_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'date date-desktop'})
    JOB_LOCATION_ARRAY = JOBLUM_SOUP.find_all('span',{'class':'location location-desktop'})
    for j in range(len(JOB_TITLE_ARRAY)):
        PRAC_TITLE_SECOND.append(JOB_TITLE_ARRAY[j].text.strip())
        PRAC_COMPANY_SECOND.append(JOB_COMPANY_ARRAY[j].text.strip())
        PRAC_DATE_SECOND.append(datetime.strptime(JOB_DATE_ARRAY[j].text.strip(), '%B %d, %Y').date())
        PRAC_LOCATION_SECOND.append(JOB_LOCATION_ARRAY[j].text.strip())
    scrapeJob(URL, PRAC_STATUS_SECOND, PRAC_SALARY_SECOND, PRAC_SALARY_MIN_SECOND, 
              PRAC_SALARY_MAX_SECOND, PRAC_DESCRIPTION_SECOND)

In [None]:
#Joining Scrape data of Practitioner/Medical Asst Jobs (JOBLUM) 

PRAC_TITLE_LIST = np.concatenate((PRAC_TITLE_FIRST, PRAC_TITLE_SECOND))
PRAC_COMPANY_LIST = np.concatenate((PRAC_COMPANY_FIRST, PRAC_COMPANY_SECOND))
PRAC_DATE_LIST = np.concatenate((PRAC_DATE_FIRST, PRAC_DATE_SECOND))
PRAC_LOCATION_LIST = np.concatenate((PRAC_LOCATION_FIRST, PRAC_LOCATION_SECOND))
PRAC_STATUS_LIST = np.concatenate((PRAC_STATUS_FIRST, PRAC_STATUS_SECOND))
PRAC_SALARY_LIST = np.concatenate((PRAC_SALARY_FIRST, PRAC_SALARY_SECOND))
PRAC_SALARY_MIN_LIST = np.concatenate((PRAC_SALARY_MIN_FIRST, PRAC_SALARY_MIN_SECOND))
PRAC_SALARY_MAX_LIST = np.concatenate((PRAC_SALARY_MAX_FIRST, PRAC_SALARY_MAX_SECOND))
PRAC_DESCRIPTION_LIST = np.concatenate((PRAC_DESCRIPTION_FIRST, PRAC_DESCRIPTION_SECOND))

In [None]:
#Creating Data Frame for Practitioner/Medical Asst Jobs (JOBLUM) 
PRAC={'Website': "Joblum",
      'Job Title': PRAC_TITLE_LIST, 
      'Category': "Practitioner/Medical Asst Jobs", 
      'Company': PRAC_COMPANY_LIST, 
      'Date Posted': PRAC_DATE_LIST, 
      'Location': PRAC_LOCATION_LIST, 
      'Status': PRAC_STATUS_LIST, 
      'Salary': PRAC_SALARY_LIST,
      'Education': "Not Specified / In Description",
      'Years of Work Experience': "Not Specified / In Description",
      'Job Description': PRAC_DESCRIPTION_LIST,
      'Min Salary': PRAC_SALARY_MIN_LIST,
      'Max Salary': PRAC_SALARY_MAX_LIST,
      'Min Years of Work Experience': "Not Specified",
      'Max Years of Work Experience': "Not Specified",
      'Field': "Medicine"}
PRAC_df = pd.DataFrame(data=PRAC)
PRAC_df

In [None]:
PRAC_df.to_csv ('Joblum Data\JOBLUM-PRAC.csv', index = False)

In [220]:
#concatenate all df (JOBLUM) 
#The usage of this code is only applicable if the user was able to run the whole notbook
JOBLUM_df = pd.concat([STAT_df, AGRI_df, ARCHI_df, AVI_df, BIOMED_df, BIOTECH_df, 
                       CHEMENG_df, CHEM_df, CIVILENG_df, CONSTRUCTION_df, DIAGNOSIS_df, 
                       DOCTOR_df, ELEC_df, ELECENG_df, ELECTRO_df, ELECTROENG_df, ENVI_df, 
                       ENVIENG_df, NUTRI_df, GEO_df, INDUSENG_df, IT_HARDWARE_df, IT_SYS_df, 
                       IT_SOFTWARE_df, MAINTENANCE_df, MECH_df, MECHENG_df, NURSE_df, OIL_df, 
                       OILENG_df, ENG_df, PHARMA_df, QUALITY_df, QUANTITY_df, SNT_df, PRAC_df], 
                      ignore_index=True, sort=False)
JOBLUM_df

NameError: name 'STAT_df' is not defined