In [3]:
from bs4 import BeautifulSoup
import requests
from datetime import datetime
import pandas as pd 
import time

def generate_url(jobTitle,location):
    """Generate URL given job title and location"""
    url = f"https://ca.indeed.com/{jobTitle}-jobs-in-{location}"
    return url

def get_record(card):
    """Extract info from one job card"""
    
    atag = card.h2.a
    jobTitle = atag.get('title')
    jobURL = "https://ca.indeed.com" + atag.get('href')
    jobCompany = card.find('span',class_='company').text.strip()
    
    # for job location, sometimes it's <span> sometimes it's <div> 
    if card.find('span',class_='location'):
        jobLocation = card.find('span',class_='location').text.strip()
    else:
        jobLocation = card.find('div',class_='location').text.strip()
        
    jobSummary = card.find('div','summary').text.strip()
    jobPostDate = card.find('span','date').text
    today = datetime.today().strftime('%Y-%m-%d')
    
    # sometimes there's salary
    if card.find('span','salaryText'):
        jobSalary = card.find('span','salaryText').text.strip() 
    else:
        jobSalary = ''
        
    job = (jobTitle,jobCompany,jobLocation,jobPostDate,today,jobSalary,jobSummary,jobURL)
    
    return job

In [9]:
# should maybe rotate
user_agent = {'User-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.3 Safari/605.1.15'}

records = []

job_title = ""  # empty string for all jobs
loc = "ontario"
url = generate_url(jobTitle = job_title, location = loc)

# there will be pop-ups but that doesn't matter
while True:
    time.sleep(1)
    response = requests.get(url, user_agent)
    soup = BeautifulSoup(response.text,'html.parser')
    cards = soup.find_all('div',class_='jobsearch-SerpJobCard')
    
    for card in cards: 
        record = get_record(card)
        records.append(record)    # append tuple to list
        
    try:
        url = "https://ca.indeed.com" + soup.find('a',{'aria-label':'Next'}).get('href')
    except AttributeError:
        break

In [10]:
# make dataframe 
records_df = pd.DataFrame(records, columns =['jobTitle','jobCompany','jobLocation','jobPostDate','today','jobSalary','jobSummary','jobURL']) 

records_df

Unnamed: 0,jobTitle,jobCompany,jobLocation,jobPostDate,today,jobSalary,jobSummary,jobURL
0,Passport Officer/Citizen Services Officer (Inv...,Employment and Social Development Canada,Ontario,28 days ago,2021-01-21,"$54,878 - $61,379 a year",Experience working with different client group...,http://indeed.com/rc/clk?jk=30f688ad666d23be&f...
1,Order Picker,Uline,"Milton, ON",30+ days ago,2021-01-21,$27 - $33 an hour,Scholarship program for children of employees....,http://indeed.com/pagead/clk?mo=r&ad=-6NYlbfkN...
2,Management Positions - Entry Level - Customer ...,Platinum Empire Group,"Brantford, ON",2 days ago,2021-01-21,,Our reps average between $50k-$80k first year ...,http://indeed.com/rc/clk?jk=76b4fb968848a98f&f...
3,COVID-19 Chat Responder Volunteer,Certified Listeners Society,"Greater Toronto Area, ON",30+ days ago,2021-01-21,,"Experience towards a career in social work, ps...",http://indeed.com/pagead/clk?mo=r&ad=-6NYlbfkN...
4,Contact Centre Agent (Customer Service),Ontario One Call,"Guelph, ON",6 days ago,2021-01-21,$17.00 - $17.75 an hour,Make outbound phone calls to obtain clarificat...,http://indeed.com/pagead/clk?mo=r&ad=-6NYlbfkN...
...,...,...,...,...,...,...,...,...
916,Junior Architectural Coordinator,KNYMH Inc.,"Burlington, ON",6 days ago,2021-01-21,,Participate in all phases of project developme...,http://indeed.com/company/KNYMH-Incorporated/j...
917,Siebel Implementation Specialist,Arisoft Group,"Toronto, ON",Just posted,2021-01-21,,"Experience with documenting test strategies, s...",http://indeed.com/company/ARISOFT-INC./jobs/Si...
918,Summer Student - Retail Associate,Total Tech Pools and Leisure,"Oakville, ON",5 days ago,2021-01-21,$15 - $18 an hour,"Ultimately, you will ensure that customers lea...",http://indeed.com/company/Total-Tech-Pools-and...
919,Inventory Control Associate,Tesla,"Richmond Hill, ON",Today,2021-01-21,,The role includes processing of incoming parts...,http://indeed.com/rc/clk?jk=8181ae8e1972c9ef&f...


In [11]:
# to csv
records_df.to_csv("data/"+job_title+"_in_"+loc+".csv",index=False)