In [2]:
import csv
from bs4 import BeautifulSoup
import requests

def scrape_job_details(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    job_details = {}

    # Extracting job details
    title_elem = soup.find('span', property='title')
    job_details['Title'] = title_elem.get_text().strip() if title_elem else ''

    date_elem = soup.find('span', property='datePosted')
    job_details['Date Posted'] = date_elem.get_text().strip() if date_elem else ''

    employer_elem = soup.find('span', property='name')
    job_details['Employer'] = employer_elem.get_text().strip() if employer_elem else ''

    location_elem = soup.find('span', property='addressLocality')
    region_elem = soup.find('span', property='addressRegion')
    job_details['Location'] = f"{location_elem.get_text().strip()}, {region_elem.get_text().strip()}" if location_elem and region_elem else ''

    salary_elem = soup.find('span', property='value')
    unit_elem = soup.find('span', class_='hidden', property='unitText')
    job_details['Salary'] = f"{salary_elem.get_text().strip()} {unit_elem.get_text().strip()}" if salary_elem and unit_elem else ''

    employment_type_elem = soup.find('span', property='employmentType')
    job_details['Employment Type'] = employment_type_elem.get_text().strip() if employment_type_elem else ''

    start_date_elem = soup.find('span', class_='wb-inv', string='Start date').find_next_sibling() if soup.find('span', class_='wb-inv', string='Start date') else None
    job_details['Start Date'] = start_date_elem.get_text().strip() if start_date_elem else ''

    vacancies_elem = soup.find('span', class_='wb-inv', string='vacancies').find_next_sibling() if soup.find('span', class_='wb-inv', string='vacancies') else None
    job_details['Vacancies'] = vacancies_elem.get_text().strip() if vacancies_elem else ''

    description_elem = soup.find('span', class_='hidden', property='description')
    job_details['Description'] = description_elem.get_text().strip() if description_elem else ''

    languages_elem = soup.find('h4', string='Languages').find_next_sibling() if soup.find('h4', string='Languages') else None
    job_details['Languages'] = languages_elem.get_text().strip() if languages_elem else ''

    education_elem = soup.find('h4', string='Education').find_next_sibling() if soup.find('h4', string='Education') else None
    job_details['Education'] = education_elem.get_text().strip() if education_elem else ''

    experience_elem = soup.find('h4', string='Experience').find_next_sibling() if soup.find('h4', string='Experience') else None
    job_details['Experience'] = experience_elem.get_text().strip() if experience_elem else ''

    # Additional scraping for responsibilities
    responsibilities_elem = soup.find('div', property='responsibilities')
    if responsibilities_elem:
        job_details['Responsibilities'] = ', '.join([li.get_text().strip() for li in responsibilities_elem.find_all('li')])
    else:
        job_details['Responsibilities'] = ''

    return job_details

# Read URLs from CSV file
job_links = []
with open('job_links.csv', 'r', newline='', encoding='utf-8') as csvfile:
    reader = csv.reader(csvfile)
    next(reader)  # Skip header
    for row in reader:
        job_links.append(row[0])

# Scrape job details for each URL
job_details_list = []
for url in job_links:
    job_details = scrape_job_details(url)
    job_details_list.append(job_details)

# Write job details to CSV file
with open('job_details.csv', 'w', newline='', encoding='utf-8') as csvfile:
    fieldnames = ['Title', 'Date Posted', 'Employer', 'Location', 'Salary', 'Employment Type', 'Start Date', 'Vacancies', 'Description', 'Languages', 'Education', 'Experience', 'Responsibilities']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    for job_details in job_details_list:
        writer.writerow(job_details)

print("Job details have been saved to job_details.csv")


Job details have been saved to job_details.csv


In [2]:
import pandas as pd
df = pd.read_csv('job_details.csv')

In [5]:
df.columns

Index(['Title', 'Date Posted', 'Employer', 'Location', 'Salary',
       'Employment Type', 'Start Date', 'Vacancies', 'Description',
       'Languages', 'Education', 'Experience', 'Responsibilities'],
      dtype='object')

In [3]:
df.head()

Unnamed: 0,Title,Date Posted,Employer,Location,Salary,Employment Type,Start Date,Vacancies,Description,Languages,Education,Experience,Responsibilities
0,food service supervisor,"Posted on December 25, 2023",Government of Canada / Gouvernement du Canada,"Yellowknife, NT",$16.50HOUR hourly HOUR,Permanent employmentFull time,Starts as soon as possible,2 vacancies,Education: Secondary (high) school graduation ...,English,Secondary (high) school graduation certificate,1 year to less than 2 years,"Establish methods to meet work schedules, Supe..."
1,tour guide supervisor,"Posted on March 26, 2024",Government of Canada / Gouvernement du Canada,"Yellowknife, NT",$25.00HOUR hourly HOUR,Seasonal employmentFull time,,1 vacancy,Education: College/CEGEP. Work setting: Remote...,English,College/CEGEP,2 years to less than 3 years,"Perform same duties as workers supervised, Ass..."
2,"plumber, residential construction","Posted on March 15, 2024",Government of Canada / Gouvernement du Canada,"Yellowknife, NT",$50.00 to $70.00HOUR hourly (To be negotiated)...,Permanent employmentFull time,Starts as soon as possible,2 vacancies,Education: Registered Apprenticeship certifica...,English,Registered Apprenticeship certificate\n\nor eq...,1 year to less than 2 years,
3,veterinarian,"Posted on February 08, 2024",Government of Canada / Gouvernement du Canada,"Yellowknife, NT",$52.50 to $60.00HOUR hourly (To be negotiated)...,Permanent employmentFull time,Starts as soon as possible,3 vacancies,"Education: Degree in medicine, dentistry, vete...",English,"Degree in medicine, dentistry, veterinary medi...",1 year to less than 2 years,Diagnose diseases or abnormal conditions in in...
4,automotive service technician,"Posted on January 27, 2024",Government of Canada / Gouvernement du Canada,"Yellowknife, NT",$39.77HOUR hourly HOUR,Permanent employmentFull time,Starts as soon as possible,3 vacancies,Education: Registered Apprenticeship certifica...,English,Registered Apprenticeship certificate\n\nor eq...,3 years to less than 5 years,"Inspect motor in operation, road test motor ve..."


In [8]:
df.shape

(187, 13)

In [10]:
df['Start Date'].unique()

array(['Starts as soon as possible', nan], dtype=object)

In [12]:
df.tail()

Unnamed: 0,Title,Date Posted,Employer,Location,Salary,Employment Type,Start Date,Vacancies,Description,Languages,Education,Experience,Responsibilities
182,truck operator,"Posted on March 04, 2024",Government of Canada / Gouvernement du Canada,"Yellowknife, NT",,,Starts as soon as possible,1 vacancy,Make Great Money! Be Appreciated for What You ...,,,,
183,supermarket cashier,"Posted on February 27, 2024",Government of Canada / Gouvernement du Canada,"Yellowknife, NT",$16.05HOUR hourly HOUR,Permanent employmentFull time,Starts as soon as possible,5 vacancies,"Education: No degree, certificate or diploma. ...",English,"No degree, certificate or diploma",1 to less than 7 months,
184,delivery driver,"Posted on December 12, 2023",Government of Canada / Gouvernement du Canada,"Yellowknife, NT",$21.00HOUR hourly HOUR,Permanent employmentFull time,Starts as soon as possible,1 vacancy,"Education: No degree, certificate or diploma. ...",English,"No degree, certificate or diploma",Will train,"Deliver and pick up messages, parcels, and oth..."
185,office manager,"Posted on February 14, 2024",Government of Canada / Gouvernement du Canada,"Hay River, , NT",$25.00HOUR hourly HOUR,Permanent employmentFull time,Starts as soon as possible,2 vacancies,Education: Bachelor's degree. or equivalent ex...,Bilingual,Bachelor's degree\n\nor equivalent experience,1 year to less than 2 years,"Delegate work to office support staff, Establi..."
186,restaurant manager,"Posted on January 17, 2024",Government of Canada / Gouvernement du Canada,"Yellowknife, NT","$70,000YEAR annually YEAR",Permanent employmentFull time,Starts as soon as possible,1 vacancy,"Education: College, CEGEP or other non-univers...",English,"College, CEGEP or other non-university certifi...",1 year to less than 2 years,"Plan, organize, direct, control and evaluate d..."
