In [150]:
import re
from dataclasses import dataclass

<h4>Definition of Job Object</h4>

In [151]:
@dataclass
class Job:
    job_title: str
    company_name: str
    company_link: str
    description: str

<h3>Main Code</h3>

In [152]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup

def dummy_input():
    return 'Software Engineer', 'Denver CO', 1

def get_input():
    print('Enter Desired Job')
    desired_job = input()

    print('Enter Location')
    location = input()

    print('How many pages to search')
    pages = int(input())

    if pages > 10:
        pages = 10

    return desired_job, location, pages

def main():
    # Initialize Selenium options
    options = webdriver.FirefoxOptions()
    options.add_argument('--ignore-certificate-errors')
    options.add_argument('--incognito')
    # options.add_argument('--headless')

    # Get input from user
    desired_job, location, num_pages = dummy_input()

    # Start selenium browser
    driver = webdriver.Firefox(options)

    job_sources = []
    page_index = 0
    for i in range(num_pages):
        # If on second page or further
        if i > 0:
            # Add pagination query string
            page_index = i * 10
            driver.get('https://www.indeed.com/jobs?q=' + desired_job + '&l=' + location + '&start=' + str(page_index) + '&sort=date');
        # If on the first page
        else:
            # Just get the first page of data
            driver.get('https://www.indeed.com/jobs?q=' + desired_job + '&l=' + location + '&sort=date');
        # Find all job card elements
        job_links = driver.find_elements(By.CSS_SELECTOR, 'h2.jobTitle')
        for link in job_links:
            link.click()
            WebDriverWait(driver, timeout=10).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'div.jobsearch-RightPane')))
            right_pane = driver.find_element(By.CSS_SELECTOR, 'div.jobsearch-RightPane')
            if (right_pane):
                job_sources.append(right_pane.get_attribute('innerHTML'))

    return job_sources


In [153]:
pages = main()

In [154]:
test_page = pages[0]

<h3>parse_job_soup</h3>
<ul>
    <li>takes soup'd job listing</li>
    <li>parses it for various information</li>
    <li>returns a Job object with data filled in</li>
</ul>

In [155]:
def parse_job_soup(job_soup):
    # Get job title
    job_title = job_soup.select_one('h2.jobsearch-JobInfoHeader-title span')
    if (job_title):
        job_title = job_title.contents[0]
        # Get company name
        try:
            company_name = job_soup.select_one('a').text
        except:
            company_name = 'Unknown Company'
        # Get link to company postings
        try:
            company_link = job_soup.select_one('a')['href']
        except:
            company_link = 'None'
        # Get description
        try:
            description = job_soup.select_one('div.jobsearch-jobDescriptionText').text
        except:
            try:
                description = job_soup.select_one('div.jobsearch-JobComponent-description').text
            except:
                description = 'None'

        return Job(job_title, company_name, company_link, description)
    # If there's no job title, the job contents probably didn't load
    return 'Bad Link'

<h4>Testing Code Here</h4>

In [156]:
for page in pages:
    js = BeautifulSoup(page, 'html.parser')
    job_info = parse_job_soup(js)
    if type(job_info) == Job:
        print(job_info.job_title)
        print(job_info.company_name)
        print('\t ' + job_info.description[:75])
        # print('---------------------------------')
        print('')
        print('---------------------------------')

Integration Connector Developer
LyntonWeb
	 

LyntonWeb is seeking a highly motivated Connector Developer (C#.NET) with

---------------------------------
Principal Software Test Engineer
Sierra Space
	 
 Do you enjoy developing new products and services? Join us! Our Software 

---------------------------------
Senior Software Developer (up to 25% profit sharing benefit)
CACI
	 
 Senior Software Developer (up to 25% profit sharing benefit)
  Job Catego

---------------------------------
Sr. Software Test Engineer
Sierra Space
	 
 Do you enjoy developing new products and services? Join us! Our Software 

---------------------------------
Software Engineer Principal
Geologics Corporation
	 

  Software Engineer Principal
 

   Location: Aurora, CO
 

   Clearance 

---------------------------------
Sr. Technical Consultant
Computronix
	 
About Computronix
Our low turnover rate, challenging work, great client re

---------------------------------
Senior Software Engineer
Salesforce
	 
To