In [1]:
import requests
from bs4 import BeautifulSoup
import argparse

In [2]:
def scrape_jobs(location=None):
    """Scrapes Developer job postings from Monster, optionally by location.
    :param location: Where the job is located
    :type location: str
    :return: all job postings from first page that match the search results
    :rtype: BeautifulSoup object
    """
    if location:
        URL = f"https://www.monster.com/jobs/search/\
                ?q=Software-Developer&where={location}"
    else:
        URL = f"https://www.monster.com/jobs/search/?q=Software-Developer"
    page = requests.get(URL)

    soup = BeautifulSoup(page.content, "html.parser")
    results = soup.find(id="ResultsContainer")
    return results

In [3]:
def filter_jobs_by_keyword(results, word):
    """Filters job postings by word and prints matching job title plus link.
    :param results: Parsed HTML container with all job listings
    :type results: BeautifulSoup object
    :param word: keyword to filter by
    :type word: str
    :return: None - just meant to print results
    :rtype: None
    """
    filtered_jobs = results.find_all(
        "h2", string=lambda text: word in text.lower()
    )
    for f_job in filtered_jobs:
        link = f_job.find("a")["href"]
        print(f_job.text.strip())
        print(f"Apply here: {link}\n")

In [4]:
def print_all_jobs(results):
    """Print details of all jobs returned by the search.
    The printed details are title, link, company name and location of the job.
    :param results: Parsed HTML container with all job listings
    :type results: BeautifulSoup object
    :return: None - just meant to print results
    :rtype: None
    """
    job_elems = results.find_all("section", class_="card-content")

    for job_elem in job_elems:
        # keep in mind that each job_elem is another BeautifulSoup object!
        title_elem = job_elem.find("h2", class_="title")
        company_elem = job_elem.find("div", class_="company")
        location_elem = job_elem.find("div", class_="location")
        if None in (title_elem, company_elem, location_elem):
            continue
            # print(job_elem.prettify())  # to inspect the 'None' element
        print(title_elem.text.strip())
        link_elem = title_elem.find("a")
        print(link_elem["href"])
        print(company_elem.text.strip())
        print(location_elem.text.strip())
        print()

In [5]:

# USE THE SCRIPT AS A COMMAND-LINE INTERFACE
# ----------------------------------------------------------------------------
my_parser = argparse.ArgumentParser(
    prog="jobs", description="Find Developer Jobs"
)
my_parser.add_argument(
    "-location", metavar="location", type=str, help="The location of the job"
)
my_parser.add_argument(
    "-word", metavar="word", type=str, help="What keyword to filter by"
)

args = my_parser.parse_args()
location, keyword = args.location, args.word

results = scrape_jobs(location)
if keyword:
    filter_jobs_by_keyword(results, keyword.lower())
else:
    print_all_jobs(results)

usage: jobs [-h] [-location location] [-word word]
jobs: error: unrecognized arguments: -f C:\Users\mo.yosola\AppData\Roaming\jupyter\runtime\kernel-80fc0afa-cfd3-4486-bc60-48d0fd1fb15d.json


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [2]:
import requests
from bs4 import BeautifulSoup


# URL = "https://www.monster.com/jobs/search/?q=Software-Developer\
#         &where=Australia"

URL = 'https://www.propertypro.ng/property-for-rent/flat-apartment/in/lagos/alimosho/egbeda/?search=Egbeda+%2C+Alimosho+%2C+Lagos&auto=Egbeda+%2C+Alimosho+%2C+Lagos&bedroom=1&max_price=1000000'
page = requests.get(URL)

soup = BeautifulSoup(page.content, "html.parser")

ConnectionError: HTTPSConnectionPool(host='www.propertypro.ng', port=443): Max retries exceeded with url: /property-for-rent/flat-apartment/in/lagos/alimosho/egbeda/?search=Egbeda+%2C+Alimosho+%2C+Lagos&auto=Egbeda+%2C+Alimosho+%2C+Lagos&bedroom=1&max_price=1000000 (Caused by NewConnectionError('<urllib3.connection.VerifiedHTTPSConnection object at 0x00000143E36DE400>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))

In [73]:
# print(soup.prettify())

In [74]:
price = soup.find_all('p', class_='prop-price')

for prices in price:
    prices = prices.contents[3]
#     len(prices)
    print(prices) #.prettify())

<span content="280000.0" itemprop="price">280,000</span>
<span content="250000.0" itemprop="price">250,000</span>
<span content="150000.0" itemprop="price">150,000</span>
<span content="150000.0" itemprop="price">150,000</span>
<span content="150000.0" itemprop="price">150,000</span>


In [79]:
beds = soup.find_all('span', class_='prop-aminities float-left')

for bed in beds:
#     bed = bed.contents[1]
    print(bed)

<span class="prop-aminities float-left"> <span><i class="fa fa-bed"></i>1 bed</span> <span><i class="fa fa-bath"></i>1 bath</span> <span><i class="fa fa-bath"></i>1 toilet</span> </span>
<span class="prop-aminities float-left"> <span><i class="fa fa-bed"></i>1 bed</span> <span><i class="fa fa-bath"></i>2 bath</span> <span><i class="fa fa-bath"></i>2 toilet</span> </span>
<span class="prop-aminities float-left"> <span><i class="fa fa-bed"></i>1 bed</span> <span><i class="fa fa-bath"></i>1 bath</span> <span><i class="fa fa-bath"></i>1 toilet</span> </span>
<span class="prop-aminities float-left"> <span><i class="fa fa-bed"></i>1 bed</span> <span><i class="fa fa-bath"></i>1 bath</span> <span><i class="fa fa-bath"></i>1 toilet</span> </span>
<span class="prop-aminities float-left"> <span><i class="fa fa-bed"></i>1 bed</span> <span><i class="fa fa-bath"></i>1 bath</span> <span><i class="fa fa-bath"></i>1 toilet</span> </span>
<span class="prop-aminities float-left"> <span><i class="fa fa-be

In [78]:
baths = soup.find_all('span', class_='prop-aminities float-left')

for bath in baths:
    bath = bath.contents[3]
    print(bath)

<span><i class="fa fa-bath"></i>1 bath</span>
<span><i class="fa fa-bath"></i>2 bath</span>
<span><i class="fa fa-bath"></i>1 bath</span>
<span><i class="fa fa-bath"></i>1 bath</span>
<span><i class="fa fa-bath"></i>1 bath</span>
<span><i class="fa fa-bath"></i>1 bath</span>
<span><i class="fa fa-bath"></i>1 bath</span>
<span><i class="fa fa-bath"></i>1 bath</span>


In [None]:
results = soup.find(id="ResultsContainer")

# Look for Python jobs
beds = results.find_all("span", string=lambda t: "bed" in t.lower())

for bed in beds:
    link = bed.find("a")["href"]
    print(bed.text.strip())
    print(f"Apply here: {link}\n")

In [None]:
# Print out all available jobs from the scraped webpage
job_elems = results.find_all("div", class_="col-lg-12")
for job_elem in job_elems:
    title_elem = job_elem.find("h2", class_="title prop-title")
    price_elem = job_elem.find("p", class_="prop-price")
    bed_elem = job_elem.find("span")
    if None in (title_elem, company_elem, location_elem):
        continue
    print(title_elem.text.strip())
    print(company_elem.text.strip())
    print(location_elem.text.strip())
    print()