### Web Scrapping Tutorial
* Reference: https://realpython.com/beautiful-soup-web-scraper-python/

In [11]:
import pandas as pd
import requests
import pprint

from bs4 import BeautifulSoup

### Obtain content

In [12]:
URL = 'https://www.monster.com/jobs/search/?q=Software-Developer&where=Australia'
page = requests.get(URL)

In [13]:
pprint.pprint(page.content)

(b'<!DOCTYPE html>\r\n<html xmlns="https://www.w3.org/1999/xhtml" xml:lang="e'
 b'n" lang="en">\r\n<head>\r\n    \r\n            <link rel="preconnect" href'
 b'="https://coda.newjobs.com" />\r\n            <link rel="preconnect" href='
 b'"https://js-seeker.newjobs.com" />\r\n            <link rel="preconnect" h'
 b'ref="https://css-seeker.newjobs.com" />\r\n            <link rel="preconne'
 b'ct" href="https://securemedia.newjobs.com" />\r\n            <link rel="pr'
 b'econnect" href="https://logs2.jobs.com" />\r\n            <link rel="preco'
 b'nnect" href="https://job-openings.monster.com" />\r\n            <link rel'
 b'="preconnect" href="https://apis.google.com" />\r\n            <link rel="'
 b'preconnect" href="https://www.google.com" />\r\n            <link rel="pre'
 b'connect" href="https://accounts.google.com" />\r\n            <link rel="p'
 b'reconnect" href="https://content.googleapis.com" />\r\n            <link r'
 b'el="preconnect" href="https://ssl.gstatic.com" />

 b'                            </li>\r\n                                    <'
 b'li id="mobile-nav-3" class="menu__item">\r\n                              '
 b'          <a class="menu__link" role="menuitem" href="https://www.monster.co'
 b'm/resumes/writing-services">Resume Help</a>\r\n                           '
 b'         </li>\r\n                                    <li id="mobile-nav-4'
 b'" class="menu__item">\r\n                                        <a class='
 b'"menu__link" role="menuitem" href="https://www.monster.com/career-advice">Ca'
 b'reer Advice</a>\r\n                                    </li>\r\n\r\n      '
 b'                          <li role="separator" class="divider"></li>\r\n  '
 b'                              <li id="mobile-nav-2" class="menu__item">\r'
 b'\n                                    <a class="menu__link" role="menuite'
 b'm" href="https://hiring.monster.com/?intcid=skr_LPF_TopNav_Employer&amp;ch=M'
 b'ONS">For Employers</a>\r\n                   

### Get element by ID

In [15]:
soup = BeautifulSoup(page.content, 'html.parser')
results = soup.find(id='ResultsContainer')

In [16]:
print(results.prettify())

<div class="mux-custom-scroll" data-extend="left" data-mux="customScroll" data-target="html" id="ResultsContainer">
 <div class="scrollable" id="ResultsScrollable">
  <script type="application/ld+json">
   {"@context":"https://schema.org","@type":"ItemList","mainEntityOfPage":{
            "@type":"CollectionPage","@id":"https://www.monster.com/jobs/search/?q=Software-Developer&amp;where=Australia"
            }
            ,"itemListElement":[

                 {"@type":"ListItem","position":1,"url":"https://job-openings.monster.com/sql-bi-ssrs-ssis-developer-for-blackboard-nyc-new-york-wa-us-lancesoft-inc/56d17f16-f07d-4271-abda-b80155837c80"}
                    ,
                 {"@type":"ListItem","position":2,"url":""}
                    ,
                 {"@type":"ListItem","position":3,"url":"https://job-openings.monster.com/python-developer-woodlands-wa-us-lancesoft-inc/4755ec59-d0db-4ce9-8385-b4df7c1e9f7c"}
                    ,
                 {"@type":"ListIt

### Get element by Class

In [17]:
job_elems = results.find_all('section', class_='card-content')

In [18]:
for job_elem in job_elems:
    print(job_elem, end='\n'*2)

<section class="card-content" data-jobid="56d17f16-f07d-4271-abda-b80155837c80" onclick="MKImpressionTrackingMouseDownHijack(this, event)">
<div class="flex-row">
<div class="mux-company-logo thumbnail is-loaded">
<img alt="LanceSoft Inc" src="https://media.newjobs.com/clu/xlan/xlancesinx/branding/164991/LanceSoft-Inc-logo.png"/>
</div>
<div class="summary">
<header class="card-header">
<h2 class="title"><a data-bypass="true" data-m_impr_a_placement_id="JSR2CW" data-m_impr_j_cid="10" data-m_impr_j_coc="" data-m_impr_j_jawsid="359506666" data-m_impr_j_jobid="0" data-m_impr_j_jpm="2" data-m_impr_j_jpt="3" data-m_impr_j_lat="40.75" data-m_impr_j_lid="550" data-m_impr_j_long="-73.9967" data-m_impr_j_occid="11709" data-m_impr_j_p="1" data-m_impr_j_postingid="56d17f16-f07d-4271-abda-b80155837c80" data-m_impr_j_pvc="4496dab8-a60c-4f02-a2d1-6213320e7213" data-m_impr_s_t="t" data-m_impr_uuid="5218ab81-a5c6-4a6c-bda1-9b4c5a3c949e" href="https://job-openings.monster.com/sql-bi-ssrs-ssis-developer

### Display elements by Class inside a Tag

In [19]:
for job_elem in job_elems:
    # Each job_elem is a new BeautifulSoup object.
    # You can use the same methods on it as you did before.
    title_elem = job_elem.find('h2', class_='title')
    company_elem = job_elem.find('div', class_='company')
    location_elem = job_elem.find('div', class_='location')
    print(title_elem)
    print(company_elem)
    print(location_elem)
    print()

<h2 class="title"><a data-bypass="true" data-m_impr_a_placement_id="JSR2CW" data-m_impr_j_cid="10" data-m_impr_j_coc="" data-m_impr_j_jawsid="359506666" data-m_impr_j_jobid="0" data-m_impr_j_jpm="2" data-m_impr_j_jpt="3" data-m_impr_j_lat="40.75" data-m_impr_j_lid="550" data-m_impr_j_long="-73.9967" data-m_impr_j_occid="11709" data-m_impr_j_p="1" data-m_impr_j_postingid="56d17f16-f07d-4271-abda-b80155837c80" data-m_impr_j_pvc="4496dab8-a60c-4f02-a2d1-6213320e7213" data-m_impr_s_t="t" data-m_impr_uuid="5218ab81-a5c6-4a6c-bda1-9b4c5a3c949e" href="https://job-openings.monster.com/sql-bi-ssrs-ssis-developer-for-blackboard-nyc-new-york-wa-us-lancesoft-inc/56d17f16-f07d-4271-abda-b80155837c80" onclick="clickJobTitle('plid=550&amp;pcid=10&amp;poccid=11709','Software Developer',''); clickJobTitleSiteCat('{&quot;events.event48&quot;:&quot;true&quot;,&quot;eVar25&quot;:&quot;SQL BI (SSRS, SSIS) developer for Blackboard - NYC&quot;,&quot;eVar66&quot;:&quot;Monster&quot;,&quot;eVar67&quot;:&quot;J

### Get the tag content

In [20]:
for job_elem in job_elems:
    title_elem = job_elem.find('h2', class_='title')
    company_elem = job_elem.find('div', class_='company')
    location_elem = job_elem.find('div', class_='location')
    if None in (title_elem, company_elem, location_elem):
        continue
    print(title_elem.text.strip())
    print(company_elem.text.strip())
    print(location_elem.text.strip())
    print()

SQL BI (SSRS, SSIS) developer for Blackboard - NYC
LanceSoft Inc
New york, WA

Python Developer
LanceSoft Inc
Woodlands, WA

Junior QA Analyst - Melbourne, Victoria
Mediaocean
Melbourne, VIC

Test Analyst
Dialog Group
Canberra, ACT

Senior Sales Engineer
Zuora
Melbourne, VIC

Enterprise Account Executive
Zuora
Melbourne, VIC

Account Manager
Dialog Group
Canberra, ACT

Customer Experience Technical Analyst - Sydney, New South Wales
Mediaocean
Sydney, NSW

Test Analyst / Senior Test Analyst
Dialog Group
Melbourne, VIC

Senior Practice Manager - IES (WA)
Blue Ocean Ventures
New York, WA

Software Developer/Senior Software Developer
Beacon Hill Staffing Group, LLC
Green Bay, WI



### Find Elements by Class Name and Text Content

In [25]:
python_jobs = results.find_all('h2', string='Python Developer')

### Pass a Function to a Beautiful Soup Method

In [22]:
python_jobs = results.find_all('h2',string=lambda text: 'python' in text.lower())

In [23]:
print(len(python_jobs))

1


### Extract Attributes From Html Elements

In [24]:
python_jobs = results.find_all('h2',
                               string=lambda text: "python" in text.lower())

for p_job in python_jobs:
    link = p_job.find('a')['href']
    print(p_job.text.strip())
    print(f"Apply here: {link}\n")

Python Developer
Apply here: https://job-openings.monster.com/python-developer-woodlands-wa-us-lancesoft-inc/4755ec59-d0db-4ce9-8385-b4df7c1e9f7c

