In [40]:
import requests
from bs4 import BeautifulSoup
from fake_headers import Headers
from numpy import random
import time
header = Headers()
from JobsDb import JobsDb
db = JobsDb()

def get_page(url, params=None):
    headers = header.generate()
    page = requests.get(url, params=params, headers=headers).content
    return page

def get_search_page(page_number=1):
    params = {
        'l': 'USA',
        'sort': 'date',
        'p': page_number
    }
    url = 'https://www.careerjet.com/search/jobs'
    page = get_page(url, params=params)
    return page

def get_jobs_from_page(page):
    soup = BeautifulSoup(page, 'html.parser')
    jobs = soup.find_all('article', class_='job clicky')
    return jobs

def get_link_from_job(job):
    raw_link = job.find('h2').find('a', href=True)
    url = 'https://www.careerjet.com'+raw_link['href']
    title = raw_link['title']
    link = {
        'url': url,
        'title': title
    }
    return link

def get_description(url):
    """Given a url for a careerjet job listing. This function will
    scrape the full job description form the page.
    """
    wait_time = random.rand(1)
    time.sleep(wait_time)
    page = get_page(url)
    soup = BeautifulSoup(page, 'html.parser')
    description = soup.find('section', class_='content').text
    return description

def scrape_search_page(page_number=1):
    page = get_search_page(page_number=page_number)
    jobs = get_jobs_from_page(page)
    scraped_jobs = []
    for job in jobs:
        try:
            link = get_link_from_job(job)
            title = link['title']
            url = link['url']
            description = get_description(url)
            job_dict = {
                'title': title,
                'url': url,
                'description': description
            }
            db.write_row_to_table(table_name='jobs', row_dict=job_dict)
        except:
            continue

def scrape_site(number_of_pages=2):
    for page_number in range(1, number_of_pages+1):
        scrape_search_page(page_number=page_number)

In [41]:
scrape_site()

In [42]:
db.load_table_as_df('jobs')

Unnamed: 0,id,title,url,description
0,1,Commercial Masonry Restoration Estimator,https://www.careerjet.com/jobad/usa6c0e00e0d09...,\n \n Job Description Great Opportunity! E...
1,2,Part-Time Delivery,https://www.careerjet.com/jobad/us0ad1dc5b555d...,\n \n Deliver with Uber. Earn on your sche...
2,3,Flexible Schedule - Deliver with Uber Eats,https://www.careerjet.com/jobad/us8021e83a20fe...,\n \n Deliver with Uber. Earn on your sche...
3,4,Food Delivery - Weekly Pay,https://www.careerjet.com/jobad/uscbcb1f38c697...,\n \n Deliver with Uber. Earn on your sche...
4,5,Deliver with Uber Eats,https://www.careerjet.com/jobad/us6f8bc1fe6997...,\n \n Deliver with Uber. Earn on your sche...
5,6,Deliver Food with Uber Eats,https://www.careerjet.com/jobad/usf6bd263e0068...,\n \n Deliver with Uber. Earn on your sche...
6,7,Part-Time Food Delivery - Uber Eats,https://www.careerjet.com/jobad/usbc0ace62c629...,\n \n Deliver with Uber. Earn on your sche...
7,8,Research Technician - Genomic Medicine,https://www.careerjet.com/jobad/us04be25ad4ade...,\n \n The mission of The University of Tex...
8,9,Part-Time Delivery,https://www.careerjet.com/jobad/us9aebea489215...,\n \n Deliver with Uber. Earn on your sche...
9,10,Deliver with Uber Eats,https://www.careerjet.com/jobad/us6d9f47dbf28b...,\n \n Deliver with Uber. Earn on your sche...


In [39]:
db.close()