In [42]:
import requests
import sqlite3
import os
import csv

from bs4 import BeautifulSoup, Tag

url = r"https://realpython.github.io/fake-jobs/"
database_name = 'database.db'
table_name = 'Jobs'
cwd = os.getcwd()

In [43]:
# connect database
def connect_database(database_name):
        
    with sqlite3.connect(database_name) as con:
        cur = con.cursor()

        # crete table if not exist
        cur.execute("""
            CREATE TABLE IF NOT EXISTS Jobs (
                    job_id INTEGER PRIMARY KEY AUTOINCREMENT,
                    job_title TEXT,
                    comp_name TEXT,
                    loc TEXT,
                    job_desc TEXT,
                    app_link TEXT,
                    added_time DATE
                    )
            """)

        # save changes
        con.commit()

In [44]:
# get html content and crete BeautifulSoup object

def get_url_as_bs(url: str) -> BeautifulSoup:
    req = requests.get(url)
    if req:
        html_text = req.text
        return BeautifulSoup(html_text)

In [45]:
# Extract job deskription from url
def get_job_description(url: str) -> str:
    soup = get_url_as_bs(url)
    return str(soup.find("p", id="", class_="").string).strip()

In [46]:
class Job():
    def __init__(self, job_title, comp_name, loc, job_desc, app_link, added_time):
        self.job_title = job_title
        self.comp_name = comp_name
        self.loc = loc
        self.job_desc = job_desc
        self.app_link = app_link
        self.added_time = added_time

    
    def as_tuple(self):
        return (self.job_title, self.comp_name, self.loc, self.job_desc, self.app_link, self.added_time)

    @classmethod
    def from_tuple(cls, *values) -> "Job":
        return cls(values[0], values[1], values[2], values[3], values[4], values[5])

In [47]:
class JobManager():
    def __init__(self, jobs: list = None) -> None:
        self.jobs = jobs if jobs else []
    
    def add_job(self, job: Job):
        self.jobs.append(job)
    
    def get_all_jobs(self):
        return self.jobs

    def is_include(self, job: Job) -> bool:
        return any(
            job.job_title == existing_job.job_title and
            job.comp_name == existing_job.comp_name and
            job.loc == existing_job.loc
            for existing_job in self.jobs
        )
            

In [48]:
# write to database
def write2database(database_name: str, table_name: str, data: "JobManager") -> None:
    with sqlite3.connect(database_name) as con:
        cur = con.cursor()

        # Validate table name
        if not table_name.isidentifier():
            raise ValueError("Invalid table name.")

        # Insert data into the database
        for job in data.get_all_jobs():
            cur.execute(f"""
                INSERT INTO {table_name} (job_title, comp_name, loc, job_desc, app_link, added_time)
                VALUES (?, ?, ?, ?, ?, ?)
                """, job.as_tuple())

        # Save changes
        con.commit()


In [49]:
def load_from_database(database_name, table_name) -> JobManager:
    jobs = []
    with sqlite3.connect(database_name) as con:
        cur = con.cursor()
        datas = cur.execute(f"""
            SELECT job_title, comp_name, loc, job_desc, app_link, added_time from {table_name}
            """)
        for data in datas:
            job = Job.from_tuple(*data)
            jobs.append(job)
    return jobs
    

In [50]:
# Write jobs to a CSV file

def write_as_csv(job_manager: JobManager, filename: str = "jobs.csv"):
    # Define the CSV header
    header = ["Job Title", "Company Name", "Location", "Job Description", "Application Link", "Added Time"]

    try:
        with open(os.path.join(cwd, filename), mode="w", newline="", encoding="utf-8") as csvfile:
            writer = csv.writer(csvfile)
            
            # Write the header
            writer.writerow(header)
            
            # Write job data
            for job in job_manager.get_all_jobs():
                writer.writerow(job.as_tuple())

        print(f"Jobs successfully written to {filename}")
    except IOError as e:
        print(f"Error writing to CSV file: {e}")

In [51]:
# connect database
connect_database(database_name)

# retrive data from database as list
jobs = load_from_database(database_name, table_name)

# Create job manager
job_manager = JobManager(jobs)

# all available jobs
soup = get_url_as_bs(url)
cards_body = soup.find("div", id = "ResultsContainer")
cards_iterator = cards_body.children
for card in cards_iterator:
    if isinstance(card, Tag):
        job_title = str(card.find("h2", class_="title is-5").string).strip()
        comp_name = str(card.find("h3", class_="subtitle is-6 company").string).strip()
        loc = str(card.find("p", class_="location").string).strip()
        added_time = str(card.find("time").string).strip()
        app_link = card.find("a", string="Apply")['href']
        job_desc = get_job_description(app_link)
        job = Job(job_title, comp_name, loc, job_desc, app_link, added_time)
        if not job_manager.is_include(job):
            job_manager.add_job(job)


write2database(database_name, table_name, job_manager)
write_as_csv(job_manager)

Jobs successfully written to jobs.csv
