In [None]:
import os
from dotenv import load_dotenv
from typing import List, TypeVar
import json
import requests
from bs4 import BeautifulSoup

load_dotenv()

def scrape_html(url):
    try:
        response = requests.get(url)
        response.raise_for_status()  # Check if the request was successful
        html_content = response.text
        soup = BeautifulSoup(html_content, 'html.parser')
        return soup
    except requests.exceptions.RequestException as e:
        print(f"Error fetching the URL: {e}")
        return None
    
def extract_text_from_path(soup, path):
    element = soup.select_one(path)
    if element:
        return element.get_text(separator=' ', strip=True)
    else:
        return None
    
def load_jobs() -> List:
    res = []
    with open(os.environ['JOBS_FILE'], 'r') as f:
        res = json.load(f)

    return res

In [None]:
class Job:
    @staticmethod
    def from_url(url, header_path, description_path):
        soup = scrape_html(url)
        if soup:
            title = extract_text_from_path(soup, header_path)
            description = extract_text_from_path(soup, description_path)
            return Job(title=title, description=description, url=url)
        return None

    def __init__(self, title=None, description=None, company=None, url=None):
        self.title = title
        self.description = description
        self.company = company
        self.url = url

    def to_dict(self):
        return {
            'title': self.title,
            'description': self.description,
            'company': self.company,
            'url': self.url
        }

class JobList(List[Job]):
    pass
    
    