In [3]:
import sqlite3
import requests
from bs4 import BeautifulSoup
import csv

In [4]:
def create_db():
    with sqlite3.connect('jobs.db') as con:
        cursor = con.cursor()
        cursor.execute('''
            CREATE TABLE IF NOT EXISTS jobs (
                job_title TEXT,
                company_name TEXT,
                location TEXT,
                job_description TEXT,
                application_link TEXT,
                UNIQUE(job_title, company_name, location)
            )
        ''')


In [5]:
def get_job_description(application_link):
    try:
        res = requests.get(application_link)
        soup = BeautifulSoup(res.content, 'html.parser')
        description = soup.find('div', class_ = 'content')
        if description:
                return description.text.strip()
        else:
                return "No description found."
    except requests.exceptions.RequestException as e:
                return f"Error fetching description: {e}"

    


In [6]:
def scrap_jobs():
    response = requests.get('https://realpython.github.io/fake-jobs')
    soup = BeautifulSoup(response.content)
    jobs = []
    job_cards = soup.find_all('div', class_ = 'card-content')
    for card in job_cards:
        job_title = card.find('h2', class_='title').text.strip()
        company_name = card.find('h3', class_ = 'subtitle').text.strip()
        location = card.find('p', class_= 'location').text.strip()
        application_link = None
        links = card.find_all('a', class_='card-footer-item')
        for link in links:
            if "Apply" in link.text:  # Ensure we get the Apply Now link
                application_link = link['href']
                break

        # Fetch the job description by visiting the application link
        job_description = "No description available"  # Default in case there's no application link
        if application_link:
            job_description = get_job_description(application_link)  # Fetch description from the application page

        # Append extracted data to the jobs list
        jobs.append({
            'Job Title': job_title,
            'Company Name': company_name,
            'Location': location,
            'Job Description': job_description,
            'Application Link': application_link
        })
    return jobs



In [7]:
def store_to_db(jobs):
    with sqlite3.connect('jobs.db') as con:
        cursor = con.cursor()
        for job in jobs:
            cursor.execute('''
                INSERT INTO jobs (job_title, company_name, location, job_description, application_link)
                VALUES (?, ?, ?, ?, ?) 
                ON CONFLICT(job_title, company_name, location) 
                DO UPDATE SET
                    job_description = excluded.job_description,
                    application_link = excluded.application_link
            ''', (
                job['Job Title'],
                job['Company Name'],
                job['Location'],
                job['Job Description'],
                job['Application Link']
            ))
        con.commit()


In [8]:
def filter_jobs(filter_by, value):
    """Filters jobs by a specific column (location or company_name)."""
    with sqlite3.connect('jobs.db') as con:
        cursor = con.cursor()
        cursor.execute(f'''
            SELECT job_title, company_name, location, job_description, application_link 
            FROM jobs
            WHERE {filter_by} LIKE ?
        ''', (f'%{value}%',))
        return cursor.fetchall()


In [9]:
def export_to_csv(data, filename):
    """Exports filtered job data to a CSV file."""
    with open(filename, 'w') as file:
        writer = csv.writer(file)
        writer.writerow(['Job Title', 'Company Name', 'Location', 'Job Description', 'Application Link'])
        writer.writerows(data)

In [10]:
def main():
    """Main function to orchestrate the scraping and storing process."""
    create_db()
    jobs = scrap_jobs()
    store_to_db(jobs)

    # Example: Filter and export jobs for a specific location
    location_filter = "Remote"
    filtered_jobs = filter_jobs('location', location_filter)
    export_to_csv(filtered_jobs, f'jobs_filtered_by_{location_filter}.csv')

    print(f"Filtered jobs for location '{location_filter}' have been exported to CSV.")

if __name__ == "__main__":
    main()

Filtered jobs for location 'Remote' have been exported to CSV.
