In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import streamlit as st

# Job search parameters
keywords = ["data scientist", "data engineer", "data analyst"]
location = "remote"
job_boards = [
    "https://www.indeed.com/jobs?q={}&l={}",
    "https://www.linkedin.com/jobs/search?keywords={}&location={}",
    "https://www.glassdoor.com/Job/jobs.htm?sc.keyword={}&locT=C&locId=1&locKeyword={}",
    "https://remoterocketship.com/jobs?q={}&l={}",
]

# Function to scrape job postings
def scrape_jobs():
    job_list = []
    for board in job_boards:
        for keyword in keywords:
            url = board.format(keyword.replace(" ", "+"), location.replace(" ", "+"))
            headers = {'User-Agent': 'Mozilla/5.0'}
            response = requests.get(url, headers=headers)
            
            if response.status_code == 200:
                soup = BeautifulSoup(response.text, 'html.parser')
                
                if 'indeed' in url:
                    jobs = soup.find_all('a', class_='tapItem')
                    for job in jobs:
                        title = job.find('h2').text.strip()
                        link = "https://www.indeed.com" + job.get('href')
                        job_list.append((title, link))
                
                elif 'linkedin' in url:
                    jobs = soup.find_all('div', class_='base-search-card')
                    for job in jobs:
                        title = job.find('h3').text.strip()
                        link = job.find('a').get('href')
                        job_list.append((title, link))
                
                elif 'glassdoor' in url:
                    jobs = soup.find_all('li', class_='react-job-listing')
                    for job in jobs:
                        title = job.find('a').text.strip()
                        link = "https://www.glassdoor.com" + job.find('a').get('href')
                        job_list.append((title, link))
                
                elif 'remoterocketship' in url:
                    jobs = soup.find_all('div', class_='job-card')
                    for job in jobs:
                        title = job.find('h2').text.strip()
                        link = job.find('a').get('href')
                        job_list.append((title, link))
                
    return job_list

# Function to display job postings
def display_jobs(job_list):
    df = pd.DataFrame(job_list, columns=['Job Title', 'Job Link'])
    st.write("## New Remote Data Job Openings")
    st.dataframe(df)
    return df

# Streamlit UI for manual triggering
st.title("Remote Data Job Scraper")
if st.button("Scrape Jobs Now"):
    jobs = scrape_jobs()
    if jobs:
        df = display_jobs(jobs)
        df.to_csv('job_listings.csv', index=False)
        st.success("Job scraping completed!")
    else:
        st.warning("No jobs found. Try again later.")

# Running automated scraping every 24 hours
def automated_scraping():
    while True:
        jobs = scrape_jobs()
        if jobs:
            df = display_jobs(jobs)
            df.to_csv('job_listings.csv', index=False)
        time.sleep(86400)  # Runs daily

# Optionally run automated scraping in the background
if __name__ == "__main__":
    automated_scraping()
