In [2]:
import json
import time
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait  
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException

class WellfoundScraper:
    def __init__(self, driver):
        self.driver = driver
        self.wait = WebDriverWait(driver, 10)
        self.base_url = "https://wellfound.com"
        self.cookies_file = "cookies.json"

    def check_if_captcha(self):
        try:
            self.wait.until(EC.presence_of_element_located((By.XPATH, '//*[@data-cfasync="false"]')))
            print("Captcha detected.")
            return True
        except TimeoutException:
            print("No captcha detected.")
            return False

    def save_cookies(self):
        with open(self.cookies_file, 'w') as f:
            json.dump(self.driver.get_cookies(), f)

    def load_cookies(self):
        try:
            with open(self.cookies_file, 'r') as f:
                cookies = json.load(f)
            for cookie in cookies:
                self.driver.add_cookie(cookie)
            print("Cookies loaded successfully.")
        except FileNotFoundError:
            print("Cookies file not found.")

    def login(self, email, password):
        self.driver.get(f"{self.base_url}/login")
        try:
            email_input = self.driver.find_element(By.ID, "user_email")
            email_input.send_keys(email)
            password_input = self.driver.find_element(By.ID, "user_password")
            password_input.send_keys(password)

            login_button = self.driver.find_element(By.NAME, "commit")
            login_button.click()
            time.sleep(5)
            if self.driver.current_url.startswith(f"{self.base_url}/jobs"):
                self.save_cookies()
                print("Login successful and cookies saved.")
            else:
                raise Exception("Login failed.")
        except Exception as e:
            print(f"Error during login: {e}")
            raise

    def use_session(self):
        self.driver.get(self.base_url)
        self.load_cookies()
        self.driver.get(f"{self.base_url}/jobs")
        time.sleep(2)

    def scrape_jobs(self, payload):
        try:
            if self.check_if_captcha():
                raise Exception("Captcha detected, manual intervention required.")
            
            js_script = f"""
            var callback = arguments[0];
            var xhr = new XMLHttpRequest();
            xhr.open('POST', '{self.base_url}/graphql?fallbackAOR=talent', true);
            xhr.setRequestHeader('Content-Type', 'application/json');
            xhr.setRequestHeader('Accept', '*/*');
            xhr.setRequestHeader('Accept-Encoding', 'gzip, deflate, br, zstd');
            xhr.setRequestHeader('Accept-Language', 'en-US,en;q=0.9');
            xhr.setRequestHeader('Apollographql-Client-Name', 'talent-web');
            xhr.setRequestHeader('Origin', '{self.base_url}');
            xhr.setRequestHeader('Referer', '{self.base_url}/jobs');
            xhr.setRequestHeader('X-Requested-With', 'XMLHttpRequest');
            
            xhr.onreadystatechange = function() {{
                if (xhr.readyState == 4) {{
                    if (xhr.status == 200) {{
                        callback(xhr.responseText);
                    }} else {{
                        callback("Error: " + xhr.statusText);
                    }}
                }}
            }};
            xhr.send(JSON.stringify({json.dumps(payload)}));
            """
            response = self.driver.execute_async_script(js_script)
            print("Raw response:", response)
            response_json = json.loads(response)
            return response_json
        except Exception as e:
            print(f"Error executing GraphQL request: {e}")
            return []

options = uc.ChromeOptions()
options.add_argument("--disable-blink-features=AutomationControlled")
driver = uc.Chrome(options=options)

EMAIL = "wavano8030@nausard.com"
PASSWORD = "tEMP@123"

PAYLOAD = {
    "operationName": "JobSearchResultsX",
    "variables": {
        "filterConfigurationInput": {
            "page": 1,
            "locationTagIds": ["392260"],
            "remoteCompanyLocationTagIds": ["153509"],
            "roleTagIds": ["14726"],
            "equity": {"min": None, "max": None},
            "jobTypes": ["internship"],
            "remotePreference": "REMOTE_OPEN",
            "salary": {"min": None, "max": None},
            "yearsExperience": {"min": None, "max": None},
        }
    },
    "extensions": {
        "operationId": "tfe/2aeb9d7cc572a94adfe2b888b32e64eb8b7fb77215b168ba4256b08f9a94f37b"
    },
}

scraper = WellfoundScraper(driver)

try:
    scraper.login(EMAIL, PASSWORD)
    scraper.use_session()
    jobs = scraper.scrape_jobs(PAYLOAD)
    print(f"Found {len(jobs)} job listings.")
    for job in jobs:
        print(job)
except Exception as e:
    print(f"Error: {e}")
finally:
    driver.quit()

Login successful and cookies saved.
Cookies loaded successfully.
Error executing GraphQL request: Message: no such window: target window already closed
from unknown error: web view not found
  (Session info: chrome=131.0.6778.86)
Stacktrace:
	GetHandleVerifier [0x008B33E3+25059]
	(No symbol) [0x0083CDE4]
	(No symbol) [0x0071BEC3]
	(No symbol) [0x006FD93B]
	(No symbol) [0x0078800F]
	(No symbol) [0x0079AE49]
	(No symbol) [0x00781C96]
	(No symbol) [0x00753FAC]
	(No symbol) [0x00754F3D]
	GetHandleVerifier [0x00BA5543+3113795]
	GetHandleVerifier [0x00BBA20A+3198986]
	GetHandleVerifier [0x00BB29E2+3168226]
	GetHandleVerifier [0x00953250+680016]
	(No symbol) [0x0084572D]
	(No symbol) [0x008429D8]
	(No symbol) [0x00842B75]
	(No symbol) [0x008357D0]
	BaseThreadInitThunk [0x76D1FCC9+25]
	RtlGetAppContainerNamedObjectPath [0x7732809E+286]
	RtlGetAppContainerNamedObjectPath [0x7732806E+238]

Found 0 job listings.
