In [1]:
!pip install selenium



In [2]:
import os
import csv
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import StaleElementReferenceException, NoSuchElementException
from selenium.webdriver.common.action_chains import ActionChains

In [4]:


# Initialize the Chrome WebDriver
service = Service(executable_path="./chromedriver")
driver = webdriver.Chrome(service=service)

csv_file_path = "job_data_energyjobline.csv"

# Check if the file exists and is empty
file_exists = os.path.isfile(csv_file_path)
file_empty = os.stat(csv_file_path).st_size == 0 if file_exists else True

# Write the header row if the file doesn't exist or is empty
if not file_exists or file_empty:
    with open(csv_file_path, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(["Job Title", "Job Description"])

# Function to get elements with retry
def get_elements_with_retry(driver, by, value, max_attempts=3):
    attempts = 0
    while attempts < max_attempts:
        try:
            elements = WebDriverWait(driver, 10).until(
                EC.presence_of_all_elements_located((by, value))
            )
            return elements
        except (StaleElementReferenceException, NoSuchElementException):
            attempts += 1
    return []

# Function to get element text with retry
def get_element_text_with_retry(element, max_attempts=3):
    attempts = 0
    while attempts < max_attempts:
        try:
            return element.text
        except StaleElementReferenceException:
            attempts += 1
    return ""
    
base_url = "https://www.energyjobline.com/jobs?search=Wind%20Technician&job_geo_location=United%20States&radius=80.47&Find_Jobs=Find%20Jobs&lat=37.09024&lon=-95.712891&country=United%20States&administrative_area_level_1=undefined&page="
# Initialize page parameter for pagination
page = 0

while True:
    # Construct the URL for the current page
    current_url = f"{base_url}{page}"
    driver.get(current_url)
    
    # Wait for the job list to load
    time.sleep(5)
    
    jobs = get_elements_with_retry(driver, By.CLASS_NAME, "recruiter-job-link recruiter-jobs-new-tab-processed")
    
    if not jobs:
        break  # Exit the loop if no more jobs are found
    
    for j in range(len(jobs)):
        try:
            # Refresh job links list and scroll the job into view
            jobs = get_elements_with_retry(driver, By.CLASS_NAME, "recruiter-job-link recruiter-jobs-new-tab-processed")
            job = jobs[j]
            
            job_title = get_element_text_with_retry(job)
            ActionChains(driver).move_to_element(job).perform()
            job.click()
            
            # Wait for job description to load
            job_description_element = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CLASS_NAME, "field__item even"))
            )
            job_description = get_element_text_with_retry(job_description_element)
            
            with open(csv_file_path, mode='a', newline='', encoding='utf-8') as file:
                writer = csv.writer(file)
                writer.writerow([job_title, job_description])
            
            # Navigate back to the job list
            driver.back()
            
            # Wait for the job list to reload
            WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CLASS_NAME, "recruiter-job-link recruiter-jobs-new-tab-processed"))
            )
            
        except Exception as e:
            print(f"Error processing job: {e}")
            continue
    
    # Increment the page parameter for the next page
    page += 1

driver.quit()

print(f"Job data has been appended to {csv_file_path}")


TimeoutException: Message: 


In [6]:
import csv
import os
import time
import random
import logging
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import StaleElementReferenceException, NoSuchElementException
from selenium.webdriver.common.action_chains import ActionChains

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Initialize the Chrome WebDriver
service = Service(executable_path="./chromedriver")
chrome_options = Options()
chrome_options.add_argument("--disable-notifications")
chrome_options.add_argument("--disable-popup-blocking")
driver = webdriver.Chrome(service=service, options=chrome_options)

csv_file_path = "job_data_energyjobline.csv"

# Check if the file exists and is empty
file_exists = os.path.isfile(csv_file_path)
file_empty = os.stat(csv_file_path).st_size == 0 if file_exists else True

# Write the header row if the file doesn't exist or is empty
if not file_exists or file_empty:
    with open(csv_file_path, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(["Job Title", "Job Description"])

# Function to get elements with retry
def get_elements_with_retry(driver, by, value, max_attempts=3):
    attempts = 0
    while attempts < max_attempts:
        try:
            elements = WebDriverWait(driver, 10).until(
                EC.presence_of_all_elements_located((by, value))
            )
            return elements
        except (StaleElementReferenceException, NoSuchElementException):
            attempts += 1
    return []

# Function to get element text with retry
def get_element_text_with_retry(element, max_attempts=3):
    attempts = 0
    while attempts < max_attempts:
        try:
            return element.text
        except StaleElementReferenceException:
            attempts += 1
    return ""

# Function to close popups
def close_popup(driver):
    try:
        # Wait for a short time to see if a popup appears
        WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.ID, "popup-id")))
        # If a popup is found, close it (you'll need to replace "popup-id" with the actual ID or another identifier)
        close_button = driver.find_element(By.XPATH, "//button[@aria-label='Close']")
        close_button.click()
    except:
        # If no popup is found or there's an error closing it, just continue
        pass

base_url = "https://www.energyjobline.com/jobs?search=Wind%20Technician&job_geo_location=United%20States&radius=80.47&Find_Jobs=Find%20Jobs&lat=37.09024&lon=-95.712891&country=United%20States&administrative_area_level_1=undefined&page="

# Initialize page parameter for pagination
page = 0

while True:
    try:
        # Construct the URL for the current page
        current_url = f"{base_url}{page}"
        driver.get(current_url)
        
        # Try to close any popup that appears
        close_popup(driver)
        
        # Wait for the job list to load
        time.sleep(5)
        
        jobs = get_elements_with_retry(driver, By.CLASS_NAME, "recruiter-job-link.recruiter-jobs-new-tab-processed")
        
        if not jobs:
            logger.info("No more jobs found. Exiting loop.")
            break  # Exit the loop if no more jobs are found
        
        for job in jobs:
            try:
                job_title = get_element_text_with_retry(job)
                
                # Get the href attribute
                job_url = job.get_attribute('href')
                
                # Open new tab
                driver.execute_script("window.open('');")
                
                # Switch to the new tab
                driver.switch_to.window(driver.window_handles[-1])
                
                # Navigate to the job URL in the new tab
                driver.get(job_url)
                
                # Wait for job description to load
                job_description_element = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.CLASS_NAME, "field__item.even"))
                )
                job_description = get_element_text_with_retry(job_description_element)
                
                with open(csv_file_path, mode='a', newline='', encoding='utf-8') as file:
                    writer = csv.writer(file)
                    writer.writerow([job_title, job_description])
                
                logger.info(f"Scraped job: {job_title}")
                
                # Close the current tab
                driver.close()
                
                # Switch back to the main tab
                driver.switch_to.window(driver.window_handles[0])
                
            except Exception as e:
                logger.error(f"Error processing job on page {page}: {e}")
                # If there was an error, make sure we're back on the main tab
                if len(driver.window_handles) > 1:
                    driver.close()
                    driver.switch_to.window(driver.window_handles[0])
                continue
        
        # Increment the page parameter for the next page
        page += 1
        
        # Add a random delay between requests
        time.sleep(random.uniform(1, 3))
        
    except Exception as e:
        logger.error(f"Error processing page {page}: {e}")
        break

driver.quit()
logger.info(f"Job data has been appended to {csv_file_path}")

INFO:__main__:Scraped job: 
INFO:__main__:Scraped job: HSE Technician - Wind Farms (Spain and availability to travel)
INFO:__main__:Scraped job: Wind Turbine Service Technician
INFO:__main__:Scraped job: Wind Turbine Technician
INFO:__main__:Scraped job: Wind Turbine Service Technician
INFO:__main__:Scraped job: Wind Turbine Service Technician
INFO:__main__:Scraped job: Wind Turbine Service Technician
INFO:__main__:Scraped job: Wind Turbine Service Technician
INFO:__main__:Scraped job: HVAC Service Technician
INFO:__main__:Scraped job: Field Technician
INFO:__main__:Scraped job: HVAC Truck Based Service Controls Technician
INFO:__main__:Scraped job: Diesel Mechanic/Technician - Starting at $30/hour
INFO:__main__:Scraped job: Diesel Mechanic, Service Technician
INFO:__main__:Scraped job: HVAC Service Technician
INFO:__main__:Scraped job: Field Service Technician (Hydraulics / Pumps / Motors)
INFO:__main__:Scraped job: Mobile Diesel Mechanic/Technician - OTR (1257)
INFO:__main__:Scraped 