In [1]:
import pandas as pd

def scrape_jobs(job_title, location):
    from selenium import webdriver
    from selenium.webdriver.common.by import By
    import time
    from selenium.common.exceptions import NoSuchElementException
    import logging

    # Set up logging to capture errors
    logging.basicConfig(filename='web_scraping_errors.log', level=logging.ERROR)

    # Create a Chrome browser instance
    driver = webdriver.Chrome()

    # Construct the URL
    url = f"https://www.simplyhired.com/search?q={job_title}&l={location}"

    # Initialize data lists
    job_titles, company_names, job_locations, est_salary, job_details, job_hrefs = [], [], [], [], [], []

    total_job_count = 0

    try:
        # Visit the URL
        driver.get(url)

        while True:
            # Extract the total job count from the element
            total_job_element = driver.find_element(By.CSS_SELECTOR, "p.css-gu0het")
            total_job_count = int(total_job_element.text)

            # Find all job title elements and corresponding company name elements on the current page
            job_title_elements = driver.find_elements(By.CSS_SELECTOR, "[data-testid='searchSerpJobTitle'] a")
            company_name_elements = driver.find_elements(By.CSS_SELECTOR, "[data-testid='companyName']")
            job_location_elements = driver.find_elements(By.CSS_SELECTOR, "[data-testid='searchSerpJobLocation']")
            est_salary_elements = driver.find_elements(By.CSS_SELECTOR, "[data-testid='searchSerpJobSalaryEst']")
            job_href_elements = driver.find_elements(By.CSS_SELECTOR, "[data-testid='searchSerpJobTitle'] a")

            # Iterate through the job elements to extract job titles, company names, and href links
            for job_title_element, company_name_element, job_location_element, est_salary_element, job_href_element in zip(job_title_elements, company_name_elements, job_location_elements, est_salary_elements, job_href_elements):
                job_titles.append(job_title_element.text)
                company_names.append(company_name_element.text)
                job_locations.append(job_location_element.text)
                est_salary.append(est_salary_element.text)

                # Click on the job href to get details and open in a new window
                job_href = job_href_element.get_attribute("href")
                job_hrefs.append(job_href)  # Store the href link
                driver.execute_script("window.open('', '_blank');")
                driver.switch_to.window(driver.window_handles[1])
                driver.get(job_href)

                try:
                    job_detail_element = driver.find_element(By.CSS_SELECTOR, "[data-testid='viewJobBodyJobFullDescriptionContent']")
                    job_details.append(job_detail_element.text)
                except NoSuchElementException:
                    job_details.append("N/A")

                driver.close()  # Close the job details tab
                driver.switch_to.window(driver.window_handles[0])  # Switch back to the job listing tab

            # Find the next page button
            next_page_button = driver.find_element(By.XPATH, "//a[@aria-label='Next page']")

            # Scroll to the next page button to make it clickable
            driver.execute_script("arguments[0].scrollIntoView();", next_page_button)

            # Click the next page button
            next_page_button.click()

            # Wait for a short period before loading the next page
            time.sleep(5)  # Wait for 5 seconds

            # Break the loop if we have collected data from all pages
            if len(job_titles) >= total_job_count:
                break

    except NoSuchElementException as e:
        # Handle the specific exception (element not found)
        logging.error(f"Element not found error: {str(e)}")
    except Exception as e:
        # Handle other exceptions and log them
        logging.error(f"An error occurred: {str(e)}")

    finally:
        # Close the browser when done
        driver.quit()

    # Create a dictionary to store the data
    data = {
        'Job Title': job_titles,
        'Company Name': company_names,
        'Job Location': job_locations,
        'Estimated Salary': est_salary,
        'Job details': job_details,
        'Job Href': job_hrefs  # Include the href links
    }

    # Check if all lists have the same length
    lengths = set(len(lst) for lst in data.values())
    if len(lengths) == 1:
        # All lists have the same length, create the DataFrame
        df = pd.DataFrame(data)
    else:
        # Lists have different lengths, handle the error
        print("Error: Lists have different lengths")
        df = None  # Set df to None or handle the error as needed

    # Save the data to a CSV file if the DataFrame is not None
    if df is not None:
        file_name = f'{job_title}_{location}_{time.strftime("%Y-%m-%d")}.csv'
        df.to_csv(file_name, index=False)

    # Return the DataFrame (or None)
    return df


In [None]:
data_engineer = scrape_jobs("Data Engineer", "California")
data_engineer

In [34]:
Software_engineer = scrape_jobs("Software Engineer", "California")
Software_engineer

Unnamed: 0,Job Title,Company Name,Job Location,Estimated Salary,Job details,Job Href
0,Software Engineer (C# / C++),ETAP,"Irvine, CA","Estimated: $95,000 - $115,000 a year",About us\nETAP® provides market-leading softwa...,https://www.simplyhired.com/job/WOeUz3Wmhue7cm...
1,FIRMWARE ENGINEER (Motion Control Emphasis),Advanced Motion Controls,"Camarillo, CA","Estimated: $110,000 - $145,000 a year",We are looking for an experienced Firmware Eng...,https://www.simplyhired.com/job/gD-YhZPzbnnR1J...
2,Software Engineer - Top Secret Clearance Required,Northrop Grumman,"Woodland Hills, CA","Estimated: $118,000 - $155,000 a year",Requisition ID: R10125304\nCategory: Engineeri...,https://www.simplyhired.com/job/8Dt5EkuJEy1Qkx...
3,Software Engineer III (C++),Forward Slope Inc.,"San Diego, CA","Estimated: $110,000 - $145,000 a year",Would you love to work for a fast-growing comp...,https://www.simplyhired.com/job/oOktxId2yUK8po...
4,FIRMWARE ENGINEER (Communications Emphasis),Advanced Motion Controls,"Camarillo, CA",Estimated: $160K - $202K a year,We are looking for an experienced Firmware Eng...,https://www.simplyhired.com/job/kauDSOuUKxctXC...
...,...,...,...,...,...,...
201,"Senior System Software Engineer, Performance -...",NVIDIA,"Santa Clara, CA","Estimated: $112,200 - $147,000 a year",We are hiring senior engineers to work on the ...,https://www.simplyhired.com/job/T2b5HAF7ucJ1Yi...
202,Frontend Software Engineer Graduate (Effect Pl...,TikTok,"San Jose, CA","Estimated: $99,900 - $168,400 a year",Responsibilities\n\nAbout TikTok:\nTikTok is t...,https://www.simplyhired.com/job/RbfRg2-v9p2wp7...
203,Gameplay Engineer (Apex Legends),Electronic Arts,"Los Angeles, CA","Estimated: $91,500 - $118,860 a year","Founded in 2010, Respawn was created with the ...",https://www.simplyhired.com/job/ax1sdnQX8028e2...
204,Software Engineer,Albertsons Companies,"Pleasanton, CA",Estimated: $145K - $184K a year,*Prior to applying it is required that you inf...,https://www.simplyhired.com/job/xRLTwdjXCKr4PO...


In [35]:
Data_Scientist = scrape_jobs("Data Scientist", "California")
Data_Scientist

Unnamed: 0,Job Title,Company Name,Job Location,Estimated Salary,Job details,Job Href
0,Sr. Data Science Lead - Enterprise Risk,Pacific Life,"Newport Beach, CA",Estimated: $146K - $185K a year,Job Description:\nPacific Life is investing in...,https://www.simplyhired.com/job/7DiD5ctr5EEr_v...
1,Data Scientist/Algorithm Developer,Raptor Scientific,"Livermore, CA","Estimated: $110,000 - $150,000 a year",DATA SCIENTIST/ALGORITHM DEVELOPER\nRaptor Sci...,https://www.simplyhired.com/job/a-dQrnEnhElDX3...
2,"RESEARCH MANAGER, MICROBE DATA SCIENCE - DAVIS...",Novozymes,"Davis, CA","Estimated: $115,000 - $169,000 a year",Rethink your impact!\nAre you passionate about...,https://www.simplyhired.com/job/E91USaegKmr3pu...
3,AI Perception Algorithm Engineer/Scientist,Black Sesame Technologies Inc,"San Jose, CA","Estimated: $130,000 - $400,000 a year",The selected candidate will work closely with ...,https://www.simplyhired.com/job/tGULwr1WZBYH2Y...
4,E-commerce Data Scientist - Health and Wellnes...,Stingray Direct,"California, CA","Estimated: $110,000 - $130,000 a year",THE ROLE: E-commerce Data Scientist\nStingray ...,https://www.simplyhired.com/job/P3AoF1gK6Xrxks...
5,"Senior Scientist, Machine Learning",Bristol Myers Squibb,"San Diego, CA",Estimated: $143K - $181K a year,Working with Us\nChallenging. Meaningful. Life...,https://www.simplyhired.com/job/vhnw4kU8Sk4OPf...
6,Artificial Intelligence (AI) Engineer/Machine ...,MyHHub,"Folsom, CA","Estimated: $50,000 - $100,000 a year","MUST LIVE NEAR FOLSOM, CA. WILL NOT ADVANCE AP...",https://www.simplyhired.com/job/iyGQ8Q8sd4Bqjv...
7,Senior Sys SW Manager - AI,NVIDIA,"Santa Clara, CA","Estimated: $268,000 - $414,000 a year",NVIDIA has been transforming computer graphics...,https://www.simplyhired.com/job/DfPBG48qgS476M...
8,"Software Engineer, AI Compiler",Black Sesame Technologies Inc,"San Jose, CA","Estimated: $130,000 - $300,000 a year",Job description\nWe are looking for an experie...,https://www.simplyhired.com/job/zNXZ5cQdL3C_x7...
9,Data Scientist / Analyst,Study.com,"Mountain View, CA","Estimated: $105,000 - $125,000 a year",About Study.com\nOur mission is to open the do...,https://www.simplyhired.com/job/WY-VMo7F2RzAwZ...


In [36]:
Data_Analyst = scrape_jobs("Data Analyst", "California")
Data_Analyst

Unnamed: 0,Job Title,Company Name,Job Location,Estimated Salary,Job details,Job Href
0,Behavior Analyst- Bakersfield,Centre for Neuro Skills,"Bakersfield, CA","Estimated: $70,000 - $85,000 a year","Up to a $25,000 sign on bonus! *\nThe Behavior...",https://www.simplyhired.com/job/0GLevvXnc5SXeI...
1,IA6 - Data & Impact Analyst - Full Time - 40 H...,Episcopal Community Services,"San Francisco, CA",Estimated: $27 an hour,"POSITION SUMMARY:\nEvery night, ECS provides s...",https://www.simplyhired.com/job/tGhKLx7d-BYJf-...
2,Autonomous Vehicle Software Video Data Analyst,Kett Engineering Corporation,"Santa Clara, CA","Estimated: $118,800 - $178,200 a year",Kett Engineering is seeking AI / ML Autonomous...,https://www.simplyhired.com/job/NyUdQi5wFb3-wJ...
3,"IT Data Analyst, Consultant",Blue Shield of California,"Oakland, CA",Estimated: $84.3K - $107K a year,JOB DESCRIPTION\nYour Role\nThe Data Solutions...,https://www.simplyhired.com/job/URr8x_vmidpAB5...
4,Board Certified Behavior Analyst (BCBA),Applied Family Solutions,"Sacramento, CA",Estimated: $24.35 an hour,We are looking for a BCBA to join our team. Th...,https://www.simplyhired.com/job/XgCPpDOj98xmCG...
...,...,...,...,...,...,...
310,Trainee - Business Analyst,LeewayHertz,"San Francisco, CA","Estimated: $90,000 - $100,000 a year",Job Information\nPriority\nUrgent Important\nN...,https://www.simplyhired.com/job/9wgSKRoVwaa4GM...
311,Power Supply and Markets Data Analyst,GDS Associates Inc,"Folsom, CA",Estimated: $66.7K - $84.5K a year,Job Description:\n\nAbout us:\n\nGDS Associate...,https://www.simplyhired.com/job/8W0ZKLOS2ID-ml...
312,Data Analyst (Operations Research Analyst),LAs Totally Awesome,"Buena Park, CA",Estimated: $46.36 - $60.27 an hour,To develop and manage automated solutions for ...,https://www.simplyhired.com/job/dojMuXszidFOKc...
313,Performance Improvement Analyst,Stanford Health Care,"Palo Alto, CA",Estimated: $45 an hour,If you're ready to be part of our legacy of ho...,https://www.simplyhired.com/job/7MeT4WRVHt2qkr...


In [38]:
Business_Systems_Analyst = scrape_jobs("Business Systems Analyst", "California")
Business_Systems_Analyst

Unnamed: 0,Job Title,Company Name,Job Location,Estimated Salary,Job details,Job Href
0,IS Analyst - Server/System Administrator,Ponderosa,"O'Neals, CA",Estimated: From $28.60 an hour,POSITION: Information Services Analyst (Server...,https://www.simplyhired.com/job/hpOBKQecx4bd0W...
1,Business System Analyst (Spanish),"Odoo, Inc.","San Francisco Bay Area, CA","Estimated: $70,000 - $95,000 a year",Business System Analyst (Spanish Speaker)\nThi...,https://www.simplyhired.com/job/n21Gd6c9HhFbDu...
2,IT Business Analyst,Harrington Industrial Plastics,"Chino, CA",Estimated: $71.9K - $91K a year,Harrington Industrial Plastics is the leading ...,https://www.simplyhired.com/job/k1_e5nIwRXNrzb...
3,Quality Control Analyst,Trayer Engineering Corporation,"Livermore, CA",Estimated: $26.92 - $34.62 an hour,Job description\nTrayer Engineering Corporatio...,https://www.simplyhired.com/job/0WIJRMyp1hyV41...
4,Sr Business Systems Analyst (RPA experience re...,Golden 1 Credit Union,"Sacramento, CA","Estimated: $95,300 - $105,000 a year",JOB TITLE: Senior Business Systems Analyst\nDE...,https://www.simplyhired.com/job/O3mfUf1835P-NG...
...,...,...,...,...,...,...
72,Principal Business Systems Analyst,Disney,"Los Angeles, CA","Estimated: $76,598 - $114,897 a year",Principal Business Systems Analyst\nJob ID\n10...,https://www.simplyhired.com/job/UbmuDOG7ySqhQZ...
73,Senior Applications Developer - Android,Buildertrend,United States,"Estimated: $134,000 - $168,000 a year",Job Title: Senior Applications Developer - And...,https://www.simplyhired.com/job/_dRK0tPiLZ07vy...
74,Business Systems Analyst I/II/III,Santa Clara Family Health Plan,"San Jose, CA","Estimated: $120,000 - $150,000 a year","Salary Range: $80,428 - $120,642\nThe expected...",https://www.simplyhired.com/job/b54bLivtPNmvtf...
75,JDE Business Systems Analyst,"Cupertino Electric, Inc.","San Jose, CA",Estimated: Up to $60 an hour,Posting Title: JDE Business Systems Analyst\nR...,https://www.simplyhired.com/job/Bu_NElSdopF3gl...


In [39]:
Business_Analyst = scrape_jobs("Business Analyst", "California")
Business_Analyst

Unnamed: 0,Job Title,Company Name,Job Location,Estimated Salary,Job details,Job Href
0,Business System Analyst (Spanish),"Odoo, Inc.","San Francisco Bay Area, CA","Estimated: $70,000 - $95,000 a year",Business System Analyst (Spanish Speaker)\nThi...,https://www.simplyhired.com/job/n21Gd6c9HhFbDu...
1,Sales Tax Analyst,Peterson CAT,"San Leandro, CA","Estimated: Up to $95,000 a year",Peterson Cat has an immediate need for an expe...,https://www.simplyhired.com/job/CmzVuS9jhyna38...
2,Business Development Manager,"Tiger Lines, LLC","Lodi, CA","Estimated: $70,000 - $100,000 a year",Business Development Manager\nTiger Lines is r...,https://www.simplyhired.com/job/ZBVyXDqBplBQoY...
3,Digital Business Solutions Analyst,Rheem Manufacturing,"Oxnard, CA","Estimated: $85,000 - $100,000 a year",Digital Solutions Analyst -* (*23000558)\nDesc...,https://www.simplyhired.com/job/fo8O0o2DEiAag-...
4,IT Business Analyst,Harrington Industrial Plastics,"Chino, CA",Estimated: $71.9K - $91K a year,Harrington Industrial Plastics is the leading ...,https://www.simplyhired.com/job/k1_e5nIwRXNrzb...
...,...,...,...,...,...,...
636,Business Intelligence Analyst - Healthcare Ver...,The County of Santa Clara,"San Jose, CA","Estimated: $148,000 - $230,000 a year","Salary\n$142,168.00 - $172,814.72 Annually\nLo...",https://www.simplyhired.com/job/hB2VNCpg0WVHMh...
637,Business Operations Analyst,Yokohama Tire Corporation,"Santa Ana, CA",Estimated: $75.5K - $95.6K a year,Summary:\nThe Business Operations Analyst is r...,https://www.simplyhired.com/job/4o9v-vPAOtyBFB...
638,Senior Business Systems Analyst,NVIDIA,"Santa Clara, CA",Estimated: $94.5K - $120K a year,Our work at NVIDIA is dedicated towards a comp...,https://www.simplyhired.com/job/EDDci8LEBN6kXT...
639,Business Analyst - Remote (WFH),Cognitive Medical Systems,"San Diego, CA",Estimated: $82.2K - $104K a year,Position Title: Business Analyst\nPosition Sum...,https://www.simplyhired.com/job/PUsa-LW7yq3OU8...


In [43]:
Java_Developer = scrape_jobs("Java Developer", "California")
Java_Developer

Unnamed: 0,Job Title,Company Name,Job Location,Estimated Salary,Job details,Job Href
0,"Staff Software Engineer, Mask and Optical Model",ASML,"San Jose, CA",Estimated: $160K - $202K a year,"Introduction\nASML US, including its affiliate...",https://www.simplyhired.com/job/vFhIn-wTJSy4wH...
1,Solution Developer II - Automation & Innovatio...,Inland Empire Health Plans,"Rancho Cucamonga, CA","Estimated: $91,000 - $116,022 a year",A reasonable salary expectation is between $91...,https://www.simplyhired.com/job/DCyqFcvBkluK-L...
2,Software Development Engineer,HCL Technologies,"San Leandro, CA",Estimated: $50 - $70 an hour,"We are HCLTech, one of the fastest-growing lar...",https://www.simplyhired.com/job/oBqHVsvSw67eHp...
3,Backend Java Developer (Contract),QuinStreet,"Foster City, CA","Estimated: $100,000 - $120,000 a year",Powering Performance Marketplaces in Digital M...,https://www.simplyhired.com/job/IXSHgOVY-p4uSM...
4,"iOS Developer (Mobile) ($5,000 SIGN-ON BONUS)",Atlas Lift Tech,"San Ramon, CA",Estimated: $70 - $80 an hour,"ABOUT THE COMPANY\nAtlas Lift Tech, Inc. is a ...",https://www.simplyhired.com/job/Ho_8zXwcNDymyv...
5,Senior Application Developer,"Pyramid Consulting, Inc","San Francisco, CA","Estimated: $95,000 - $115,000 a year",Immediate need for a talented Senior Applicati...,https://www.simplyhired.com/job/IMe1HSVV7UGdTj...
6,Senior/Principal Software Engineer (PGS Space ...,Northrop Grumman,"Los Angeles, CA",Estimated: $101K - $127K a year,Requisition ID: R10123188\nCategory: Engineeri...,https://www.simplyhired.com/job/wxbeQdYYCiJdWl...
7,NGG Software Engineer / Principal Software Eng...,Northrop Grumman,"Azusa, CA","Estimated: $60,000 - $65,000 a year",Requisition ID: R10122869\nCategory: Engineeri...,https://www.simplyhired.com/job/sNZxSIFP-z9-cI...
8,Software Engineer (.NET Developer),Medica Talent Group,"Montebello, CA",Estimated: $55 - $70 an hour,Medica Talent Group is excited to share this d...,https://www.simplyhired.com/job/GJxHfqxs9fcppa...
9,Principal/Sr. Principal Embedded Software Engi...,Northrop Grumman,"Manhattan Beach, CA",Estimated: $50 - $70 an hour,Requisition ID: R10111765\nCategory: Engineeri...,https://www.simplyhired.com/job/CnoXJZifoL6q6i...


In [47]:
Software_Developer = scrape_jobs("Software_Developer", "California")
Software_Developer

Unnamed: 0,Job Title,Company Name,Job Location,Estimated Salary,Job details,Job Href
0,"Manager, Analytics Platform Operations",UCLA Health,"Los Angeles, CA",Estimated: $130K - $164K a year,Description\nThe Manager Analytics Platform Op...,https://www.simplyhired.com/job/45-tabNAwY2QBY...
1,Machine Vision Engineer,Tensor ID,"Oceanside, CA","Estimated: $90,000 - $105,000 a year","Tensor ID, Inc. is seeking a Machine Vision En...",https://www.simplyhired.com/job/Nwt0cD4Kq28Ixe...
2,Embedded Systems Software Developer,Santa Barbara Control Systems,"Goleta, CA","Estimated: $95,000 - $130,000 a year","Embedded Systems Software Developer, Santa Bar...",https://www.simplyhired.com/job/wbYg2dezCjAzG7...
3,Entry Level Software Developer,Property Matrix,"Culver City, CA","Estimated: $60,000 - $65,000 a year",Minimum Qualifications:\nCompleted bachelor’s ...,https://www.simplyhired.com/job/CKyZC1uypWCMsT...
4,Software Developer,Exxact Corporation,"Fremont, CA","Estimated: $88,008 - $152,413 a year",Exxact Corporation is looking for a senior lev...,https://www.simplyhired.com/job/lagy_DvRrhwYNb...
...,...,...,...,...,...,...
67,Software Developer,INFRAPOD TECHNOLOGIES,"San Jose, CA",Estimated: $23 - $32 an hour,Infrapod Technologies is fastly emerging compa...,https://www.simplyhired.com/job/xqiG8m6J9FCib7...
68,Jr. Software Developer,4Creeks,"Visalia, CA",Estimated: $129K - $163K a year,Job Description:\nJr. Software Developer\nFull...,https://www.simplyhired.com/job/5uEF5y12pheZNP...
69,Experienced Software Developer,VYNYL,"San Jose, CA",Estimated: $98.3K - $124K a year,Vynyl delivers high-fidelity software and desi...,https://www.simplyhired.com/job/-1jgS5ZobEGQZq...
70,"Research Lead, Financial Industry Disruptors",Fidelity Investments,"San Francisco, CA","Estimated: $157,000 - $209,000 a year",Job Description:\nThe Role\nFidelity has a str...,https://www.simplyhired.com/job/NJ35NoAV3itfEn...


In [49]:
Python_developer

Unnamed: 0,Job Title,Company Name,Job Location,Estimated Salary,Job details,Job Href
0,Senior Software Engineer 2023-2394,Kyocera Document Solutions Development America...,"Gardena, CA","Estimated: $100,000 - $135,000 a year",Overview:\nJob Title: Senior Software Engineer...,https://www.simplyhired.com/job/5yP1kKVQCtND64...
1,"Staff Software Engineer, Mask and Optical Model",ASML,"San Jose, CA",Estimated: $160K - $202K a year,"Introduction\nASML US, including its affiliate...",https://www.simplyhired.com/job/vFhIn-wTJSy4wH...
2,Senior Software Engineer,ASML,"San Jose, CA",Estimated: $136K - $172K a year,Sr. Product Engineer - Computational Lithograp...,https://www.simplyhired.com/job/VJ75rtVRlCyohc...
3,Principal Engineer Software/ Sr. Prinicpal Eng...,Northrop Grumman,"Palmdale, CA",Estimated: $60 - $75 an hour,Requisition ID: R10127243\nCategory: Engineeri...,https://www.simplyhired.com/job/od__6YatO1ac6h...
4,Staff Software Engineer - Top Secret Clearance...,Northrop Grumman,"Woodland Hills, CA",Estimated: $128K - $162K a year,Requisition ID: R10116350\nCategory: Engineeri...,https://www.simplyhired.com/job/7iuEWZuFzLoAhn...
...,...,...,...,...,...,...
122,C++ Software Developer (gRPC),Catapult Solutions Group,"San Diego, CA","Estimated: $85,000 - $110,000 a year",We are hiring C++ Software Developer in San Di...,https://www.simplyhired.com/job/CFx41xjqh3n5G1...
123,Software Engineer,Sorting Robotics,"Van Nuys, CA","Estimated: $187,040 - $280,000 a year",Do you have what it takes to be our next Softw...,https://www.simplyhired.com/job/5kgsm-c-wm7V60...
124,Aerospace Software Engineer 2023,NovaWurks,"Los Alamitos, CA","Estimated: $138,000 - $190,000 a year",Company Overview:\nAre you in search of a comp...,https://www.simplyhired.com/job/q_OWS80BZ8_T_6...
125,"Network Software Development Engineer, Intent-...",TikTok,"San Jose, CA",Estimated: $31 an hour,Responsibilities\nTikTok is the leading destin...,https://www.simplyhired.com/job/laEgInHya9Cajg...


# Mapping the column names in all the csv files and merge csv files

In [56]:
import pandas as pd

# Define the list of CSV file paths
file_paths = [
    'BI_Engineer_Indeed.csv',
'Business Analyst_California_2023-09-11.csv',
'Business Systems Analyst_California_2023-09-11.csv',
'Business_Analyst_Indeed.csv',
'Data Analyst_California_2023-09-11.csv',
'Data Engineer_California_2023-09-11.csv',
'Data Scientist_California_2023-09-11.csv',
'Data_Analyst_Indeed.csv',
'Data_Engineer_Indeed.csv',
'Data_Scientist_Indeed.csv',
'Java Developer_California_2023-09-11.csv',
'Java_Developer_Indeed.csv',
'Monster_Firefox_bi_developer_9_8_23.csv',
'Monster_Firefox_data_engineer_9_8_23.csv',
'Monster_Firefox_devops_engineer_9_8_23.csv',
'Monster_Firefox_python_developer_9_8_23.csv',
'Python Developer_California_2023-09-11.csv',
'Software Engineer_California_2023-09-11.csv',
'Software_Developer_California_2023-09-11.csv',
'Software_Developer_Indeed.csv',
'Software_Engineer_Jobs_Indeed.csv',
]

# Define a dictionary to map old column names to new column names
column_name_mapping = {
    'Job Title': 'Job_Title',
    'Company Name': 'Company_Name',
    'Company' : 'Company_Name',
    'Location': 'Job_Location',
    'Job Location': 'Job_Location',
    'Salary': 'Estimated_Salary',
    'Estimated Salary': 'Estimated_Salary',
    'Href': 'Hyperlink',
    'Job Href': 'Hyperlink',
    'Job Description': 'Job_Details',
    'Job details': 'Job_Details'
}

# Read all CSV files into DataFrames and rename columns
dataframes = []

for file_path in file_paths:
    df = pd.read_csv(file_path)
    df.rename(columns=column_name_mapping, inplace=True)
    dataframes.append(df)

# Concatenate the DataFrames row-wise
merged_df = pd.concat(dataframes, ignore_index=True)

# Define the desired column order
desired_columns = [
    'Job_Title',
    'Company_Name',
    'Job_Location',
    'Estimated_Salary',
    'Job_Details',
    'Hyperlink',
]

# Reorder columns to match the desired order
merged_df = merged_df[desired_columns]

# Save the merged DataFrame to a CSV file
merged_df.to_csv('merged_file.csv', index=False)


In [57]:
merged_df

Unnamed: 0,Job_Title,Company_Name,Job_Location,Estimated_Salary,Job_Details,Hyperlink
0,Business Intelligence Engineer\n- job post,California State University,"San Luis Obispo, CA","$87,000 - $110,000 a year - Full-time",,
1,Business Intelligence Engineer\n- job post,Flip Fit,"El Segundo, CA","$80,000 - $125,000 a year - Full-time",,
2,Business Intelligence Engineer\n- job post,"Cal Poly, San Luis Obispo","1 Grand Ave., San Luis Obispo, CA 93407","$87,000 - $110,000 a year - Full-time",,
3,Business Intelligence Engineer\n- job post,Flip,"El Segundo, CA","$80,000 - $125,000 a year - Full-time",,
4,Business Intelligence Developer III\n- job post,Sutter Health,"Sacramento, CA",$47.28 - $70.92 an hour - Full-time,,
...,...,...,...,...,...,...
2478,"Software Engineer, Apple Pay\n- job post",Apple,"Cupertino, CA",Full-time,,
2479,Web Developer/Programmer\n- job post,Vegan Liftz,"1300 Avenida Vista Hermosa, San Clemente, CA 9...","$10,000 a month",,
2480,Machine Learning Engineer (Remote)\n- job post,Curai,"San Francisco Bay Area, CA•Remote","$150,000 - $200,000 a year - Full-time",,
2481,Trainee - Software Developer\n- job post,LeewayHertz,"San Francisco, CA 94111•Remote",Full-time,,


In [58]:
merged_df['Job_Title'][0]

'Business Intelligence Engineer\n- job post'

In [59]:
merged_df.Job_Details.isna().sum()

511

In [2]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
from selenium.common.exceptions import NoSuchElementException
import logging

def scrape_jobs(job_title, location):
    # Set up logging to capture errors
    logging.basicConfig(filename='web_scraping_errors.log', level=logging.ERROR)

    # Create a Chrome browser instance
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')  # Run Chrome in headless mode (no GUI)
    driver = webdriver.Chrome(options=options)

    # Construct the URL
    url = f"https://www.monster.com/jobs/search?q={job_title}&where={location}&page="

    # Initialize data lists
    job_titles, company_names, job_locations, est_salary, job_details, job_hrefs = [], [], [], [], [], []

    total_job_count = 0
    page_number = 1

    try:
        while True:
            # Visit the URL
            driver.get(url + str(page_number))

            # Find all job card elements on the current page
            job_card_elements = driver.find_elements(By.CSS_SELECTOR, "article[data-testid='svx_jobCard']")

            if not job_card_elements:
                break  # No more job cards found, exit the loop

            for job_card_element in job_card_elements:
                # Extract job details from the job card
                try:
                    job_title_element = job_card_element.find_element(By.CSS_SELECTOR, "a[data-testid='jobTitle']")
                    job_titles.append(job_title_element.text)
                except NoSuchElementException:
                    job_titles.append("N/A")

                try:
                    company_name_element = job_card_element.find_element(By.CSS_SELECTOR, "span[data-testid='company']")
                    company_names.append(company_name_element.text)
                except NoSuchElementException:
                    company_names.append("N/A")

                try:
                    job_location_element = job_card_element.find_element(By.CSS_SELECTOR, "span[data-testid='jobDetailLocation']")
                    job_locations.append(job_location_element.text)
                except NoSuchElementException:
                    job_locations.append("N/A")

                try:
                    est_salary_element = job_card_element.find_element(By.CSS_SELECTOR, "span[data-testid='jobDetailDateRecency']")
                    est_salary.append(est_salary_element.text)
                except NoSuchElementException:
                    est_salary.append("N/A")

                try:
                    job_href_element = job_card_element.find_element(By.CSS_SELECTOR, "a[data-testid='jobTitle']")
                    job_href = job_href_element.get_attribute("href")
                    job_hrefs.append(job_href)
                except NoSuchElementException:
                    job_hrefs.append("N/A")

                try:
                    job_detail_element = job_card_element.find_element(By.CSS_SELECTOR, "div[data-testid='jobCardTags']")
                    job_details.append(job_detail_element.text)
                except NoSuchElementException:
                    job_details.append("N/A")

            # Increment the page number
            page_number += 1

    except NoSuchElementException as e:
        # Handle the specific exception (element not found)
        logging.error(f"Element not found error: {str(e)}")
    except Exception as e:
        # Handle other exceptions and log them
        logging.error(f"An error occurred: {str(e)}")

    finally:
        # Close the browser when done
        driver.quit()

    # Create a dictionary to store the data
    data = {
        'Job Title': job_titles,
        'Company Name': company_names,
        'Job Location': job_locations,
        'Estimated Salary': est_salary,
        'Job Details': job_details,
        'Job Href': job_hrefs
    }

    # Create a DataFrame from the data
    df = pd.DataFrame(data)

    # Save the data to a CSV file
    file_name = f'{job_title}_{location}_{time.strftime("%Y-%m-%d")}.csv'
    df.to_csv(file_name, index=False)

    return df


In [3]:
data_engineer = scrape_jobs("Data Engineer", "California")
data_engineer

Unnamed: 0,Job Title,Company Name,Job Location,Estimated Salary,Job Details,Job Href


In [46]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
import time

def scrape_jobs(job_title, location, start_page=1, end_page=10):
    # Create a Chrome browser instance
    driver = webdriver.Chrome()

    # Initialize data lists
    job_titles, company_names, job_locations, job_details, job_hrefs = [], [], [], [], []

    try:
        for page in range(start_page, end_page + 1):
            # Construct the URL with the current page number
            url = f"https://www.monster.com/jobs/search/?q={job_title}&where={location}&page={page}"

            # Visit the URL
            driver.get(url)

            # Find the job card scroll container
            scroll_container = driver.find_element(By.CLASS_NAME, 'infinite-scroll-component__outerdiv')

            # Scroll down to load more job listings on each page
            for _ in range(5):  # Scroll 5 times to load more listings on each page
                driver.execute_script("arguments[0].scrollTo(0, arguments[0].scrollHeight);", scroll_container)
                time.sleep(2)  # Wait for the page to load

            # Find all job card elements on the current page
            job_card_elements = driver.find_elements(By.CSS_SELECTOR, "article[data-testid='svx_jobCard']")

            # Iterate through the job card elements to extract data
            for job_card in job_card_elements:
                job_titles.append(job_card.find_element(By.CSS_SELECTOR, "h3 a").text)
                company_names.append(job_card.find_element(By.CSS_SELECTOR, "span[data-testid='company']").text)
                job_locations.append(job_card.find_element(By.CSS_SELECTOR, "span[data-testid='jobDetailLocation']").text)

                # Get the job href directly
                job_href = job_card.find_element(By.CSS_SELECTOR, "h3 a").get_attribute("href")
                job_hrefs.append(job_href)  # Store the href link

                # Extract the job details directly
                try:
                    job_detail_element = job_card.find_element(By.CSS_SELECTOR, "div[class='descriptionstyles__DescriptionBody-sc-13ve12b-4 crOoVX']")
                    job_details.append(job_detail_element.text)
                except:
                    job_details.append("N/A")

    except Exception as e:
        print(f"An error occurred: {str(e)}")

    finally:
        # Close the browser when done
        driver.quit()

    # Create a dictionary to store the data
    data = {
        'Job Title': job_titles,
        'Company Name': company_names,
        'Job Location': job_locations,
        'Job Details': job_details,
        'Job Href': job_hrefs  # Include the href links
    }

    # Create a DataFrame from the collected data
    df = pd.DataFrame(data)

    # Save the data to a CSV file
    df.to_csv(f'{job_title}_{location}_jobs.csv', index=False)

    return df

# Example usage:
df = scrape_jobs("Data Engineer", "California", start_page=1, end_page=10)
if df is not None:
    print(df.head())


                                    Job Title         Company Name  \
0                      Software Engineer, C++      DCS Corporation   
1                    Senior Software Engineer      Rumba Solutions   
2  Data Transfer Device (DTD) System Engineer      DCS Corporation   
3          AngularJS Senior Software Engineer              Walmart   
4     Senior Software Engineer - Applications  Gables Search Group   

     Job Location Job Details  \
0  Ridgecrest, CA         N/A   
1   Riverside, CA         N/A   
2  Ridgecrest, CA         N/A   
3   SUNNYVALE, CA         N/A   
4    Carlsbad, CA         N/A   

                                            Job Href  
0  https://www.monster.com/job-openings/software-...  
1  https://www.monster.com/job-openings/senior-so...  
2  https://www.monster.com/job-openings/data-tran...  
3  https://www.monster.com/job-openings/angularjs...  
4  https://www.monster.com/job-openings/senior-so...  


In [47]:
data_engineer = scrape_jobs("Data Engineer", "California")
data_engineer

KeyboardInterrupt: 

In [50]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

def scrape_jobs(job_title, location, start_page=1, end_page=10):
    # Create a Chrome browser instance
    driver = webdriver.Chrome()

    # Initialize data lists
    job_titles, company_names, job_locations, job_hrefs = [], [], [], []

    try:
        for page in range(start_page, end_page + 1):
            # Construct the URL with the current page number
            url = f"https://www.monster.com/jobs/search/?q={job_title}&where={location}&page={page}"

            # Visit the URL
            driver.get(url)

            # Wait for the job card scroll container to load
            scroll_container = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CLASS_NAME, 'infinite-scroll-component__outerdiv'))
            )

            # Scroll down to load more job listings on each page
            for _ in range(5):  # Scroll 5 times to load more listings on each page
                driver.execute_script("arguments[0].scrollTo(0, arguments[0].scrollHeight);", scroll_container)
                time.sleep(2)  # Wait for the page to load

            # Find all job card elements on the current page
            job_card_elements = driver.find_elements(By.CSS_SELECTOR, "article[data-testid='svx_jobCard']")

            # Iterate through the job card elements to extract data
            for job_card in job_card_elements:
                job_titles.append(job_card.find_element(By.CSS_SELECTOR, "h3 a").text)
                company_names.append(job_card.find_element(By.CSS_SELECTOR, "span[data-testid='company']").text)
                job_locations.append(job_card.find_element(By.CSS_SELECTOR, "span[data-testid='jobDetailLocation']").text)

                # Get the job href directly
                job_href = job_card.find_element(By.CSS_SELECTOR, "h3 a").get_attribute("href")
                job_hrefs.append(job_href)  # Store the href link

    except Exception as e:
        print(f"An error occurred: {str(e)}")

    finally:
        # Close the browser when done with job card scraping
        driver.quit()

    # Create a dictionary to store the job card data
    data = {
        'Job Title': job_titles,
        'Company Name': company_names,
        'Job Location': job_locations,
        'Job Href': job_hrefs  # Include the href links
    }

    # Create a DataFrame from the collected job card data
    df_job_cards = pd.DataFrame(data)

    # Initialize data lists for descriptions
    job_descriptions = []

    try:
        # Create a new Chrome browser instance for scraping descriptions
        driver = webdriver.Chrome()

        for job_href in job_hrefs:
            # Visit the job detail page
            driver.get(job_href)

            # Wait for the description to load
            description_element = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, "div[class='descriptionstyles__DescriptionBody-sc-13ve12b-4 crOoVX']"))
            )

            # Extract the job description
            job_description = description_element.text
            job_descriptions.append(job_description)

    except Exception as e:
        print(f"An error occurred while scraping descriptions: {str(e)}")

    finally:
        # Close the browser when done with description scraping
        driver.quit()

    # Add descriptions to the DataFrame
    df_job_cards['Job Description'] = job_descriptions

    # Save the data to a CSV file
    df_job_cards.to_csv(f'{job_title}_{location}_job_cards.csv', index=False, mode='a', header=False)

    return df_job_cards

# Example usage:
df = scrape_jobs("Data Engineer", "California", start_page=1, end_page=10)
if df is not None:
    print(df.head())


                                           Job Title           Company Name  \
0  2024 Technology Early Career Development Progr...            Wells Fargo   
1       AIML Software Engineer (Mid-Level or Senior)                 BOEING   
2                        Mid-Level Software Engineer                 BOEING   
3  Senior Associate, Systems Engineering (Data An...  L3Harris Technologies   
4      SolidWorks Applications Engineer - California                    GSC   

           Job Location                                           Job Href  \
0     San Francisco, CA  https://www.monster.com/job-openings/2024-tech...   
1        El Segundo, CA  https://www.monster.com/job-openings/aiml-soft...   
2       Los Angeles, CA  https://www.monster.com/job-openings/mid-level...   
3        Dodgertown, CA  https://www.monster.com/job-openings/senior-as...   
4  Santa Fe Springs, CA  https://www.monster.com/job-openings/solidwork...   

                                     Job Description  
0

In [51]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

def scrape_jobs(job_title, location, start_page=1, end_page=10):
    # Create a Chrome browser instance
    driver = webdriver.Chrome()

    # Initialize data lists
    job_titles, company_names, job_locations, job_hrefs = [], [], [], []

    try:
        for page in range(start_page, end_page + 1):
            # Construct the URL with the current page number
            url = f"https://www.monster.com/jobs/search/?q={job_title}&where={location}&page={page}"

            # Visit the URL
            driver.get(url)

            # Wait for the job card scroll container to load
            scroll_container = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CLASS_NAME, 'infinite-scroll-component__outerdiv'))
            )

            # Scroll down to load more job listings on each page
            for _ in range(5):  # Scroll 5 times to load more listings on each page
                driver.execute_script("arguments[0].scrollTo(0, arguments[0].scrollHeight);", scroll_container)
                time.sleep(2)  # Wait for the page to load

            # Find all job card elements on the current page
            job_card_elements = driver.find_elements(By.CSS_SELECTOR, "article[data-testid='svx_jobCard']")

            # Iterate through the job card elements to extract data
            for job_card in job_card_elements:
                job_titles.append(job_card.find_element(By.CSS_SELECTOR, "h3 a").text)
                company_names.append(job_card.find_element(By.CSS_SELECTOR, "span[data-testid='company']").text)
                job_locations.append(job_card.find_element(By.CSS_SELECTOR, "span[data-testid='jobDetailLocation']").text)

                # Get the job href directly
                job_href = job_card.find_element(By.CSS_SELECTOR, "h3 a").get_attribute("href")
                job_hrefs.append(job_href)  # Store the href link

    except Exception as e:
        print(f"An error occurred: {str(e)}")

    finally:
        # Close the browser when done with job card scraping
        driver.quit()

    # Create a dictionary to store the job card data
    data = {
        'Job Title': job_titles,
        'Company Name': company_names,
        'Job Location': job_locations,
        'Job Href': job_hrefs  # Include the href links
    }

    # Create a DataFrame from the collected job card data
    df_job_cards = pd.DataFrame(data)

    # Initialize data lists for descriptions
    job_descriptions = []

    try:
        # Create a new Chrome browser instance for scraping descriptions
        driver = webdriver.Chrome()

        for job_href in job_hrefs:
            # Visit the job detail page
            driver.get(job_href)

            # Wait for the description to load
            description_element = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, "div[class='descriptionstyles__DescriptionBody-sc-13ve12b-4 crOoVX']"))
            )

            # Extract the job description
            job_description = description_element.text
            job_descriptions.append(job_description)

    except Exception as e:
        print(f"An error occurred while scraping descriptions: {str(e)}")

    finally:
        # Close the browser when done with description scraping
        driver.quit()

    # Add descriptions to the DataFrame
    df_job_cards['Job Description'] = job_descriptions

    # Save the data to a CSV file
    df_job_cards.to_csv(f'{job_title}_{location}_job_cards.csv', index=False, mode='a', header=False)

    return df_job_cards

# Example usage:
df = scrape_jobs("Data Engineer", "California", start_page=1, end_page=10)
if df is not None:
    print(df.head())


An error occurred while scraping descriptions: Message: disconnected: Unable to receive message from renderer
  (failed to check if window was closed: disconnected: not connected to DevTools)
  (Session info: chrome=116.0.5845.188)
Stacktrace:
	GetHandleVerifier [0x00007FF639C752A2+57122]
	(No symbol) [0x00007FF639BEEA92]
	(No symbol) [0x00007FF639ABE3AB]
	(No symbol) [0x00007FF639AAC01A]
	(No symbol) [0x00007FF639AABF63]
	(No symbol) [0x00007FF639AAA9E5]
	(No symbol) [0x00007FF639AAB31E]
	(No symbol) [0x00007FF639AC1FE7]
	(No symbol) [0x00007FF639AC3D2F]
	(No symbol) [0x00007FF639AC3E20]
	(No symbol) [0x00007FF639AF367E]
	(No symbol) [0x00007FF639B16FDA]
	(No symbol) [0x00007FF639AEEB76]
	(No symbol) [0x00007FF639B171F0]
	(No symbol) [0x00007FF639B2EB82]
	(No symbol) [0x00007FF639B16DB3]
	(No symbol) [0x00007FF639AED2B1]
	(No symbol) [0x00007FF639AEE494]
	GetHandleVerifier [0x00007FF639F1EF82+2849794]
	GetHandleVerifier [0x00007FF639F71D24+3189156]
	GetHandleVerifier [0x00007FF639F6AC

ValueError: Length of values (89) does not match length of index (99)

In [52]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

def scrape_jobs(job_title, location, start_page=1, end_page=10):
    # Create a Chrome browser instance
    driver = webdriver.Chrome()

    # Initialize data lists
    job_titles, company_names, job_locations, job_hrefs = [], [], [], []

    try:
        for page_number in range(start_page, end_page + 1):
            # Construct the URL with the current page number
            url = f"https://www.monster.com/jobs/search/?q={job_title}&where={location}&page={page_number}"

            # Visit the URL
            driver.get(url)

            # Wait for the job card scroll container to load
            scroll_container = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CLASS_NAME, 'infinite-scroll-component__outerdiv'))
            )

            # Scroll down to load more job listings on each page
            for _ in range(5):  # Scroll 5 times to load more listings on each page
                driver.execute_script("arguments[0].scrollTo(0, arguments[0].scrollHeight);", scroll_container)
                time.sleep(2)  # Wait for the page to load

            # Find all job card elements on the current page
            job_card_elements = driver.find_elements(By.CSS_SELECTOR, "article[data-testid='svx_jobCard']")

            # Iterate through the job card elements to extract data
            for job_card in job_card_elements:
                job_titles.append(job_card.find_element(By.CSS_SELECTOR, "h3 a").text)
                company_names.append(job_card.find_element(By.CSS_SELECTOR, "span[data-testid='company']").text)
                job_locations.append(job_card.find_element(By.CSS_SELECTOR, "span[data-testid='jobDetailLocation']").text)

                # Get the job href directly
                job_href = job_card.find_element(By.CSS_SELECTOR, "h3 a").get_attribute("href")
                job_hrefs.append(job_href)  # Store the href link

    except Exception as e:
        print(f"An error occurred: {str(e)}")

    finally:
        # Close the browser when done with job card scraping
        driver.quit()

    # Create a dictionary to store the job card data
    data = {
        'Job Title': job_titles,
        'Company Name': company_names,
        'Job Location': job_locations,
        'Job Href': job_hrefs  # Include the href links
    }

    # Create a DataFrame from the collected job card data
    df_job_cards = pd.DataFrame(data)

    # Initialize data lists for descriptions
    job_descriptions = []

    try:
        # Create a new Chrome browser instance for scraping descriptions
        driver = webdriver.Chrome()

        for job_href in job_hrefs:
            # Visit the job detail page
            driver.get(job_href)

            # Wait for the description to load
            description_element = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, "div[class='descriptionstyles__DescriptionBody-sc-13ve12b-4 crOoVX']"))
            )

            # Extract the job description
            job_description = description_element.text
            job_descriptions.append(job_description)

    except Exception as e:
        print(f"An error occurred while scraping descriptions: {str(e)}")

    finally:
        # Close the browser when done with description scraping
        driver.quit()

    # Add descriptions to the DataFrame
    df_job_cards['Job Description'] = job_descriptions

    # Save the data to a CSV file
    df_job_cards.to_csv(f'{job_title}_{location}_job_cards.csv', index=False, mode='a', header=False)

    return df_job_cards

# Example usage:
df = scrape_jobs("Data Engineer", "California", start_page=1, end_page=10)
if df is not None:
    print(df.head())


                                    Job Title       Company Name  \
0                               Data Engineer       Randstad USA   
1                           Big Data Engineer       Randstad USA   
2             Senior Device Software Engineer             AbbVie   
3  Battery Algorithm Data Engineer - Contract  TalentBurst, Inc.   
4                   Prod Data Change Engineer            Entegee   

     Job Location                                           Job Href  \
0   Sunnyvale, CA  https://www.monster.com/job-openings/data-engi...   
1   Sunnyvale, CA  https://www.monster.com/job-openings/big-data-...   
2  Pleasanton, CA  https://www.monster.com/job-openings/senior-de...   
3   Cupertino, CA  https://www.monster.com/job-openings/battery-a...   
4      Irvine, CA  https://www.monster.com/job-openings/prod-data...   

                                     Job Description  
0  job summary:\nDescription:\nDesigns, develops,...  
1  job summary:\nThe Software Engineer position 

In [53]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

def scrape_jobs(job_title, location, start_page=1, end_page=10):
    # Create a Chrome browser instance
    driver = webdriver.Chrome()

    # Initialize data lists
    job_titles, company_names, job_locations, job_hrefs = [], [], [], []

    try:
        for page_number in range(start_page, end_page + 1):
            # Construct the URL with the current page number
            url = f"https://www.monster.com/jobs/search/?q={job_title}&where={location}&page={page_number}"

            # Visit the URL
            driver.get(url)

            # Wait for the job card scroll container to load
            scroll_container = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CLASS_NAME, 'infinite-scroll-component__outerdiv'))
            )

            # Scroll down to load more job listings on each page
            for _ in range(5):  # Scroll 5 times to load more listings on each page
                driver.execute_script("arguments[0].scrollTo(0, arguments[0].scrollHeight);", scroll_container)
                time.sleep(2)  # Wait for the page to load

            # Find all job card elements on the current page
            job_card_elements = driver.find_elements(By.CSS_SELECTOR, "article[data-testid='svx_jobCard']")

            # Iterate through the job card elements to extract data
            for job_card in job_card_elements:
                job_titles.append(job_card.find_element(By.CSS_SELECTOR, "h3 a").text)
                company_names.append(job_card.find_element(By.CSS_SELECTOR, "span[data-testid='company']").text)
                job_locations.append(job_card.find_element(By.CSS_SELECTOR, "span[data-testid='jobDetailLocation']").text)

                # Get the job href directly
                job_href = job_card.find_element(By.CSS_SELECTOR, "h3 a").get_attribute("href")
                job_hrefs.append(job_href)  # Store the href link

    except Exception as e:
        print(f"An error occurred: {str(e)}")

    finally:
        # Close the browser when done with job card scraping
        driver.quit()

    # Create a dictionary to store the job card data
    data = {
        'Job Title': job_titles,
        'Company Name': company_names,
        'Job Location': job_locations,
        'Job Href': job_hrefs  # Include the href links
    }

    # Create a DataFrame from the collected job card data
    df_job_cards = pd.DataFrame(data)

    # Initialize data lists for descriptions
    job_descriptions = []

    try:
        # Create a new Chrome browser instance for scraping descriptions
        driver = webdriver.Chrome()

        for job_href in job_hrefs:
            # Visit the job detail page
            driver.get(job_href)

            # Wait for the description to load
            description_element = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, "div[class='descriptionstyles__DescriptionBody-sc-13ve12b-4 crOoVX']"))
            )

            # Extract the job description
            job_description = description_element.text
            job_descriptions.append(job_description)

    except Exception as e:
        print(f"An error occurred while scraping descriptions: {str(e)}")

    finally:
        # Close the browser when done with description scraping
        driver.quit()

    # Add descriptions to the DataFrame
    df_job_cards['Job Description'] = job_descriptions

    # Save the data to a CSV file
    df_job_cards.to_csv(f'{job_title}_{location}_job_cards.csv', index=False, mode='a', header=False)

    return df_job_cards

# Input job title and location
job_title = input("Enter the job title: ")
location = input("Enter the location: ")

# Example usage:
df = scrape_jobs(job_title, location, start_page=1, end_page=10)
if df is not None:
    print(df.head())


Enter the job title: software engineer
Enter the location: california
                                    Job Title     Company Name  \
0               Embedded Software Engineer II     Randstad USA   
1                      Java Software Engineer        Vaco, LLC   
2                           Software Engineer  Piper Companies   
3                    Senior Software Engineer  Rumba Solutions   
4  Software Engineer (Linux Operating System)       VANET Inc.   

                 Job Location  \
0              El Segundo, CA   
1             Los Angeles, CA   
2              El Segundo, CA   
3               Riverside, CA   
4  Rancho Santa Margarita, CA   

                                            Job Href  \
0  https://www.monster.com/job-openings/embedded-...   
1  https://www.monster.com/job-openings/java-soft...   
2  https://www.monster.com/job-openings/software-...   
3  https://www.monster.com/job-openings/senior-so...   
4  https://www.monster.com/job-openings/software-...  

In [54]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

def scrape_jobs(job_title, location, start_page=1, end_page=10):
    # Create a Chrome browser instance
    driver = webdriver.Chrome()

    # Initialize data lists
    job_titles, company_names, job_locations, job_hrefs, salaries, date_posted = [], [], [], [], [], []

    try:
        for page_number in range(start_page, end_page + 1):
            # Construct the URL with the current page number
            url = f"https://www.monster.com/jobs/search/?q={job_title}&where={location}&page={page_number}"

            # Visit the URL
            driver.get(url)

            # Wait for the job card scroll container to load
            scroll_container = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CLASS_NAME, 'infinite-scroll-component__outerdiv'))
            )

            # Scroll down to load more job listings on each page
            for _ in range(5):  # Scroll 5 times to load more listings on each page
                driver.execute_script("arguments[0].scrollTo(0, arguments[0].scrollHeight);", scroll_container)
                time.sleep(2)  # Wait for the page to load

            # Find all job card elements on the current page
            job_card_elements = driver.find_elements(By.CSS_SELECTOR, "article[data-testid='svx_jobCard']")

            # Iterate through the job card elements to extract data
            for job_card in job_card_elements:
                job_titles.append(job_card.find_element(By.CSS_SELECTOR, "h3 a").text)
                company_names.append(job_card.find_element(By.CSS_SELECTOR, "span[data-testid='company']").text)
                job_locations.append(job_card.find_element(By.CSS_SELECTOR, "span[data-testid='jobDetailLocation']").text)

                # Get the job href directly
                job_href = job_card.find_element(By.CSS_SELECTOR, "h3 a").get_attribute("href")
                job_hrefs.append(job_href)  # Store the href link

                # Find salary element, if missing set to "N/A"
                try:
                    salary = job_card.find_element(By.CSS_SELECTOR, "div[data-testid='payTag']").text
                except:
                    salary = "N/A"
                salaries.append(salary)

                # Find date posted element, if missing set to "N/A"
                try:
                    posted = job_card.find_element(By.CSS_SELECTOR, "div[data-test-id='svx-jobview-posted-or-website-div']").text
                    if "Posted" in posted:
                        date_posted.append(posted.split("Posted")[1].strip())
                    else:
                        date_posted.append("N/A")
                except:
                    date_posted.append("N/A")

    except Exception as e:
        print(f"An error occurred: {str(e)}")

    finally:
        # Close the browser when done with job card scraping
        driver.quit()

    # Create a dictionary to store the job card data
    data = {
        'Job Title': job_titles,
        'Company Name': company_names,
        'Location': job_locations,
        'Salary': salaries,
        'Href': job_hrefs,
        'Date Posted': date_posted
    }

    # Create a DataFrame from the collected job card data
    df_job_cards = pd.DataFrame(data)

    # Initialize data lists for descriptions and scraping date
    job_descriptions, scraping_dates = [], []

    try:
        # Create a new Chrome browser instance for scraping descriptions
        driver = webdriver.Chrome()

        for job_href in job_hrefs:
            # Visit the job detail page
            driver.get(job_href)

            # Wait for the description to load
            description_element = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, "div[class='descriptionstyles__DescriptionBody-sc-13ve12b-4 crOoVX']"))
            )

            # Extract the job description
            job_description = description_element.text
            job_descriptions.append(job_description)

            # Get the current date for the scraping date
            scraping_date = time.strftime("%m/%d/%Y")
            scraping_dates.append(scraping_date)

    except Exception as e:
        print(f"An error occurred while scraping descriptions: {str(e)}")

    finally:
        # Close the browser when done with description scraping
        driver.quit()

    # Add descriptions and scraping dates to the DataFrame
    df_job_cards['Job Description'] = job_descriptions
    df_job_cards['Scraping Date'] = scraping_dates

    # Save the data to a CSV file
    df_job_cards.to_csv(f'{job_title}_{location}_job_cards.csv', index=False, mode='a', header=False)

    return df_job_cards

# Input job title and location
job_title = input("Enter the job title: ")
location = input("Enter the location: ")

# Example usage:
df = scrape_jobs(job_title, location, start_page=1, end_page=10)
if df is not None:
    print(df.head())


Enter the job title: data engineer
Enter the location: california
                                           Job Title       Company Name  \
0                                      Data Engineer       Randstad USA   
1                                  Big Data Engineer       Randstad USA   
2         Battery Algorithm Data Engineer - Contract  TalentBurst, Inc.   
3                          Prod Data Change Engineer            Entegee   
4  Sr. Visualization/Data Engineer (THIS ROLE IS ...         TEKsystems   

        Location Salary                                               Href  \
0  Sunnyvale, CA    N/A  https://www.monster.com/job-openings/data-engi...   
1  Sunnyvale, CA    N/A  https://www.monster.com/job-openings/big-data-...   
2  Cupertino, CA    N/A  https://www.monster.com/job-openings/battery-a...   
3     Irvine, CA    N/A  https://www.monster.com/job-openings/prod-data...   
4  Cupertino, CA    N/A  https://www.monster.com/job-openings/sr-visual...   

  Date Posted 

In [55]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

def scrape_jobs(job_title, location, start_page=1, end_page=10):
    # Create a Chrome browser instance
    driver = webdriver.Chrome()

    # Initialize data lists
    job_titles, companies, locations, hrefs, content_list, salary_list, date_posted_list = [], [], [], [], [], [], []

    try:
        for page_number in range(start_page, end_page + 1):
            # Construct the URL with the current page number
            url = f"https://www.monster.com/jobs/search/?q={job_title}&where={location}&page={page_number}"

            # Visit the URL
            driver.get(url)

            # Wait for the job card scroll container to load
            scroll_container = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CLASS_NAME, 'infinite-scroll-component__outerdiv'))
            )

            # Scroll down to load more job listings on each page
            for _ in range(5):  # Scroll 5 times to load more listings on each page
                driver.execute_script("arguments[0].scrollTo(0, arguments[0].scrollHeight);", scroll_container)
                time.sleep(2)  # Wait for the page to load

            # Find all job card elements on the current page
            job_card_elements = driver.find_elements(By.CSS_SELECTOR, "article[data-testid='svx_jobCard']")

            # Iterate through the job card elements to extract data
            for job_card in job_card_elements:
                job_titles.append(job_card.find_element(By.CSS_SELECTOR, "h3 a").text)
                companies.append(job_card.find_element(By.CSS_SELECTOR, "span[data-testid='company']").text)
                locations.append(job_card.find_element(By.CSS_SELECTOR, "span[data-testid='jobDetailLocation']").text)

                # Get the job href directly
                job_href = job_card.find_element(By.CSS_SELECTOR, "h3 a").get_attribute("href")
                hrefs.append(job_href)  # Store the href link

                # Find salary element, if missing set to "N/A"
                try:
                    salary = job_card.find_element(By.CSS_SELECTOR, "div[data-testid='payTag']").text
                except:
                    salary = "N/A"
                salary_list.append(salary)

                # Find date posted element, if missing set to "N/A"
                try:
                    date_posted = job_card.find_element(By.CSS_SELECTOR, "span[data-testid='jobDetailDateRecency']").text
                except:
                    date_posted = "N/A"
                date_posted_list.append(date_posted)

    except Exception as e:
        print(f"An error occurred: {str(e)}")

    finally:
        # Close the browser when done with job card scraping
        driver.quit()

    # Create a dictionary to store the job card data
    data = {
        'Job Title': job_titles,
        'Company Name': companies,
        'Location': locations,
        'Salary': salary_list,
        'Href': hrefs,
        'Date Posted': date_posted_list,
        'Scraping Date': [time.strftime("%m/%d/%Y")] * len(job_titles)
    }

    # Create a DataFrame from the collected job card data
    df_job_cards = pd.DataFrame(data)

    # Initialize data lists for descriptions
    content_list = []

    try:
        # Create a new Chrome browser instance for scraping descriptions
        driver = webdriver.Chrome()

        for job_href in hrefs:
            # Visit the job detail page
            driver.get(job_href)

            # Wait for the description to load
            description_element = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, "div[class='descriptionstyles__DescriptionBody-sc-13ve12b-4 crOoVX']"))
            )

            # Extract the job description
            job_description = description_element.text
            content_list.append(job_description)

    except Exception as e:
        print(f"An error occurred while scraping descriptions: {str(e)}")

    finally:
        # Close the browser when done with description scraping
        driver.quit()

    # Add descriptions to the DataFrame
    df_job_cards['Job Description'] = content_list

    # Save the data to a CSV file
    df_job_cards.to_csv(f'{job_title}_{location}_job_cards.csv', index=False)

    return df_job_cards

# Input job title and location
job_title = input("Enter the job title: ")
location = input("Enter the location: ")

# Example usage:
df = scrape_jobs(job_title, location, start_page=1, end_page=10)
if df is not None:
    print(df.head())


Enter the job title: python developer
Enter the location: california
                         Job Title                 Company Name  \
0           Software Engineer, C++              DCS Corporation   
1               Software Developer  Creative Financial Staffing   
2  NodeJS Senior Software Engineer                      Walmart   
3  Software Engineer II - Contract            TalentBurst, Inc.   
4  PHP Software Developer (HYBRID)                   TEKsystems   

            Location Salary  \
0     Ridgecrest, CA    N/A   
1    Bakersfield, CA    N/A   
2       San Jose, CA    N/A   
3  San Francisco, CA    N/A   
4      Cupertino, CA    N/A   

                                                Href  Date Posted  \
0  https://www.monster.com/job-openings/software-...  12 days ago   
1  https://www.monster.com/job-openings/software-...   2 days ago   
2  https://www.monster.com/job-openings/nodejs-se...    1 day ago   
3  https://www.monster.com/job-openings/software-...   3 days ago

In [56]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

def scrape_jobs(job_title, location, start_page=1, end_page=10):
    # Create a Chrome browser instance
    driver = webdriver.Chrome()

    # Initialize data lists
    job_titles, companies, locations, hrefs, content_list, salary_list, date_posted_list = [], [], [], [], [], [], []

    try:
        for page_number in range(start_page, end_page + 1):
            # Construct the URL with the current page number
            url = f"https://www.monster.com/jobs/search/?q={job_title}&where={location}&page={page_number}"

            # Visit the URL
            driver.get(url)

            # Wait for the job card scroll container to load
            scroll_container = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CLASS_NAME, 'infinite-scroll-component__outerdiv'))
            )

            # Scroll down to load more job listings on each page
            for _ in range(5):  # Scroll 5 times to load more listings on each page
                driver.execute_script("arguments[0].scrollTo(0, arguments[0].scrollHeight);", scroll_container)
                time.sleep(2)  # Wait for the page to load

            # Find all job card elements on the current page
            job_card_elements = driver.find_elements(By.CSS_SELECTOR, "article[data-testid='svx_jobCard']")

            # Iterate through the job card elements to extract data
            for job_card in job_card_elements:
                job_titles.append(job_card.find_element(By.CSS_SELECTOR, "h3 a").text)
                companies.append(job_card.find_element(By.CSS_SELECTOR, "span[data-testid='company']").text)
                locations.append(job_card.find_element(By.CSS_SELECTOR, "span[data-testid='jobDetailLocation']").text)

                # Get the job href directly
                job_href = job_card.find_element(By.CSS_SELECTOR, "h3 a").get_attribute("href")
                hrefs.append(job_href)  # Store the href link

                # Find salary element, if missing set to "N/A"
                try:
                    salary = job_card.find_element(By.CSS_SELECTOR, "div[data-testid='payTag']").text
                except:
                    salary = "N/A"
                salary_list.append(salary)

                # Find date posted element, if missing set to "N/A"
                try:
                    date_posted = job_card.find_element(By.CSS_SELECTOR, "span[data-testid='jobDetailDateRecency']").text
                except:
                    date_posted = "N/A"
                date_posted_list.append(date_posted)

    except Exception as e:
        print(f"An error occurred: {str(e)}")

    finally:
        # Close the browser when done with job card scraping
        driver.quit()

    # Initialize data lists for descriptions
    content_list = []

    try:
        # Create a new Chrome browser instance for scraping descriptions
        driver = webdriver.Chrome()

        for job_href in hrefs:
            # Visit the job detail page
            driver.get(job_href)

            # Wait for the description to load
            description_element = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, "div[class='descriptionstyles__DescriptionBody-sc-13ve12b-4 crOoVX']"))
            )

            # Extract the job description
            job_description = description_element.text
            content_list.append(job_description)

    except Exception as e:
        print(f"An error occurred while scraping descriptions: {str(e)}")

    finally:
        # Close the browser when done with description scraping
        driver.quit()

    # Create a dictionary to store the job card data
    data = {
        'Job Title': job_titles,
        'Company Name': companies,
        'Location': locations,
        'Job Description': content_list,
        'Salary': salary_list,
        'Date Posted': date_posted_list,
        'Scraping Date': [time.strftime("%m-%d-%Y")] * len(job_titles)
    }

    # Create a DataFrame from the collected job card data
    df_job_cards = pd.DataFrame(data)

    # Save the data to a CSV file with the desired column order and date of scraping
    csv_filename = f'{job_title}_{location}_{time.strftime("%m-%d-%Y")}.csv'
    df_job_cards.to_csv(csv_filename, index=False, mode='a', header=False, columns=['Job Title', 'Company Name', 'Location', 'Job Description', 'Salary', 'Date Posted', 'Scraping Date'])

    return df_job_cards

# Input job title and location
job_title = input("Enter the job title: ")
location = input("Enter the location: ")

# Example usage:
df = scrape_jobs(job_title, location, start_page=1, end_page=10)
if df is not None:
    print(df.head())


Enter the job title: software engineer
Enter the location: california
                                 Job Title         Company Name  \
0            Embedded Software Engineer II         Randstad USA   
1          Software Engineer II - Contract    TalentBurst, Inc.   
2                   Software Engineer, C++      DCS Corporation   
3        Staff Software Engineer - Android            Vaco, LLC   
4  Senior Software Engineer - Applications  Gables Search Group   

            Location                                    Job Description  \
0     El Segundo, CA  job summary:\nOur client, a leader in the aero...   
1  San Francisco, CA  Pay Rate: 65.0-66.2 $/Hour\nDevelop an Interna...   
2     Ridgecrest, CA  Salary Range: $64,942 - $110,582\nSeeking a So...   
3      San Diego, CA  Summary:\nAs an Android Engineer on the androi...   
4       Carlsbad, CA  Our Client’s Wireless Team is currently seekin...   

  Salary   Date Posted Scraping Date  
0    N/A    2 days ago    09-15-2023 

In [59]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

def scrape_jobs(job_title, location, start_page=1, end_page=10):
    # Create a Chrome browser instance
    driver = webdriver.Chrome()

    # Initialize data lists
    job_titles, companies, locations, hrefs, content_list, salary_list, date_posted_list = [], [], [], [], [], [], []

    try:
        for page_number in range(start_page, end_page + 1):
            # Construct the URL with the current page number
            url = f"https://www.monster.com/jobs/search/?q={job_title}&where={location}&page={page_number}"

            # Visit the URL
            driver.get(url)

            # Wait for the job card scroll container to load
            scroll_container = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CLASS_NAME, 'infinite-scroll-component__outerdiv'))
            )

            # Scroll down to load more job listings on each page
            for _ in range(5):  # Scroll 5 times to load more listings on each page
                driver.execute_script("arguments[0].scrollTo(0, arguments[0].scrollHeight);", scroll_container)
                time.sleep(2)  # Wait for the page to load

            # Find all job card elements on the current page
            job_card_elements = driver.find_elements(By.CSS_SELECTOR, "article[data-testid='svx_jobCard']")

            # Iterate through the job card elements to extract data
            for job_card in job_card_elements:
                job_titles.append(job_card.find_element(By.CSS_SELECTOR, "h3 a").text)
                companies.append(job_card.find_element(By.CSS_SELECTOR, "span[data-testid='company']").text)
                locations.append(job_card.find_element(By.CSS_SELECTOR, "span[data-testid='jobDetailLocation']").text)

                # Get the job href directly
                job_href = job_card.find_element(By.CSS_SELECTOR, "h3 a").get_attribute("href")
                hrefs.append(job_href)  # Store the href link

                # Find salary element in the job card, if missing set to "N/A"
                salary_tag = job_card.find_element(By.CSS_SELECTOR, 'span.sc-fKMpNL.dasswI.ds-tag-label')
                salary = salary_tag.text if salary_tag else 'N/A'


                # Find date posted element, if missing set to "N/A"
                try:
                    date_posted = job_card.find_element(By.CSS_SELECTOR, "span[data-testid='jobDetailDateRecency']").text
                except:
                    date_posted = "N/A"
                date_posted_list.append(date_posted)

    except Exception as e:
        print(f"An error occurred: {str(e)}")

    finally:
        # Close the browser when done with job card scraping
        driver.quit()

    # Initialize data lists for descriptions
    content_list = []

    try:
        # Create a new Chrome browser instance for scraping descriptions
        driver = webdriver.Chrome()

        for job_href in hrefs:
            # Visit the job detail page
            driver.get(job_href)

            # Wait for the description to load
            description_element = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, "div[class='descriptionstyles__DescriptionBody-sc-13ve12b-4 crOoVX']"))
            )

            # Extract the job description
            job_description = description_element.text
            content_list.append(job_description)

    except Exception as e:
        print(f"An error occurred while scraping descriptions: {str(e)}")

    finally:
        # Close the browser when done with description scraping
        driver.quit()

    # Create a dictionary to store the job card data
    data = {
        'Job Title': job_titles,
        'Company Name': companies,
        'Location': locations,
        'Salary': salary_list,  # Include salary
        'Date Posted': date_posted_list,
        'Job Description': content_list,
        'Scraping Date': [time.strftime("%m-%d-%Y")] * len(job_titles)
    }

    # Create a DataFrame from the collected job card data
    df_job_cards = pd.DataFrame(data)

    # Save the data to a CSV file with the desired column order and date of scraping
    csv_filename = f'Monster_Chrome_{job_title}_{location}_{time.strftime("%m-%d-%Y")}.csv'
    # Check if the CSV file already exists
    try:
        # Attempt to read the existing CSV
        existing_df = pd.read_csv(csv_filename)
        # Append the new data to the existing file, without headers
        df_job_cards.to_csv(csv_filename, mode='a', header=False, index=False)
    except FileNotFoundError:
        # If the file doesn't exist, create it with headers
        df_job_cards.to_csv(csv_filename, index=False, header=True)

    return df_job_cards

# Input job title and location
job_title = input("Enter the job title: ")
location = input("Enter the location: ")

# Example usage:
df = scrape_jobs(job_title, location, end_page=10)
if df is not None:
    print(df.head())


Enter the job title: software engineer
Enter the location: california


ValueError: All arrays must be of the same length

In [60]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

def scrape_jobs(job_title, location, start_page=1, end_page=10):
    # Create a Chrome browser instance
    driver = webdriver.Chrome()

    # Construct the base URL
    base_url = f"https://www.monster.com/jobs/search/?q={job_title}&where={location}&page="

    # Initialize data lists
    job_titles, companies, locations, job_details, salary_list, hrefs, date_posted_list = [], [], [], [], [], [], []

    try:
        for page_number in range(start_page, end_page + 1):
            url = base_url + str(page_number)
            driver.get(url)

            # Wait for job card elements to be present on the page
            WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "article[data-testid='svx_jobCard']")))

            # Find all job card elements on the current page
            job_card_elements = driver.find_elements(By.CSS_SELECTOR, "article[data-testid='svx_jobCard']")

            for job_card in job_card_elements:
                job_titles.append(job_card.find_element(By.CSS_SELECTOR, "h3 a").text)
                companies.append(job_card.find_element(By.CSS_SELECTOR, "span[data-testid='company']").text)
                locations.append(job_card.find_element(By.CSS_SELECTOR, "span[data-testid='jobDetailLocation']").text)

                # Check if job description exists
                job_detail_element = None
                try:
                    job_detail_element = job_card.find_element(By.CSS_SELECTOR, "div[class='descriptionstyles__DescriptionBody-sc-13ve12b-4 crOoVX']")
                except NoSuchElementException:
                    pass
                job_details.append(job_detail_element.text if job_detail_element else 'N/A')

                # Check if salary information exists
                salary_tag = None
                try:
                    salary_tag = job_card.find_element(By.CSS_SELECTOR, 'span.sc-fKMpNL.dasswI.ds-tag-label')
                except NoSuchElementException:
                    pass
                salary_list.append(salary_tag.text if salary_tag else 'N/A')

                # Check if date posted information exists
                date_posted_element = None
                try:
                    date_posted_element = job_card.find_element(By.CSS_SELECTOR, "span[data-testid='jobDetailDateRecency']")
                except NoSuchElementException:
                    pass
                date_posted_list.append(date_posted_element.text if date_posted_element else 'N/A')

                hrefs.append(job_card.find_element(By.TAG_NAME, 'a').get_attribute('href'))

    except Exception as e:
        print(f"An error occurred: {str(e)}")

    finally:
        driver.quit()

    # Create a DataFrame with the collected data
    data = {
        'Job Title': job_titles,
        'Company Name': companies,
        'Location': locations,
        'Job Description': job_details,
        'Salary': salary_list,
        'Href': hrefs,
        'Date Posted': date_posted_list,
        'Scraping Date': [time.strftime("%m-%d-%Y")] * len(job_titles)
    }

    df_job_cards = pd.DataFrame(data)

    # Save the data to a CSV file with the desired column order and date of scraping
    csv_filename = f'Monster_Chrome_{job_title}_{location}_{time.strftime("%m-%d-%Y")}.csv'
    df_job_cards.to_csv(csv_filename, index=False, columns=['Job Title', 'Company Name', 'Location', 'Job Description', 'Salary', 'Date Posted', 'Scraping Date'], header=True)

    return df_job_cards

# Example usage:
job_title = input("Enter the job title: ")
location = input("Enter the location: ")
df = scrape_jobs(job_title, location, start_page=1, end_page=10)
if df is not None:
    print(df.head())


Enter the job title: software engineer
Enter the location: california
                                 Job Title         Company Name  \
0                   Software Engineer, C++      DCS Corporation   
1  Senior Software Engineer - Applications  Gables Search Group   
2        Staff Software Engineer - Android            Vaco, LLC   
3              Software Engineer - 60380-1         Randstad USA   
4           Embedded Software Engineer C++         Randstad USA   

         Location Job Description            Salary  \
0  Ridgecrest, CA             N/A   Actively Hiring   
1    Carlsbad, CA             N/A  $89,000–$136,000   
2   San Diego, CA             N/A   Actively Hiring   
3  El Segundo, CA             N/A   Actively Hiring   
4  El Segundo, CA             N/A   Actively Hiring   

                                                Href   Date Posted  \
0  https://www.monster.com/job-openings/software-...  30+ days ago   
1  https://www.monster.com/job-openings/senior-so...  30

In [62]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

def scrape_jobs(job_title, location, start_page=1, end_page=10):
    # Create a Chrome browser instance
    driver = webdriver.Chrome()

    # Construct the base URL
    base_url = f"https://www.monster.com/jobs/search/?q={job_title}&where={location}&page="

    # Initialize data lists
    job_titles, companies, locations, job_descriptions, salary_list, hrefs, date_posted_list = [], [], [], [], [], [], []

    try:
        for page_number in range(start_page, end_page + 1):
            url = base_url + str(page_number)
            driver.get(url)

            # Wait for job card elements to be present on the page
            WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "article[data-testid='svx_jobCard']")))

            # Find all job card elements on the current page
            job_card_elements = driver.find_elements(By.CSS_SELECTOR, "article[data-testid='svx_jobCard']")

            for job_card in job_card_elements:
                job_titles.append(job_card.find_element(By.CSS_SELECTOR, "h3 a").text)
                companies.append(job_card.find_element(By.CSS_SELECTOR, "span[data-testid='company']").text)
                locations.append(job_card.find_element(By.CSS_SELECTOR, "span[data-testid='jobDetailLocation']").text)

                # Check if job description exists
                job_detail_element = None
                try:
                    job_detail_element = job_card.find_element(By.CSS_SELECTOR, "div.descriptionstyles__DescriptionContainer-sc-13ve12b-0.iCEVUR")
                except NoSuchElementException:
                    pass
                job_descriptions.append(job_detail_element.text if job_detail_element else 'N/A')

                # Check if salary information exists
                salary_tag = None
                try:
                    salary_tag = job_card.find_element(By.CSS_SELECTOR, 'span.sc-fKMpNL.dasswI.ds-tag-label')
                except NoSuchElementException:
                    pass
                salary_list.append(salary_tag.text if salary_tag else 'N/A')

                # Check if date posted information exists
                date_posted_element = None
                try:
                    date_posted_element = job_card.find_element(By.CSS_SELECTOR, "span[data-testid='jobDetailDateRecency']")
                except NoSuchElementException:
                    pass
                date_posted_list.append(date_posted_element.text if date_posted_element else 'N/A')

                hrefs.append(job_card.find_element(By.TAG_NAME, 'a').get_attribute('href'))

    except Exception as e:
        print(f"An error occurred: {str(e)}")

    finally:
        driver.quit()

    # Create a DataFrame with the collected data
    data = {
        'Job Title': job_titles,
        'Company Name': companies,
        'Location': locations,
        'Job Description': job_descriptions,
        'Salary': salary_list,
        'Href': hrefs,
        'Date Posted': date_posted_list,
        'Scraping Date': [time.strftime("%m-%d-%Y")] * len(job_titles)
    }

    df_job_cards = pd.DataFrame(data)

    # Save the data to a CSV file with the desired column order and date of scraping
    csv_filename = f'Monster_Chrome_{job_title}_{location}_{time.strftime("%m-%d-%Y")}.csv'
    df_job_cards.to_csv(csv_filename, mode='a', index=False, columns=['Job Title', 'Company Name', 'Location', 'Job Description', 'Salary', 'Date Posted', 'Scraping Date'], header=True)

    return df_job_cards

# Example usage:
job_title = input("Enter the job title: ")
location = input("Enter the location: ")
df = scrape_jobs(job_title, location, start_page=1, end_page=10)
if df is not None:
    print(df.head())


Enter the job title: software engineer
Enter the location: california
                                 Job Title         Company Name  \
0                   Software Engineer, C++      DCS Corporation   
1        Staff Software Engineer - Android            Vaco, LLC   
2  Senior Software Engineer - Applications  Gables Search Group   
3        Senior Software Engineer (REMOTE)         Randstad USA   
4                        Software Engineer             Actalent   

          Location Job Description           Salary  \
0   Ridgecrest, CA             N/A  Actively Hiring   
1    San Diego, CA             N/A  Actively Hiring   
2     Carlsbad, CA             N/A  Actively Hiring   
3  Los Angeles, CA             N/A  Actively Hiring   
4       Irvine, CA             N/A  Actively Hiring   

                                                Href   Date Posted  \
0  https://www.monster.com/job-openings/software-...  30+ days ago   
1  https://www.monster.com/job-openings/staff-sof...  30

In [63]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
import time
import re  # Add this import for regular expressions

def scrape_jobs(job_title, location, start_page=1, end_page=10):
    # Create a Chrome browser instance
    driver = webdriver.Chrome()

    # Construct the base URL
    base_url = f"https://www.monster.com/jobs/search/?q={job_title}&where={location}&page="

    # Initialize data lists
    job_titles, companies, locations, job_descriptions, salary_list, hrefs, date_posted_list = [], [], [], [], [], [], []

    try:
        for page_number in range(start_page, end_page + 1):
            url = base_url + str(page_number)
            driver.get(url)

            # Wait for job card elements to be present on the page
            WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "article[data-testid='svx_jobCard']")))

            # Find all job card elements on the current page
            job_card_elements = driver.find_elements(By.CSS_SELECTOR, "article[data-testid='svx_jobCard']")

            for job_card in job_card_elements:
                job_titles.append(job_card.find_element(By.CSS_SELECTOR, "h3 a").text)
                companies.append(job_card.find_element(By.CSS_SELECTOR, "span[data-testid='company']").text)
                locations.append(job_card.find_element(By.CSS_SELECTOR, "span[data-testid='jobDetailLocation']").text)

                # Check if job description exists
                job_detail_element = None
                try:
                    job_detail_element = job_card.find_element(By.CSS_SELECTOR, "div.descriptionstyles__DescriptionContainer-sc-13ve12b-0.iCEVUR")
                except NoSuchElementException:
                    pass
                job_descriptions.append(job_detail_element.text if job_detail_element else 'N/A')

                # Check if salary information exists
                salary_tag = None
                try:
                    salary_tag = job_card.find_element(By.CSS_SELECTOR, 'span.sc-fKMpNL.dasswI.ds-tag-label')
                except NoSuchElementException:
                    pass
                salary_text = salary_tag.text if salary_tag else 'N/A'

                # Use regular expressions to extract numerical values from the salary text
                salary_values = re.findall(r'\d+', salary_text)
                if salary_values:
                    # Join numerical values into a single string
                    salary_value = '-'.join(salary_values)
                    salary_list.append(salary_value)
                else:
                    salary_list.append(salary_text)

                # Check if date posted information exists
                date_posted_element = None
                try:
                    date_posted_element = job_card.find_element(By.CSS_SELECTOR, "span[data-testid='jobDetailDateRecency']")
                except NoSuchElementException:
                    pass
                date_posted_list.append(date_posted_element.text if date_posted_element else 'N/A')

                hrefs.append(job_card.find_element(By.TAG_NAME, 'a').get_attribute('href'))

    except Exception as e:
        print(f"An error occurred: {str(e)}")

    finally:
        driver.quit()

    # Create a DataFrame with the collected data
    data = {
        'Job Title': job_titles,
        'Company Name': companies,
        'Location': locations,
        'Job Description': job_descriptions,
        'Salary': salary_list,
        'Date Posted': date_posted_list,
        'Scraping Date': [time.strftime("%m-%d-%Y")] * len(job_titles)
    }

    df_job_cards = pd.DataFrame(data)

    # Save the data to a CSV file with the desired column order and date of scraping
    csv_filename = f'Monster_Chrome_{job_title}_{location}_{time.strftime("%m-%d-%Y")}.csv'
    df_job_cards.to_csv(csv_filename,mode='a', index=False, columns=['Job Title', 'Company Name', 'Location', 'Job Description', 'Salary', 'Date Posted', 'Scraping Date'], header=True)

    return df_job_cards

# Example usage:
job_title = input("Enter the job title: ")
location = input("Enter the location: ")
df = scrape_jobs(job_title, location, start_page=1, end_page=10)
if df is not None:
    print(df.head())


Enter the job title: software engineer
Enter the location: california
                                 Job Title             Company Name  \
0  Senior Software Engineer - Applications      Gables Search Group   
1                   Software Engineer, C++          DCS Corporation   
2                        Software Engineer              Wells Fargo   
3                        Software Engineer                 Actalent   
4             Flight Software Engineer III  Amtec Human Capital Inc   

            Location Job Description           Salary   Date Posted  \
0       Carlsbad, CA             N/A  Actively Hiring   14 days ago   
1     Ridgecrest, CA             N/A  Actively Hiring  30+ days ago   
2  San Francisco, CA             N/A  Actively Hiring    2 days ago   
3         Irvine, CA             N/A  Actively Hiring    4 days ago   
4       Pasadena, CA             N/A  Actively Hiring  30+ days ago   

  Scraping Date  
0    09-15-2023  
1    09-15-2023  
2    09-15-2023  
3   

In [None]:
import pandas as pd
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

def scrape_jobs(job_title, location, start_page=1, end_page=10):
    # Create a Chrome browser instance
    driver = webdriver.Chrome()

    # Construct the base URL
    base_url = f"https://www.monster.com/jobs/search/?q={job_title}&where={location}&page="

    # Initialize data lists
    job_titles, companies, locations, job_details, salaries, hrefs, date_posted_list = [], [], [], [], [], [], []

    try:
        for page_number in range(start_page, end_page + 1):
            url = base_url + str(page_number)
            driver.get(url)

            # Scroll down to load more job listings (adjust the number of scrolls as needed)
            for _ in range(5):  # Scroll 5 times to load more listings
                driver.find_element(By.TAG_NAME, 'body').send_keys(Keys.PAGE_DOWN)
                time.sleep(2)  # Wait for the page to load

            # Wait for the job card elements to be present on the page
            WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "article[data-testid='svx_jobCard']")))

            # Find all job card elements on the current page
            job_card_elements = driver.find_elements(By.CSS_SELECTOR, "article[data-testid='svx_jobCard']")

            for job_card in job_card_elements:
                job_titles.append(job_card.find_element(By.CSS_SELECTOR, "h3 a").text)
                companies.append(job_card.find_element(By.CSS_SELECTOR, "span[data-testid='company']").text)
                locations.append(job_card.find_element(By.CSS_SELECTOR, "span[data-testid='jobDetailLocation']").text)

                # Click on the job card to get details
                job_card.click()
                time.sleep(2)  # Wait for the details to load

                try:
                    # Find the description element by class name
                    job_detail_element = driver.find_element(By.CLASS_NAME, "descriptionstyles__DescriptionContainer-sc-13ve12b-0.iCEVUR")
                    job_details.append(job_detail_element.text)
                except:
                    job_details.append("N/A")

                try:
                    # Find the salary element by class name
                    salary_element = driver.find_element(By.CLASS_NAME, "sc-aNeao.eKxONA.sc-ha-dNcR.grFRtm.ds-tag")
                    salaries.append(salary_element.text)
                except:
                    salaries.append("N/A")

                hrefs.append(driver.current_url)  # Store the href link

                # Find the date posted element by data-testid
                date_posted_element = job_card.find_element(By.CSS_SELECTOR, "span[data-testid='jobDetailDateRecency']")
                date_posted_list.append(date_posted_element.text)

                # Go back to the job listings
                driver.back()
                time.sleep(2)  # Wait for the page to load

    except Exception as e:
        print(f"An error occurred: {str(e)}")

    finally:
        driver.quit()

    # Create a dictionary to store the data
    data = {
        'Job Title': job_titles,
        'Company Name': companies,
        'Location': locations,
        'Job Description': job_details,
        'Salary': salaries,
        'Href': hrefs,
        'Date Posted': date_posted_list,
        'Scraping Date': [time.strftime("%m-%d-%Y")] * len(job_titles)
    }

    # Create a DataFrame from the collected job card data
    df_job_cards = pd.DataFrame(data)

    # Check if the CSV file already exists
    csv_filename = f'Monster_Chrome_{job_title}_{location}_{time.strftime("%m-%d-%Y")}.csv'
    try:
        with open(csv_filename, 'r') as file:
            # Check if headers are present
            header = file.readline().strip()
            if header != 'Job Title,Company Name,Location,Job Description,Salary,Href,Date Posted,Scraping Date':
                # Headers are missing, so add them to the first row
                df_job_cards.to_csv(csv_filename, mode='a', index=False, header=True)
            else:
                # Headers are present, so just append the data
                df_job_cards.to_csv(csv_filename, mode='a', index=False, header=False)
    except FileNotFoundError:
        # File doesn't exist, create it and add headers
        df_job_cards.to_csv(csv_filename, mode='w', index=False, header=True)

    return df_job_cards

# Example usage:
job_title = input("Enter the job title: ")
location = input("Enter the location: ")
df = scrape_jobs(job_title, location, start_page=1, end_page=10)
if df is not None:
    print(df.head())

    