In [6]:
import csv
import requests
from bs4 import BeautifulSoup

def scrape_page(soup, quotes):
    # retrieving all the quote <div> HTML element on the page
    quote_elements = soup.find_all('div', class_='quote')

    # iterating over the list of quote elements
    # to extract the data of interest and store it
    # in quotes
    for quote_element in quote_elements:
        # extracting the text of the quote
        text = quote_element.find(
            'span',
            class_='text'
        ).text
        # extracting the author of the quote
        author = quote_element.find(
            'small',
            class_='author'
        ).text

        # extracting the tag <a> HTML elements related to the quote
        tag_elements = quote_element.find('div', class_='tags').find_all('a', class_='tag')

        # storing the list of tag strings in a list
        tags = []
        for tag_element in tag_elements:
            tags.append(tag_element.text)

        # appending a dictionary containing the quote data
        # in a new format in the quote list
        quotes.append(
            {
                'text': text,
                'author': author,
                'tags': ', '.join(tags)  # merging the tags into a "A, B, ..., Z" string
            }
        )

# the url of the home page of the target website
base_url = 'https://quotes.toscrape.com'

# defining the User-Agent header to use in the GET request below
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36'
}

# retrieving the target web page
page = requests.get(base_url, headers=headers)

# parsing the target web page with Beautiful Soup
soup = BeautifulSoup(page.text, 'html.parser')

# initializing the variable that will contain
# the list of all quote data
quotes = []

# scraping the home page
scrape_page(soup, quotes)

# getting the "Next →" HTML element
next_li_element = soup.find('li', class_='next')

# if there is a next page to scrape
while next_li_element is not None:
    next_page_relative_url = next_li_element.find('a', href=True)['href']

    # getting the new page
    page = requests.get(base_url + next_page_relative_url, headers=headers)

    # parsing the new page
    soup = BeautifulSoup(page.text, 'html.parser')

    # scraping the new page
    scrape_page(soup, quotes)

    # looking for the "Next →" HTML element in the new page
    next_li_element = soup.find('li', class_='next')

# Open the "quotes.csv" file and create it
# if not present
csv_file = open('quotes.csv', 'w', encoding='utf-8', newline='')

# initializing the writer object to insert data
# in the CSV file
writer = csv.writer(csv_file)

# writing the header of the CSV file
writer.writerow(['Text', 'Author', 'Tags'])

# writing each row of the CSV
for quote in quotes:
    writer.writerow(quote.values())

# terminating the operation and releasing the resources
csv_file.close()


In [5]:
import csv
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from time import sleep

# Set up Chrome options for headless mode
options = Options()
options.headless = True  # Run the browser in headless mode

# Set up Selenium WebDriver (this will download the correct driver automatically)
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# Visit Naukri's job portal
driver.get("https://www.naukri.com/jobs")

# Initialize the list for job details
job_details = []

# Scroll down the page to load more jobs (adjust as needed for dynamic loading)
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
sleep(2)

# Example scraping logic
job_elements = driver.find_elements(By.CLASS_NAME, "jobTuple")

for job_element in job_elements:
    try:
        title = job_element.find_element(By.CLASS_NAME, "title").text.strip()
        company = job_element.find_element(By.CLASS_NAME, "subTitle").text.strip()
        role = job_element.find_element(By.CLASS_NAME, "job-location").text.strip()
        package = job_element.find_element(By.CLASS_NAME, "salary").text.strip() if job_element.find_elements(By.CLASS_NAME, "salary") else "Not Provided"
        experience = job_element.find_element(By.CLASS_NAME, "experience").text.strip() if job_element.find_elements(By.CLASS_NAME, "experience") else "Not Provided"
        location = job_element.find_element(By.CLASS_NAME, "location").text.strip() if job_element.find_elements(By.CLASS_NAME, "location") else "Not Provided"
        review = job_element.find_element(By.CLASS_NAME, "ratings").text.strip() if job_element.find_elements(By.CLASS_NAME, "ratings") else "Not Available"
        description = job_element.find_element(By.CLASS_NAME, "job-description").text.strip() if job_element.find_elements(By.CLASS_NAME, "job-description") else "No description provided"

        job_details.append({
            'title': title,
            'company': company,
            'role': role,
            'package': package,
            'experience': experience,
            'location': location,
            'review': review,
            'description': description
        })
    except Exception as e:
        print(f"Error extracting data for a job listing: {e}")

# Saving the job details to a CSV file
with open('job_details.csv', 'w', encoding='utf-8', newline='') as csv_file:
    writer = csv.writer(csv_file)
    writer.writerow(['Job Title', 'Company Name', 'Role', 'Package', 'Experience', 'Location', 'Review', 'Description'])
    for job in job_details:
        writer.writerow(job.values())

# Quit the driver after scraping
driver.quit()


ModuleNotFoundError: No module named 'selenium'