# Web Scrapper

## Overview
This project aims to scrape email addresses from a specified website and store them in a CSV file.

## Usage Instructions
1. Install the required libraries:
   ```
   pip install requests beautifulsoup4
   ```
2. Open the web_scraping_project.ipynb file in a Jupyter Notebook environment.

3. Customize the target_url variable with the URL of the website you want to scrape.

4. Run the cells sequentially.

5. The scraped email addresses will be stored in a CSV file named scraped_emails.csv in the same directory.

## Import necessary libraries

In [None]:
import requests
from bs4 import BeautifulSoup
import csv

## Function to scrape emails from a website

In [None]:
def scrape_emails(url):
    """
    Scrapes email addresses from a given website.
    
    Parameters:
    - url (str): The URL of the website to scrape.
    
    Returns:
    - list: A list of scraped email addresses.
    """
    try:
        # Establish connection and get HTML content
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')

        # Extract email addresses (adjust this based on the website structure)
        emails = [email.get('href') for email in soup.find_all('a', href=lambda x: x and 'mailto:' in x)]

        return emails

    except Exception as e:
        print(f"Error: {e}")
        return []

## Function to write emails to a CSV file

In [None]:
def write_to_csv(emails, filename):
    """
    Writes a list of emails to a CSV file.
    
    Parameters:
    - emails (list): A list of email addresses.
    - filename (str): The name of the CSV file to write.
    """
    try:
        with open(filename, 'w', newline='') as csvfile:
            csv_writer = csv.writer(csvfile)
            csv_writer.writerow(['Email'])

            for email in emails:
                csv_writer.writerow([email])

        print(f"Data written to {filename}")

    except Exception as e:
        print(f"Error writing to CSV: {e}")


## Main execution

In [None]:
if __name__ == "__main__":
    # Step 1: Specify the URL to scrape
    target_url = 'https://thenytimes.com'

    # Step 2: Scrape emails
    scraped_emails = scrape_emails(target_url)

    # Step 3: Specify the CSV filename
    output_csv = 'scraped_emails.csv'

    # Step 4: Write emails to CSV
    write_to_csv(scraped_emails, output_csv)