<a href="https://colab.research.google.com/github/santosh958/WEBMOJII/blob/main/scrape_b2b_websites_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
!pip install requests beautifulsoup4




In [18]:
import requests
from bs4 import BeautifulSoup
import csv
import os

def safe_get_text(element, default="N/A"):
    """Safe text extraction from a BeautifulSoup element."""
    try:
        return element.text.strip()
    except AttributeError:
        return default

def scrape_pricing(soup):
    """Scrape pricing details from a website."""
    pricing_element = soup.find('div', class_='pricing')  # Adjust this selector based on actual HTML
    pricing = pricing_element.text.strip() if pricing_element else 'Pricing not available'
    return pricing

def scrape_speakers(soup):
    """Scrape speaker details from a website."""
    speakers_section = soup.find_all('div', class_='speaker')  # Adjust this selector based on actual HTML
    speakers = []
    for speaker in speakers_section:
        name = safe_get_text(speaker.find('h3', class_='name'))
        bio = safe_get_text(speaker.find('p', class_='bio'))
        speakers.append({'name': name, 'bio': bio})
    return speakers

def scrape_blake_envelopes(soup):
    """Scrape data from Blake Envelopes website."""
    event_name = safe_get_text(soup.title, "N/A")
    description = safe_get_text(soup.find("meta", {"name": "description"}), "N/A")
    return [event_name, "N/A", "N/A", description, scrape_speakers(soup), "N/A", "N/A", scrape_pricing(soup), "N/A", "N/A"]

def scrape_pixelgrade(soup):
    """Scrape data from Pixelgrade website."""
    event_name = safe_get_text(soup.title, "N/A")
    return [event_name, "N/A", "N/A", "N/A", scrape_speakers(soup), "N/A", "N/A", scrape_pricing(soup), "N/A", "N/A"]

def scrape_dropbox(soup):
    """Scrape data from Dropbox website."""
    event_name = safe_get_text(soup.title, "N/A")
    description = safe_get_text(soup.find("meta", {"name": "description"}), "N/A")
    return [event_name, "N/A", "N/A", description, scrape_speakers(soup), "N/A", "N/A", scrape_pricing(soup), "N/A", "N/A"]

def scrape_olumo(soup):
    """Scrape data from Olumo website."""
    event_name = safe_get_text(soup.title, "N/A")
    return [event_name, "N/A", "N/A", "N/A", scrape_speakers(soup), "N/A", "N/A", scrape_pricing(soup), "N/A", "N/A"]

def scrape_trello(soup):
    """Scrape data from Trello website."""
    event_name = safe_get_text(soup.title, "N/A")
    description = safe_get_text(soup.find("meta", {"name": "description"}), "N/A")
    return [event_name, "N/A", "N/A", description, scrape_speakers(soup), "N/A", "N/A", scrape_pricing(soup), "N/A", "N/A"]

def scrape_website_details(url):
    """Route scraping tasks to appropriate functions based on URL."""
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    if "blake-envelopes" in url:
        return scrape_blake_envelopes(soup)
    elif "pixelgrade" in url:
        return scrape_pixelgrade(soup)
    elif "dropbox" in url:
        return scrape_dropbox(soup)
    elif "olumo" in url:
        return scrape_olumo(soup)
    elif "trello" in url:
        return scrape_trello(soup)
    else:
        return ["N/A", "N/A", "N/A", url, "N/A", "N/A", "N/A", "N/A", "N/A", "N/A", "N/A"]

# List of URLs to scrape
urls = [
    "https://www.blake-envelopes.com",
    "https://pixelgrade.com",
    "https://www.dropbox.com/events",
    "https://www.olumo.com",
    "https://trello.com"
]

# Open CSV file to write the data
with open('b2b_websites.csv', mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(["Event Name", "Event Dates", "Location", "Website URL", "Description", "Key Speakers", "Agenda", "Registration Details", "Pricing", "Categories", "Audience Type"])

    # Loop through URLs and write details to CSV
    for url in urls:
        website_details = scrape_website_details(url)
        print(f"Scraped data for {url}: {website_details}")
        writer.writerow([website_details[0], website_details[1], website_details[2], url, website_details[3], website_details[4], website_details[5], website_details[6], website_details[7], website_details[8], website_details[9]])

# Verifying the file creation
if os.path.exists('b2b_websites.csv'):
    print("CSV file has been created successfully.")
else:
    print("CSV file was not created.")




Scraped data for https://www.blake-envelopes.com: ['Envelopes & Mailing Supplies - Postal Packaging - Blake Envelopes', 'N/A', 'N/A', '', [], 'N/A', 'N/A', 'Pricing not available', 'N/A', 'N/A']
Scraped data for https://pixelgrade.com: ['pixelgrade.com | 520: Web server is returning an unknown error', 'N/A', 'N/A', 'N/A', [], 'N/A', 'N/A', 'Pricing not available', 'N/A', 'N/A']
Scraped data for https://www.dropbox.com/events: ['Login - Dropbox', 'N/A', 'N/A', '', [], 'N/A', 'N/A', 'Pricing not available', 'N/A', 'N/A']
Scraped data for https://www.olumo.com: ['Olumo | Olumo Uncovers The Real People Problems', 'N/A', 'N/A', 'N/A', [], 'N/A', 'N/A', 'Pricing not available', 'N/A', 'N/A']
Scraped data for https://trello.com: ['Manage Your Team’s Projects From Anywhere | Trello', 'N/A', 'N/A', '', [], 'N/A', 'N/A', 'Pricing not available', 'N/A', 'N/A']
CSV file has been created successfully.


In [19]:
from google.colab import files

# Assuming 'scrape_b2b_websites.csv' is the file you uploaded
file_path = 'b2b_websites.csv'

# Create a download link
files.download(file_path)





<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>