In [1]:
# Import required libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
# Function to fetch HTML content from a URL
def get_page_content(url):
    try:
        response = requests.get(url)
        if response.status_code == 200:
            return response.text
        else:
            print(f"Failed to retrieve {url}, Status Code: {response.status_code}")
            return None
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

In [3]:
# Function to scrape title, URL, and content from GenZMarketing
def scrape_genz_marketing(base_url):
    scraped_data = []
    homepage_content = get_page_content(base_url)
    
    if not homepage_content:
        return scraped_data

    soup = BeautifulSoup(homepage_content, 'html.parser')
    
    # Extract all links from the homepage
    links = soup.find_all('a', href=True)
    for link in links:
        page_url = link['href']
        # Handle relative URLs
        if not page_url.startswith('http'):
            page_url = base_url.rstrip('/') + '/' + page_url.lstrip('/')

        # Fetch content from the linked page
        page_content = get_page_content(page_url)
        if not page_content:
            continue

        page_soup = BeautifulSoup(page_content, 'html.parser')

        # Extract the title and main content
        title = page_soup.title.string if page_soup.title else "No Title"
        content = page_soup.get_text(separator=' ', strip=True)

        # Append data to the list
        scraped_data.append({
            'title': title,
            'url': page_url,
            'content': content
        })

    return scraped_data

In [4]:
# Main script to scrape and save data
base_url = "https://genzmarketing.xyz/"
data = scrape_genz_marketing(base_url)

# Save scraped data to a CSV file
if data:
    df = pd.DataFrame(data)
    output_file = "genz_marketing_data.csv"
    df.to_csv(output_file, index=False)
    print(f"Data scraping completed. File saved as {output_file}")
else:
    print("No data scraped.")

Failed to retrieve https://genzmarketing.xyz/javascript:void(0);, Status Code: 500
Failed to retrieve https://www.linkedin.com/in/dr-shah-siddiqui/, Status Code: 999
Failed to retrieve https://genzmarketing.xyz/javascript:void();, Status Code: 500
Failed to retrieve https://www.linkedin.com/in/jamesbuckley121/, Status Code: 999
Failed to retrieve https://genzmarketing.xyz/javascript:void();, Status Code: 500
Failed to retrieve https://genzmarketing.xyz/javascript:void();, Status Code: 500
Failed to retrieve https://genzmarketing.xyz/javascript:void();, Status Code: 500
Failed to retrieve https://genzmarketing.xyz/javascript:void(0);, Status Code: 500
Failed to retrieve https://genzmarketing.xyz/javascript:void(0);, Status Code: 500
Failed to retrieve https://genzmarketing.xyz/javascript:void(0);, Status Code: 500
Failed to retrieve https://genzmarketing.xyz/javascript:void(0);, Status Code: 500
Failed to retrieve https://genzmarketing.xyz/javascript:void(0);, Status Code: 500
Failed to