In [1]:
# Install required libraries
!pip install requests beautifulsoup4 pandas



In [2]:
# Import required libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [3]:
# Function to fetch HTML content from a URL
def get_page_content(url):
    try:
        response = requests.get(url)
        if response.status_code == 200:
            return response.text
        else:
            print(f"Failed to retrieve {url}, Status Code: {response.status_code}")
            return None
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

In [4]:
# Function to scrape title, URL, and content from GenZMarketing
def scrape_genz_marketing(base_url):
    scraped_data = []
    homepage_content = get_page_content(base_url)
    
    if not homepage_content:
        return scraped_data

    soup = BeautifulSoup(homepage_content, 'html.parser')
    
    # Extract all links from the homepage
    links = soup.find_all('a', href=True)
    for link in links:
        page_url = link['href']
        # Handle relative URLs
        if not page_url.startswith('http'):
            page_url = base_url.rstrip('/') + '/' + page_url.lstrip('/')

        # Fetch content from the linked page
        page_content = get_page_content(page_url)
        if not page_content:
            continue

        page_soup = BeautifulSoup(page_content, 'html.parser')

        # Extract the title and main content
        title = page_soup.title.string if page_soup.title else "No Title"
        content = page_soup.get_text(separator=' ', strip=True)

        # Append data to the list
        scraped_data.append({
            'title': title,
            'url': page_url,
            'content': content
        })

    return scraped_data

In [5]:
# Main script to scrape and save data
base_url = "https://genzmarketing.xyz/"
data = scrape_genz_marketing(base_url)

# Save scraped data to a CSV file
if data:
    df = pd.DataFrame(data)
    output_file = "genz_marketing_data.csv"
    df.to_csv(output_file, index=False)
    print(f"Data scraping completed. File saved as {output_file}")
else:
    print("No data scraped.")

Failed to retrieve https://genzmarketing.xyz/javascript:void(0);, Status Code: 500
Failed to retrieve https://www.linkedin.com/in/dr-shah-siddiqui/, Status Code: 999
Failed to retrieve https://genzmarketing.xyz/javascript:void();, Status Code: 500
Failed to retrieve https://www.linkedin.com/in/jamesbuckley121/, Status Code: 999
Failed to retrieve https://genzmarketing.xyz/javascript:void();, Status Code: 500
Failed to retrieve https://genzmarketing.xyz/javascript:void();, Status Code: 500
Failed to retrieve https://genzmarketing.xyz/javascript:void();, Status Code: 500
Failed to retrieve https://genzmarketing.xyz/javascript:void(0);, Status Code: 500
Failed to retrieve https://genzmarketing.xyz/javascript:void(0);, Status Code: 500
Failed to retrieve https://genzmarketing.xyz/javascript:void(0);, Status Code: 500
Failed to retrieve https://genzmarketing.xyz/javascript:void(0);, Status Code: 500
Failed to retrieve https://genzmarketing.xyz/javascript:void(0);, Status Code: 500
Failed to

In [6]:
genz_data = pd.read_csv("genz_marketing_data.csv")
genz_data

Unnamed: 0,title,url,content
0,Grow Your Brand with GenZ Marketing's Full Dig...,https://genzmarketing.xyz,Grow Your Brand with GenZ Marketing's Full Dig...
1,Grow Your Brand with GenZ Marketing's Full Dig...,https://genzmarketing.xyz,Grow Your Brand with GenZ Marketing's Full Dig...
2,| About Us\r\n,https://genzmarketing.xyz/about_us,| About Us Loading... Home About Us Services P...
3,Services | GenZMarketing,https://genzmarketing.xyz/services,Services | GenZMarketing Loading... Home About...
4,| Portfolio\r\n,https://genzmarketing.xyz/portfolio,| Portfolio Loading... Home About Us Services ...
...,...,...,...
78,Blog | GenZMarketing,https://genzmarketing.xyz/blogs,Blog | GenZMarketing Loading... Home About Us ...
79,Contact Us\r\n,https://genzmarketing.xyz/contact_us,Contact Us Loading... Home About Us Services P...
80,Home | Timerni\r\n,https://timerni.com/,Home | Timerni Home Services Projects Contact ...
81,Site Information | Timerni\r\n,https://timerni.com/setting/privacy-policy,Site Information | Timerni Home Services Proje...
