In [15]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from googlesearch import search
import time

# Define search query
query = "The Digital Fifth fintech OR consulting OR finance OR market trends OR strategy"

# Get top Google search results
search_results = list(search(query, num_results=10))
print(f"🔍 Found {len(search_results)} relevant websites.")

# Keywords for categorization
categories = {
    "Market Trends": ["trend", "growth", "future", "forecast"],
    "Strategy": ["strategy", "business model", "plan"],
    "Finance": ["investment", "funding", "revenue", "profit"],
    "Competition": ["competitor", "market share", "industry leader"],
}

def categorize_text(text):
    """Categorize content based on predefined keywords."""
    for category, keywords in categories.items():
        if any(keyword in text.lower() for keyword in keywords):
            return category
    return "General"

# List to store scraped data
data = []

# Scrape each website
for url in search_results:
    try:
        print(f"🌐 Scraping: {url}")

        # Fetch website content
        response = requests.get(url, timeout=10, headers={'User-Agent': 'Mozilla/5.0'})
        response.raise_for_status()

        # Parse with BeautifulSoup
        soup = BeautifulSoup(response.text, "html.parser")

        # Extract relevant content
        for section in soup.find_all(["h1", "h2", "h3", "p", "a"]):
            text = section.get_text(strip=True)
            tag = section.name
            link = section.get("href") if section.name == "a" else None
            category = categorize_text(text)
            data.append([url, tag, text, link, category])

        time.sleep(2)  # Prevent hitting rate limits

    except Exception as e:
        print(f"❌ Error scraping {url}: {e}")

# Convert to DataFrame
df = pd.DataFrame(data, columns=["Website", "Tag", "Content", "Link", "Category"])

# Save to CSV
df.to_csv("consulting_market_analysis.csv", index=False)

print("✅ Scraping and categorization complete! Data saved as consulting_market_analysis.csv.")

🔍 Found 10 relevant websites.
🌐 Scraping: https://thedigitalfifth.com/
🌐 Scraping: https://thedigitalfifth.com/reports/
🌐 Scraping: https://in.linkedin.com/company/thedigitalfifth
🌐 Scraping: https://www.youtube.com/playlist?list=PLekAEhRDBwIH6FvvpmRgepLarFCkJnEeE
🌐 Scraping: https://everfi.com/blog/financial-education/5-financial-services-marketing-trends/
🌐 Scraping: https://www.facebook.com/thedigitalfifth/
🌐 Scraping: https://www.youtube.com/@TheDigitalFifth
🌐 Scraping: https://www.crunchbase.com/organization/the-digital-fifth
❌ Error scraping https://www.crunchbase.com/organization/the-digital-fifth: 403 Client Error: Forbidden for url: https://www.crunchbase.com/organization/the-digital-fifth
🌐 Scraping: https://www.oliverwyman.com/our-expertise/insights/2024/sep/five-key-trends-shaping-digital-banks-future.html
🌐 Scraping: https://www.youtube.com/watch?v=BaF9vbD4dzg
✅ Scraping and categorization complete! Data saved as consulting_market_analysis.csv.
