In [1]:
import requests
from bs4 import BeautifulSoup
import csv
import time
import random
from urllib.parse import urljoin

BASE_URL = "https://www.futurepedia.io"
CATEGORIES = [
    "video-enhancer", "video-editing", "video-generators", "text-to-video",
    "prompt-generators", "writing-generators", "paraphrasing", "storyteller",
    "copywriting", "website-builders", "marketing", "design-generators",
    "image-generators", "image-editing", "text-to-image", "finance",
    "project-management", "social-media", "workflows", "AI-agents",
    "cartoon-generators", "portrait-generators", "avatars", "logo-generator",
    "3D", "audio-editing", "text-to-speech", "music", "transcriber",
    "fitness", "religion", "code-assistant", "low-code-no-code", "SQL",
    "students", "fashion", "gift-ideas"
]

HEADERS = {
    "User-Agent": "Mozilla/5.0"
}

DELAY = (1.5, 3)

def scrape_category(category):
    print(f"\n🔍 Scraping category: {category}")
    tools = []
    page = 1
    seen_names = set()

    while True:
        url = f"{BASE_URL}/ai-tools/{category}?page={page}"
        print(f"📄 Scraping Page {page}: {url}")
        time.sleep(random.uniform(*DELAY))

        try:
            response = requests.get(url, headers=HEADERS)
            if response.status_code != 200:
                print("❌ Page not found or bad response. Stopping.")
                break

            soup = BeautifulSoup(response.text, "html.parser")
            tool_cards = soup.find_all("a", class_="hover:no-underline")

            new_tools_this_page = 0

            for card in tool_cards:
                name = card.get("data-tool-name", "").strip()
                if not name or name in seen_names:
                    continue  # Already added or invalid

                seen_names.add(name)
                category_name = card.get("data-tool-category", "").strip()
                link = urljoin(BASE_URL, card.get("href", "").strip())

                badge = card.find("span", class_="inline-flex")
                pricing = "Paid" if badge and "Paid" in badge.text else "Free"

                desc_tag = card.find("p", class_=lambda x: x and "text-gray-500" in x)
                description = desc_tag.text.strip() if desc_tag else ""

                tools.append({
                    "Name": name,
                    "Category": category_name,
                    "Description": description,
                    "Pricing": pricing,
                    "Link": link
                })
                new_tools_this_page += 1

            if new_tools_this_page == 0:
                print("🛑 No new tools found on this page. Stopping.")
                break

            page += 1

        except Exception as e:
            print(f"⚠️ Error: {e}")
            break

    print(f"✅ Total tools collected from {category}: {len(tools)}")
    return tools

def save_to_csv(tools, filename="ai_1000.csv"):
    with open(filename, "w", newline="", encoding="utf-8") as file:
        writer = csv.DictWriter(file, fieldnames=["Name", "Category", "Description", "Pricing", "Link"])
        writer.writeheader()
        writer.writerows(tools)
    print(f"\n✅ Saved {len(tools)} tools to {filename}")

def main():
    all_tools = []

    for category in CATEGORIES:
        cat_tools = scrape_category(category)
        all_tools.extend(cat_tools)

        if len(all_tools) >= 1000:
            print("🎯 Collected 1000+ tools. Stopping early.")
            break

    save_to_csv(all_tools)

if __name__ == "__main__":
    main()


🔍 Scraping category: video-enhancer
📄 Scraping Page 1: https://www.futurepedia.io/ai-tools/video-enhancer?page=1
📄 Scraping Page 2: https://www.futurepedia.io/ai-tools/video-enhancer?page=2
📄 Scraping Page 3: https://www.futurepedia.io/ai-tools/video-enhancer?page=3
🛑 No new tools found on this page. Stopping.
✅ Total tools collected from video-enhancer: 23

🔍 Scraping category: video-editing
📄 Scraping Page 1: https://www.futurepedia.io/ai-tools/video-editing?page=1
📄 Scraping Page 2: https://www.futurepedia.io/ai-tools/video-editing?page=2
📄 Scraping Page 3: https://www.futurepedia.io/ai-tools/video-editing?page=3
📄 Scraping Page 4: https://www.futurepedia.io/ai-tools/video-editing?page=4
📄 Scraping Page 5: https://www.futurepedia.io/ai-tools/video-editing?page=5
📄 Scraping Page 6: https://www.futurepedia.io/ai-tools/video-editing?page=6
📄 Scraping Page 7: https://www.futurepedia.io/ai-tools/video-editing?page=7
📄 Scraping Page 8: https://www.futurepedia.io/ai-tools/video-editing?pa