In [None]:
%pip install --quiet -U langchain-scrapegraph langchain-google-genai pandas matplotlib seaborn

In [13]:
import getpass
import os
import json
import pandas as pd
from typing import List, Dict, Any
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns

In [14]:
if not os.environ.get("SGAI_API_KEY"):
    os.environ["SGAI_API_KEY"] = getpass.getpass("ScrapeGraph AI API key:\n")

if not os.environ.get("GOOGLE_API_KEY"):
    os.environ["GOOGLE_API_KEY"] = getpass.getpass("Google API key for Gemini:\n")

In [15]:
from langchain_scrapegraph.tools import (
    SmartScraperTool,
    SearchScraperTool,
    MarkdownifyTool,
    GetCreditsTool,
)
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableConfig, chain
from langchain_core.output_parsers import JsonOutputParser

smartscraper = SmartScraperTool()
searchscraper = SearchScraperTool()
markdownify = MarkdownifyTool()
credits = GetCreditsTool()

llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    temperature=0.1,
    convert_system_message_to_human=True
)

In [16]:
class CompetitiveAnalyzer:
    def __init__(self):
        self.results = []
        self.analysis_timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

    def scrape_competitor_data(self, url: str, company_name: str = None) -> Dict[str, Any]:
        """Scrape comprehensive data from a competitor website"""

        extraction_prompt = """
        Extract the following information from this website:
        1. Company name and tagline
        2. Main products/services offered
        3. Pricing information (if available)
        4. Target audience/market
        5. Key features and benefits highlighted
        6. Technology stack mentioned
        7. Contact information
        8. Social media presence
        9. Recent news or announcements
        10. Team size indicators
        11. Funding information (if mentioned)
        12. Customer testimonials or case studies
        13. Partnership information
        14. Geographic presence/markets served

        Return the information in a structured JSON format with clear categorization.
        If information is not available, mark as 'Not Available'.
        """

        try:
            result = smartscraper.invoke({
                "user_prompt": extraction_prompt,
                "website_url": url,
            })

            markdown_content = markdownify.invoke({"website_url": url})

            competitor_data = {
                "company_name": company_name or "Unknown",
                "url": url,
                "scraped_data": result,
                "markdown_length": len(markdown_content),
                "analysis_date": self.analysis_timestamp,
                "success": True,
                "error": None
            }

            return competitor_data

        except Exception as e:
            return {
                "company_name": company_name or "Unknown",
                "url": url,
                "scraped_data": None,
                "error": str(e),
                "success": False,
                "analysis_date": self.analysis_timestamp
            }

    def analyze_competitor_landscape(self, competitors: List[Dict[str, str]]) -> Dict[str, Any]:
        """Analyze multiple competitors and generate insights"""

        print(f"🔍 Starting competitive analysis for {len(competitors)} companies...")

        for i, competitor in enumerate(competitors, 1):
            print(f"📊 Analyzing {competitor['name']} ({i}/{len(competitors)})...")

            data = self.scrape_competitor_data(
                competitor['url'],
                competitor['name']
            )
            self.results.append(data)

        analysis_prompt = ChatPromptTemplate.from_messages([
            ("system", """
            You are a senior business analyst specializing in competitive intelligence.
            Analyze the scraped competitor data and provide comprehensive insights including:

            1. Market positioning analysis
            2. Pricing strategy comparison
            3. Feature gap analysis
            4. Target audience overlap
            5. Technology differentiation
            6. Market opportunities
            7. Competitive threats
            8. Strategic recommendations

            Provide actionable insights in JSON format with clear categories and recommendations.
            """),
            ("human", "Analyze this competitive data: {competitor_data}")
        ])

        clean_data = []
        for result in self.results:
            if result['success']:
                clean_data.append({
                    'company': result['company_name'],
                    'url': result['url'],
                    'data': result['scraped_data']
                })

        analysis_chain = analysis_prompt | llm | JsonOutputParser()

        try:
            competitive_analysis = analysis_chain.invoke({
                "competitor_data": json.dumps(clean_data, indent=2)
            })
        except:
            analysis_chain_text = analysis_prompt | llm
            competitive_analysis = analysis_chain_text.invoke({
                "competitor_data": json.dumps(clean_data, indent=2)
            })

        return {
            "analysis": competitive_analysis,
            "raw_data": self.results,
            "summary_stats": self.generate_summary_stats()
        }

    def generate_summary_stats(self) -> Dict[str, Any]:
        """Generate summary statistics from the analysis"""
        successful_scrapes = sum(1 for r in self.results if r['success'])
        failed_scrapes = len(self.results) - successful_scrapes

        return {
            "total_companies_analyzed": len(self.results),
            "successful_scrapes": successful_scrapes,
            "failed_scrapes": failed_scrapes,
            "success_rate": f"{(successful_scrapes/len(self.results)*100):.1f}%" if self.results else "0%",
            "analysis_timestamp": self.analysis_timestamp
        }

    def export_results(self, filename: str = None):
        """Export results to JSON and CSV files"""
        if not filename:
            filename = f"competitive_analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}"

        with open(f"{filename}.json", 'w') as f:
            json.dump({
                "results": self.results,
                "summary": self.generate_summary_stats()
            }, f, indent=2)

        df_data = []
        for result in self.results:
            if result['success']:
                df_data.append({
                    'Company': result['company_name'],
                    'URL': result['url'],
                    'Success': result['success'],
                    'Data_Length': len(str(result['scraped_data'])) if result['scraped_data'] else 0,
                    'Analysis_Date': result['analysis_date']
                })

        if df_data:
            df = pd.DataFrame(df_data)
            df.to_csv(f"{filename}.csv", index=False)

        print(f"✅ Results exported to {filename}.json and {filename}.csv")

In [17]:
def run_ai_saas_analysis():
    """Run a comprehensive analysis of AI/SaaS competitors"""

    analyzer = CompetitiveAnalyzer()

    ai_saas_competitors = [
        {"name": "OpenAI", "url": "https://openai.com"},
        {"name": "Anthropic", "url": "https://anthropic.com"},
        {"name": "Hugging Face", "url": "https://huggingface.co"},
        {"name": "Cohere", "url": "https://cohere.ai"},
        {"name": "Scale AI", "url": "https://scale.com"},
    ]

    results = analyzer.analyze_competitor_landscape(ai_saas_competitors)

    print("\n" + "="*80)
    print("🎯 COMPETITIVE ANALYSIS RESULTS")
    print("="*80)

    print(f"\n📊 Summary Statistics:")
    stats = results['summary_stats']
    for key, value in stats.items():
        print(f"   {key.replace('_', ' ').title()}: {value}")

    print(f"\n🔍 Strategic Analysis:")
    if isinstance(results['analysis'], dict):
        for section, content in results['analysis'].items():
            print(f"\n   {section.replace('_', ' ').title()}:")
            if isinstance(content, list):
                for item in content:
                    print(f"     • {item}")
            else:
                print(f"     {content}")
    else:
        print(results['analysis'])

    analyzer.export_results("ai_saas_competitive_analysis")

    return results

In [18]:
def run_ecommerce_analysis():
    """Analyze e-commerce platform competitors"""

    analyzer = CompetitiveAnalyzer()

    ecommerce_competitors = [
        {"name": "Shopify", "url": "https://shopify.com"},
        {"name": "WooCommerce", "url": "https://woocommerce.com"},
        {"name": "BigCommerce", "url": "https://bigcommerce.com"},
        {"name": "Magento", "url": "https://magento.com"},
    ]

    results = analyzer.analyze_competitor_landscape(ecommerce_competitors)
    analyzer.export_results("ecommerce_competitive_analysis")

    return results

In [19]:
@chain
def social_media_monitoring_chain(company_urls: List[str], config: RunnableConfig):
    """Monitor social media presence and engagement strategies of competitors"""

    social_media_prompt = ChatPromptTemplate.from_messages([
        ("system", """
        You are a social media strategist. Analyze the social media presence and strategies
        of these companies. Focus on:
        1. Platform presence (LinkedIn, Twitter, Instagram, etc.)
        2. Content strategy patterns
        3. Engagement tactics
        4. Community building approaches
        5. Brand voice and messaging
        6. Posting frequency and timing
        Provide actionable insights for improving social media strategy.
        """),
        ("human", "Analyze social media data for: {urls}")
    ])

    social_data = []
    for url in company_urls:
        try:
            result = smartscraper.invoke({
                "user_prompt": "Extract all social media links, community engagement features, and social proof elements",
                "website_url": url,
            })
            social_data.append({"url": url, "social_data": result})
        except Exception as e:
            social_data.append({"url": url, "error": str(e)})

    chain = social_media_prompt | llm
    analysis = chain.invoke({"urls": json.dumps(social_data, indent=2)}, config=config)

    return {
        "social_analysis": analysis,
        "raw_social_data": social_data
    }

In [20]:
def check_credits():
    """Check available credits"""
    try:
        credits_info = credits.invoke({})
        print(f"💳 Available Credits: {credits_info}")
        return credits_info
    except Exception as e:
        print(f"⚠️  Could not check credits: {e}")
        return None

In [12]:
if __name__ == "__main__":
    print("🚀 Advanced Competitive Analysis Tool with Gemini AI")
    print("="*60)

    check_credits()

    print("\n🤖 Running AI/SaaS Competitive Analysis...")
    ai_results = run_ai_saas_analysis()

    run_additional = input("\n❓ Run e-commerce analysis as well? (y/n): ").lower().strip()
    if run_additional == 'y':
        print("\n🛒 Running E-commerce Platform Analysis...")
        ecom_results = run_ecommerce_analysis()

    print("\n✨ Analysis complete! Check the exported files for detailed results.")

Google API key for Gemini:
··········
🚀 Advanced Competitive Analysis Tool with Gemini AI
💳 Available Credits: {'remaining_credits': 8, 'total_credits_used': 42}

🤖 Running AI/SaaS Competitive Analysis...
🔍 Starting competitive analysis for 5 companies...
📊 Analyzing OpenAI (1/5)...
📊 Analyzing Anthropic (2/5)...
📊 Analyzing Hugging Face (3/5)...
📊 Analyzing Cohere (4/5)...
📊 Analyzing Scale AI (5/5)...





🎯 COMPETITIVE ANALYSIS RESULTS

📊 Summary Statistics:
   Total Companies Analyzed: 5
   Successful Scrapes: 0
   Failed Scrapes: 5
   Success Rate: 0.0%
   Analysis Timestamp: 2025-06-02 14:29:15

🔍 Strategic Analysis:

   Analysis:
     {'competitorData': [], 'insights': {'marketPositioning': {'summary': 'No competitor data provided.  Unable to perform market positioning analysis.  Requires competitor data including brand positioning statements, target market descriptions, and perceived value propositions.', 'recommendations': ['Gather competitor data on brand positioning, target market, and value proposition.  Conduct surveys and market research to understand customer perceptions.']}, 'pricingStrategy': {'summary': 'No competitor data provided. Unable to compare pricing strategies. Requires competitor pricing data for various products/services.', 'recommendations': ['Gather competitor pricing data for comparable offerings. Analyze pricing models (value-based, cost-plus, competitive)