In [None]:
# imports
import os
import json
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from playwright.sync_api import sync_playwright
from dotenv import load_dotenv
from IPython.display import Markdown, display, update_display
from openai import OpenAI 

# Use the latest tokenizer we discussed earlier
import tiktoken
encoding = tiktoken.get_encoding("o200k_base")

In [None]:
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>18:
  print("API key looks good so far")
else: 
  print("There might be a problem with your API key? Please visit the troubleshooting notebook!")

MODEL = 'qwen2.5:3b'
openai = OpenAI()

In [None]:
class CompetitorAnalyst:
    def __init__(self, model="qwen2.5:3b"):
        self.model = model
        self.headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}

    def smart_fetch(self, url):
        """Tries BS4 first, falls back to Playwright if page is JS-heavy."""
        try:
            res = requests.get(url, headers=self.headers, timeout=10)
            soup = BeautifulSoup(res.text, 'html.parser')
            text_content = soup.get_text()
            
            # If the text is suspiciously short, it's likely a React/SPA app
            if len(text_content) < 800:
                return self.scrape_with_playwright(url)
            return res.text
        except:
            return self.scrape_with_playwright(url)

    def scrape_with_playwright(self, url):
        """Full browser rendering for dynamic sites."""
        with sync_playwright() as p:
            browser = p.chromium.launch(headless=True)
            page = browser.new_page()
            page.goto(url, wait_until="networkidle")
            content = page.content()
            browser.close()
            return content

    def get_links_and_content(self, url):
        """Extracts text and finds relevant sub-links."""
        html = self.smart_fetch(url)
        soup = BeautifulSoup(html, 'html.parser')
        
        # Clean up the soup
        for script in soup(["script", "style"]): script.decompose()
        
        links = []
        for a in soup.find_all('a', href=True):
            links.append({"text": a.text.strip(), "url": urljoin(url, a['href'])})
        
        return soup.get_text(separator='\n', strip=True), links

    def filter_links(self, company_name, links):
        """Uses LLM to pick the 3 most important links (Pricing, Features, About)."""
        prompt = f"""
        You are a research assistant. Provided is a list of links from {company_name}'s website.
        Pick the 3 most relevant links for understanding their product features, pricing, and company mission.
        Return ONLY a JSON object with this structure: 
        {{"links": [{{"type": "pricing/features/about", "url": "URL"}}]}}
        
        Links: {json.dumps(links[:20])} 
        """
        # Call your local Ollama/OpenAI completion here
        # For this example, we assume a helper function 'call_llm' exists
        response = self.call_llm(prompt)
        try:
            # Cleaning common LLM 'fluff' to get pure JSON
            json_str = response[response.find("{"):response.rfind("}")+1]
            return json.loads(json_str)
        except:
            return {"links": []}

    def call_llm(self, prompt, system_prompt="You are a helpful assistant."):
        # Replace this with your specific Ollama/OpenAI call logic
        # Example using the library structure from your course:
        from openai import OpenAI
        client = OpenAI(base_url="http://localhost:11434/v1", api_key="ollama")
        res = client.chat.completions.create(
            model=self.model,
            messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": prompt}]
        )
        return res.choices[0].message.content
    
    
    def generate_battlecard(self, comp1_name, comp1_data, comp2_name, comp2_data):
        system_message = "You are a Strategic Competition Consultant."
        user_prompt = f"""
        Analyze these two competitors: {comp1_name} and {comp2_name}.
        
        DATA FOR {comp1_name}:
        {comp1_data[:4000]}
        
        DATA FOR {comp2_name}:
        {comp2_data[:4000]}
        
        Create a Sales Battlecard in Markdown including:
        1. **Side-by-Side Comparison Table** (Pricing, Target Audience, Key Tech).
        2. **Gaps & Strengths**: Where does {comp1_name} beat {comp2_name}?
        3. **The 'Killer' Question**: 3 questions a salesperson should ask a prospect to expose {comp2_name}'s weaknesses.
        """
        return self.call_llm(user_prompt, system_message)

In [None]:
# Initialize the Analyst
analyst = CompetitorAnalyst(model=MODEL)

def run_research(name, url):
    print(f"--- Researching {name} ---")
    base_text, all_links = analyst.get_links_and_content(url)
    relevant = analyst.filter_links(name, all_links)
    
    combined_research = base_text
    for link in relevant.get('links', []):
        print(f"Following deep link: {link['url']}")
        page_html = analyst.smart_fetch(link['url'])
        page_soup = BeautifulSoup(page_html, 'html.parser')
        combined_research += f"\n\n--- {link['type']} ---\n" + page_soup.get_text()
        
    return combined_research

# Step 1: Research both
data_notion = run_research("Notion", "https://www.notion.so")
data_linear = run_research("Linear", "https://linear.app")

# Step 2: Compare
battlecard = analyst.generate_battlecard("Notion", data_notion, "Linear", data_linear)

In [None]:
# Step 3: Display
display(Markdown(battlecard))