<a href="https://colab.research.google.com/github/kareemullah1234/AI_Agent_content/blob/main/Agent_search_Tavily_5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -q tavily-python duckduckgo-search beautifulsoup4 requests pygments

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/3.3 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━[0m [32m3.0/3.3 MB[0m [31m89.6 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m3.3/3.3 MB[0m [31m80.4 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m44.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
#!/usr/bin/env python
# coding: utf-8

# # 🌐 Lesson 3: Normal Search vs Agentic Search
# A comparison of traditional web scraping vs AI-powered agentic search using Tavily

import os
import re
import json
import requests
from bs4 import BeautifulSoup
from duckduckgo_search import DDGS
from tavily import TavilyClient
from pygments import highlight, lexers, formatters

# 🔑 Hardcoded Tavily API Key
TAVILY_API_KEY = ""
os.environ["TAVILY_API_KEY"] = TAVILY_API_KEY  # Set for Tavily client

# Initialize Tavily client
tavily_client = TavilyClient(api_key=TAVILY_API_KEY)



In [19]:


def normal_search(query, max_results=3):
    """
    Perform a normal search using DuckDuckGo and return URLs.
    This simulates how a traditional scraper works.
    """
    try:
        with DDGS() as ddgs:
            results = ddgs.text(query, max_results=max_results)

        if not results:
            print("⚠️ No results returned from DuckDuckGo.")
            return []

        urls = [r["href"].strip() for r in results if r.get("href")]
        if not urls:
            print("⚠️ No valid URLs found in results.")
        return urls

    except Exception as e:
        print(f"⚠️ DuckDuckGo search failed: {e}")
        return []

# ========================
# 🧩 Helper: Scrape Webpage Content
# ========================
def scrape_content(url):
    """
    Scrape text from a webpage (like a normal bot would).
    This is part of the 'normal search' pipeline.
    """
    if not url:
        return "No URL provided."

    headers = {'User-Agent': 'Mozilla/5.0 (compatible; ResearchBot/1.0)'}
    try:
        response = requests.get(url, headers=headers, timeout=5)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')

        # Extract visible text from headings and paragraphs
        text_elements = soup.find_all(['p', 'h1', 'h2', 'h3', 'span'])
        content = ' '.join([elem.get_text(strip=True) for elem in text_elements])
        content = re.sub(r'\s+', ' ', content)  # Normalize whitespace
        return content[:5000]  # Limit length
    except Exception as e:
        return f"Scraping failed: {str(e)}"


# ========================
# 🧠 Agentic Search (Tavily) - Smart, AI-Powered
# ========================
def agentic_search(query):
    try:
        result = tavily_client.search(
            query=query,
            include_answer=True,
            include_sources=True,
            max_results=3
        )
        # Ensure sources is a list, even if missing
        if "sources" not in result:
            result["sources"] = []
        return result
    except Exception as e:
        return {"error": f"Tavily search failed: {str(e)}"}


In [20]:


# ========================
# 🎯 Example Query
# ========================
city = "San Francisco"
query = f"What is the current weather in {city}? Should I travel there today?"

print("🔍 QUERY:", query)
print("\n" + "="*80)

🔍 QUERY: What is the current weather in San Francisco? Should I travel there today?



Normal Search (Traditional Web Scraping)

In [22]:
print("🌐 NORMAL SEARCH: DuckDuckGo + Scraping")
print("="*80)

# Step 1: Search and get URLs
urls = normal_search(query, max_results=1)

if not urls:
    print("❌ No URLs found. Cannot proceed with scraping.")
else:
    print("🔗 Found URL:", urls[0])

    # Step 2: Scrape content
    raw_content = scrape_content(urls[0])
    print("\n📄 Raw scraped content (first 500 chars):")
    print(raw_content[:500] + "...\n")

    # Step 3: Try to extract useful info (manual parsing)
    if len(raw_content) > 10:  # Check if we got *some* content
        print("✅ Successfully scraped content.")
        if "weather" in raw_content.lower() or "forecast" in raw_content.lower():
            print("🌤️  Found weather-related keywords.")
        else:
            print("⚠️  No clear weather info found in the content.")
    else:
        print("❌ Scraped content is empty or invalid.")

print("\n⚠️ Limitations of Normal Search:")
print("  • Hard to parse dynamic content (e.g., JavaScript-heavy sites)")
print("  • May miss key facts due to poor layout parsing")
print("  • No summarization — you get raw text")
print("  • Fragile: breaks if site changes its HTML structure")
print("  • No semantic understanding of relevance")

🌐 NORMAL SEARCH: DuckDuckGo + Scraping
🔗 Found URL: https://forums.att.com/conversations/wireless-account/need-help/62cc8d3461d3c72dc2899213


  with DDGS() as ddgs:



📄 Raw scraped content (first 500 chars):
Menu Menu Deals Wireless Internet Accessories Prepaid Search Support Account Start of main content We're still here for you The AT&T Community Forums have closed. Try these digital resources to find what you’re looking for. How can we help you today? Get the myAT&T app Access all your account info in one place: View and pay your bill Make and confirm payments Shop and upgrade your device TextmyATTto556699for the link. Message and data rates may apply. Use the Smart Home Manager app Manage your W...

✅ Successfully scraped content.
⚠️  No clear weather info found in the content.

⚠️ Limitations of Normal Search:
  • Hard to parse dynamic content (e.g., JavaScript-heavy sites)
  • May miss key facts due to poor layout parsing
  • No summarization — you get raw text
  • Fragile: breaks if site changes its HTML structure
  • No semantic understanding of relevance


In [17]:
print("\n\n🧠 AGENIC SEARCH: Tavily AI Search")
print("="*80)

# Perform agentic search
result = agentic_search(query)

if "error" in result:
    print(result["error"])
else:
    print("✅ AI-Generated Answer:")
    print(result["answer"])

    print("\n📚 Sources:")
    for i, source in enumerate(result["sources"], 1):
        print(f"  {i}. {source}")

    print("\n🔍 Raw Results (for debugging):")
    # Pretty-print JSON with syntax highlighting
    formatted_json = json.dumps(result, indent=4)
    colorful_json = highlight(formatted_json, lexers.JsonLexer(), formatters.TerminalFormatter())
    print(colorful_json)



🧠 AGENIC SEARCH: Tavily AI Search
✅ AI-Generated Answer:
Today in San Francisco, it's partly cloudy with a temperature of 62°F. Travel is feasible, but check for any sudden weather changes.

📚 Sources:

🔍 Raw Results (for debugging):
{[37m[39;49;00m
[37m    [39;49;00m[94m"query"[39;49;00m:[37m [39;49;00m[33m"What is the current weather in San Francisco? Should I travel there today?"[39;49;00m,[37m[39;49;00m
[37m    [39;49;00m[94m"follow_up_questions"[39;49;00m:[37m [39;49;00m[34mnull[39;49;00m,[37m[39;49;00m
[37m    [39;49;00m[94m"answer"[39;49;00m:[37m [39;49;00m[33m"Today in San Francisco, it's partly cloudy with a temperature of 62\u00b0F. Travel is feasible, but check for any sudden weather changes."[39;49;00m,[37m[39;49;00m
[37m    [39;49;00m[94m"images"[39;49;00m:[37m [39;49;00m[],[37m[39;49;00m
[37m    [39;49;00m[94m"results"[39;49;00m:[37m [39;49;00m[[37m[39;49;00m
[37m        [39;49;00m{[37m[39;49;00m
[37m            [39;

In [7]:
print("\n" + "="*80)
print("🎯 COMPARISON: Normal vs Agentic Search")
print("="*80)

print("""
🟢 Agentic Search (Tavily) Benefits:
   • Returns a direct, natural language answer
   • Synthesizes info from multiple sources
   • Includes citations (sources)
   • Handles ambiguity and intent
   • No scraping or parsing needed
   • Works even if websites change

🔴 Normal Search (Scraping) Drawbacks:
   • Returns raw HTML/text — you must parse it
   • Fragile: breaks when site layout changes
   • Requires manual logic to extract facts
   • No summarization or reasoning
   • Hard to scale across topics

💡 Conclusion:
Agentic search turns the web into a **knowledge engine**.
Normal search just gives you links — you do all the work.
""")

print("✅ Try changing the query and re-running!")


🎯 COMPARISON: Normal vs Agentic Search

🟢 Agentic Search (Tavily) Benefits:
   • Returns a direct, natural language answer
   • Synthesizes info from multiple sources
   • Includes citations (sources)
   • Handles ambiguity and intent
   • No scraping or parsing needed
   • Works even if websites change

🔴 Normal Search (Scraping) Drawbacks:
   • Returns raw HTML/text — you must parse it
   • Fragile: breaks when site layout changes
   • Requires manual logic to extract facts
   • No summarization or reasoning
   • Hard to scale across topics

💡 Conclusion:
Agentic search turns the web into a **knowledge engine**.
Normal search just gives you links — you do all the work.

✅ Try changing the query and re-running!
