# Lesson 3: Agentic Search

In [28]:
# libraries
from dotenv import load_dotenv
import os
from tavily import TavilyClient

# load environment variables from .env file
_ = load_dotenv()

# connect
client = TavilyClient(api_key=os.environ.get("TAVILY_API_KEY"))

In [29]:
# run search
result = client.search("What is in Nvidia's new Blackwell GPU?",
                       include_answer=True)

# print the answer
result["answer"]


'The new Nvidia Blackwell architecture is set to power the RTX 50-series graphics cards. It features six-generation improvements in real-time generative AI capabilities and offers up to 25x less cost and energy consumption compared to its predecessor. The Blackwell B200 GPU specifically delivers up to 20 petaflops of compute performance and significant enhancements over its predecessor, making it a powerful option for AI supercomputers.'

## Regular search

In [30]:
# choose location (try to change to your own city!)

city = "San Francisco"

query = f"""
    what is the current weather in {city}?
    Should I travel there today?
    "weather.com"
"""

> Note: search was modified to return expected results in the event of an exception. High volumes of student traffic sometimes cause rate limit exceptions.

In [31]:
import requests
from bs4 import BeautifulSoup
from duckduckgo_search import DDGS
import re

ddg = DDGS()

def search(query, max_results=6):
    try:
        results = ddg.text(query, max_results=max_results)
        return [i["href"] for i in results]
    except Exception as e:
        print(f"returning previous results due to exception reaching ddg.")
        results = [ # cover case where DDG rate limits due to high deeplearning.ai volume
            "https://weather.com/weather/today/l/USCA0987:1:US",
            "https://weather.com/weather/hourbyhour/l/54f9d8baac32496f6b5497b4bf7a277c3e2e6cc5625de69680e6169e7e38e9a8",
        ]
        return results  


for i in search(query):
    print(i)

https://weather.com/weather/tenday/l/San Francisco CA USCA0987:1:US
https://weather.com/weather/today/l/San+Francisco+CA+USCA0987:1:US


In [32]:
def scrape_weather_info(url):
    """Scrape content from the given URL"""
    if not url:
        return "Weather information could not be found."
    
    # fetch data
    headers = {'User-Agent': 'Mozilla/5.0'}
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        return "Failed to retrieve the webpage."

    # parse result
    soup = BeautifulSoup(response.text, 'html.parser')
    return soup


> Note: This produces a long output, you may want to right click and clear the cell output after you look at it briefly to avoid scrolling past it.

In [33]:
# use DuckDuckGo to find websites and take the first result
url = search(query)[0]

# scrape first wesbsite
soup = scrape_weather_info(url)

print(f"Website: {url}\n\n")
print(str(soup.body)[:50]) # limit long outputs

Website: https://weather.com/weather/tenday/l/San Francisco CA USCA0987:1:US


<body><div class="appWrapper DaybreakLargeScreen L


In [34]:
# extract text
weather_data = []
for tag in soup.find_all(['h1', 'h2', 'h3', 'p']):
    text = tag.get_text(" ", strip=True)
    weather_data.append(text)

# combine all elements into a single string
weather_data = "\n".join(weather_data)

# remove all spaces from the combined text
weather_data = re.sub(r'\s+', ' ', weather_data)
    
print(f"Website: {url}\n\n")
print(weather_data)

Website: https://weather.com/weather/tenday/l/San Francisco CA USCA0987:1:US


recents Specialty Forecasts 10 Day Weather - San Francisco, CA Tonight Fri 28 | Night Some clouds. Low 53F. Winds SW at 5 to 10 mph. Sat 29 Sat 29 | Day Partly cloudy skies. High near 70F. Winds WSW at 10 to 20 mph. Sat 29 | Night Clear. Low around 55F. Winds W at 10 to 20 mph. Sun 30 Sun 30 | Day A mainly sunny sky. High 74F. Winds W at 15 to 25 mph. Sun 30 | Night Mostly clear. Low 54F. Winds WSW at 10 to 20 mph. Mon 01 Mon 01 | Day Mainly sunny. High 73F. Winds WSW at 10 to 20 mph. Mon 01 | Night A mostly clear sky. Low near 55F. Winds WSW at 10 to 15 mph. Tue 02 Tue 02 | Day Sunny skies. High 77F. Winds W at 10 to 20 mph. Tue 02 | Night Clear skies. Low near 55F. Winds WSW at 10 to 15 mph. Wed 03 Wed 03 | Day Sunny skies. High 79F. Winds W at 10 to 20 mph. Wed 03 | Night A mostly clear sky. Low near 55F. Winds W at 10 to 15 mph. Thu 04 Thu 04 | Day A mainly sunny sky. High 76F. Winds W at 10 to 20 mph. T

## Agentic Search

In [35]:
# run search
result = client.search(query, max_results=1)

# print first result
data = result["results"][0]["content"]

print(data)

{'location': {'name': 'San Francisco', 'region': 'California', 'country': 'United States of America', 'lat': 37.78, 'lon': -122.42, 'tz_id': 'America/Los_Angeles', 'localtime_epoch': 1719646515, 'localtime': '2024-06-29 0:35'}, 'current': {'last_updated_epoch': 1719646200, 'last_updated': '2024-06-29 00:30', 'temp_c': 13.3, 'temp_f': 55.9, 'is_day': 0, 'condition': {'text': 'Partly cloudy', 'icon': '//cdn.weatherapi.com/weather/64x64/night/116.png', 'code': 1003}, 'wind_mph': 9.4, 'wind_kph': 15.1, 'wind_degree': 300, 'wind_dir': 'WNW', 'pressure_mb': 1010.0, 'pressure_in': 29.81, 'precip_mm': 0.0, 'precip_in': 0.0, 'humidity': 84, 'cloud': 25, 'feelslike_c': 12.0, 'feelslike_f': 53.5, 'windchill_c': 10.8, 'windchill_f': 51.4, 'heatindex_c': 12.3, 'heatindex_f': 54.1, 'dewpoint_c': 10.0, 'dewpoint_f': 50.0, 'vis_km': 16.0, 'vis_miles': 9.0, 'uv': 1.0, 'gust_mph': 14.8, 'gust_kph': 23.9}}


In [37]:
import json
from pygments import highlight, lexers, formatters

# parse JSON
parsed_json = json.loads(data.replace("'", '"'))

# pretty print JSON with syntax highlighting
formatted_json = json.dumps(parsed_json, indent=4)
colorful_json = highlight(formatted_json,
                          lexers.JsonLexer(),
                          formatters.TerminalFormatter())

print(colorful_json)


{[37m[39;49;00m
[37m    [39;49;00m[94m"location"[39;49;00m:[37m [39;49;00m{[37m[39;49;00m
[37m        [39;49;00m[94m"name"[39;49;00m:[37m [39;49;00m[33m"San Francisco"[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"region"[39;49;00m:[37m [39;49;00m[33m"California"[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"country"[39;49;00m:[37m [39;49;00m[33m"United States of America"[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"lat"[39;49;00m:[37m [39;49;00m[34m37.78[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"lon"[39;49;00m:[37m [39;49;00m[34m-122.42[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"tz_id"[39;49;00m:[37m [39;49;00m[33m"America/Los_Angeles"[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"localtime_epoch"[39;49;00m:[37m [39;49;00m[34m1719646515[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"localtime"[39;49;00m:[37m [39;49;00m[33m"2024-06-29 0:35"[39;49;00m

<img src="./google_sample.png" width="800" height="600">