# Lesson 3: Agentic Search

In [1]:
# libraries
from dotenv import load_dotenv
import os
from tavily import TavilyClient

# load environment variables from .env file
_ = load_dotenv()

# connect
client = TavilyClient(api_key=os.environ.get("TAVILY_API_KEY"))

In [2]:
# run search
result = client.search("What is in Nvidia's new Blackwell GPU?",
                       include_answer=True)

# print the answer
result["answer"]


"The new Nvidia Blackwell B200 GPU features 208 billion transistors and is considered the company's most powerful single-chip GPU. Nvidia claims that this chip can reduce AI inference operating costs and energy consumption by up to 25 times compared to its predecessor, the H100. Several major organizations, including Amazon Web Services, Google, Meta, Microsoft, and others, are expected to adopt the Blackwell platform for their AI needs."

## Regular search

In [3]:
# choose location (try to change to your own city!)

city = "San Francisco"

query = f"""
    what is the current weather in {city}?
    Should I travel there today?
    "weather.com"
"""

> Note: search was modified to return expected results in the event of an exception. High volumes of student traffic sometimes cause rate limit exceptions.

In [4]:
import requests
from bs4 import BeautifulSoup
from duckduckgo_search import DDGS
import re

ddg = DDGS()

def search(query, max_results=6):
    try:
        results = ddg.text(query, max_results=max_results)
        return [i["href"] for i in results]
    except Exception as e:
        print(f"returning previous results due to exception reaching ddg.")
        results = [ # cover case where DDG rate limits due to high deeplearning.ai volume
            "https://weather.com/weather/today/l/USCA0987:1:US",
            "https://weather.com/weather/hourbyhour/l/54f9d8baac32496f6b5497b4bf7a277c3e2e6cc5625de69680e6169e7e38e9a8",
        ]
        return results  


for i in search(query):
    print(i)

https://weather.com/weather/tenday/l/San Francisco CA USCA0987:1:US
https://weather.com/weather/today/l/San+Francisco+CA+USCA0987:1:US
https://weather.com/weather/today/l/San+Leandro+CA?canonicalCityId=13e87335b06bc090c9e37c3d432085070933292de8335f23aaff4ac2e7a3c335
https://weather.com/weather/today/l/Indian+Wells+CA?canonicalCityId=da16162382652f9e2235bf0841b71db5f0cfce94ae6afdfe6ca0e80721955ca8
https://weather.com/weather/today/l/Taos+NM?canonicalCityId=ec3043a54c3a463ef2a6eef522e0260d77b834f78572fdf191bdb0896e1f4f02


In [5]:
def scrape_weather_info(url):
    """Scrape content from the given URL"""
    if not url:
        return "Weather information could not be found."
    
    # fetch data
    headers = {'User-Agent': 'Mozilla/5.0'}
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        return "Failed to retrieve the webpage."

    # parse result
    soup = BeautifulSoup(response.text, 'html.parser')
    return soup


> Note: This produces a long output, you may want to right click and clear the cell output after you look at it briefly to avoid scrolling past it.

In [None]:
# use DuckDuckGo to find websites and take the first result
url = search(query)[0]

# scrape first wesbsite
soup = scrape_weather_info(url)

print(f"Website: {url}\n\n")
print(str(soup.body)[:50000]) # limit long outputs

In [7]:
# extract text
weather_data = []
for tag in soup.find_all(['h1', 'h2', 'h3', 'p']):
    text = tag.get_text(" ", strip=True)
    weather_data.append(text)

# combine all elements into a single string
weather_data = "\n".join(weather_data)

# remove all spaces from the combined text
weather_data = re.sub(r'\s+', ' ', weather_data)
    
print(f"Website: {url}\n\n")
print(weather_data)

Website: https://weather.com/weather/tenday/l/San Francisco CA USCA0987:1:US


recents Specialty Forecasts 10 Day Weather - San Francisco, CA Today Sun 09 | Day Sunshine and clouds mixed. High 63F. Winds WSW at 15 to 25 mph. Sun 09 | Night Partly cloudy skies this evening will become overcast overnight. Low 53F. Winds SW at 10 to 20 mph. Mon 10 Mon 10 | Day Intervals of clouds and sunshine. High 67F. Winds SW at 10 to 20 mph. Mon 10 | Night Partly cloudy. Low 52F. Winds SW at 10 to 15 mph. Tue 11 Tue 11 | Day Partly cloudy skies. High 73F. Winds WSW at 10 to 20 mph. Tue 11 | Night Clear to partly cloudy. Low 54F. Winds SW at 10 to 15 mph. Wed 12 Wed 12 | Day Partly cloudy skies. High 67F. Winds SW at 15 to 25 mph. Wed 12 | Night Partly cloudy. Low 52F. Winds SW at 10 to 20 mph. Thu 13 Thu 13 | Day Partly cloudy skies. High 64F. Winds SW at 10 to 20 mph. Thu 13 | Night Partly cloudy skies. Low 52F. Winds WSW at 10 to 20 mph. Fri 14 Fri 14 | Day Generally sunny despite a few afternoon cl

## Agentic Search

In [8]:
# run search
result = client.search(query, max_results=1)

# print first result
data = result["results"][0]["content"]

print(data)

{'location': {'name': 'San Francisco', 'region': 'California', 'country': 'United States of America', 'lat': 37.78, 'lon': -122.42, 'tz_id': 'America/Los_Angeles', 'localtime_epoch': 1717962064, 'localtime': '2024-06-09 12:41'}, 'current': {'last_updated_epoch': 1717961400, 'last_updated': '2024-06-09 12:30', 'temp_c': 16.7, 'temp_f': 62.1, 'is_day': 1, 'condition': {'text': 'Partly cloudy', 'icon': '//cdn.weatherapi.com/weather/64x64/day/116.png', 'code': 1003}, 'wind_mph': 6.9, 'wind_kph': 11.2, 'wind_degree': 240, 'wind_dir': 'WSW', 'pressure_mb': 1015.0, 'pressure_in': 29.96, 'precip_mm': 0.0, 'precip_in': 0.0, 'humidity': 67, 'cloud': 25, 'feelslike_c': 16.7, 'feelslike_f': 62.1, 'windchill_c': 13.2, 'windchill_f': 55.8, 'heatindex_c': 14.6, 'heatindex_f': 58.2, 'dewpoint_c': 9.8, 'dewpoint_f': 49.6, 'vis_km': 16.0, 'vis_miles': 9.0, 'uv': 4.0, 'gust_mph': 14.2, 'gust_kph': 22.9}}


In [9]:
import json
from pygments import highlight, lexers, formatters

# parse JSON
parsed_json = json.loads(data.replace("'", '"'))

# pretty print JSON with syntax highlighting
formatted_json = json.dumps(parsed_json, indent=4)
colorful_json = highlight(formatted_json,
                          lexers.JsonLexer(),
                          formatters.TerminalFormatter())

print(colorful_json)


{[37m[39;49;00m
[37m    [39;49;00m[94m"location"[39;49;00m:[37m [39;49;00m{[37m[39;49;00m
[37m        [39;49;00m[94m"name"[39;49;00m:[37m [39;49;00m[33m"San Francisco"[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"region"[39;49;00m:[37m [39;49;00m[33m"California"[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"country"[39;49;00m:[37m [39;49;00m[33m"United States of America"[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"lat"[39;49;00m:[37m [39;49;00m[34m37.78[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"lon"[39;49;00m:[37m [39;49;00m[34m-122.42[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"tz_id"[39;49;00m:[37m [39;49;00m[33m"America/Los_Angeles"[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"localtime_epoch"[39;49;00m:[37m [39;49;00m[34m1717962064[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"localtime"[39;49;00m:[37m [39;49;00m[33m"2024-06-09 12:41"[39;49;00m

<img src="./google_sample.png" width="800" height="600">