In [2]:
import os
from openai import OpenAI
from serpapi import GoogleSearch
from dotenv import find_dotenv, load_dotenv
import requests

In [15]:
env_path =  find_dotenv()
load_dotenv(env_path)

True

In [16]:
SERPAPI_KEY = os.getenv("SERPAPI_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_RANDY_KEY")
GOOGLE_CX = os.getenv("GOOGLE_CX")
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

In [17]:
def get_search_results(query, api_key=SERPAPI_KEY):
    params = {
        "engine": "google",
        "q": query,
        "api_key": api_key,
        "num": 10
    }
    search = GoogleSearch(params)
    results = search.get_dict()
    return [(r['title'], r['snippet'], r['link']) for r in results.get('organic_results', [])]

In [7]:
query = "list of hospitals in Ontario"

In [None]:
results = get_search_results(query)

In [21]:
results

[('General hospital locations',
  'General hospital locations ; Brampton. William Osler Health System ; Huntsville. Muskoka Algonquin Healthcare ; Markham. Oak Valley Health ; Midland.',
  'http://www.ontario.ca/page/general-hospital-locations'),
 ('List of hospitals in Canada',
  "Ontario · Children's Hospital of Eastern Ontario (CHEO) · Montfort Hospital · National Defence Medical Centre · The Ottawa Hospital · Civic Hospital · General ...",
  'https://en.wikipedia.org/wiki/List_of_hospitals_in_Canada'),
 ('A Comprehensive List of Ontario Hospitals',
  'A comprehensive list of the hospitals in Ontario province. Provided by the law offices of Greg Monforton & Partners.',
  'https://www.gregmonforton.com/windsor/personal-injury-lawyer/hospitals-in-ontario.html'),
 ('Classification of hospitals',
  'Hospitals are classified as general hospitals, convalescent hospitals, hospitals for chronic patients, active treatment teaching psychiatric hospitals.',
  'https://www.ontario.ca/page/class

In [None]:
def rank_links_llm(query, search_results):
    client = OpenAI(api_key=OPENAI_API_KEY)

    prompt = f"You are an AI tasked with selecting which search results are most useful for the query: '{query}'. Score each entry from 0-10 for relevance.\n\n"

    for idx, (title, snippet, link) in enumerate(search_results):
        prompt += f"[{idx}]\nTitle: {title}\nSnippet: {snippet}\nLink: {link}\n\n"

    response = client.chat.completions.create(
        model="gpt-4.1-nano",
        messages=[
            {"role": "user", "content": prompt}
        ],
        temperature=0.2
    )

    return response.choices[0].message.content


In [36]:
results_llm = rank_links_llm(query, results)

In [38]:
print(results_llm)

Here's the relevance score for each search result based on their usefulness for the query "list of hospitals in Ontario":

[0] Title: General hospital locations  
Score: 8  
Reasoning: Provides specific locations within Ontario, listing several hospitals and their cities, directly relevant.

[1] Title: List of hospitals in Canada  
Score: 6  
Reasoning: Covers all of Canada, including Ontario, but less specific to Ontario alone.

[2] Title: A Comprehensive List of Ontario Hospitals  
Score: 9  
Reasoning: Explicitly mentions a comprehensive list of Ontario hospitals, highly relevant.

[3] Title: Classification of hospitals  
Score: 4  
Reasoning: Focuses on types of hospitals, not a list, so less directly useful.

[4] Title: Ontario Hospital Association OHA Members  
Score: 7  
Reasoning: Lists member hospitals in Ontario, relevant but may not be comprehensive.

[5] Title: LIST OF ONTARIO HOSPITALS & CLINICS  
Score: 9  
Reasoning: Directly states it is a list of Ontario hospitals and 

Iteration and Criteria

In [1]:
print(os.getenv("GOOGLE_CX"))

NameError: name 'os' is not defined

In [19]:
def google_search(query):
    url = "https://www.googleapis.com/customsearch/v1"
    params = {
        "key": GOOGLE_API_KEY,
        "cx": GOOGLE_CX,
        "q": query,
        "num": 5  # Up to 10 per request
    }
    response = requests.get(url, params=params)
    results = response.json()
    return [(item['title'], item['snippet'], item['link']) for item in results.get('items', [])]

In [20]:
print(google_search(query))

[('General hospital locations | ontario.ca', 'Apr 18, 2023 ... Brampton. William Osler Health System ; Huntsville. Muskoka Algonquin Healthcare ; Markham. Oak Valley Health ; Midland. Georgian Bay General\xa0...', 'http://www.ontario.ca/page/general-hospital-locations'), ('List of hospitals in Canada - Wikipedia', "Ontario · Children's Hospital of Eastern Ontario (CHEO) · Montfort Hospital · National Defence Medical Centre · The Ottawa Hospital · Civic Hospital · General\xa0...", 'https://en.wikipedia.org/wiki/List_of_hospitals_in_Canada'), ('Classification of hospitals | ontario.ca', 'Apr 26, 2023 ... Kingston. Kingston Health Sciences Centre. Kingston General Hospital ; London. London Health Sciences Centre. Victoria Hospital ; Ottawa. The\xa0...', 'https://www.ontario.ca/page/classification-hospitals'), ('Mount Sinai Hospital | Sinai Health', 'Physician directory. Find a specific physician who provides care at ... Toronto, Ontario M5G 1X5. View on Google Maps. Contact. 416-596-4200.

In [25]:
def plan_research_task(task_description: str) -> str:
    client = OpenAI(api_key=OPENAI_API_KEY)
    messages = [
        {"role": "system", "content": "You are an expert research assistant. Given a research goal, break it into subtasks that mimic how a human would plan and verify findings step by step."},
        {"role": "user", "content": f"The research goal is: {task_description}. What are the logical subtasks we should execute to complete this accurately and efficiently?"}
    ]
    response = client.chat.completions.create(
        model="gpt-4",
        messages=messages,
        temperature=0.3
    )
    return response.choices[0].message.content

In [26]:
def generate_search_queries(task_description: str) -> str:
    client = OpenAI(api_key=OPENAI_API_KEY)
    messages = [
        {"role": "system", "content": "You are a helpful assistant skilled in crafting efficient search engine queries."},
        {"role": "user", "content": f"Based on the task: '{task_description}', provide 5-7 optimized Google-style search queries to gather relevant information. Prioritize variety, credibility, and relevance."}
    ]
    response = client.chat.completions.create(
        model="gpt-4",
        messages=messages,
        temperature=0.3
    )
    return response.choices[0].message.content

In [27]:
def rank_links_with_criteria(search_results, query):
    client = OpenAI(api_key=OPENAI_API_KEY)
    criteria_prompt = """
You are evaluating search engine results to decide which are worth clicking. Use the following scoring criteria for each result:

1. Relevance (0–5): Does the title/snippet clearly match the query?
2. Authority (0–5): Is it from a trusted source (e.g., .gov, .edu, hospital website)?
3. Specificity (0–5): Is it highly specific or just loosely related?
4. Recency (0–5): Is the info from the last 2–3 years?
5. Extractability (0–5): Does it look like it contains lists or structured info?

Query: '{}'

Score the following results:
""".format(query)

    for idx, (title, snippet, link) in enumerate(search_results):
        criteria_prompt += f"\n[{idx}] Title: {title}\nSnippet: {snippet}\nLink: {link}\n"

    messages = [
        {"role": "system", "content": "You are a meticulous researcher scoring search results."},
        {"role": "user", "content": criteria_prompt}
    ]
    response = client.chat.completions.create(
        model="gpt-4",
        messages=messages,
        temperature=0.2
    )
    return response.choices[0].message.content


In [28]:
task = "Find a list of hospitals in Ontario"

print(" Planning subtasks...")
subtasks = plan_research_task(task)
print(subtasks)

print("\nGenerating search queries...")
queries = generate_search_queries(task)
print(queries)

# Use just the first query for demo purposes
print("\nPerforming search...")
top_query = queries.strip().split("\n")[0]
results = google_search(top_query)

print("\n Ranking results...")
scores = rank_links_with_criteria(results, top_query)
print(scores)

 Planning subtasks...
1. Define Research Scope: Clearly define the scope of the research. Are we looking for all types of hospitals, or just specific types like public hospitals, private hospitals, or specialized hospitals?

2. Identify Reliable Sources: Identify reliable sources of information such as government health websites, hospital directories, and healthcare databases.

3. Preliminary Search: Conduct a preliminary search to get an overview of the information available.

4. Detailed Search: Carry out a detailed search from the identified sources. This could involve visiting each source and extracting relevant information.

5. Data Extraction: Extract necessary information like the name of the hospital, location, contact details, and type of hospital. 

6. Data Organization: Organize the extracted data in a systematic manner. This could be in the form of a spreadsheet or a database with columns for each piece of information.

7. Verification: Cross-check the information from mult