In [None]:
# define neo4j retriever

In [1]:
'''
Welcome to Bird Watchers' Emporium! I can help you with anything and everything you'll need to start finding and identifying cool birds in your area or abroad! 
Need help identifying what kind of birds you want to see? I can accept descriptions, locations, or other specifications so we can narrow down your choices.
Once we've decided what birds you want to add to your field list, I can help you plan your trip and find any necessary equipment you'll need to begin your bird watching adventure!
'''
questions = [
	'What are the closest blue birds near me?',
	'What do I need to go to see an Ostrich?',
	'What are some easy birds to start off with in my area?',
	'How many red birds are there in Australia?',
	'What equipment will I need to start bird watching?'
]

In [2]:
from langchain_ollama import ChatOllama
from langchain_core.prompts import ChatPromptTemplate

planning_llm = ChatOllama(
	model='gpt-oss:20b',
	temperature=1,
	base_url='http://127.0.0.1:11434'
)

planning_prompt = ChatPromptTemplate.from_messages([
	('system',
		'You are a planning assistant for an autonomous agent for Bird Watcher\'s Emporum, a one-stop shop for anything and everything related to bird watching. '
		'As input, you will recieve a user\'s QUERY as input, as well as a list of available TOOLS at your disposal. '
  		'Each TOOL will be an overview of a capability/function that can be used to get more information or complete tasks. '
		'Given this informatiuon, think through each step that the agent could execute, using the available tools, to address the query. Use the following criteria when designing your plan:\n'
		'-- Each step should be observable and actionable; avoid speculation.\n'
		'-- You do not need to go into too much detail on how exactly each tool will be queried, the agent will do that for you.\n'
		'-- Focus on high-level steps to gather the information you will need in order to complete the given task.\n'
		'-- Separate out functionality as much as possible, but avoid redundancies.\n'
		'Return your answer as a series of bullet points. DO NOT LIST SUBSTEPS. Do not explain.'
	),
	('human',
		'QUERY: {query}\n'
		'TOOLS:\n{tools}\n\n'
	)
])

plan_tries = 5
plans = []

for ind in range(plan_tries):
	plan = (planning_prompt | planning_llm).invoke({
		'query': questions[3],
		'tools': '\n'.join([
			'-- Graph database of birds, their facts (with semantic embeddings), their taxonomy, and their images. ',
			'-- Images of birds, indexed by vector embeddings for similarity search, with captions. ',
			'-- Flights API for trip planning. ',
			'-- Ecommerce listings for gear and equipment. ',
			'-- Hotel and transportation bookings. ',
			'-- Ask user for their location. ',
		])
	})
	plans.append(plan.content)
	print(f'PLAN {ind} ---- ')
	for step in plan.content.split('\n'):
		print(step)

PLAN 0 ---- 
- Query the graph database for all bird species recorded in Australia.  
- Filter those species whose color attributes indicate a dominant red plumage.  
- Count the number of distinct species that match the red‑plumage filter.  
- Return that count to the user as the answer to the query.
PLAN 1 ---- 
- Query the graph database for all bird species recorded in Australia that have a documented red plumage attribute.  
- Count the distinct species returned by that query to obtain the total number of red birds.  
- Optionally retrieve a sample of images for each species from the image tool to verify coloration.  
- Compile the final count and a brief list of representative species to present to the user.
PLAN 2 ---- 
- Ask the user to confirm that they want the count of red‑coloured bird species that are found in Australia.  
- Query the bird graph database for all species whose color attribute includes “red” and whose range includes Australia.  
- Count the distinct species 

In [30]:
import random
from collections import defaultdict
from itertools import combinations

pairs = list(combinations(range(len(plans)), 2))
plan_tallies = defaultdict(int)

grading_llm = ChatOllama(
	model='gpt-oss:20b',
	temperature=0.2,
	base_url='http://127.0.0.1:11434'
)

grading_prompt = ChatPromptTemplate.from_messages([
	('system',
		'You are a plan comparing agent. Your goal is to assess which plan (out of two plans) is better for answering/completing a particular query. '
		'You will accept the QUERY, PLAN 1, and PLAN 2 as input. You will return the integer 1 if PLAN 1 is better than PLAN 2, and return 2 if PLAN 2 is better. '
		'Evaluate the plans based on the following criteria: '
		'-- A plan should be thorough, reasonably accounting for any edge cases that affect the quality of the end result. '
		'-- A plan should avoid unneccessary steps and redundancies. '
		'-- A plan should be able to be reasonably executed. '
		'In your answer, ONLY return 1 or 2. DO NOT EXPLAIN YOUR ANSWER.'
	),
	('human', 
		'QUERY: {query}\n'
		'PLAN 1:\n\n{plan1}\n\n'
		'PLAN 2:\n\n{plan2}\n\n'
  	)
])

for pair in pairs:
	first_option = random.randint(0,1)
	second_option = 0 if first_option else 1
	print(f'GRADING PLAN {pair[first_option]} AGAINST {pair[second_option]}')
	plan = (grading_prompt | grading_llm).invoke({
		'query': questions[3],
		'plan1': plans[pair[first_option]],
		'plan2': plans[pair[second_option]]
	})
	if plan.content == '1':
		print(f'PLAN {pair[first_option]} WON')
		plan_tallies[pair[first_option]] += 1
	elif plan.content == '2':
		print(f'PLAN {pair[second_option]} WON')
		plan_tallies[pair[second_option]] += 1
	else:
		print('INCORRECT FORMAT')

print(plan_tallies)

GRADING PLAN 0 AGAINST 1
PLAN 1 WON
GRADING PLAN 0 AGAINST 2
PLAN 2 WON
GRADING PLAN 0 AGAINST 3
PLAN 0 WON
GRADING PLAN 4 AGAINST 0
PLAN 4 WON
GRADING PLAN 2 AGAINST 1
PLAN 1 WON
GRADING PLAN 1 AGAINST 3
PLAN 1 WON
GRADING PLAN 4 AGAINST 1
PLAN 1 WON
GRADING PLAN 2 AGAINST 3
PLAN 2 WON
GRADING PLAN 4 AGAINST 2
PLAN 2 WON
GRADING PLAN 4 AGAINST 3
PLAN 3 WON
defaultdict(<class 'int'>, {1: 4, 2: 3, 0: 1, 4: 1, 3: 1})


In [35]:
best_plan = plans[max(plan_tallies, key=plan_tallies.get)]
steps = best_plan.split('\n')

for ind, step in enumerate(steps):
	step = step.strip()
	print(f'STEP {ind+1}: {step}')

STEP 1: - Query the graph database for all bird species that are native to Australia and have a “red” coloration attribute.
STEP 2: - Retrieve the unique list of species from that query.
STEP 3: - Count the number of species in the list to give a species‑level total.
STEP 4: - If the graph database also stores population estimates for each species, fetch those estimates and sum them to approximate the total number of individual red birds in Australia.
STEP 5: - Return the species count (and, if available, the summed individual estimate) to the user, noting any uncertainty or data gaps.
