# Module objectives
- Create Retrieval Tools
    - skills search
    - finding similar people
    - aggregation queries
- use LLMs and Agents to answer multi-part questions with GraphRAG

In [1]:
#%pip install graphdatascience neo4j dotenv openai langchain-openai

## Setup

Import our usual suspects

In [2]:
import os
import pandas as pd
from dotenv import load_dotenv
from graphdatascience import GraphDataScience
from neo4j import Query, GraphDatabase, RoutingControl, Result

Load env variables

In [3]:
load_dotenv('ws.env', override=True)
# Neo4j
HOST = os.getenv('NEO4J_URI')
USERNAME = os.getenv('NEO4J_USERNAME')
PASSWORD = os.getenv('NEO4J_PASSWORD')
DATABASE = os.getenv('NEO4J_DATABASE', 'neo4j')

# AI
LLM = os.getenv('LLM')
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
os.environ['OPENAI_API_KEY']=OPENAI_API_KEY

Connect to neo4j db

In [4]:
driver = GraphDatabase.driver(
    HOST,
    auth=(USERNAME, PASSWORD)
)
driver.verify_connectivity(database=DATABASE)

  experimental_warn(


### Schema
Keeping this in case we need to add in more indexes

In [5]:
schema_statements = [
    'create constraint if not exists for (n:Person) require (n.email) is node key',
    'create constraint if not exists for (n:Skill) require (n.name) is node key',
    'create vector index ada_v if not exists for (n:Skill) on (n.embedding)'
]
for statement in schema_statements:
    driver.execute_query(
        statement,
        database_=DATABASE,
        routing_=RoutingControl.WRITE
    )

# Fetch all indexes
schema_result_df  = driver.execute_query(
    'show indexes',
    database_=DATABASE,
    routing_=RoutingControl.READ,
    result_transformer_= lambda r: r.to_df()
)
schema_result_df.head(100)


Unnamed: 0,id,name,state,populationPercent,type,entityType,labelsOrTypes,properties,indexProvider,owningConstraint,lastRead,readCount
0,6,ada_v,ONLINE,100.0,VECTOR,NODE,[Skill],[embedding],vector-2.0,,2025-03-11T06:58:49.744000000+00:00,91
1,4,constraint_63bf11a1,ONLINE,100.0,RANGE,NODE,[Skill],[name],range-1.0,constraint_63bf11a1,2025-03-11T06:55:00.061000000+00:00,594
2,2,constraint_d3bfd313,ONLINE,100.0,RANGE,NODE,[Person],[email],range-1.0,constraint_d3bfd313,2025-03-11T05:01:11.830000000+00:00,302
3,0,index_343aff4e,ONLINE,100.0,LOOKUP,NODE,,,token-lookup-1.0,,2025-03-11T06:58:49.744000000+00:00,36
4,1,index_f7700477,ONLINE,100.0,LOOKUP,RELATIONSHIP,,,token-lookup-1.0,,,0


## Basic search

In [6]:
skills = ['Contineous Delivery', 'Cloud Native', 'Security']

# Find person given some skills
driver.execute_query(
    '''
    match (p:Person)-[:KNOWS]->(s:Skill)
    where s.name in $skills
    return 
        count(*) as rank, 
        p.email as email, 
        p.name as person_name, 
        collect{ match (p)-[:KNOWS]->(anySkill) return anySkill.name } as skills
    order by rank desc limit 10
    ''',
    database_=DATABASE,
    routing_=RoutingControl.READ,
    result_transformer_= lambda r: r.to_df(),
    skills = skills
).head(10)

Unnamed: 0,rank,email,person_name,skills
0,1,lucy.turner@test.org,Lucy Turner,"[Security, Express.js, Big Data, Scala, Docker]"
1,1,sophie.jackson@test.org,Sophie Jackson,"[Security, Pandas, Linux, Angular]"
2,1,mia.nelson@test.org,Mia Nelson,"[Security, WordPress, Big Data, Swift, AWS]"
3,1,david.lopez@test.org,David Lopez,"[Security, WordPress, PHP]"
4,1,thomas.brown@test.org,Thomas Brown,"[Security, R, Java, Docker]"
5,1,isabella.allen@test.org,Isabella Allen,"[Security, Scala, Cloud Architecture]"
6,1,olivia.johnson@test.org,Olivia Johnson,"[Security, Angular, CI/CD]"
7,1,amelia.davis@test.org,Amelia Davis,"[Security, PyTorch, Java, HTML5, Docker]"
8,1,emily.phillips@test.org,Emily Phillips,"[Security, Vue.js, PHP, Kubernetes, Data Visua..."
9,1,thomas.nelson@test.org,Thomas Nelson,"[Security, Pandas, Go]"


## Vector index search

In [10]:
# Import langchain open ai
from langchain_openai import OpenAIEmbeddings, ChatOpenAI


In [11]:
embeddings = OpenAIEmbeddings(model='text-embedding-ada-002')
v_skills=embeddings.embed_documents(skills)

### Strategy 1
We get the approximate top 10 nearest nodes to the search vector `v` and take the 3 first returned. Then put them together in a list (`skill_list`) and does same ranking as before (number of skills)

In [12]:
driver.execute_query(
    '''
    unwind $v_skills as v
    call db.index.vector.queryNodes('ada_v', 10, toFloatList(v)) yield node
    with v, collect(node)[0..3] as top3
    unwind top3 as s
    with collect(s) as skill_list 
    match (p:Person)-[:KNOWS]->(s)
    where s in skill_list
    return 
        count(*) as rank, 
        p.email as email, 
        p.name as person_name, 
        collect{ match (p)-[:KNOWS]->(anySkill) return anySkill.name } as skills
    order by rank desc limit 10
    ''',
    database_=DATABASE,
    routing_=RoutingControl.READ,
    result_transformer_= lambda r: r.to_df(),
    v_skills = v_skills
).head(10)

Unnamed: 0,rank,email,person_name,skills
0,3,david.rodriguez@test.org,David Rodriguez,"[Scrum, Azure, Cypher, Spark]"
1,2,olivia.johnson@test.org,Olivia Johnson,"[Security, Angular, CI/CD]"
2,2,andrew.martin@test.org,Andrew Martin,"[R, Java, Cloud Architecture, Testing, Node.js]"
3,2,joseph.lopez@test.org,Joseph Lopez,"[Linux, System Design, CI/CD, Django, TypeScri..."
4,2,natalie.miller@test.org,Natalie Miller,"[Go, Express.js, Azure, Testing, Machine Learn..."
5,2,andrew.anderson@test.org,Andrew Anderson,"[DevOps, Java, Testing, Node.js, Data Visualiz..."
6,2,isabella.allen@test.org,Isabella Allen,"[Security, Scala, Cloud Architecture]"
7,2,matthew.scott@test.org,Matthew Scott,"[Scrum, Azure, Cypher]"
8,2,isabella.jones@test.org,Isabella Jones,"[Scrum, TensorFlow, Cloud Architecture, ReactJS]"
9,2,sophia.walker@test.org,Sophia Walker,"[Scrum, DevOps, Django, C++]"


### Strategy 2
Same strategy as before for finding the semantic similar skills, but we do a post sorting based on cosine similarity

In [13]:
driver.execute_query(
    '''
    unwind $v_skills as v
    call db.index.vector.queryNodes('ada_v', 10, toFloatList(v)) yield node
    with v, collect(node)[0..3] as top3
    unwind top3 as s
    with collect(s) as skill_list 
    match (p:Person)-[:KNOWS]->(s)
    where s in skill_list
    with p, sum(reduce(res=0.0, x in $v_skills | res + vector.similarity.cosine(x,s.embedding))) as score
    return 
        score as rank, 
        p.email as email, 
        p.name as person_name, 
        collect{ match (p)-[:KNOWS]->(anySkill) return anySkill.name } as skills
    order by rank desc limit 10
    ''',
    database_=DATABASE,
    routing_=RoutingControl.READ,
    result_transformer_= lambda r: r.to_df(),
    v_skills = v_skills
).head(10)

Unnamed: 0,rank,email,person_name,skills
0,8.097379,david.rodriguez@test.org,David Rodriguez,"[Scrum, Azure, Cypher, Spark]"
1,5.481543,isabella.allen@test.org,Isabella Allen,"[Security, Scala, Cloud Architecture]"
2,5.475316,olivia.johnson@test.org,Olivia Johnson,"[Security, Angular, CI/CD]"
3,5.417244,andrew.anderson@test.org,Andrew Anderson,"[DevOps, Java, Testing, Node.js, Data Visualiz..."
4,5.414759,andrew.martin@test.org,Andrew Martin,"[R, Java, Cloud Architecture, Testing, Node.js]"
5,5.411052,sophia.walker@test.org,Sophia Walker,"[Scrum, DevOps, Django, C++]"
6,5.411007,natalie.miller@test.org,Natalie Miller,"[Go, Express.js, Azure, Testing, Machine Learn..."
7,5.408567,sophie.perez@test.org,Sophie Perez,"[Scrum, Angular, Cloud Architecture]"
8,5.408567,isabella.jones@test.org,Isabella Jones,"[Scrum, TensorFlow, Cloud Architecture, ReactJS]"
9,5.408532,joseph.lopez@test.org,Joseph Lopez,"[Linux, System Design, CI/CD, Django, TypeScri..."


### Strategy 3
We can also look at community membership (from our leiden community detection from before). We could for instance imagining the user wanting to explore the community that looks most relevant.


In [14]:
driver.execute_query(
    '''
    unwind $v_skills as v
    call db.index.vector.queryNodes('ada_v', 10, toFloatList(v)) yield node
    with v, collect(node)[0..3] as top3
    unwind top3 as s
    with collect(s) as skill_list 
    match (p:Person)-[:KNOWS]->(s)
    where s in skill_list
    with p, sum(reduce(res=0.0, x in $v_skills | res + vector.similarity.cosine(x,s.embedding))) as score
    with
        p.segmentId as community,
        score as rank, 
        p.email as email, 
        p.name as person_name, 
        collect{ match (p)-[:KNOWS]->(anySkill) return anySkill.name order by anySkill.name} as skills
    order by rank desc limit 10
    return 
        community,
        rank,
        email,
        person_name,
        skills
    order by community
    ''',
    database_=DATABASE,
    routing_=RoutingControl.READ,
    result_transformer_= lambda r: r.to_df(),
    v_skills = v_skills
).head(10)

Unnamed: 0,community,rank,email,person_name,skills
0,0,5.481543,isabella.allen@test.org,Isabella Allen,"[Cloud Architecture, Scala, Security]"
1,0,5.475316,olivia.johnson@test.org,Olivia Johnson,"[Angular, CI/CD, Security]"
2,1,8.097379,david.rodriguez@test.org,David Rodriguez,"[Azure, Cypher, Scrum, Spark]"
3,1,5.411052,sophia.walker@test.org,Sophia Walker,"[C++, DevOps, Django, Scrum]"
4,1,5.411007,natalie.miller@test.org,Natalie Miller,"[Azure, Express.js, Go, Machine Learning, Test..."
5,1,5.408567,sophie.perez@test.org,Sophie Perez,"[Angular, Cloud Architecture, Scrum]"
6,1,5.408567,isabella.jones@test.org,Isabella Jones,"[Cloud Architecture, ReactJS, Scrum, TensorFlow]"
7,2,5.408532,joseph.lopez@test.org,Joseph Lopez,"[CI/CD, Django, Linux, ReactJS, System Design,..."
8,3,5.417244,andrew.anderson@test.org,Andrew Anderson,"[Data Visualization, DevOps, Java, Node.js, Te..."
9,3,5.414759,andrew.martin@test.org,Andrew Martin,"[Cloud Architecture, Java, Node.js, R, Testing]"


## Agents w/ GraphRAG

### Retrieval Tools

In [15]:
from typing import List, Optional
from pydantic import BaseModel, Field, validator
import functools
from langchain_core.tools import tool

class Skill(BaseModel):
    """
    Represents a professional skill or knwoledge of a person.
    """
    name: str = Field(..., description="Sortened name of the skill")

@tool
def retrieve_persons_given_set_of_skills(skills: List[Skill]) -> pd.DataFrame:
    """
    Given a list of skills this function will retrieve top 10 relevant persons. 
    If the question contains multiple skills, only call the function once using the set of
    skills as the argument for the function
    Returns a top k, not all persons - do not use for aggregation questions. 
    """
    skills = [s.name for s in skills]
    print(skills)
    v_skills = embeddings.embed_documents(skills)
    return driver.execute_query(
        ''' 
            unwind $v_skills as v
            call db.index.vector.queryNodes('ada_v', 10, toFloatList(v)) yield node
            with v, collect(node)[0..3] as top3
            unwind top3 as s
            with collect(s) as skill_list 
            match (p:Person)-[:KNOWS]->(s)
            where s in skill_list
            with p, sum(reduce(res=0.0, x in $v_skills | res + vector.similarity.cosine(x,s.embedding))) as score
            with
                p.segmentId as community,
                score as rank, 
                p.email as email, 
                p.name as person_name, 
                collect{ match (p)-[:KNOWS]->(anySkill) return anySkill.name order by anySkill.name} as skills
            order by rank desc limit 10
            return 
                community,
                rank,
                email,
                person_name,
                skills
            order by community
        ''',
        database_=DATABASE,
        v_skills=v_skills,
        routing_=RoutingControl.READ,
        result_transformer_= lambda r: r.to_df()
    )

In [16]:
@tool
def retrieve_people_with_similar_skills(person_name: str, limit=5) -> pd.DataFrame:
    """Find people with similar skills to the provided person"""
    #uses overlap query
    return driver.execute_query(
        ''' 
        match (p1:Person{name:$name})-[:KNOWS]->(s:Skill)<-[:KNOWS]-(p2)
        where p1 <> p2
        with p2.name as name, 
            p2.segmentId AS segmentId,
            count{(p1)-[:KNOWS]->()} as p1Skills,
            toFloat(count(s)) as sharedSkills,
            collect(s.name) as sharedSkillsList
        return name, segmentId, sharedSkills/p1Skills AS percentSharedSkills, sharedSkillsList
        ORDER BY sharedSkills DESC LIMIT $limit
        ''',
        database_=DATABASE,
        name=person_name,
        limit=limit,
        routing_=RoutingControl.READ,
        result_transformer_= lambda r: r.to_df()
)

retrieve_people_with_similar_skills('Isabella Allen')

  retrieve_people_with_similar_skills('Isabella Allen')


Unnamed: 0,name,segmentId,percentSharedSkills,sharedSkillsList
0,Lucy Turner,0,0.666667,"[Security, Scala]"
1,Sophie Jackson,0,0.333333,[Security]
2,Mia Nelson,0,0.333333,[Security]
3,David Lopez,0,0.333333,[Security]
4,Thomas Nelson,0,0.333333,[Security]


In [18]:
@tool
def retrieve_people_with_similar_skills(person_name: str, limit=5) -> pd.DataFrame:
    """Find people with similar skills to the provided person"""
    #uses overlap query
    return driver.execute_query(
        ''' 
        match (p1:Person{name:$name})-[:KNOWS]->(s:Skill)<-[:KNOWS]-(p2)
        where p1 <> p2
        with p2.name as name, 
            p2.segmentId AS segmentId,
            count{(p1)-[:KNOWS]->()} as p1Skills,
            toFloat(count(s)) as sharedSkills,
            collect(s.name) as sharedSkillsList
        return name, segmentId, sharedSkills/p1Skills AS percentSharedSkills, sharedSkillsList
        ORDER BY sharedSkills DESC LIMIT $limit
        ''',
        database_=DATABASE,
        name=person_name,
        limit=limit,
        routing_=RoutingControl.READ,
        result_transformer_= lambda r: r.to_df()
    )

retrieve_people_with_similar_skills('Isabella Allen')

Unnamed: 0,name,segmentId,percentSharedSkills,sharedSkillsList
0,Lucy Turner,0,0.666667,"[Security, Scala]"
1,Sophie Jackson,0,0.333333,[Security]
2,Mia Nelson,0,0.333333,[Security]
3,David Lopez,0,0.333333,[Security]
4,Thomas Nelson,0,0.333333,[Security]


In [20]:
from langchain_core.prompts import PromptTemplate

t2c_prompt =  PromptTemplate.from_template("""
Task: Generate a Cypher statement for querying a Neo4j graph database from a user input. 
- Do not include triple backticks ``` or ```cypher or any additional text except the generated Cypher statement in your response.
- Do not use any properties or relationships not included in the schema.

Schema:
{schema}

#User Input
{question}


Cypher query:
""")

annotated_schema = '''
Nodes:
  Person:
    description: "A person in our talent pool."
    properties:
      name:
        type: "string"
        description: "The full name of the person. serves as a unique identifier."
      email:
        type: "string"
        description: "The email address of the person."
      segmentId:
        type: "integer"
        description: "The talent segment for the person.  People in the same talent segment share similar skills."
  Skill:
    description: "A professional skill."
    properties:
      name:
        type: "string"
        description: "The unique name of the skill."
Relationships:
    KNOWS:
        description: "A person knowing a skill."
        query_pattern: "(:Person)-[:KNOWS]->(:Skill)"
'''

t2c_llm = ChatOpenAI(model='gpt-4', temperature=0)

@tool
def perform_aggregation_query(question: str) -> pd.DataFrame:
    """
    perform an aggregation query on the Neo4j graph database and obtain the results.
    """
    prompt = t2c_prompt.invoke({'schema': annotated_schema, 'question': question})
    query = t2c_llm.invoke(prompt).content
    print(f"executing Cypher query:\n{query}")
    return driver.execute_query(query,
        database_=DATABASE,
        routing_=RoutingControl.READ,
        result_transformer_= lambda r: r.to_df()
    )

perform_aggregation_query('describe talent segments by skills') 

executing Cypher query:
MATCH (p:Person)-[:KNOWS]->(s:Skill)
RETURN p.segmentId as Talent_Segment, collect(s.name) as Skills


Unnamed: 0,Talent_Segment,Skills
0,4.0,"[API Design, API Design, API Design, API Desig..."
1,2.0,"[API Design, AWS, Angular, Angular, Angular, A..."
2,0.0,"[AWS, AWS, Angular, Angular, Angular, Big Data..."
3,3.0,"[AWS, AWS, Angular, Blockchain, Blockchain, Bl..."
4,1.0,"[AWS, Angular, Angular, Azure, Azure, Azure, B..."
5,,"[Cypher, Cypher Queries, Graph Technology, Jav..."


In [21]:
perform_aggregation_query('how many people share skills with Isabella Allen, and what are the skills')

executing Cypher query:
MATCH (p:Person {name: "Isabella Allen"})-[:KNOWS]->(s:Skill)<-[:KNOWS]-(other:Person)
RETURN COUNT(DISTINCT other) AS NumberOfPeople, COLLECT(DISTINCT s.name) AS SharedSkills


Unnamed: 0,NumberOfPeople,SharedSkills
0,29,"[Security, Scala, Cloud Architecture]"


### Instantiate LLM and Bind Tools

In [22]:
from langchain_core.messages import HumanMessage

llm = ChatOpenAI(model_name="gpt-4o", temperature=0, openai_api_base="https://api.openai.com/v1")

response = llm.invoke([HumanMessage(content="hi!")])
response.content

'Hello! How can I assist you today?'

In [23]:
tools = [
    retrieve_persons_given_set_of_skills,
    retrieve_people_with_similar_skills,
    perform_aggregation_query
]

llm_with_tools = llm.bind_tools(tools)

In [24]:
response = llm_with_tools.invoke([HumanMessage(content="Who has front end development skills")])

print(f"ContentString: {response.content}")
print(f"ToolCalls: {response.tool_calls}")

ContentString: 
ToolCalls: [{'name': 'retrieve_persons_given_set_of_skills', 'args': {'skills': [{'name': 'front end development'}]}, 'id': 'call_og0DJivQCFmtcgcV4vL2zxrY', 'type': 'tool_call'}]


We can see that there's now no text content, but there is a tool call! It wants us to call the Tavily Search tool.
This isn't calling that tool yet - it's just telling us to. In order to actually call it, we'll want to create our agent.

### Running Agents with LangGraph

In [25]:
from langgraph.prebuilt import create_react_agent

agent_executor = create_react_agent(llm, tools)

In [26]:
response = agent_executor.invoke({"messages": [HumanMessage(content="hi!")]})

response["messages"]

[HumanMessage(content='hi!', additional_kwargs={}, response_metadata={}, id='21f695ee-b9a0-4583-8de6-5c5ec5a45f4c'),
 AIMessage(content='Hello! How can I assist you today?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 11, 'prompt_tokens': 204, 'total_tokens': 215, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_eb9dce56a8', 'finish_reason': 'stop', 'logprobs': None}, id='run-57e9ee4e-3b4d-47de-ada4-e64de03a25cc-0', usage_metadata={'input_tokens': 204, 'output_tokens': 11, 'total_tokens': 215, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})]

In [27]:
# Streaming steps
for step in agent_executor.stream(
        {"messages": [HumanMessage(content="find a senior java developer that also knows cypher")]},
        stream_mode="values",
):
    step["messages"][-1].pretty_print()


find a senior java developer that also knows cypher
Tool Calls:
  retrieve_persons_given_set_of_skills (call_Z7HDTgomeySQir5MPbodEl7i)
 Call ID: call_Z7HDTgomeySQir5MPbodEl7i
  Args:
    skills: [{'name': 'java'}, {'name': 'cypher'}]
['java', 'cypher']
Name: retrieve_persons_given_set_of_skills

   community      rank                     email      person_name  \
0        0.0  1.878038     amelia.davis@test.org     Amelia Davis   
1        0.0  1.878038     thomas.brown@test.org     Thomas Brown   
2        1.0  1.878038       david.hill@test.org       David Hill   
3        3.0  1.878038   charles.taylor@test.org   Charles Taylor   
4        3.0  1.878038  andrew.anderson@test.org  Andrew Anderson   
5        3.0  1.878038  charlotte.adams@test.org  Charlotte Adams   
6        3.0  1.878038    andrew.martin@test.org    Andrew Martin   
7        4.0  1.878038    joseph.martin@test.org    Joseph Martin   
8        4.0  1.878038    thomas.garcia@test.org    Thomas Garcia   
9        NaN

### Conversation with Agent

In [51]:
messages = []
for step in agent_executor.stream(
        {"messages": [HumanMessage(content="Who has front end development skills?")]},
        stream_mode="values",
):
    messages += step["messages"]
    step["messages"][-1].pretty_print()


Who has front end development skills?
Tool Calls:
  retrieve_persons_given_set_of_skills (call_aRgoOUQ2uLACWUK9rIkM6K3o)
 Call ID: call_aRgoOUQ2uLACWUK9rIkM6K3o
  Args:
    skills: [{'name': 'front end development'}]
['front end development']
Name: retrieve_persons_given_set_of_skills

   community      rank                       email        person_name  \
0          0  0.913499    amelia.phillips@test.org    Amelia Phillips   
1          0  0.913499          ava.white@test.org          Ava White   
2          0  0.913499     peter.martinez@test.org     Peter Martinez   
3          2  0.913499         ryan.jones@test.org         Ryan Jones   
4          2  0.913499      natalie.brown@test.org      Natalie Brown   
5          2  0.913499      brian.jackson@test.org      Brian Jackson   
6          2  0.913499  william.rodriguez@test.org  William Rodriguez   
7          2  0.912831       daniel.smith@test.org       Daniel Smith   
8          3  0.913499       john.johnson@test.org     

In [52]:
for step in agent_executor.stream(
        {"messages": messages + [HumanMessage(content="Amanda Phillips looks like a good fit but she is leaving.  Who are some good replacements?")]},
        stream_mode="values",
):
    messages += step["messages"]
    step["messages"][-1].pretty_print()


Amanda Phillips looks like a good fit but she is leaving.  Who are some good replacements?
Tool Calls:
  retrieve_people_with_similar_skills (call_h7EJXm25bCYA3zVXDwnc1Vaf)
 Call ID: call_h7EJXm25bCYA3zVXDwnc1Vaf
  Args:
    person_name: Amelia Phillips
Name: retrieve_people_with_similar_skills

           name  segmentId  percentSharedSkills               sharedSkillsList
0  Kevin Garcia          4                  0.4              [Angular, Docker]
1   Lucy Turner          0                  0.4                [Scala, Docker]
2   Elena Young          3                  0.4               [Angular, Scala]
3    Ryan Jones          2                  0.4  [Angular, Project Management]
4     Ava White          0                  0.4    [Scala, Project Management]

Here are some potential replacements for Amelia Phillips, based on similar skills:

1. **Kevin Garcia**
   - Shared Skills: Angular, Docker

2. **Lucy Turner**
   - Shared Skills: Scala, Docker

3. **Elena Young**
   - Shared S

In [53]:
for step in agent_executor.stream(
        {"messages": messages + [HumanMessage(content="Angular and Docker is good. How many people have these skills? what are their names?")]},
        stream_mode="values",
):
    messages += step["messages"]
    step["messages"][-1].pretty_print()


Angular and Docker is good. How many people have these skills? what are their names?
Tool Calls:
  perform_aggregation_query (call_up6Bg3kp8qeWhdtraKruQWGo)
 Call ID: call_up6Bg3kp8qeWhdtraKruQWGo
  Args:
    question: How many people have both Angular and Docker skills?
executing Cypher query:
MATCH (p:Person)-[:KNOWS]->(s:Skill)
WHERE s.name IN ['Angular', 'Docker']
WITH p, COUNT(s) AS skillCount
WHERE skillCount = 2
RETURN COUNT(p)
Name: perform_aggregation_query

   COUNT(p)
0         2
Tool Calls:
  retrieve_persons_given_set_of_skills (call_l4pb3GBVxxc4b7neONNkfax1)
 Call ID: call_l4pb3GBVxxc4b7neONNkfax1
  Args:
    skills: [{'name': 'Angular'}, {'name': 'Docker'}]
['Angular', 'Docker']
Name: retrieve_persons_given_set_of_skills

   community      rank                      email       person_name  \
0          0  5.613126   amelia.phillips@test.org   Amelia Phillips   
1          0  3.718535       lucy.turner@test.org       Lucy Turner   
2          0  1.894591    sophie.jackso

In [54]:
for step in agent_executor.stream(
        {"messages": messages + [HumanMessage(content="Can you describe my talent pool by segments and most common skills?  What sorts of contracts are we well suited for? and areas for growth given industry trends?")]},
        stream_mode="values",
):
    messages += step["messages"]
    step["messages"][-1].pretty_print()


Can you describe my talent pool by segments and most common skills?  What sorts of contracts are we well suited for? and areas for growth given industry trends?
Tool Calls:
  perform_aggregation_query (call_WroxMa8aXrPAiTrFfVnkbFRP)
 Call ID: call_WroxMa8aXrPAiTrFfVnkbFRP
  Args:
    question: Describe the talent pool by segments and most common skills.
  perform_aggregation_query (call_8SkrnOXstW4YvPYZQrFr3qZd)
 Call ID: call_8SkrnOXstW4YvPYZQrFr3qZd
  Args:
    question: What sorts of contracts is the talent pool well suited for?
  perform_aggregation_query (call_eMNc54HMNHGbWyz0PzHo9EDc)
 Call ID: call_eMNc54HMNHGbWyz0PzHo9EDc
  Args:
    question: What are the areas for growth given industry trends?
executing Cypher query:
MATCH (p:Person)-[:KNOWS]->(s:Skill) 
RETURN s.name AS Skill, COUNT(p) AS NumberOfPeopleWithSkill
ORDER BY NumberOfPeopleWithSkill DESC
executing Cypher query:
MATCH (p:Person)-[:KNOWS]->(s:Skill)
RETURN s.name AS Skill, COUNT(p) AS NumberOfPeopleWithSkill
ORDER