In [38]:
#get env setup
import getpass
import os
from dotenv import load_dotenv

#get env setup
load_dotenv('nb.env', override=True)

if not os.environ.get('NEO4J_URI'):
    os.environ['NEO4J_URI'] = getpass.getpass('NEO4J_URI:\n')
if not os.environ.get('NEO4J_USERNAME'):
    os.environ['NEO4J_USERNAME'] = getpass.getpass('NEO4J_USERNAME:\n')
if not os.environ.get('NEO4J_PASSWORD'):
    os.environ['NEO4J_PASSWORD'] = getpass.getpass('NEO4J_PASSWORD:\n')

NEO4J_URI = os.getenv('NEO4J_URI')
NEO4J_USERNAME = os.getenv('NEO4J_USERNAME')
NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD')

In [87]:
import json
from person import Person, SkillName

#read json models back
with open('extracted-people-data.json', 'r') as file:
    people_json = json.load(file)
people = [Person(**person) for person in people_json]
people[0]

Person(id='5BiANRmk', name='Alex Thompson', email='alex.thompson@email.com', current_title='Junior Mobile Developer', department=<Department.ENGINEERING: 'Engineering'>, level=<Level.JUNIOR: 'Junior'>, hire_date=None, skills=[HasSkill(skill=Skill(name=<SkillName.JAVASCRIPT: 'JavaScript'>), proficiency=2, years_experience=2, context='Used in mobile app development and personal projects', is_primary=False)], accomplishments=[Accomplishment(type=<AccomplishmentType.BUILT: 'BUILT'>, thing=Thing(type=<WorkType.PRODUCT: 'PRODUCT'>, domain=<Domain.MOBILE: 'MOBILE'>), impact_description='Developed a banking mobile app using Swift during a 6-month internship, implemented secure authentication.', year=2024, role='Junior Mobile Developer', duration='6 months', team_size=None, context='FinTech Mobile internship'), Accomplishment(type=<AccomplishmentType.BUILT: 'BUILT'>, thing=Thing(type=<WorkType.PRODUCT: 'PRODUCT'>, domain=<Domain.MOBILE: 'MOBILE'>), impact_description='Built a social media iOS a

In [88]:
from neo4j import GraphDatabase

# load into People nodes in Neo4j

#instantiate driver
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))

#test neo4j connection
driver.execute_query("MATCH(n) RETURN count(n)")

EagerResult(records=[<Record count(n)=10>], summary=<neo4j._work.summary.ResultSummary object at 0x10dbc5010>, keys=['count(n)'])

In [89]:
from neo4j import RoutingControl

#create uniqueness constraint if not exists
driver.execute_query(
    'CREATE CONSTRAINT IF NOT EXISTS FOR (n:Person) REQUIRE (n.id) IS NODE KEY',
    #database_=DATABASE,
    routing_=RoutingControl.WRITE
)

driver.execute_query(
    'CREATE CONSTRAINT IF NOT EXISTS FOR (n:Skill) REQUIRE (n.name) IS NODE KEY',
    #database_=DATABASE,
    routing_=RoutingControl.WRITE
)

driver.execute_query(
    'CREATE CONSTRAINT IF NOT EXISTS FOR (n:Thing) REQUIRE (n.name) IS NODE KEY',
    #database_=DATABASE,
    routing_=RoutingControl.WRITE
)

driver.execute_query(
    'CREATE CONSTRAINT IF NOT EXISTS FOR (n:Domain) REQUIRE (n.name) IS NODE KEY',
    #database_=DATABASE,
    routing_=RoutingControl.WRITE
)

driver.execute_query(
    'CREATE CONSTRAINT IF NOT EXISTS FOR (n:WorkType) REQUIRE (n.name) IS NODE KEY',
    #database_=DATABASE,
    routing_=RoutingControl.WRITE
)


EagerResult(records=[], summary=<neo4j._work.summary.ResultSummary object at 0x10d71d390>, keys=[])

In [90]:
# merge people
def chunks(xs, n=10):
    n = max(1, n)
    return [xs[i:i + n] for i in range(0, len(xs), n)]

for chunk in chunks(people_json):
    records = driver.execute_query(
        """
        UNWIND $records AS rec
        MERGE(person:Person {id:rec.id})
        SET person.name = rec.name,
            person.email = rec.email,
            person.current_title = rec.current_title,
            person.department = rec.department,
            person.level = rec.level,
            person.years_experience = rec.years_experience,
            person.location = rec.location
        RETURN count(rec) AS records_upserted
        """,
        #database_=DATABASE,
        routing_=RoutingControl.WRITE,
        result_transformer_= lambda r: r.data(),
        records = chunk
    )
    print(records)

[{'records_upserted': 10}]


In [91]:
# merge accomplishments
skills = []
accomplishments = []
for person in people_json:

    # extend skills list
    tmp_skills = person['skills'].copy()
    for skill in tmp_skills:
        skill['personId'] = person['id']
    skills.extend(tmp_skills)

    # extend accomplishments list
    tmp_accomplishments = person['accomplishments'].copy()
    for accomplishment in tmp_accomplishments:
        accomplishment['personId'] = person['id']
    accomplishments.extend(tmp_accomplishments)



In [92]:
skills[:3]

[{'skill': {'name': 'JavaScript'},
  'proficiency': 2,
  'years_experience': 2,
  'context': 'Used in mobile app development and personal projects',
  'is_primary': False,
  'personId': '5BiANRmk'},
 {'skill': {'name': 'Python'},
  'proficiency': 4,
  'years_experience': 4,
  'context': 'Used for security automation and scripting',
  'is_primary': False,
  'personId': 'MhzMrjwz'},
 {'skill': {'name': 'SQL'},
  'proficiency': 3,
  'years_experience': 3,
  'context': 'Used for analytics and market research analysis',
  'is_primary': True,
  'personId': 'ZIMWCRHs'}]

In [93]:
accomplishments[:2]

[{'type': 'BUILT',
  'thing': {'name': 'banking_app_5BiANRmk',
   'type': 'PRODUCT',
   'domain': 'MOBILE'},
  'impact_description': 'Developed a banking mobile app using Swift during a 6-month internship, implemented secure authentication.',
  'year': 2024,
  'role': 'Junior Mobile Developer',
  'duration': '6 months',
  'team_size': None,
  'context': 'FinTech Mobile internship',
  'personId': '5BiANRmk'},
 {'type': 'BUILT',
  'thing': {'name': 'social_media_ios_app_5BiANRmk',
   'type': 'PRODUCT',
   'domain': 'MOBILE'},
  'impact_description': 'Built a social media iOS application as a capstone project, supporting photo sharing and messaging.',
  'year': 2023,
  'role': 'Mobile Development Intern',
  'duration': 'Summer 2023',
  'team_size': None,
  'context': 'Social Media Startup internship',
  'personId': '5BiANRmk'}]

In [94]:
for chunk in chunks(skills):
    records = driver.execute_query(
        """
        UNWIND $records AS rec
        MATCH(person:Person {id:rec.personId})
        MERGE(skill:Skill {name:rec.skill.name})
        MERGE(person)-[r:KNOWS]->(skill)
        SET r.proficiency = rec.proficiency,
            r.years_experience = rec.years_experience,
            r.context  = rec.context,
            r.is_primary = rec.is_primary
        RETURN count(rec) AS records_upserted
        """,
        #database_=DATABASE,
        routing_=RoutingControl.WRITE,
        result_transformer_= lambda r: r.data(),
        records = chunk
    )
    print(records)

[{'records_upserted': 10}]
[{'records_upserted': 10}]
[{'records_upserted': 10}]
[{'records_upserted': 10}]
[{'records_upserted': 10}]
[{'records_upserted': 2}]


In [95]:
for chunk in chunks(accomplishments):
    records = driver.execute_query(
        """
        UNWIND $records AS rec

        //match people
        MATCH(person:Person {id:rec.personId})

        //merge accomplishments
        MERGE(thing:Thing {name:rec.thing.name})
        MERGE(person)-[r:$(rec.type)]->(thing)
        SET r.impact_description = rec.impact_description,
            r.year = rec.year,
            r.role  = rec.role,
            r.duration = rec.duration,
            r.team_size = rec.team_size,
            r.context  = rec.context

        //merge domain and work type
        MERGE(Domain:Domain {name:rec.thing.domain})
        MERGE(thing)-[:IN]->(Domain)
        MERGE(WorkType:WorkType {name:rec.thing.type})
        MERGE(thing)-[:OF]->(WorkType)

        RETURN count(rec) AS records_upserted
        """,
        #database_=DATABASE,
        routing_=RoutingControl.WRITE,
        result_transformer_= lambda r: r.data(),
        records = chunk
    )
    print(records)

[{'records_upserted': 10}]
[{'records_upserted': 10}]
[{'records_upserted': 10}]
[{'records_upserted': 10}]
[{'records_upserted': 10}]
[{'records_upserted': 10}]
[{'records_upserted': 6}]


In [82]:
from person import Domain, WorkType, SkillName

#Addotional Scvhema Info
[i.value for i in Domain]
[i.value for i in WorkType]
[i.value for i in SkillName]


['Machine Learning',
 'Deep Learning',
 'Natural Language Processing',
 'Computer Vision',
 'Data Science',
 'Statistics',
 'Python',
 'JavaScript',
 'Java',
 'C++',
 'R',
 'SQL',
 'Data Engineering',
 'Cloud Architecture',
 'AWS',
 'Docker',
 'Kubernetes',
 'Product Strategy',
 'Product Management',
 'Data Analysis',
 'Business Intelligence',
 'Leadership',
 'Team Management',
 'Communication',
 'Project Management',
 'Adobe Photoshop',
 'Social Media Marketing',
 'Accounting',
 'Legal Research']

In [99]:
# build adk agent with neo4j mcp
from google.adk.models.lite_llm import LiteLlm
from google.adk.agents import Agent
from google.adk.tools.mcp_tool.mcp_toolset import MCPToolset, StdioServerParameters

database_agent = Agent(
    name="graph_database_agent",
    # model="gemini-2.0-flash-exp",
    model=LiteLlm(model="openai/gpt-4.1"),
    # model=LiteLlm(model="anthropic/claude-sonnet-4-20250514"),
    description="""
    Agent to access knowledge graph stored in graph database
    """,
    instruction=f"""
      You are an Neo4j graph database and Cypher query expert, that must use the database schema with a user question and repeatedly generate valid cypher statements
      to execute on the database and answer the user's questions in a friendly manner in natural language. You can also directly return a graph schema when requested.

      For generating queries:
      If in doubt the database schema is always prioritized when it comes to nodes-types (labels) or relationship-types or property names, never take the user's input at face value.
      If the user requests also render tables, charts or other artifacts with the query results.
      Always validate the correct node-labels at the end of a relationship based on the schema.

      If a query fails or doesn't return data, use the error response 3 times to try to fix the generated query and re-run it, don't return the error to the user.
      If you cannot fix the query, explain the issue to the user and apologize.

      Fetch the graph database schema first and keep it in session memory to access later for query generation. Also keep in mind the below standardized property values:
      - For Domain.name The standard values are {[i.value for i in Domain]}
      - For WorkType.name The standard values are {[i.value for i in WorkType]}
      - For Skill.name The standard values are {[i.value for i in SkillName]}

      Keep results of previous executions in session memory and access if needed, for instance ids or other attributes of nodes to find them again
      removing the need to ask the user. This also allows for generating shorter, more focused and less error-prone queries
      to for drill downs, sequences and loops.
      If possible resolve names to primary keys or ids and use those for looking up entities.
      The schema always indicates *outgoing* relationship-types from an entity to another entity, the graph patterns read like english language.
      `company has supplier` would be the pattern `(o:Organization)-[:HAS_SUPPLIER]->(s:Organization)`

      To get the schema of a database use the `get_schema` tool without parameters. Store the response of the schema tool in session context
      to access later for query generation.

      To answer a user question generate one or more Cypher statements based on the database schema and the parts of the user question.
      If necessary resolve categorical attributes (like names, countries, industries, publications) first by retrieving them for a set of entities to translate from the user's request.
      Use the `read_neo4j_cypher` tool repeatedly with the Cypher statements, you MUST generate statements that use named query parameters with `$parameter` style names
      and MUST pass them as a second dictionary parameter to the tool, even if empty.
      Parameter data can come from the users requests, prior query results or additional lookup queries.
      After the data for the question has been sufficiently retrieved, pass the data and control back to the parent agent.
    """,
    tools=[MCPToolset(
        connection_params=StdioServerParameters(
            command='uvx',
            args=[
                "mcp-neo4j-cypher",
            ],
            env={ k: os.environ[k] for k in ["NEO4J_URI","NEO4J_USERNAME","NEO4J_PASSWORD"] }
        ),
        tool_filter=['get_neo4j_schema','read_neo4j_cypher']
    )]
)

from google.adk.runners import InMemoryRunner
from google.genai.types import Part, UserContent

APP_NAME = 'Database Agent'
USER_ID = 'Zach Blumenfeld'


runner = InMemoryRunner(app_name=APP_NAME, agent=database_agent)

session = await runner.session_service.create_session( app_name=runner.app_name, user_id=USER_ID)

async def run_prompt(new_message: str):
  content = UserContent(parts=[Part(text=new_message)])
  result = None
  async for event in runner.run_async(user_id=session.user_id, session_id=session.id, new_message=content):
    for part in event.content.parts:
      print(part.text, part.function_call, part.function_response)
      if part.text:
        result = part.text
  return result

In [100]:
# ask some questions
from IPython.display import Markdown, display

res = await run_prompt('How many people have Python Skills?')
print("\n\n\n\nFinal Response:")
display(Markdown(res))

None id='call_7us8bB1ETdVlQMzjWtKOLJCp' args={} name='get_neo4j_schema' None
None None will_continue=None scheduling=None id='call_7us8bB1ETdVlQMzjWtKOLJCp' name='get_neo4j_schema' response={'result': CallToolResult(meta=None, content=[TextContent(type='text', text='[{"label": "Person", "attributes": {"id": "STRING indexed", "current_title": "STRING", "text": "STRING", "level": "STRING", "location": "STRING", "email": "STRING", "department": "STRING", "name": "STRING", "years_experience": "INTEGER", "embedding": "LIST"}, "relationships": {"BUILT": "Thing", "WON": "Thing", "SHIPPED": "Thing", "KNOWS": "Skill", "PUBLISHED": "Thing", "OPTIMIZED": "Thing", "LED": "Thing", "MANAGED": "Thing"}}, {"label": "Skill", "attributes": {"name": "STRING indexed"}, "relationships": {}}, {"label": "Thing", "attributes": {"name": "STRING indexed"}, "relationships": {"IN": "Domain", "OF": "WorkType"}}, {"label": "Domain", "attributes": {"name": "STRING indexed"}, "relationships": {}}, {"label": "WorkType

There are 8 people who have Python skills in the database. If you need more details or want to know who they are, let me know!

In [102]:
# ask some questions
from IPython.display import Markdown, display

res = await run_prompt("Who should be on our new AI tiger team where we will use Google ADK and Langchain to make a chatbot? What Are the Skill Gaps?")
print("\n\n\n\nFinal Response:")
display(Markdown(res))

None id='call_KXGvti7S8JJQcx262QFNAxBo' args={'query': "MATCH (p:Person)-[:KNOWS]->(s:Skill) \nWHERE s.name IN ['Machine Learning','Deep Learning','Natural Language Processing','Computer Vision','Data Science','Statistics','Python', 'JavaScript', 'SQL'] \nRETURN p.name AS person, collect(s.name) AS relevant_skills \nORDER BY size(relevant_skills) DESC"} name='read_neo4j_cypher' None
None None will_continue=None scheduling=None id='call_KXGvti7S8JJQcx262QFNAxBo' name='read_neo4j_cypher' response={'result': CallToolResult(meta=None, content=[TextContent(type='text', text='[{"person": "Dr. Amanda Foster", "relevant_skills": ["Machine Learning", "Deep Learning", "Natural Language Processing", "Computer Vision", "Python"]}, {"person": "Sarah Chen", "relevant_skills": ["Machine Learning", "Deep Learning", "Natural Language Processing", "Computer Vision", "Python"]}, {"person": "Lisa Wang", "relevant_skills": ["Machine Learning", "Statistics", "Python", "SQL"]}, {"person": "Monica Garcia", "r

To assemble an AI tiger team for a chatbot project using Google ADK and Langchain, here are the best candidates based on your team's current skills:

Best Candidates:
- Dr. Amanda Foster: Machine Learning, Deep Learning, Natural Language Processing, Computer Vision, Python
- Sarah Chen: Machine Learning, Deep Learning, Natural Language Processing, Computer Vision, Python
- Lisa Wang: Machine Learning, Statistics, Python, SQL
- Monica Garcia: Python, SQL
- Jennifer Park: Python, SQL
- James Mitchell: Python, JavaScript
- Marcus Rodriguez: JavaScript, SQL

Additional Skills Useful for This Project:
- Python: Highly relevant for Langchain, Chatbot building, and Google ADK.
- Natural Language Processing: Essential for chatbot logic and AI understanding.
- JavaScript: Useful for integrating chatbots into web platforms.
- SQL: Helps with backend and data storage aspects.

Skill Gaps:
- None of the current team members have explicit experience with "Data Science" (which could help with analytics and model optimization).
- Only two people have both “Natural Language Processing” and strong Python skills (Dr. Amanda Foster and Sarah Chen).
- Deep Learning, Computer Vision, and NLP are limited to just those two (Amanda & Sarah).
- Some web stack experience (JavaScript) is present but spread between very few people, and only Marcus Rodriguez and James Mitchell have this skill (James also has Python).
- No one specifically covers “Google ADK” or “Langchain” as named skills—but related expertise is present.

Core Recommendations:
- Core AI/ML/NLP: Dr. Amanda Foster, Sarah Chen (strong for chatbot and AI logic)
- Backend/data: Lisa Wang, Monica Garcia, Jennifer Park (Python, SQL)
- Frontend/integration: James Mitchell, Marcus Rodriguez (JavaScript; James also has Python)

To round out your AI tiger team, focus on:
- Closing knowledge gaps in Data Science, as well as training/experience in Google ADK and Langchain tools.
- Strengthening JavaScript and SQL expertise for seamless integration and data operations.
- Possibly recruiting or upskilling someone with explicit chatbot platform and cloud architecture experience.

Let me know if you’d like a breakdown table or skill chart for a clearer view!

In [104]:
res = await run_prompt("Who shares the m ost similar types of accomplishments to our PhDs but isn't themselves a PhD? this may take multiple hops out to workType.  You didn't see work type before or Domain how can I make that clearer fgor you in the schema so you get it on first try?")
print("\n\n\n\nFinal Response:")
display(Markdown(res))

Thank you for your feedback! Your question makes perfect sense, and you’re absolutely right to highlight WorkType and Domain as key dimensions for categorizing accomplishments—sometimes relationships across several hops are critical for deeper similarity analysis.

### How to Make WorkType and Domain More Apparent in the Schema
To help agents and users (like myself) immediately see that accomplishments should be compared across their type/category (WorkType) and field (Domain), you can:

1. **Explicitly highlight relationships in the schema output.**  
   Show patterns such as:
   - `(Person)-[:SHIPPED]->(Thing)-[:OF]->(WorkType)`
   - `(Person)-[:PUBLISHED]->(Thing)-[:IN]->(Domain)`

2. **Annotate “accomplishment” nodes with their context.**  
   For example, have documentation/legend in the schema clarifying that "Thing" nodes represent accomplishments (papers, inventions, awards, etc.), and always clarify that each “Thing” has an “OF” (WorkType) and “IN” (Domain) relationship.

3. *

Thank you for your feedback! Your question makes perfect sense, and you’re absolutely right to highlight WorkType and Domain as key dimensions for categorizing accomplishments—sometimes relationships across several hops are critical for deeper similarity analysis.

### How to Make WorkType and Domain More Apparent in the Schema
To help agents and users (like myself) immediately see that accomplishments should be compared across their type/category (WorkType) and field (Domain), you can:

1. **Explicitly highlight relationships in the schema output.**  
   Show patterns such as:
   - `(Person)-[:SHIPPED]->(Thing)-[:OF]->(WorkType)`
   - `(Person)-[:PUBLISHED]->(Thing)-[:IN]->(Domain)`

2. **Annotate “accomplishment” nodes with their context.**  
   For example, have documentation/legend in the schema clarifying that "Thing" nodes represent accomplishments (papers, inventions, awards, etc.), and always clarify that each “Thing” has an “OF” (WorkType) and “IN” (Domain) relationship.

3. **Group edge descriptions by context.**  
   When listing relationships, explicitly group them as:
   ```
   Person:
   - ...standard attributes...
   - Relationships:
     - BUILT, WON, SHIPPED, PUBLISHED, OPTIMIZED, LED, MANAGED -> Thing (Accomplishments)
   
   Thing (Accomplishment):
     - name
     - IN -> Domain
     - OF -> WorkType
   ```
   (Where "Thing" is always associated with both a WorkType and Domain.)

4. **Include inline pattern examples.**  
   Add a schema comment like:
   ```
   // Typical accomplishment path:
   // (Person)-[:PUBLISHED]->(Thing)-[:OF]->(WorkType)-[:IN]->(Domain)
   ```

### Example of Clearer Schema Documentation:

Here’s how you might clarify the intended traversals and structure:

```
Person:
  - id, name, ...
  - Relationships:
    - ... (various verb relationships) ... -> Thing

Thing:
  - name
  - Relationships:
    - IN -> Domain      // e.g., "AI", "DATABASE"
    - OF -> WorkType    // e.g., "RESEARCH", "PRODUCT"

Domain:
  - name

WorkType:
  - name

// Typical accomplishment traversal example:
// (Person)-[:PUBLISHED]->(Thing)-[:OF]->(WorkType {name: "RESEARCH"})
```

---

Would you like me to now try to answer your original question, taking WorkType and Domain into account by traversing from Person to Thing to WorkType and Domain? If so, I’ll proceed with a more sophisticated query for you!

In [105]:
res = await run_prompt("Yes please try answering the question now")
print("\n\n\n\nFinal Response:")
display(Markdown(res))

None id='call_rTDY7ZP076FaM30MfNa6JsmI' args={'query': "// Step 1: Find Dr./PhD accomplishments' work types and domains\nMATCH (phd:Person) \nWHERE toLower(phd.name) STARTS WITH 'dr.' OR toLower(phd.current_title) CONTAINS 'phd'\nMATCH (phd)-[:BUILT|:WON|:SHIPPED|:PUBLISHED|:OPTIMIZED|:LED|:MANAGED]->(t:Thing)\nOPTIONAL MATCH (t)-[:OF]->(wt:WorkType)\nOPTIONAL MATCH (t)-[:IN]->(d:Domain)\nWITH phd, collect(DISTINCT {workType: wt.name, domain: d.name}) AS phd_accomplishment_types\nRETURN phd.name AS phd_name, phd_accomplishment_types"} name='read_neo4j_cypher' None
None None will_continue=None scheduling=None id='call_rTDY7ZP076FaM30MfNa6JsmI' name='read_neo4j_cypher' response={'result': CallToolResult(meta=None, content=[TextContent(type='text', text='[{"phd_name": "Dr. Amanda Foster", "phd_accomplishment_types": [{"domain": "AI", "workType": "CODE"}, {"domain": "AI", "workType": "TEAM"}, {"domain": "AI", "workType": "PRODUCT"}, {"domain": "AI", "workType": "RESEARCH"}, {"domain": "AI"

Sarah Chen shares the most similar types of accomplishments to your PhDs (specifically Dr. Amanda Foster) but does not appear to be a PhD herself.

Sarah Chen's tracked accomplishments match all the main work types and AI domain areas found with Dr. Amanda Foster, including:
- CODE (AI)
- TEAM (AI)
- PRODUCT (AI)
- RESEARCH (AI)
- AWARD (AI)

Additionally, Sarah also has unique experience with SYSTEM (AI) type accomplishments.

This makes Sarah Chen the closest peer to your PhDs in terms of the kinds of achievements and impact areas demonstrated.

If you’d like to see a full side-by-side table or see who comes next after Sarah, just let me know!