In [4]:
import getpass
import os
from dotenv import load_dotenv

#get env setup
load_dotenv('nb.env', override=True)

if not os.environ.get('NEO4J_URI'):
    os.environ['NEO4J_URI'] = getpass.getpass('NEO4J_URI:\n')
if not os.environ.get('NEO4J_USERNAME'):
    os.environ['NEO4J_USERNAME'] = getpass.getpass('NEO4J_USERNAME:\n')
if not os.environ.get('NEO4J_PASSWORD'):
    os.environ['NEO4J_PASSWORD'] = getpass.getpass('NEO4J_PASSWORD:\n')

NEO4J_URI = os.getenv('NEO4J_URI')
NEO4J_USERNAME = os.getenv('NEO4J_USERNAME')
NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD')

In [5]:
import pandas as pd

# read structured hris data
df = pd.read_csv('hris-tables/project-assignments.csv')
# Some minor formatting
df['in_progress'] = df['end_date'].isna()
df['end_date'] = df['end_date'].fillna("present")
df['duration'] = df['start_date'] + " - " + df['end_date']
df['year'] = df['end_date'].apply(lambda x: x[:4] if x.lower() != "present" else None)
df.head()

Unnamed: 0,project_name,role_title,allocation_percentage,start_date,end_date,project_domain,project_type,accomplishment_type,person_id,in_progress,duration,year
0,Supply Chain Optimization Engine,AI Consultant,30.0,2018-03-01,2018-11-30,AI,PRODUCT,BUILT,xRPBlhk9,False,2018-03-01 - 2018-11-30,2018.0
1,Automated Content Moderation,Senior ML Engineer,60.0,2020-02-01,2021-08-31,AI,PRODUCT,BUILT,xRPBlhk9,False,2020-02-01 - 2021-08-31,2021.0
2,Edge Computing AI Platform,Principal Architect,80.0,2022-01-01,2023-12-31,AI,INFRASTRUCTURE,BUILT,xRPBlhk9,False,2022-01-01 - 2023-12-31,2023.0
3,Quantum-Classical Hybrid Research,Research Lead,40.0,2024-01-01,present,AI,RESEARCH,LED,xRPBlhk9,True,2024-01-01 - present,
4,Employee Self-Service Portal,Full-Stack Developer,90.0,2020-06-01,2021-12-31,WEB,PRODUCT,BUILT,kkkMTAId,False,2020-06-01 - 2021-12-31,2021.0


In [6]:
from neo4j import GraphDatabase

# load into People nodes in Neo4j

#instantiate driver
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))

#test neo4j connection
driver.execute_query("MATCH(n) RETURN count(n)")

EagerResult(records=[<Record count(n)=312>], summary=<neo4j._work.summary.ResultSummary object at 0x117436110>, keys=['count(n)'])

In [7]:
from neo4j import RoutingControl


#load structured data
def chunks(xs, n=10):
    n = max(1, n)
    return [xs[i:i + n] for i in range(0, len(xs), n)]

for chunk in chunks(df.to_dict(orient='records')):
    records = driver.execute_query(
        """
        UNWIND $records AS rec

        //match people
        MATCH(person:Person {id:rec.person_id})

        //merge accomplishments
        MERGE(thing:Thing {name:rec.project_name})
        MERGE(person)-[r:$(rec.accomplishment_type)]->(thing)
        SET r.year = rec.year,
            r.role  = rec.role_title,
            r.duration = rec.duration,
            thing.in_progress = rec.in_progress,
            thing.internal_project=true

        //merge domain and work type
        MERGE(Domain:Domain {name:rec.project_domain})
        MERGE(thing)-[:IN]->(Domain)
        MERGE(WorkType:WorkType {name:rec.project_type})
        MERGE(thing)-[:OF]->(WorkType)

        RETURN count(rec) AS records_upserted
        """,
        #database_=DATABASE,
        routing_=RoutingControl.WRITE,
        result_transformer_= lambda r: r.data(),
        records = chunk
    )
    print(records)

[{'records_upserted': 10}]
[{'records_upserted': 10}]
[{'records_upserted': 10}]
[{'records_upserted': 10}]
[{'records_upserted': 10}]


In [9]:
# build adk agent with neo4j mcp
from person import Domain, WorkType, SkillName
from google.adk.models.lite_llm import LiteLlm
from google.adk.agents import Agent
from google.adk.tools.mcp_tool.mcp_toolset import MCPToolset, StdioServerParameters

database_agent = Agent(
    name="graph_database_agent",
    # model="gemini-2.0-flash-exp",
    model=LiteLlm(model="openai/gpt-4.1"),
    # model=LiteLlm(model="anthropic/claude-sonnet-4-20250514"),
    description="""
    Agent to access knowledge graph stored in graph database
    """,
    instruction=f"""
      You are an Neo4j graph database and Cypher query expert, that must use the database schema with a user question and repeatedly generate valid cypher statements
      to execute on the database and answer the user's questions in a friendly manner in natural language. You can also directly return a graph schema when requested.

      For generating queries:
      If in doubt the database schema is always prioritized when it comes to nodes-types (labels) or relationship-types or property names, never take the user's input at face value.
      If the user requests also render tables, charts or other artifacts with the query results.
      Always validate the correct node-labels at the end of a relationship based on the schema.

      If a query fails or doesn't return data, use the error response 3 times to try to fix the generated query and re-run it, don't return the error to the user.
      If you cannot fix the query, explain the issue to the user and apologize.

      Fetch the graph database schema first and keep it in session memory to access later for query generation. Also keep in mind the below standardized property values:
      - For Domain.name The standard values are {[i.value for i in Domain]}
      - For WorkType.name The standard values are {[i.value for i in WorkType]}
      - For Skill.name The standard values are {[i.value for i in SkillName]}

      Keep results of previous executions in session memory and access if needed, for instance ids or other attributes of nodes to find them again
      removing the need to ask the user. This also allows for generating shorter, more focused and less error-prone queries
      to for drill downs, sequences and loops.
      If possible resolve names to primary keys or ids and use those for looking up entities.
      The schema always indicates *outgoing* relationship-types from an entity to another entity, the graph patterns read like english language.
      `company has supplier` would be the pattern `(o:Organization)-[:HAS_SUPPLIER]->(s:Organization)`

      To get the schema of a database use the `get_schema` tool without parameters. Store the response of the schema tool in session context
      to access later for query generation.

      To answer a user question generate one or more Cypher statements based on the database schema and the parts of the user question.
      If necessary resolve categorical attributes (like names, countries, industries, publications) first by retrieving them for a set of entities to translate from the user's request.
      Use the `read_neo4j_cypher` tool repeatedly with the Cypher statements, you MUST generate statements that use named query parameters with `$parameter` style names
      and MUST pass them as a second dictionary parameter to the tool, even if empty.
      Parameter data can come from the users requests, prior query results or additional lookup queries.
      After the data for the question has been sufficiently retrieved, pass the data and control back to the parent agent.
    """,
    tools=[MCPToolset(
        connection_params=StdioServerParameters(
            command='uvx',
            args=[
                "mcp-neo4j-cypher",
            ],
            env={ k: os.environ[k] for k in ["NEO4J_URI","NEO4J_USERNAME","NEO4J_PASSWORD"] }
        ),
        tool_filter=['get_neo4j_schema','read_neo4j_cypher']
    )]
)

from google.adk.runners import InMemoryRunner
from google.genai.types import Part, UserContent

APP_NAME = 'Database Agent'
USER_ID = 'Zach Blumenfeld'


runner = InMemoryRunner(app_name=APP_NAME, agent=database_agent)

session = await runner.session_service.create_session( app_name=runner.app_name, user_id=USER_ID)

async def run_prompt(new_message: str):
  content = UserContent(parts=[Part(text=new_message)])
  result = None
  async for event in runner.run_async(user_id=session.user_id, session_id=session.id, new_message=content):
    for part in event.content.parts:
      print(part.text, part.function_call, part.function_response)
      if part.text:
        result = part.text
  return result

In [10]:
# ask some questions
from IPython.display import Markdown, display

res = await run_prompt("Who should be on our new AI tiger team where we will use Google ADK and Langchain to make a chatbot? What Are the Skill Gaps? Take into account who has successfully delivered other AI/Data Projects together")
print("\n\n\n\nFinal Response:")
display(Markdown(res))

None id='call_9rSVk7MQrQKNp1t0dnLuXLrc' args={} name='get_neo4j_schema' None
None None will_continue=None scheduling=None id='call_9rSVk7MQrQKNp1t0dnLuXLrc' name='get_neo4j_schema' response={'result': CallToolResult(meta=None, content=[TextContent(type='text', text='[{"label": "Person", "attributes": {"id": "STRING indexed", "current_title": "STRING", "text": "STRING", "level": "STRING", "location": "STRING", "email": "STRING", "department": "STRING", "name": "STRING", "years_experience": "INTEGER", "embedding": "LIST"}, "relationships": {"BUILT": "Thing", "WON": "Thing", "SHIPPED": "Thing", "KNOWS": "Skill", "PUBLISHED": "Thing", "OPTIMIZED": "Thing", "LED": "Thing", "MANAGED": "Thing"}}, {"label": "Skill", "attributes": {"name": "STRING indexed"}, "relationships": {}}, {"label": "Thing", "attributes": {"in_progress": "BOOLEAN", "name": "STRING indexed", "internal_project": "BOOLEAN"}, "relationships": {"IN": "Domain", "OF": "WorkType"}}, {"label": "Domain", "attributes": {"name": "ST

Here's an initial answer based on your request and current skill coverage:

## Top Candidates for the AI Tiger Team
Based on previous successful collaboration on AI/Data projects and relevant skills (including Python, Data Engineering, ML/DL/NLP, plus DevOps/Cloud expertise partial for running on Google ADK), these people have worked together delivering projects:

| Name                | Title                       | Key AI Skills                                         | Delivered Projects w/ Each Other                                      |
|---------------------|----------------------------|-------------------------------------------------------|-----------------------------------------------------------------------|
| Jennifer Park       | Data Engineering Manager    | Python, SQL, AWS, Data Engineering, Mgmt, Leadership  | "Supply Chain Optimization Engine", "Customer 360 Platform", "Data Warehouse Migration" (with Monica, Sarah, Robert) |
| Sarah Chen          | Senior AI Engineer          | Python, ML, NLP, Deep Learning, Docker, CV, AWS       | "Supply Chain Optimization Engine", "Edge Computing AI Platform"      |
| Robert Johnson      | Security Engineer           | Python                                                | "Supply Chain Optimization Engine"                                    |
| Monica Garcia       | Senior Database Admin       | Python, SQL, Data Engineering, Team Management         | "Customer 360 Platform", "Data Warehouse Migration"                  |
| Dr. Amanda Foster   | Principal Research Scientist| Python, ML, DL, NLP, Computer Vision                  | "Edge Computing AI Platform"                                         |
| David Kim           | DevOps Platform Engineer    | Python, AWS, Docker, Data Eng, Cloud Architecture     | "Edge Computing AI Platform"                                         |

These individuals have repeatedly shipped AI/Data projects together, increasing your likelihood of speedy effective teaming.

## Skill Gaps for Google ADK and Langchain
- There are **no explicit "Google ADK" or "Langchain" skills** among the current skills database. The closest matches are strong backgrounds in Python, Cloud Architecture (esp. AWS), Data Engineering, Machine Learning, and NLP.
- If your chatbot project involves new Google ADK APIs/environments or requires intensive Langchain knowledge, you should plan **upskilling** or onboarding **external specialists** for those specific tools/frameworks.

## Recommendations
- Form your core tiger team with people above for best collaboration/productivity.
- Invest in rapid training/enablement for Google ADK and Langchain for the team.
- Consider short-term consultancy or pairing with experts in those frameworks until in-house expertise ramps up.

---

**Let me know if you want a detailed social/project network graph, skill charts, or tailored training plans for closing the skill gap!**

In [11]:
res = await run_prompt("Do we have any product managers to help scope maybe?")
print("\n\n\n\nFinal Response:")
display(Markdown(res))

None id='call_NELwnrPFLv7IYTJxhiJMKKzd' args={'query': "MATCH (p:Person) WHERE toLower(p.current_title) CONTAINS 'product manager'\nOPTIONAL MATCH (p)-[:KNOWS]->(s:Skill)\nRETURN p.id, p.name, p.current_title, COLLECT(s.name) AS skills"} name='read_neo4j_cypher' None
None None will_continue=None scheduling=None id='call_NELwnrPFLv7IYTJxhiJMKKzd' name='read_neo4j_cypher' response={'result': CallToolResult(meta=None, content=[TextContent(type='text', text='[{"p.id": "Y7Dbiku6", "p.name": "Rachel Thompson", "p.current_title": "AI Product Manager", "skills": ["Python", "Data Analysis", "Machine Learning", "SQL", "Project Management", "Computer Vision", "Leadership", "Product Strategy", "Product Management"]}, {"p.id": "ecjfjrSQ", "p.name": "Omar Ibrahim", "p.current_title": "Technical Product Manager", "skills": ["Python", "Data Analysis", "SQL", "JavaScript", "Project Management", "Team Management", "Business Intelligence", "Product Strategy", "Product Management"]}]', annotations=None)],

Yes, you have product managers who can help with project scoping:

1. **Rachel Thompson** — AI Product Manager
   - Skills: Python, Data Analysis, Machine Learning, Product Management, Product Strategy, Project Management, SQL, Leadership, Computer Vision

2. **Omar Ibrahim** — Technical Product Manager
   - Skills: Python, Data Analysis, Product Management, Product Strategy, Business Intelligence, Project Management, SQL, JavaScript, Team Management

Both Rachel and Omar have a strong background in product management as well as technical understanding. Their skills in AI, Data Analysis, and leadership make them well-suited to help define and scope your chatbot project.

Would you like to see which projects they’ve delivered, or identify who of the core tiger team has already worked with them before?