# Module objectives
- Exact lookups
- Semantic search/vector index
- Semantic/similar skill expansion
- The R in GraphRAG (and maybe agents)

In [None]:
!pip install graphdatascience neo4j dotenv

# Setup

Import our usual suspects

In [2]:
import os
import pandas as pd
from dotenv import load_dotenv
from graphdatascience import GraphDataScience
from neo4j import Query, GraphDatabase, RoutingControl, Result

Load env variables

In [3]:
load_dotenv('ws.env', override=True)
# Neo4j
HOST = os.getenv('HOST')
USERNAME = os.getenv('USERNAME')
PASSWORD = os.getenv('PASSWORD')
DATABASE = os.getenv('DATABASE')

# AI
LLM = os.getenv('LLM')
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
os.environ['OPENAI_API_KEY']=OPENAI_API_KEY

Connect to neo4j db

In [4]:
driver = GraphDatabase.driver(
    HOST,
    auth=(USERNAME, PASSWORD)
)
driver.verify_connectivity(database=DATABASE)

  experimental_warn(


## Schema
Keeping this in case we need to add in more indexes

In [5]:
schema_statements = [
    'create constraint if not exists for (n:Person) require (n.email) is node key',
    'create constraint if not exists for (n:Skill) require (n.name) is node key',
]
for statement in schema_statements:
    driver.execute_query(
        statement,
        database_=DATABASE,
        routing_=RoutingControl.WRITE
    )

# Fetch all constraints
schema_result_df  = driver.execute_query(
    'show constraints',
    database_=DATABASE,
    routing_=RoutingControl.READ,
    result_transformer_= lambda r: r.to_df()
)
schema_result_df.head(100)


Unnamed: 0,id,name,type,entityType,labelsOrTypes,properties,ownedIndex,propertyType
0,5,constraint_63bf11a1,NODE_KEY,NODE,[Skill],[name],constraint_63bf11a1,
1,3,constraint_d3bfd313,NODE_KEY,NODE,[Person],[email],constraint_d3bfd313,


# Basic search

In [9]:
# Find person given some skills
driver.execute_query(
    '''
    match (p:Person)-[:KNOWS]->(s:Skill)
    where s.name in $skills
    return 
        count(*) as rank, 
        p.email as email, 
        p.name as person_name, 
        collect{ match (p)-[:KNOWS]->(anySkill) return anySkill.name } as skills
    order by rank desc limit 10
    ''',
    database_=DATABASE,
    routing_=RoutingControl.READ,
    result_transformer_= lambda r: r.to_df(),
    skills = ['Contineous Delivery', 'Cloud Native', 'Security']
).head(10)

Unnamed: 0,rank,email,person_name,skills
0,1,lucy.turner@test.org,Lucy Turner,"[Security, Express.js, Big Data, Scala, Docker]"
1,1,sophie.jackson@test.org,Sophie Jackson,"[Security, Pandas, Linux, Angular]"
2,1,mia.nelson@test.org,Mia Nelson,"[Security, WordPress, Big Data, Swift, AWS]"
3,1,david.lopez@test.org,David Lopez,"[Security, WordPress, PHP]"
4,1,thomas.brown@test.org,Thomas Brown,"[Security, R, Java, Docker]"
5,1,isabella.allen@test.org,Isabella Allen,"[Security, Scala, Cloud Architecture]"
6,1,olivia.johnson@test.org,Olivia Johnson,"[Security, Angular, CI/CD]"
7,1,amelia.davis@test.org,Amelia Davis,"[Security, PyTorch, Java, HTML5, Docker]"
8,1,emily.phillips@test.org,Emily Phillips,"[Security, Vue.js, PHP, Kubernetes, Data Visua..."
9,1,thomas.nelson@test.org,Thomas Nelson,"[Security, Pandas, Go]"
