In [1]:
import getpass
import os
from dotenv import load_dotenv

#get env setup
load_dotenv('nb.env', override=True)

if not os.environ.get('NEO4J_URI'):
    os.environ['NEO4J_URI'] = getpass.getpass('NEO4J_URI:\n')
if not os.environ.get('NEO4J_USERNAME'):
    os.environ['NEO4J_USERNAME'] = getpass.getpass('NEO4J_USERNAME:\n')
if not os.environ.get('NEO4J_PASSWORD'):
    os.environ['NEO4J_PASSWORD'] = getpass.getpass('NEO4J_PASSWORD:\n')

NEO4J_URI = os.getenv('NEO4J_URI')
NEO4J_USERNAME = os.getenv('NEO4J_USERNAME')
NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD')

In [19]:
import pandas as pd

# read structured hris data
df = pd.read_csv('hris-tables/project-assignments.csv')
# Some minor formatting
df['in_progress'] = df['end_date'].isna()
df['end_date'] = df['end_date'].fillna("present")
df['duration'] = df['start_date'] + " - " + df['end_date']
df['year'] = df['end_date'].apply(lambda x: x[:4] if x.lower() != "present" else None)
df.head()

Unnamed: 0,project_name,role_title,allocation_percentage,start_date,end_date,project_domain,project_type,accomplishment_type,person_id,in_progress,duration,year
0,Supply Chain Optimization Engine,AI Consultant,30.0,2018-03-01,2018-11-30,AI,PRODUCT,BUILT,xRPBlhk9,False,2018-03-01 - 2018-11-30,2018.0
1,Automated Content Moderation,Senior ML Engineer,60.0,2020-02-01,2021-08-31,AI,PRODUCT,BUILT,xRPBlhk9,False,2020-02-01 - 2021-08-31,2021.0
2,Edge Computing AI Platform,Principal Architect,80.0,2022-01-01,2023-12-31,AI,INFRASTRUCTURE,BUILT,xRPBlhk9,False,2022-01-01 - 2023-12-31,2023.0
3,Quantum-Classical Hybrid Research,Research Lead,40.0,2024-01-01,present,AI,RESEARCH,LED,xRPBlhk9,True,2024-01-01 - present,
4,Employee Self-Service Portal,Full-Stack Developer,90.0,2020-06-01,2021-12-31,WEB,PRODUCT,BUILT,kkkMTAId,False,2020-06-01 - 2021-12-31,2021.0


In [20]:
from neo4j import GraphDatabase

# load into People nodes in Neo4j

#instantiate driver
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))

#test neo4j connection
driver.execute_query("MATCH(n) RETURN count(n)")

EagerResult(records=[<Record count(n)=115>], summary=<neo4j._work.summary.ResultSummary object at 0x10b7d3e90>, keys=['count(n)'])

In [22]:
from neo4j import RoutingControl


#load structured data
def chunks(xs, n=10):
    n = max(1, n)
    return [xs[i:i + n] for i in range(0, len(xs), n)]

for chunk in chunks(df.to_dict(orient='records')):
    records = driver.execute_query(
        """
        UNWIND $records AS rec

        //match people
        MATCH(person:Person {id:rec.person_id})

        //merge accomplishments
        MERGE(thing:Thing {name:rec.project_name})
        MERGE(person)-[r:$(rec.accomplishment_type)]->(thing)
        SET r.year = rec.year,
            r.role  = rec.role_title,
            r.duration = rec.duration,
            thing.in_progress = rec.in_progress,
            thing.internal_project=true

        //merge domain and work type
        MERGE(Domain:Domain {name:rec.project_domain})
        MERGE(thing)-[:IN]->(Domain)
        MERGE(WorkType:WorkType {name:rec.project_type})
        MERGE(thing)-[:OF]->(WorkType)

        RETURN count(rec) AS records_upserted
        """,
        #database_=DATABASE,
        routing_=RoutingControl.WRITE,
        result_transformer_= lambda r: r.data(),
        records = chunk
    )
    print(records)

[{'records_upserted': 10}]
[{'records_upserted': 10}]
[{'records_upserted': 10}]
[{'records_upserted': 10}]
[{'records_upserted': 10}]
