In [1]:
from neo4j import GraphDatabase
import os
import json
from langchain_openai import ChatOpenAI

In [None]:
uri = os.environ["NEO4J_URI"]
user=os.environ["NEO4J_USERNAME"]
password = os.environ["NEO4J_PASSWORD"]
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"] 

driver = GraphDatabase.driver(uri, auth=(user, password))
llm = ChatOpenAI(temperature=0, model_name="gpt-4o", api_key=OPENAI_API_KEY)

In [4]:
from langchain.embeddings import OpenAIEmbeddings
embedder = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)  # or your desired model



  embedder = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)  # or your desired model


In [5]:
# Function to load JSON data
def load_knowledge_graph(json_file_path):
    with open(json_file_path, 'r') as file:
        return json.load(file)

In [6]:
# def create_node(tx, node):
#     attributes = node.get("attributes", {})
#     attributes_str = ", ".join([f"{key}: ${key}" for key in attributes.keys()])
#     label = f"`{node['label'].replace(' ', '_').replace('-', '_')}`"  # Replace spaces and dashes
#     query = f"""
#     MERGE (n:{label} {{id: $id}})
#     SET n += {{{attributes_str}}}
#     """
#     tx.run(query, id=node["id"], **attributes)

# def create_node(tx, node):
#     attributes = node.get("attributes", {})
#     attributes["documentId"] = node.get("documentId")  # Include top-level documentId into attributes

#     # Safely escape property keys using backticks
#     attributes_str = ", ".join([f"`{key}`: ${key}" for key in attributes.keys()])
#     label = f"`{node['label'].replace(' ', '_').replace('-', '_')}`"

#     query = f"""
#     MERGE (n:{label} {{id: $id}})
#     SET n += {{{attributes_str}}}
#     """
#     tx.run(query, id=node["id"], **attributes)
    
def create_node(tx, node):
    attributes = node.get("attributes", {})
    attributes["documentId"] = node.get("documentId")  # Ensure documentId is present

    # ✅ Choose meaningful text for embedding — fallback to ID if name/description not present
    text_for_embedding = attributes.get("name") or attributes.get("description") or node.get("id")
    if text_for_embedding:
        embedding_vector = embedder.embed_query(text_for_embedding)
        attributes["embedding"] = embedding_vector  # Add embedding to attributes

    # ✅ Prepare Cypher-safe property keys
    attributes_str = ", ".join([f"`{key}`: ${key}" for key in attributes.keys()])
    label = f"`{node['label'].replace(' ', '_').replace('-', '_')}`"

    query = f"""
    MERGE (n:{label} {{id: $id}})
    SET n += {{{attributes_str}}}
    """
    tx.run(query, id=node["id"], **attributes)





In [7]:
# def create_relationship(tx, relationship):
#     # Ensure 'type' key exists in the relationship
#     if "type" not in relationship:
#         print(f"Skipping relationship due to missing 'type': {relationship}")
#         return  # Skip this relationship

#     attributes = relationship.get("attributes", {})
#     attributes_str = ", ".join([f"{key}: ${key}" for key in attributes.keys()])
#     rel_type = f"`{relationship['type'].replace(' ', '_').replace('-', '_')}`"  # Replace spaces and dashes
#     query = f"""
#     MATCH (a {{id: $source}}), (b {{id: $target}})
#     MERGE (a)-[r:{rel_type}]->(b)
#     {"SET r += {" + attributes_str + "}" if attributes_str else ""}
#     """
#     # Ensure 'source' and 'target' exist before running the query
#     if "source" not in relationship or "target" not in relationship:
#         print(f"Skipping relationship due to missing 'source' or 'target': {relationship}")
#         return  # Skip this relationship

#     tx.run(query, source=relationship["source"], target=relationship["target"], **attributes)

def create_relationship(tx, relationship):
    if "type" not in relationship or "source" not in relationship or "target" not in relationship:
        print(f"Skipping relationship due to missing fields: {relationship}")
        return

    attributes = relationship.get("attributes", {})
    attributes["documentId"] = relationship.get("documentId")  # Include top-level documentId into attributes

    attributes_str = ", ".join([f"{key}: ${key}" for key in attributes.keys()])
    rel_type = f"`{relationship['type'].replace(' ', '_').replace('-', '_')}`"

    query = f"""
    MATCH (a {{id: $source}}), (b {{id: $target}})
    MERGE (a)-[r:{rel_type}]->(b)
    {"SET r += {" + attributes_str + "}" if attributes_str else ""}
    """
    tx.run(query, source=relationship["source"], target=relationship["target"], **attributes)


In [8]:
def store_knowledge_graph(driver, graph):
    with driver.session() as session:
        for node in graph["nodes"]:
            if "id" not in node or "label" not in node:
                print(f"[WARNING] Skipping node with missing 'id' or 'label': {node}")
                continue
            session.write_transaction(create_node, node)

        for relationship in graph["relationships"]:
            session.write_transaction(create_relationship, relationship)


In [9]:
# Load the knowledge graph data from a JSON file
json_file_path = "/home/sbhavsar/PoisonedRAG/after_seminar_small_kg/jsons/updated_output_file.json"  # Path to your JSON file
with open(json_file_path, "r") as file:
    knowledge_graph = json.load(file)

# Store the knowledge graph in Neo4j
try:
    store_knowledge_graph(driver, knowledge_graph)
    print("Knowledge graph stored in Neo4j successfully!")
finally:
    driver.close()

  session.write_transaction(create_node, node)


  session.write_transaction(create_relationship, relationship)


Knowledge graph stored in Neo4j successfully!


In [10]:
def get_all_node_labels():
    with driver.session() as session:
        result = session.run("CALL db.labels()")
        return [record["label"] for record in result]

In [11]:
# Example usage
labels = get_all_node_labels()
print("Node Labels:", labels)

  with driver.session() as session:


Node Labels: ['song', 'person', 'film', 'group', 'television_series', 'event', 'organization', 'agreement', 'project', 'country', 'place', 'financial_concept', 'standard', 'regulation', 'character', 'work', 'album', 'religion', 'artifact', 'concept', 'economic_system', 'brand', 'social_campaign', 'treaty', 'disease', 'organizational_group', 'legal_system', 'economic_policy', 'mythical_creature', 'award', 'television_program', 'game', 'chart', 'political_position', 'molecule', 'enzyme', 'cellular_structure', 'protein_complex', 'advertisement', 'anatomical_structure', 'medical_procedure', 'social_trend', 'cultural_belief', 'restaurant', 'artist', 'ship', 'plant', 'attraction', 'ritual', 'element', 'organism']
