In [1]:
from dotenv import load_dotenv
import os
load_dotenv()  
api_key = os.getenv("OPENAI_API_KEY")

In [2]:
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(temperature=0, model_name="gpt-4-turbo")

llm_transformer = LLMGraphTransformer(llm=llm)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from langchain_core.documents import Document

text = """
Mengqing Hu, born in 2001, is a Chinese software engineer and researcher currently pursuing a Master’s degree in Computational Modeling and Simulation (Visual Computing) at TU Dresden.
She has worked as a student assistant at the Fraunhofer Institute for Machine Tools and Forming Technology (IWU) and the Institute of Mechatronic Engineering at TU Dresden, contributing to projects on CNC data visualization and deep learning for additive manufacturing (OCT image analysis).
Before that, she was a team member at FSD Fahrzeugsystemdaten GmbH, developing semantic search pipelines and integrating large language models (LLMs) for knowledge retrieval and web applications using Django and LangChain.
Her earlier internships in China focused on web development and big data processing using SpringBoot, Vue, and Hadoop.
She is known for her adaptability, analytical thinking, and enthusiasm for machine learning, computer vision, and AI-driven scientific research.
"""
documents = [Document(page_content=text)]
graph_documents = await llm_transformer.aconvert_to_graph_documents(documents)
print(f"Nodes:{graph_documents[0].nodes}")
print(f"Relationships:{graph_documents[0].relationships}")

Nodes:[Node(id='Mengqing Hu', type='Person', properties={}), Node(id='2001', type='Year', properties={}), Node(id='Chinese', type='Nationality', properties={}), Node(id='Software Engineer', type='Profession', properties={}), Node(id='Researcher', type='Profession', properties={}), Node(id='Master’S Degree In Computational Modeling And Simulation (Visual Computing)', type='Degree', properties={}), Node(id='Tu Dresden', type='Institution', properties={}), Node(id='Fraunhofer Institute For Machine Tools And Forming Technology (Iwu)', type='Institution', properties={}), Node(id='Institute Of Mechatronic Engineering At Tu Dresden', type='Institution', properties={}), Node(id='Fsd Fahrzeugsystemdaten Gmbh', type='Company', properties={}), Node(id='China', type='Country', properties={}), Node(id='Web Development', type='Field', properties={}), Node(id='Big Data Processing', type='Field', properties={}), Node(id='Machine Learning', type='Field', properties={}), Node(id='Computer Vision', type=

In [4]:
from pyvis.network import Network
import os

def visualize_graph(graph_documents):
    # Create Network
    net = Network(height="1200px", width="100%", directed=True,
                  notebook=False, bgcolor="#222222", font_color="white")

    # Extract nodes and relationships
    nodes = graph_documents[0].nodes
    relationships = graph_documents[0].relationships

    # Build lookup for valid nodes
    node_dict = {node.id: node for node in nodes}

    # Filter out invalid edges and collect valid node IDs
    valid_edges = []
    valid_node_ids = set()
    for rel in relationships:
        if rel.source.id in node_dict and rel.target.id in node_dict:
            valid_edges.append(rel)
            valid_node_ids.update([rel.source.id, rel.target.id])

    # Track which nodes are part of any relationship
    connected_node_ids = set()
    for rel in relationships:
        connected_node_ids.add(rel.source.id)
        connected_node_ids.add(rel.target.id)

    # Add valid nodes
    for node_id in valid_node_ids:
        node = node_dict[node_id]
        try:
            net.add_node(node.id, label=node.id, title=node.type, group=node.type)
        except:
            continue  # skip if error

    # Add valid edges
    for rel in valid_edges:
        try:
            net.add_edge(rel.source.id, rel.target.id, label=rel.type.lower())
        except:
            continue  # skip if error

    # Configure physics and layout
    net.set_options("""
    const options = {
      "physics": {
        "forceAtlas2Based": {
          "gravitationalConstant": -100,
          "centralGravity": 0.01,
          "springLength": 200,
          "springConstant": 0.08
        },
        "minVelocity": 0.75,
        "solver": "forceAtlas2Based"
      }
    }
    """)

    # Save visualization
    output_file = "knowledge_graph.html"
    net.save_graph(output_file)
    print(f"Graph saved to {os.path.abspath(output_file)}")

    # Try to open in browser
    try:
        import webbrowser
        webbrowser.open(f"file:///{os.path.abspath(output_file)}")
    except:
        print("Could not open browser automatically")


# Run the function
visualize_graph(graph_documents)


Graph saved to /Users/humengqing/Documents/Code/VSCode/NLP/KnowledgeGraphs/knowledge_graph.html


In [5]:
llm_transformer_filtered = LLMGraphTransformer(
    llm=llm,
    allowed_nodes=["Person","Organization"],
    allowed_relationships=["WORKED_AT"],
)
graph_documents_filtered = await llm_transformer_filtered.aconvert_to_graph_documents(
    documents
)
print(f"Nodes:{graph_documents_filtered[0].nodes}")
print(f"Relationships:{graph_documents_filtered[0].relationships}")



Nodes:[Node(id='Mengqing Hu', type='Person', properties={}), Node(id='Tu Dresden', type='Organization', properties={}), Node(id='Fraunhofer Institute For Machine Tools And Forming Technology (Iwu)', type='Organization', properties={}), Node(id='Institute Of Mechatronic Engineering At Tu Dresden', type='Organization', properties={}), Node(id='Fsd Fahrzeugsystemdaten Gmbh', type='Organization', properties={})]
Relationships:[Relationship(source=Node(id='Mengqing Hu', type='Person', properties={}), target=Node(id='Tu Dresden', type='Organization', properties={}), type='WORKED_AT', properties={}), Relationship(source=Node(id='Mengqing Hu', type='Person', properties={}), target=Node(id='Fraunhofer Institute For Machine Tools And Forming Technology (Iwu)', type='Organization', properties={}), type='WORKED_AT', properties={}), Relationship(source=Node(id='Mengqing Hu', type='Person', properties={}), target=Node(id='Institute Of Mechatronic Engineering At Tu Dresden', type='Organization', prop

In [6]:
visualize_graph(graph_documents_filtered)

Graph saved to /Users/humengqing/Documents/Code/VSCode/NLP/KnowledgeGraphs/knowledge_graph.html


#### Neo4j

In [7]:
from langchain_neo4j import Neo4jGraph
graph = Neo4jGraph(
    url=os.getenv("NEO4J_URI"),
    username=os.getenv("NEO4J_USERNAME"),
    password=os.getenv("NEO4J_PASSWORD"),
)
graph.add_graph_documents(graph_documents)

In [8]:
customer_query = """MATCH (n:`Field of study`) RETURN n LIMIT 25;"""
results = graph.query(customer_query)
print(results)

[{'n': {'id': 'Computational Modeling And Simulation (Visual Computing)'}}]


In [9]:
customer_query = """MATCH p=()-[:WORKED_AT]->() RETURN p LIMIT 25;"""
results = graph.query(customer_query)
print(results)

[{'p': [{'id': 'Mengqing Hu'}, 'WORKED_AT', {'id': 'Fraunhofer Institute For Machine Tools And Forming Technology (Iwu)'}]}, {'p': [{'id': 'Mengqing Hu'}, 'WORKED_AT', {'id': 'Institute Of Mechatronic Engineering At Tu Dresden'}]}, {'p': [{'id': 'Mengqing Hu'}, 'WORKED_AT', {'id': 'Fsd Fahrzeugsystemdaten Gmbh'}]}, {'p': [{'id': 'Mengqing Hu'}, 'WORKED_AT', {'id': 'Fraunhofer Institute For Machine Tools And Forming Technology (Iwu)'}]}, {'p': [{'id': 'Mengqing Hu'}, 'WORKED_AT', {'id': 'Institute Of Mechatronic Engineering At Tu Dresden'}]}, {'p': [{'id': 'Mengqing Hu'}, 'WORKED_AT', {'id': 'Fsd Fahrzeugsystemdaten Gmbh'}]}]
