# Lib imports

In [38]:
import autoroot #noqa
from langchain_community.graphs.graph_document import GraphDocument, Node, Relationship
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_core.documents import Document

from langchain_openai import ChatOpenAI

# LLM config

In [None]:
base_url = "YOUR_BASE_URL"
api_key = "YOUR_API_KEY"

llm = ChatOpenAI(base_url=base_url, api_key=api_key)

# LLM Graph Transformer

In [7]:
no_schema = LLMGraphTransformer(llm=llm)



In [9]:
text = """Filip Wójcik and Marcin Malczewski are data scientists, who developed HeXtractor. It is a library
that helps in extracting heterogeneous knowledge graphs from various data source.
Heterogeneous knowledge graphs are graphs that contain different types of nodes and edges."""

docs = [Document(page_content=text)]

In [10]:
data = await no_schema.aconvert_to_graph_documents(docs)

In [11]:
data

[GraphDocument(nodes=[Node(id='Filip Wójcik', type='Person', properties={}), Node(id='Marcin Malczewski', type='Person', properties={}), Node(id='Hextractor', type='Library', properties={}), Node(id='Heterogeneous Knowledge Graphs', type='Graph', properties={})], relationships=[Relationship(source=Node(id='Filip Wójcik', type='Person', properties={}), target=Node(id='Hextractor', type='Library', properties={}), type='DEVELOPER', properties={}), Relationship(source=Node(id='Marcin Malczewski', type='Person', properties={}), target=Node(id='Hextractor', type='Library', properties={}), type='DEVELOPER', properties={}), Relationship(source=Node(id='Hextractor', type='Library', properties={}), target=Node(id='Heterogeneous Knowledge Graphs', type='Graph', properties={}), type='HELPS_EXTRACT', properties={})], source=Document(metadata={}, page_content='Filip Wójcik and Marcin Malczewski are data scientists, who developed HeXtractor. It is a library\nthat helps in extracting heterogeneous kno

In [16]:
graph_doc = data[0]
for node in graph_doc.nodes:
    print(node)

id='Filip Wójcik' type='Person' properties={}
id='Marcin Malczewski' type='Person' properties={}
id='Hextractor' type='Library' properties={}
id='Heterogeneous Knowledge Graphs' type='Graph' properties={}


In [17]:
for rel in graph_doc.relationships:
   print(rel) 

source=Node(id='Filip Wójcik', type='Person', properties={}) target=Node(id='Hextractor', type='Library', properties={}) type='DEVELOPER' properties={}
source=Node(id='Marcin Malczewski', type='Person', properties={}) target=Node(id='Hextractor', type='Library', properties={}) type='DEVELOPER' properties={}
source=Node(id='Hextractor', type='Library', properties={}) target=Node(id='Heterogeneous Knowledge Graphs', type='Graph', properties={}) type='HELPS_EXTRACT' properties={}


In [52]:
from hextractor.integrations.langchain_graphdoc import convert_graph_document_to_hetero_data
hetero_data = convert_graph_document_to_hetero_data(graph_doc)


In [53]:
hetero_data

HeteroData(
  Person={ x=[2, 1] },
  Library={ x=[1, 1] },
  Graph={ x=[1, 1] },
  (Library, HELPS_EXTRACT, Graph)={ edge_index=[2, 1] },
  (Person, DEVELOPER, Library)={ edge_index=[2, 2] }
)

In [54]:
doc = Document(page_content="Michael Scott knows Pam and Elon Musk.")
node_michael = Node(id="Michael Scott", type="Person")
node_pam = Node(id="Pam", type="Person")
node_elon_musk = Node(id="Elon Musk", type="Person")


invalid_gd = GraphDocument(
    nodes=[
        node_michael,
        node_pam,
    ],
    relationships=[
        Relationship(source=node_michael, target=node_pam, type="knows"),
        Relationship(source=node_michael, target=node_elon_musk, type="knows"),
    ],
    source=doc
)

In [55]:
invlid_hetero_g = convert_graph_document_to_hetero_data(invalid_gd)

ValueError: Unknown target node: Elon Musk of type Person