# Import Libraries

In [4]:
import pickle

from groq import BadRequestError
from langchain_core.prompts import ChatPromptTemplate
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_community.chat_models import ChatOllama
from langchain_groq import ChatGroq
from dotenv import load_dotenv

load_dotenv()

True

## Load Text 

In [5]:
from langchain.text_splitter import RecursiveCharacterTextSplitter  # Split Document into Chunks
from langchain_community.document_loaders import DirectoryLoader  # Load md files

markdown_path = "data/markdowns"
loader = DirectoryLoader(markdown_path, glob="*.md")
documents = loader.load()
print(f"Loaded {len(documents)} documents from {markdown_path}")

chunk_size = 1250
chunk_overlap = 100

text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap,
                                               length_function=len, add_start_index=True, keep_separator=False,
                                               strip_whitespace=True)
chunks = text_splitter.split_documents(documents)
print(f"Split {len(documents)} documents into {len(chunks)} chunks.")

document = chunks[10]
print(document.page_content)
print(document.metadata)

Loaded 1 documents from data/markdowns
Split 1 documents into 92 chunks.
The vast majority of the studied to date hydrogen storage HEAs belong to the solid solution type alloys containing the components with a high affinity to hydrogen (Ti, Zr, Nb, Hf, Ta, V). These alloys form very stable hydrides and require inconveniently high temperatures, above 400°C, to release the absorbed H2 thus limiting application potential of their hydrides.

Hydrogen storage HEAs also include totally different materials, namely AB5- [31] and AB2-type [28,34] intermetallics characterized by easily achievable and convenient operational P-T conditions allowing to reach a reversibility of the hydride formation and decomposition. In some specific cases hydrogen absorption, even being limited in the H storage capacity, proceeds also for the alloys which do not contain hydride-forming components [36]. In general, the reaction mechanism during the hydride formation by HEAs is not sufficiently well studied yet and 

## Create Graph Documents

In [17]:
llm = ChatGroq(model_name="llama-3.1-70b-versatile")
# llm = ChatOllama(model_name="llama3.1:70b")

with open('extract_instruction.txt') as f:
    extraction_instruction = ChatPromptTemplate(f.read())

llm_transformer = LLMGraphTransformer(llm=llm, allowed_nodes=["ALLOY", "ELEMENT", "PROPERTY_NAME", "PROPERTY_VALUE"],
                                      strict_mode=True,
                                      allowed_relationships=["HAS", "CONTAINS", "SIMILAR", "IS", "IS_A", "IS_PART_OF",
                                                             "IS_USED_FOR", "IS_USED_IN", "IS_USED_WITH", "IS_USED_AS",
                                                             "IS_USED_BY", "IS_USED_ON", "IS_USED_TO", "IS_USED_WITH",
                                                             "IS_USED_WITHIN", "IS_USED_WITHOUT", "MEASURED_AT",
                                                             "SYNTHESIZED_BY", "AFFECTS", "COMPARED_TO"],
                                      prompt=extraction_instruction)  # Relationships that elements can have

In [19]:
for i in range(5):
    try:
        graph_documents = llm_transformer.convert_to_graph_documents(chunks)
    except KeyboardInterrupt:
        print("Interrupted")  # If interrupted, break loop
        break
    except:
        print(f"Failed to convert to graph documents")
        continue
    else:
        print(graph_documents[0])
        break


Failed to convert to graph documents
Failed to convert to graph documents
Failed to convert to graph documents
Failed to convert to graph documents
Failed to convert to graph documents


## Save Graph Documents

In [16]:
with open("Nodes.bin", "wb") as f:  # Save graph_docs obj to file
    pickle.dump(graph_documents, f)

NameError: name 'graph_documents' is not defined

## Graph to Neo4j

In [6]:
from neo4j import GraphDatabase
from yfiles_jupyter_graphs import GraphWidget
from langchain_community.graphs import Neo4jGraph
import os
from dotenv import load_dotenv

load_dotenv()

True

In [20]:
with open("Nodes.bin", "rb") as f:  # Load graph_docs from file
    retrieved_graph_documents = pickle.load(f)

In [26]:
graph = Neo4jGraph()
graph.add_graph_documents(
    retrieved_graph_documents,
    baseEntityLabel=True,
    include_source=True
)

In [9]:
def showGraph():
    driver = GraphDatabase.driver(
        uri=os.environ["NEO4J_URI"],
        auth=(os.environ["NEO4J_USERNAME"],
              os.environ["NEO4J_PASSWORD"]))
    session = driver.session()
    widget = GraphWidget(graph=session.run("MATCH (s)-[r:!MENTIONS]->(t) RETURN s,r,t").graph())
    widget.node_label_mapping = 'id'
    return widget


showGraph()

GraphWidget(layout=Layout(height='800px', width='100%'))