In [None]:
%pip install --upgrade --quiet  langchain langchain-community langchain-experimental langchain-ollama

Load the papers classified as modeling papers into LangChain Document objects, for later use in the pipeline.

In [None]:
import pandas as pd
from genscai import paths
from langchain_core.documents import Document

df_modeling_papers = pd.read_json(paths.data / "modeling_papers_0.json", orient="records", lines=True)

documents = []

for row in df_modeling_papers.itertuples():
    documents.append(Document(id=row.id, page_content=row.abstract))

f"Papers loaded: {len(documents)}"

In [None]:
import json
from genscai import paths
from langchain_core.documents import Document

with open(paths.data / "training_modeling_papers.json", "r") as f:
    data = json.load(f)

documents = []

for row in data:
    documents.append(Document(page_content=row["abstract"]))

f"Papers loaded: {len(documents)}"

In [None]:
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_ollama.llms import OllamaLLM

llm = OllamaLLM(model="gemma3:12b", temperature=0.0)
transformer = LLMGraphTransformer(llm=llm)

transformer = LLMGraphTransformer(
    llm=llm,
    allowed_nodes=[
        "Disease Modeling Goal",
        "Diesase Modeling Technique",
        "Disease Model Data Requirement",
        "Disease Modeled",
        "Geographic Location",
    ],
    allowed_relationships=[],
)

## Process a subset of the documents as a test
graph_documents = transformer.convert_to_graph_documents(documents[:10])

f"Documents processed: {len(graph_documents)}"

In [None]:
for doc in graph_documents:
    if len(doc.nodes) > 0:
        print(f"Paper ID: {doc.source.id}")
        print(f"Paper Abstract: {doc.source.page_content}")

        for node in doc.nodes:
            print(f"Node: {node.id}, Type: {node.type}")
        for rel in doc.relationships:
            print(f"Relationship: {rel.type}")
            print(f"   Source: {rel.source.id}, Type: {rel.source.type}")
            print(f"   Target: {rel.target.id}, Type: {rel.target.type}")

        print()