In [1]:
%pip install --upgrade --quiet  langchain langchain-community langchain-experimental langchain-ollama

Note: you may need to restart the kernel to use updated packages.


Load the papers classified as modeling papers into LangChain Document objects, for later use in the pipeline.

In [2]:
import pandas as pd
from genscai import paths
from langchain_core.documents import Document

df_modeling_papers = pd.read_json(paths.data / "modeling_papers_0.json", orient="records", lines=True)

documents = []

for row in df_modeling_papers.itertuples():
    documents.append(Document(id=row.id, page_content=row.abstract))

f"Papers loaded: {len(documents)}"

'Papers loaded: 5737'

In [3]:
import json
from genscai import paths
from langchain_core.documents import Document

with open(paths.data / "training_modeling_papers.json", "r") as f:
    data = json.load(f)

documents = []

for row in data:
    documents.append(Document(page_content=row["abstract"]))

f"Papers loaded: {len(documents)}"

'Papers loaded: 46'

In [4]:
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_ollama.llms import OllamaLLM

llm = OllamaLLM(model="gemma3:12b", temperature=0.0)
transformer = LLMGraphTransformer(llm=llm)

transformer = LLMGraphTransformer(
    llm=llm,
    allowed_nodes=[
        "Disease Modeling Goal",
        "Diesase Modeling Technique",
        "Disease Model Data Requirement",
        "Disease Modeled",
        "Geographic Location",
    ],
    allowed_relationships=[],
)

# Process a subset of the documents as a test
graph_documents = transformer.convert_to_graph_documents(documents[:10])

f"Documents processed: {len(graph_documents)}"

'Documents processed: 10'

In [5]:
for doc in graph_documents:
    if len(doc.nodes) > 0:
        print(f"Paper ID: {doc.source.id}")
        print(f"Paper Abstract: {doc.source.page_content}")

        for node in doc.nodes:
            print(f"Node: {node.id}, Type: {node.type}")
        for rel in doc.relationships:
            print(f"Relationship: {rel.type}")
            print(f"   Source: {rel.source.id}, Type: {rel.source.type}")
            print(f"   Target: {rel.target.id}, Type: {rel.target.type}")

        print()

Paper ID: None
Paper Abstract: Background: Since the appearance of the first case of COVID-19 in Morocco, the cumulative number of reported infectious cases continues to increase and, consequently, the government imposed the containment measure within the country. Our aim is to predict the impact of the compulsory containment on COVID-19 spread. Earlier knowledge of the epidemic characteristics of COVID-19 transmission related to Morocco will be of great interest to establish an optimal plan-of-action to control the epidemic.

Method: Using a Susceptible-Asymptomatic-Infectious model and the data of reported cumulative confirmed cases in Morocco from March 2nd to April 9, 2020, we determined the basic and control reproduction numbers and we estimated the model parameter values. Furthermore, simulations of different scenarios of containment are performed.

Results: Epidemic characteristics are predicted according to different rates of containment. The basic reproduction number is estima

In [None]:
llm = OllamaLLM(model="mistral-small3.1:24b", temperature=0.15)
transformer = LLMGraphTransformer(llm=llm)

transformer = LLMGraphTransformer(
    llm=llm,
    allowed_nodes=[
        "Disease Modeling Goal",
        "Diesase Modeling Technique",
        "Disease Model Data Requirement",
        "Disease Modeled",
        "Geographic Location",
    ],
    allowed_relationships=[],
)

# Process a subset of the documents as a test
graph_documents = transformer.convert_to_graph_documents(documents[:10])

f"Documents processed: {len(graph_documents)}"