In [None]:
# !pip install ontogpt

In [None]:
# !runoak set-apikey -e openai <your openai api key>

In [None]:
import csv
import os
from io import BytesIO
from ontogpt.engines.spires_engine import SPIRESEngine
from ontogpt.io.template_loader import get_template_details
from ontogpt.io.yaml_wrapper import dump_minimal_yaml
from ontogpt.cli import write_extraction

### Read a sample text 

In [None]:
def process_sample_text(text, ke, output_filename):
    """Process a sample text and save the extracted results to a YAML file."""
    # Prepare the output stream and format
    output = BytesIO()
    output_format = "yaml"

    # Extract results from the sample text
    results = ke.extract_from_text(text=text)

    # Call the write_extraction function
    write_extraction(
        results=results,
        output=output,
        output_format=output_format,
        knowledge_engine=ke
    )

    # Save the output to a YAML file
    with open(output_filename, "wb") as file:
        output.seek(0)
        file.write(output.getvalue())

# Initialize the SPIRES engine
ke = SPIRESEngine(
    template_details=get_template_details(template="maxo"),
    model="gpt-4-0125-preview",
    model_source="openai",
)


In [None]:
sample_pubmed_paragraph = """
ArithmeticErrorRed blood cell mechanical sensitivity improves in patients with sickle cell disease undergoing chronic transfusion after prolonged, 
subhemolytic shear exposure.BACKGROUND: Sickle cell disease (SCD) is a genetically inherited hemoglobinopathy in which deoxygenated hemoglobin S polymerizes, 
leading to stiff red blood cells (RBCs) and inefficient microcirculatory blood flow. Transfusion therapy acts as primary and secondary prevention of 
ischemic stroke in SCD. Whether blood transfusion alters the mechanical sensitivity (MS) of RBCs to prolonged subhemolytic shear stress (shear) is unknown. 
We hypothesized that individuals with SCD undergoing chronic blood transfusion would have improved sensitivity to shear, compared with patients not 
undergoing transfusion therapy. STUDY DESIGN AND METHODS: Blood suspensions from individuals with SCD not receiving (n = 15) and receiving (n = 15) 
chronic simple transfusion were conditioned to shear (1, 4, 16, 32, and 64 Pa) for various durations (1, 4, 16, 32, and 64 sec), and then deformability of
RBCs was immediately measured. Healthy young controls (n = 15) were included for reference. A surface mesh was interpolated using the data to determine the
effect of blood transfusion on MS of RBCs. RESULTS: There was impaired RBC deformability to prolonged supraphysiologic shear in both SCD groups; however, MS 
improved in transfused patients when exposed to prolonged physiologic shear. Furthermore, in the transfused patients with SCD, the threshold above which
subhemolytic damage occurs was similar to controls. CONCLUSION: We found that chronic transfusion therapy normalizes the MS threshold above which RBC 
subhemolytic damage occurs after prolonged shear exposure in SCD. An important and novel finding in transfused patients with SCD was the improvement in RBC
deformability in response to prolonged shear exposure over the physiologic range.
"""

In [None]:

# Process the sample text and save the output to a file
process_sample_text(sample_pubmed_paragraph, ke, "test_text.yaml")


### Reading a TSV file 

In [None]:
def process_article(pubmed_id, text, ke, output_dir):
    """Process a single article and save the extracted results to a YAML file in the specified output directory."""
    # Extract results from the text
    results = ke.extract_from_text(text=text)

    # Prepare the output stream and format
    output = BytesIO()
    output_format = "yaml"

    # Call the write_extraction function
    write_extraction(
        results=results,
        output=output,
        output_format=output_format,
        knowledge_engine=ke
    )

    # Save the output to a YAML file named after the PubMed ID in the specified output directory
    output_filename = os.path.join(output_dir, f"{pubmed_id}.yaml")
    with open(output_filename, "wb") as output_file:
        output.seek(0)
        output_file.write(output.getvalue())

def process_tsv_file(file_path, ke, output_dir):
    """Read a .tsv file and process each article, saving the outputs in the specified directory."""
    with open(file_path, "r") as file:
        reader = csv.reader(file, delimiter="\t")
        for row in reader:
            pubmed_id, relationship, text = row
            process_article(pubmed_id, text, ke, output_dir)

# Initialize the SPIRES engine
ke = SPIRESEngine(
    template_details=get_template_details(template="maxo"),
    model="gpt-4-0125-preview",
    model_source="openai",
)


In [None]:

# Specify the output directory path
# output_dir = "../data/sickle_cell_ontogpt_result"
output_dir = "../test_case/test_ontogpt_result_poet_replaced"



# Create the output directory if it does not exist
os.makedirs(output_dir, exist_ok=True)

# input_dir = "../data/sickle_cell_no_replaced.tsv"
input_dir = "../test_case/test_poet_replaced.tsv"


# Process the .tsv file
process_tsv_file(input_dir, ke, output_dir)

