In [6]:
import sys
import json
from openai import OpenAI
from dotenv import load_dotenv
import Graph 
import re
import os


def get_graph_from_abstract(abstract):
    load_dotenv()
    client = OpenAI(
      api_key=os.environ.get('mykey')
    )
    attempt = 0
    max_attempts = 5
    success = False

    while not success and attempt < max_attempts:
        try:
            completion = client.chat.completions.create(
                model="gpt-4-0125-preview",
                messages=[
                  {"role": "user", "content": "identify a list of entities in this science abstract and their relations, storing the list of entities as an array of three element tuple of strings and the relations as an array of tuples in the format of (Entity A, Entity B, Relationship):" + abstract},
                ],
            )

            message = completion.choices[0].message.content
            print(message) 

            entities_output = re.findall(r'\d+\.\s+(.*)', message)

            relations = [tuple(item[1:-1].split(', ')) for item in entities_output if item.startswith('(') and item.endswith(')')]

            graph = Graph.Graph()

            for relation in relations:
                if len(relation) == 3:
                    entity_a, entity_b, annotation = relation
                    graph.add_vertex(entity_a)
                    graph.add_vertex(entity_b)
                    graph.add_edge(entity_a, entity_b, annotation)

            graph.display()  # This will print the graph representation

            success = True 
            return json.dumps(graph.adjacency_list)

        except Exception as e:
            print(f"Attempt {attempt + 1} failed with error: {e}")
            attempt += 1  # Increment the attempt counter and try again


In [7]:
abstract = "Neurodegenerative diseases are characterized by the formation and propagation of protein aggregates, especially amyloid fibrils. However, what normally suppresses protein misfolding and aggregation in metazoan cells remains incompletely understood. Here, we show that TRIM11, a member of the metazoan tripartite motif (TRIM) family, both prevents the formation of protein aggregates and dissolves pre-existing protein deposits, including amyloid fibrils. These molecular chaperone and disaggregase activities are ATP independent. They enhance folding and solubility of normal proteins and cooperate with TRIM11 SUMO ligase activity to degrade aberrant proteins. TRIM11 abrogates α-synuclein fibrillization and restores viability in cell models of Parkinson's disease (PD). Intracranial adeno-associated viral delivery of TRIM11 mitigates α-synuclein-mediated pathology, neurodegeneration, and motor impairments in a PD mouse model. Other TRIMs can also function as ATP-independent molecular chaperones and disaggregases. Thus, we define TRIMs as a potent and multifunctional protein quality-control system in metazoa, which might be applied to treat neurodegenerative diseases."



In [15]:
al = get_graph_from_abstract(abstract)

Attempt 1 failed with error: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4-turbo-preview in organization org-2TYj2P39uJIzo5VWk94x0BAQ on requests per min (RPM): Limit 3, Used 3, Requested 1. Please try again in 20s. Visit https://platform.openai.com/account/rate-limits to learn more. You can increase your rate limit by adding a payment method to your account at https://platform.openai.com/account/billing.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
Attempt 2 failed with error: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4-turbo-preview in organization org-2TYj2P39uJIzo5VWk94x0BAQ on requests per min (RPM): Limit 3, Used 3, Requested 1. Please try again in 20s. Visit https://platform.openai.com/account/rate-limits to learn more. You can increase your rate limit by adding a payment method to your account at https://platform.openai.com/account/billing.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded

KeyboardInterrupt: 

In [4]:
al

'{"Neurodegenerative diseases": [["Protein aggregates", "characterized by"]], "Protein aggregates": [["Neurodegenerative diseases", "characterized by"], ["Amyloid fibrils", "especially includes"], ["TRIM11", "prevents formation of"]], "Amyloid fibrils": [["Protein aggregates", "especially includes"], ["TRIM11", "dissolves pre-existing"]], "Protein misfolding and aggregation": [["Metazoan cells", "suppressed by"]], "Metazoan cells": [["Protein misfolding and aggregation", "suppressed by"]], "TRIM11": [["TRIM family", "member of"], ["Protein aggregates", "prevents formation of"], ["Protein deposits", "dissolves"], ["Amyloid fibrils", "dissolves pre-existing"], ["Molecular chaperone activities", "exhibited by"], ["\\u03b1-synuclein fibrillization", "abrogates"], ["Cell models of PD", "restores viability in"], ["Intracranial adeno-associated viral delivery", "method of administering"]], "TRIM family": [["TRIM11", "member of"]], "Protein deposits": [["TRIM11", "dissolves"]], "Molecular chap

In [30]:
adjacency_list = {"\"Neurodegenerative diseases\"":[["\"diseases\"","\"type\""],["\"protein aggregates\"","\"characterized_by\""]],"\"diseases\"":[["\"Neurodegenerative diseases\"","\"type\""]],"\"protein aggregates\"":[["\"aggregates\"","\"type\""],["\"amyloid fibrils\"","\"example\""],["\"Neurodegenerative diseases\"","\"characterized_by\""],["\"amyloid fibrils\"","\"especially\""]],"\"aggregates\"":[["\"protein aggregates\"","\"type\""]],"\"amyloid fibrils\"":[["\"protein aggregates\"","\"example\""],["\"protein aggregates\"","\"especially\""]],"\"protein misfolding\"":[["\"process\"","\"type\""],["\"metazoan cells\"","\"suppressed_in\""]],"\"process\"":[["\"protein misfolding\"","\"type\""],["\"aggregation\"","\"type\""]],"\"aggregation\"":[["\"process\"","\"type\""],["\"metazoan cells\"","\"suppressed_in\""]],"\"metazoan cells\"":[["\"cells\"","\"type\""],["\"protein misfolding\"","\"suppressed_in\""],["\"aggregation\"","\"suppressed_in\""]],"\"cells\"":[["\"metazoan cells\"","\"type\""]]}

image_paths = {
    "Neurodegenerative diseases": "./assets/Neurodegenerative diseases.jpeg",

}


In [31]:
from graphviz import Digraph
import os

def generate_graph_with_images(adjacency_list, entity_images):
    dot = Digraph(comment='Graph Visualization', format='png')
    
    # Iterate over entities to add nodes
    for entity in adjacency_list.keys():
        if entity in entity_images:
            # Entity has an associated image
            dot.node(entity, label='', image=entity_images[entity], shape='none')  # Use image for node
        else:
            # Fallback for entities without images
            dot.node(entity, entity)  # Use default shape and label the node with the entity name

    # Add edges with annotations
    for source, targets in adjacency_list.items():
        for target, annotation in targets:
            dot.edge(source, target, label=annotation)

    # Render the graph to a file (e.g., PNG)
    dot.render('graph-output/graph', cleanup=True)
    print("Graph image generated at 'graph-output/graph.png'")

generate_graph_with_images(adjacency_list, image_paths)


Graph image generated at 'graph-output/graph.png'


In [13]:
import os
import re
import json
import ast
from openai import OpenAI
from dotenv import load_dotenv

def process_sequential_protocol(protocol):
    load_dotenv()

    client = OpenAI(
      api_key=os.environ.get('mykey')
    )

    completion = client.chat.completions.create(
      model="gpt-4-0125-preview",
      messages=[
        {"role": "user", "content": "Identify the key steps and reagents/objects used in this biological experiment procedure, and generate two python arrays that store respectively strings describing the key steps and another python array that stores the reagents/objects " + protocol},
      ],
    )

    message = completion.choices[0].message.content
    print(message)

    steps_match = re.search(r'steps = (\[.*?\])', message, re.DOTALL)
    reagents_objects_match = re.search(r'reagents_objects = (\[.*?\])', message, re.DOTALL)

    if steps_match and reagents_objects_match:
        try:
            steps_array = ast.literal_eval(steps_match.group(1))
            reagents_objects_array = ast.literal_eval(reagents_objects_match.group(1))
            return json.dumps({"steps": steps_array, "reagents/objects": reagents_objects_array})
        except ValueError as e:
            return json.dumps({"error": f"Error processing the extracted data: {str(e)}"})
    else:
        return json.dumps({"error": "No steps or reagents/objects found in the response"})


    


In [12]:

protocol = "We will expose wild-type astrocytes and ASH1L-depleted astrocytes to PBS (control), LPS, and Poly(I:C) in vitro. We will then use RT-qPCR to quantify the expression of IL6 and TNF, two pro-inflammatory cytokine encoding genes upregulated by astrocytes upon activation, in all samples [9]."

print(process_sequential_protocol(protocol))

RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4-turbo-preview in organization org-2TYj2P39uJIzo5VWk94x0BAQ on requests per min (RPM): Limit 3, Used 3, Requested 1. Please try again in 20s. Visit https://platform.openai.com/account/rate-limits to learn more. You can increase your rate limit by adding a payment method to your account at https://platform.openai.com/account/billing.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}

In [35]:
import sys
import json
import os
from graphviz import Digraph
from openai import OpenAI
from dotenv import load_dotenv
import Graph 
import re

def get_graph_from_abstract(abstract):
    load_dotenv()
    client = OpenAI(
      api_key=os.environ.get('mykey')
    )
    attempt = 0
    max_attempts = 5
    success = False

    while not success and attempt < max_attempts:
        try:
            completion = client.chat.completions.create(
                model="gpt-4-0125-preview",
                messages=[
                  {"role": "user", "content": "identify a list of entities in this science abstract and their relations, storing the list of entities as an array of three element tuple of strings and the relations as an array of tuples in the format of (Entity A, Entity B, Relationship), do not add quotation marks around entity and relationship:" + abstract},
                ],
            )

            message = completion.choices[0].message.content
            entities_output = re.findall(r'\d+\.\s+(.*)', message)
            relations = [tuple(item[1:-1].split(', ')) for item in entities_output if item.startswith('(') and item.endswith(')')]

            graph = Graph.Graph()
            for relation in relations:
                if len(relation) == 3:
                    entity_a, entity_b, annotation = relation
                    graph.add_vertex(entity_a)
                    graph.add_vertex(entity_b)
                    graph.add_edge(entity_a, entity_b, annotation)

            success = True 
            return graph.adjacency_list

        except Exception as e:
            attempt += 1

    return None

def generate_graph_with_images(adjacency_list, entity_images):
    dot = Digraph(comment='Graph Visualization', format='png')
    
    for entity in adjacency_list.keys():
        if entity in entity_images:
            dot.node(entity, label='', image=entity_images[entity], shape='none')
        else:
            dot.node(entity, entity)

    for source, targets in adjacency_list.items():
        for target, annotation in targets:
            dot.edge(source, target, label=annotation)

    output_path = 'graph-output/graph'
    dot.render(output_path, cleanup=True)
    return output_path + '.png'


In [36]:
abstract = "Neurodegenerative diseases are characterized by the formation and propagation of protein aggregates, especially amyloid fibrils. However, what normally suppresses protein misfolding and aggregation in metazoan cells remains incompletely understood."
adjacency_list = get_graph_from_abstract(abstract)
print(adjacency_list)
if adjacency_list:
    # Assume entity_images is defined somewhere, or pass an empty dict if not available
    entity_images = {"Neurodegenerative diseases": "./assets/Neurodegenerative diseases.jpeg",}  # Example: {"EntityName": "path/to/image.png"}
    graph_image_path = generate_graph_with_images(adjacency_list, entity_images)

None
