In [1]:
import os
import re
import logging
import datetime
import json
import csv
from statistics import fmean
from typing import Dict, List, Callable, Union
from graph_of_thoughts import controller, language_models, operations, prompter, parser
from graph_of_thoughts.vector_db import azure_embedding



# This is a hack to also allow execution of this file from the examples directory
try:
    from . import utils
except ImportError:
    import utils

class ALZKBPrompter(prompter.Prompter):
    """
    ALZKBPrompter provides the generation of prompts specific to the
    ALZKB example for the language models.

    Inherits from the Prompter class and implements its abstract methods.
    """
    
    alzkb_prompt_start = """You have access to a knowledge graph of Alzheimer's disease. The knowledge graph contains node types: Gene, DrugClass, Drug, Disease, Pathway, BiologicalProcess, MolecularFunction, CellularComponent, Symptom, BodyPart.
    The knowledge graph contains relationships: "CHEMICAL BINDS GENE","CHEMICAL INCREASES EXPRESSION", "CHEMICAL DECREASES EXPRESSION", "DRUG IN CLASS", "DRUG TREATS DISEASE", "DRUG CAUSES EFFECT", "GENE PARTICIPATES IN BIOLOGICAL PROCESS", "GENE IN PATHWAY", "GENE INTERACTS WITH GENE", "GENE HAS MOLECULAR FUNCTION", "GENE ASSOCIATED WITH CELLULAR COMPONENT", "GENE ASSOCIATES WITH DISEASE", "BODYPART OVER EXPRESSES GENE", "BODYPART UNDEREXPRESSES GENE", "SYMPTOM MANIFESTATION OF DISEASE", "DISEASE LOCALIZES TO ANATOMY", "DISEASE ASSOCIATES WITH DISEASE".
    """
    
    io_prompt = """<Instruction> You are an Alzheimer's data specialist AI assistant dedicated to providing information and support related to Alzheimer's disease.
Your primary goal is to assist users by offering factual and relevant information based on your access to a comprehensive knowledge graph associated with Alzheimer's. 
Your responses are focused on addressing queries related to Alzheimer's, and you do not provide information unrelated to the topic. 
You will also only answer based on the knowledge within the knowledge graph within the <Knowledge> tags. 
You will notice there will be gene symbols in the knowledge, and there are subtle differences between the gene names.
You will need to be careful that the names are exact with you use them in context. There may be single differences in numbers and letters.
For example, the gene "APOE" is not the same as gene "APOE1". Another example is the gene "IQCK" is not the same as gene "IQCG".
You will need to be careful of specific biological terms. For example, the term "amino" is different from the term "amine".
If you are providing a list, be sure not to list duplicates. 
Your demeanor is empathetic and concise as you aim to help users understand and navigate Alzheimer's-related concerns.
You will be provided knowledge within the <Knowledge> tags and must answer the question in the <Question> tags.
{additional_instruction}
</Instruction> 
<Knowledge>
{knowledge}
</Knowledge>
<Question>{question}</Question>"""
        
    cot_prompt = """<Instruction> You are an Alzheimer's data specialist AI assistant dedicated to providing information and support related to Alzheimer's disease.
Your primary goal is to assist users by offering factual and relevant information based on your access to a comprehensive knowledge graph associated with Alzheimer's. 
Your responses are focused on addressing queries related to Alzheimer's, and you do not provide information unrelated to the topic. 
You will also only answer based on the knowledge within the knowledge graph. 
You will notice there will be gene symbols in the knowledge, and there are subtle differences between the gene names.
You will need to be careful that the names are exact with you use them in context. There may be single differences in numbers and letters.
For example, the gene "APOE" is not the same as gene "APOE1". Another example is the gene "IQCK" is not the same as gene "IQCG".
You will need to be careful of specific biological terms. For example, the term "amino" is different from the term "amine".
If you are providing a list, be sure not to list duplicates. 
Your demeanor is empathetic and concise as you aim to help users understand and navigate Alzheimer's-related concerns.
You will be provided knowledge in the Knowledge section and must answer the question in the Question section. </Instruction> 
<Knowledge>
{knowledge}
</Knowledge>
<Question>{question}</Question>
<Approach>
To answer the question, follow these steps:
1. Identify the relevant nodes in the knowledge graph.
2. Identify the relevant relationships between the nodes.
3. Generate a response based on the identified nodes and relationships.
</Approach>

<Examples>
Input:

Output: 
</Examples>
"""

    
    score_prompt_base = """We are comparing if a response is equivalent to the ground truth.
<GroundTruth>{ground_truth}</GroundTruth>
<Answer>{answer}</Answer>
Please score the response on a scale of 0 to 10, where 0 is not equivalent and 10 is equivalent.
You may have to compare two unordered lists, so please score the response based on the following criteria:
1. The lists contain the same number of elements.
2. The lists contain the same elements, but may be in a different order.
If you have a True or False question, please score the response as 10 if the response is equal to the ground truth and 0 if the response is inequal.
You may provide reasoning for your scoring, but the final score should be between the tags <Score> and </Score>, without any additional text within those tags.
"""


    def __init__(self,vector_db, lm) -> None:
        """
        Inits the prompter.
        """
        self.vector_db = vector_db
        self.lm = lm
        pass
    
    def aggregation_prompt(self, state_dicts: List[Dict], **kwargs) -> str:
        """
        Generate an aggregation prompt for the language model.

        :param state_dicts: The thought states that should be aggregated.
        :type state_dicts: List[Dict]
        :param kwargs: Additional keyword arguments.
        :return: The aggregation prompt.
        :rtype: str
        :raise AssertionError: If not exactly two thought states are provided.
        """
        assert len(state_dicts) == 2, "Expected two states for aggregation prompt."

        return self.io_prompt.format(
            input1=state_dicts[0]["current"],
            input2=state_dicts[1]["current"],
        )

        if len(state_dicts[0]["parts"]) > 0 and len(state_dicts[0]["parts"]) < len(
            state_dicts[0]["documents"]
        ):
            prompt = self.aggregate_sub_prompt_base.format(
                num_ndas=len(state_dicts),
            )
            for i, state_dict in enumerate(state_dicts):
                prompt += self.aggregate_sub_prompt_generate.format(
                    nda=state_dict["current"], num=i + 1
                )
            return prompt
        else:
            prompt = self.aggregate_full_prompt_base.format(
                num_ndas=len(state_dicts[0]["documents"]),
                num_ndas_summary=len(state_dicts),
            )
            for i, document in enumerate(state_dicts[0]["documents"]):
                prompt += self.aggregate_full_prompt_block1.format(
                    document=document, num=i + 1
                )
            prompt += self.aggregate_full_prompt_mid.format(
                num_ndas_summary=len(state_dicts),
            )
            for i, state_dict in enumerate(state_dicts):
                prompt += self.aggregate_full_prompt_block2.format(
                    summary=state_dict["current"], num=i + 1
                )
            return prompt

    def generate_prompt(
        self,
        num_branches: int,
        question: str,
        question_type: str,
        method: str,
        current: str,
        **kwargs,
    ) -> str:
        """
        Generate a generate prompt for the language model.

        :param num_branches: The number of responses the prompt should ask the LM to generate.
        :type num_branches: int
        :param documents: The list of documents to be merged.
        :type documents: List[str]
        :param method: Method for which the generate prompt is generated.
        :type method: str
        :param parts: Indices of the already processed document parts.
        :type parts: Set[str]
        :param current: The intermediate solution.
        :type current: str
        :param kwargs: Additional keyword arguments.
        :return: The generate prompt.
        :rtype: str
        :raise AssertionError: If method is not implemented yet.
        """
        

        if question_type == "true/false":
            additional_instruction = "You will be asked to answer the question with only a TRUE or FALSE response."
            # check if "True or False Question: " is in the question
            if "True or False Question: " in question:
                statement_to_embed = question[question.index("True or False Question: ") + len("True or False Question: "):]
            else:
                statement_to_embed = question
                question = "True or False Question: " + question
        elif question_type == "multiple choice":
            additional_instruction = "You will be asked to answer the question with only the multiple choice number response. For instance, if the correct answer is '2', you will need to answer '2'."
            # check if "? 1." is in the question and separate the question from the choice
            if "? 1." in question:
                statement_to_embed = question[:question.index("? 1.")]
            else:
                statement_to_embed = question
        elif question_type == "list":
            additional_instruction = "You will be asked to answer the question with only the list with each element separated by a newline."
            statement_to_embed = question
        else:
            additional_instruction = ""
            statement_to_embed = question
            
        # print("statement_to_embed:", statement_to_embed)
        # print("question:", question)
        embedded_question = self.lm.get_embedding(statement_to_embed)
        # print(embedded_question)
        knowledge_array,distances = self.vector_db.get_knowledge(embedded_question)
        # print(knowledge_array)
        knowledge = "\n".join(knowledge_array)
        
        prompt = ""
        assert num_branches == 1, "Branching should be done via multiple requests."
        if method.startswith("io"):
            # print(self.io_prompt.format(knowledge=knowledge, question=question, additional_instruction=additional_instruction))
            return self.io_prompt.format(knowledge=knowledge, question=question, additional_instruction=additional_instruction)
        elif method.startswith("cot"):
            prompt += self.alzkb_prompt_start + self.cot_prompt.format(knowledge=knowledge, question=question, additional_instruction=additional_instruction)
            return prompt
        # elif method.startswith("cot"):
        #     return self.intersection_prompt_cot.format(set1=set1, set2=set2)
        # elif method.startswith("tot"):
        #     if current is None or current == "":
        #         return self.intersection_prompt.format(set1=set1, set2=set2)
        #     return self.tot_improve_prompt.format(
        #         set1=set1, set2=set2, incorrect_intersection=current
        #     )
        # elif method.startswith("got"):
        #     if kwargs["phase"] == 0:
        #         return self.got_split_prompt.format(input=set2)

        #     input_set = set2
        #     if "subset" in kwargs and kwargs["subset"] != "":
        #         input_set = kwargs["subset"]

        #     return self.intersection_prompt.format(set1=set1, set2=input_set)
        
        
        # if method.startswith("io") or method.startswith("cot"):
        #     if method.startswith("io"):
        #         prompt += self.merge_doc_prompt_start.format(num=len(documents))
        #     else:
        #         prompt += self.merge_doc_prompt_cot_start.format(num=len(documents))
        #     for i, document in enumerate(documents):
        #         prompt += self.merge_doc_prompt_block.format(
        #             document=document, num=i + 1
        #         )
        #     return prompt
        # elif method.startswith("tot"):
        #     if current is None or current == "":
        #         prompt += self.merge_doc_prompt_start.format(num=len(documents))
        #         for i, document in enumerate(documents):
        #             prompt += self.merge_doc_prompt_block.format(
        #                 document=document, num=i + 1
        #             )
        #         return prompt
        #     else:
        #         prompt += self.improve_summary_prompt_start.format(
        #             num=len(documents),
        #         )
        #         for i, document in enumerate(documents):
        #             prompt += self.improve_summary_prompt_block.format(
        #                 document=document, num=i + 1
        #             )
        #         prompt += self.improve_summary_prompt_end.format(summary=current)
        #         return prompt
        # elif method.startswith("got"):
        #     parts = (
        #         sorted(list(parts)) if len(parts) > 0 else list(range(len(documents)))
        #     )
        #     if current is None or current == "":
        #         prompt += self.merge_doc_prompt_start.format(num=len(parts))
        #         for i, part in enumerate(sorted(list(parts))):
        #             prompt += self.merge_doc_prompt_block.format(
        #                 document=documents[part], num=i + 1
        #             )
        #         return prompt
        #     else:
        #         prompt += self.improve_summary_prompt_start.format(
        #             num=len(parts),
        #         )
        #         for i, part in enumerate(sorted(list(parts))):
        #             prompt += self.improve_summary_prompt_block.format(
        #                 document=documents[part], num=i + 1
        #             )
        #         prompt += self.improve_summary_prompt_end.format(summary=current)
        #         return prompt
        else:
            assert False, "Not implemented yet."

    def improve_prompt(self, **kwargs) -> str:
        """
        Generate an improve prompt for the language model.

        :param kwargs: Additional keyword arguments.
        :return: The improve prompt.
        :rtype: str
        """
        pass

    def validation_prompt(self, **kwargs) -> str:
        """
        Generate a validation prompt for the language model.

        :param kwargs: Additional keyword arguments.
        :return: The validation prompt.
        :rtype: str
        """
        pass

    def score_prompt(self, state_dicts: List[Dict], **kwargs) -> str:
        """
        Generate a score prompt for the language model.

        :param state_dicts: The thought states that should be scored,
                            if more than one, they should be scored together.
        :type state_dicts: List[Dict]
        :param kwargs: Additional keyword arguments.
        :return: The score prompt.
        :rtype: str
        :raise AssertionError: If more than one thought state is supplied.
        """

        # perform individual scoring
        prompt = self.score_prompt_base.format(
            answer=state_dicts[0]["current"],
            ground_truth=state_dicts[0]["ground_truth"],
        )
        return prompt
    
class ALZKBParser(parser.Parser):
    """
    ALZKBParser provides the parsing of language model reponses specific to the
    ALZKB example.

    Inherits from the Parser class and implements its abstract methods.
    """

    def __init__(self) -> None:
        """
        Inits the response cache.
        """
        self.cache = {}
        
    def strip_answer_helper(self, text: str, tag: str = "") -> str:
        """
        Helper function to remove tags from a text.

        :param text: The input text.
        :type text: str
        :param tag: The tag to be stripped. Defaults to "".
        :type tag: str
        :return: The stripped text.
        :rtype: str
        """

        text = text.strip()
        if "Output:" in text:
            text = text[text.index("Output:") + len("Output:") :].strip()
        if tag != "":
            start = text.rfind(f"<{tag}>")
            end = text.rfind(f"</{tag}>")
            if start != -1 and end != -1:
                text = text[start + len(f"<{tag}>") : end].strip()
            elif start != -1:
                logging.warning(
                    f"Only found the start tag <{tag}> in answer: {text}. Returning everything after the tag."
                )
                text = text[start + len(f"<{tag}>") :].strip()
            elif end != -1:
                logging.warning(
                    f"Only found the end tag </{tag}> in answer: {text}. Returning everything before the tag."
                )
                text = text[:end].strip()
            else:
                logging.warning(
                    f"Could not find any tag {tag} in answer: {text}. Returning the full answer."
                )
        return text

    def parse_aggregation_answer(
        self, states: List[Dict], texts: List[str]
    ) -> Union[Dict, List[Dict]]:
        """
        Parse the response from the language model for an aggregation prompt.

        :param states: The thought states used to generate the prompt.
        :type states: List[Dict]
        :param texts: The responses to the prompt from the language model.
        :type texts: List[str]
        :return: The new thought states after parsing the respones from the language model.
        :rtype: Union[Dict, List[Dict]]
        :raise AssertionError: If not exactly two thought states are provided.
        """

        assert len(states) == 2, "Expected two states for aggregation answer."
        new_states = []
        for text in texts:
            answers = text.strip().split("\n")
            if any(["Output" in answer for answer in answers]):
                # cut elements until last output is found
                for answer in reversed(answers):
                    if "Output" in answer:
                        answers = answers[answers.index(answer) :]
                        break

            answers_stripped = [
                answer for answer in answers if "[" in answer and "]" in answer
            ]
            if len(answers_stripped) == 0:
                for answer in answers:
                    answer = "[" + answer + "]"
                    try:
                        answer_converted = utils.string_to_list(answer)
                        if len(answer_converted) > 0:
                            answers_stripped.append(answer)
                    except:
                        pass
            if len(answers_stripped) == 0:
                logging.warning(
                    f"Could not parse aggregation answer: {text}. Returning empty list."
                )
                answer = "[]"
            else:
                answer = [
                    answer[answer.index("[") : answer.index("]") + 1]
                    for answer in answers_stripped
                ][0]
            states = sorted(states, key=lambda x: x["part"])
            merged_subsets = states[0]["subset"][:-1] + ", " + states[1]["subset"][1:]
            new_state = states[0].copy()
            new_state["current"] = answer
            new_state["subset"] = merged_subsets
            new_states.append(new_state)
        return new_states

    def parse_improve_answer(self, state: Dict, texts: List[str]) -> Dict:
        """
        Parse the response from the language model for an improve prompt.

        :param state: The thought state used to generate the prompt.
        :type state: Dict
        :param texts: The responses to the prompt from the language model.
        :type texts: List[str]
        :return: The new thought state after parsing the responses from the language model.
        :rtype: Dict
        """
        pass

    def parse_generate_answer(self, state: Dict, texts: List[str]) -> List[Dict]:
        """
        Parse the response from the language model for a generate prompt.

        :param state: The thought state used to generate the prompt.
        :type state: Dict
        :param texts: The responses to the prompt from the language model.
        :type texts: List[str]
        :return: The new thought states after parsing the respones from the language model.
        :rtype: List[Dict]
        """

        new_states = []
        print("parse_generate_answer text:", texts)
        
        for text in texts:
            if state["method"].startswith("io") or state["method"].startswith("cot"):
                try:
                    if state["question_type"] == "true/false":
                        if "TRUE" in text.upper():
                            answer = "TRUE"
                        elif "FALSE" in text.upper():
                            answer = "FALSE"
                        else:
                            answer = "FALSE"
                    elif state["question_type"] == "multiple choice":
                        #white strip to remove any spaces and check for numbers in the first couple of characters
                        if any(char.isdigit() for char in text[:3]):
                            answer = text[0]
                        else:
                            logging.warning(
                                f"Could not parse step answer: {text}. Returning empty."
                            )
                            continue
                    elif state["question_type"] == "list":
                        temp_answer = text.strip().split("\n")
                        answer = [x for x in temp_answer if x != '']
                        #check for empty answers
                        if len(answer) == 0:
                            logging.warning(
                                f"Could not parse step answer: {text}. Returning empty."
                            )
                            continue
                        
                    else:
                        answer = text.strip()
                        
                        
                    new_state = state.copy()
                    new_state["current"] = answer
                    new_state["phase"] = new_state["phase"] + 1
                    new_states.append(new_state)
                except Exception as e:
                    logging.error(
                        f"Could not parse step answer: {text}. Encountered exception: {e}"
                    )
                        
                        
            elif state["method"].startswith("got") and state["phase"] == 0:
                # We expect a json which contains the two lists named "List 1" and "List 2"
                # cut everything until the opening bracket and everything after the closing bracket

                try:
                    text = text[text.index("{") : text.index("}") + 1]
                    json_dict = json.loads(text)
                    if len(json_dict.keys()) != 2:
                        logging.warning(
                            f"Expected 2 lists in json, but found {len(json_dict.keys())}."
                        )
                    for key, value in json_dict.items():
                        if "List" not in key:
                            logging.warning(
                                f"Expected key to contain 'List', but found {key}."
                            )
                            continue
                        if not isinstance(value, list):
                            value = utils.string_to_list(value)
                        new_state = state.copy()
                        new_state["current"] = ""
                        new_state["subset"] = str(value)
                        new_state["phase"] = 1
                        new_state["part"] = key
                        new_states.append(new_state)
                except Exception as e:
                    logging.error(
                        f"Could not parse step answer: {text}. Encountered exception: {e}"
                    )
            else:
                answers = text.strip().split("\n")
                answers = [
                    answer for answer in answers if "[" in answer and "]" in answer
                ]
                if any(["Output" in answer for answer in answers]):
                    # cut elements until last output is found
                    for answer in reversed(answers):
                        if "Output" in answer:
                            answers = answers[answers.index(answer) :]
                            break

                answers = [
                    answer[answer.index("[") : answer.index("]") + 1]
                    for answer in answers
                ]
                if len(answers) == 0:
                    logging.warning(
                        f"Could not parse step answer: {text}. Returning empty list."
                    )
                    answer = "[]"
                else:
                    if len(answers) > 1:
                        logging.warning(
                            f"Multiple answers found for step answer: {text}. Using the first one."
                        )
                    answer = answers[0]

                new_state = state.copy()
                new_state["current"] = answer
                new_state["phase"] = 2
                new_states.append(new_state)
        return new_states

    def parse_validation_answer(self, state: Dict, texts: List[str]) -> bool:
        """
        Parse the response from the language model for a validation prompt.

        :param state: The thought state used to generate the prompt.
        :type state: Dict
        :param texts: The responses to the prompt from the language model.
        :type texts: List[str]
        :return: Whether the thought state is valid or not.
        :rtype: bool
        """
        pass

    def parse_score_answer(self, states: List[Dict], texts: List[str]) -> List[float]:
        """
        Parse the response from the language model for a score prompt.

        :param states: The thought states used to generate the prompt.
        :type states: List[Dict]
        :param texts: The responses to the prompt from the language model.
        :type texts: List[str]
        :return: The scores for the thought states.
        :rtype: List[float]
        """
        assert len(states) == 1, "Only one state is allowed for scoring."
        if len(states) == 1:
            # individual scoring
            redundancy_scores = []
            retain_scores = []
            for text in texts:
                answer = self.strip_answer_helper(text, "Score")
                res = re.findall(r"\d+\.?\d*", answer)
                if len(res) == 1:
                    redundancy_scores.append(float(res[0]))
                elif len(res) > 1:
                    logging.warning(
                        f"Found multiple scores in answer: {text}. Returning the last one."
                    )
                    redundancy_scores.append(float(res[-1]))
                else:
                    logging.warning(
                        f"Could not find any score in answer: {text}. Ignoring this answer."
                    )
            if len(redundancy_scores) == 0:
                logging.warning(
                    f"Could not find any valid score in any answer. Returning 0.0."
                )
                return [0.0]
            mean_redundancy = fmean(redundancy_scores)
            print("mean_score:", mean_redundancy)
            return [mean_redundancy]
            # mean_retain = fmean(retain_scores)
            # f1 = 2 * mean_redundancy * mean_retain / (mean_redundancy + mean_retain)
            # return [f1]


def io() -> operations.GraphOfOperations:
    """
    Generates the Graph of Operations for the IO method.

    :return: Graph of Operations
    :rtype: GraphOfOperations
    """
    operations_graph = operations.GraphOfOperations()

    operations_graph.append_operation(operations.Generate(1, 1))
    operations_graph.append_operation(operations.Score(1, False))
    # operations_graph.append_operation(operations.GroundTruth())

    return operations_graph


def cot() -> operations.GraphOfOperations:
    """
    Generates the Graph of Operations for the CoT method.

    :return: Graph of Operations
    :rtype: GraphOfOperations
    """
    operations_graph = operations.GraphOfOperations()

    operations_graph.append_operation(operations.Generate(1, 1))
    operations_graph.append_operation(operations.Score(1, False))

    return operations_graph


def tot() -> operations.GraphOfOperations:
    """
    Generates the Graph of Operations for the ToT method.

    :return: Graph of Operations
    :rtype: GraphOfOperations
    """
    operations_graph = operations.GraphOfOperations()

    branch_factor = 10

    operations_graph.append_operation(operations.Generate(1, branch_factor))
    operations_graph.append_operation(operations.Score(3, False))
    keep_best_1 = operations.KeepBestN(1, True)
    operations_graph.append_operation(keep_best_1)

    for _ in range(2):
        operations_graph.append_operation(operations.Generate(1, branch_factor))
        operations_graph.append_operation(operations.Score(3, False))
        keep_best_2 = operations.KeepBestN(1, True)
        keep_best_2.add_predecessor(keep_best_1)
        operations_graph.append_operation(keep_best_2)
        keep_best_1 = keep_best_2

    return operations_graph


def got() -> operations.GraphOfOperations:
    """
    Generates the Graph of Operations for the GoT method, where full documents
    are merged.

    :return: Graph of Operations
    :rtype: GraphOfOperations
    """
    operations_graph = operations.GraphOfOperations()

    operations_graph.append_operation(operations.Generate(1, 5))
    operations_graph.append_operation(operations.Score(3, False))
    keep_best = operations.KeepBestN(3, True)
    operations_graph.append_operation(keep_best)
    operations_graph.append_operation(operations.Aggregate(5))
    operations_graph.append_operation(operations.Score(3, False))
    keep_best2 = operations.KeepBestN(1, True)
    keep_best2.add_predecessor(keep_best)
    operations_graph.append_operation(keep_best2)
    operations_graph.append_operation(operations.Generate(1, 10))
    operations_graph.append_operation(operations.Score(3, False))
    keep_best3 = operations.KeepBestN(1, True)
    keep_best3.add_predecessor(keep_best2)
    operations_graph.append_operation(keep_best3)

    return operations_graph


def got2() -> operations.GraphOfOperations:
    """
    Generates the Graph of Operations for the GoT2 method, where partial
    documents are merged.

    :return: Graph of Operations
    :rtype: GraphOfOperations
    """
    operations_graph = operations.GraphOfOperations()

    sub_parts = []
    for i in range(0, 4, 2):  # should be at most 16 parts
        sub_text = operations.Selector(
            lambda thoughts, list_id=i: [
                operations.Thought(
                    state={**thoughts[0].state, "parts": {list_id, list_id + 1}}
                )
            ]
        )
        operations_graph.add_operation(sub_text)
        gen_nda = operations.Generate(1, 5)
        gen_nda.add_predecessor(sub_text)
        operations_graph.add_operation(gen_nda)
        score_nda = operations.Score(3, False)
        score_nda.add_predecessor(gen_nda)
        operations_graph.add_operation(score_nda)
        keep_best_nda = operations.KeepBestN(1, True)
        keep_best_nda.add_predecessor(score_nda)
        operations_graph.add_operation(keep_best_nda)

        sub_parts.append(keep_best_nda)

    while len(sub_parts) > 1:
        new_sub_parts = []
        for i in range(0, len(sub_parts), 2):
            if i + 1 == len(sub_parts):
                new_sub_parts.append(sub_parts[i])
                continue
            aggregate = operations.Aggregate(5)
            aggregate.add_predecessor(sub_parts[i])
            aggregate.add_predecessor(sub_parts[i + 1])
            operations_graph.add_operation(aggregate)
            score = operations.Score(3, False)
            score.add_predecessor(aggregate)
            operations_graph.add_operation(score)
            keep_best = operations.KeepBestN(1, True)
            keep_best.add_predecessor(score)
            operations_graph.add_operation(keep_best)

            gen_nda = operations.Generate(1, 5)
            gen_nda.add_predecessor(keep_best)
            operations_graph.add_operation(gen_nda)
            score_nda = operations.Score(3, False)
            score_nda.add_predecessor(gen_nda)
            operations_graph.add_operation(score_nda)
            keep_best_nda = operations.KeepBestN(1, True)
            keep_best_nda.add_predecessor(score_nda)
            keep_best_nda.add_predecessor(keep_best)
            operations_graph.add_operation(keep_best_nda)

            new_sub_parts.append(keep_best_nda)
        sub_parts = new_sub_parts

    return operations_graph




  from . import utils


In [8]:
#get json files in directory
import json
import os
directory = r'C:/Users/matsumoton/Box/CAIRE Work Folder/AlzKB – Neo4J and LLMs Project/AlzKB_LLM_Evaluation'
questions = {}
for filename in os.listdir(directory):
    if filename.endswith(".json"):
        with open(os.path.join(directory, filename)) as f:
            questions[filename] = json.load(f)

In [10]:
import dill
dill.dump(questions, open("questions.pkl", "wb"))


  dill.dump(questions, open("questions.pkl", "wb"))


In [4]:
questions

{'MCQ_1hop.json': [{'question': 'Which of the following binds to the drug Leucovorin? 1. CAD 2. PDS5B 3. SEL1L 4. ABCC2 5. RMI1',
   'answer': '4'},
  {'question': 'Which of the following binds to the drug Chlormerodrin? 1. PDS5A 2. RMI1 3. CAD 4. PDS5B 5. SLC12A1',
   'answer': '5'},
  {'question': 'Which of the following binds to the drug Papaverine? 1. SEL1L 2. CAD 3. PDS5B 4. PDS5A 5. PDE4B',
   'answer': '5'},
  {'question': 'Which of the following binds to the drug Ethchlorvynol? 1. RMI1 2. GABRB3 3. SEL1L 4. PDS5B 5. CAD',
   'answer': '2'},
  {'question': 'Which of the following binds to the drug Methimazole? 1. PDS5B 2. PDS5A 3. CYP3A4 4. RMI1 5. SEL1L',
   'answer': '3'},
  {'question': 'Which of the following binds to the drug Amoxapine? 1. RMI1 2. SEL1L 3. CAD 4. PDS5A 5. HTR1A',
   'answer': '5'},
  {'question': 'Which of the following binds to the drug Amobarbital? 1. PDS5B 2. RMI1 3. GABRA5 4. PDS5A 5. CAD',
   'answer': '3'},
  {'question': 'Which of the following binds

In [8]:
import dill

dill.dump(operations_log, open("operations_log_io.pkl", "wb"))

  dill.dump(operations_log, open("operations_log_io.pkl", "wb"))


In [11]:

output = []
for operation in operations_log['MCQ_1hop.json']['Which of the following binds to the drug Leucovorin? 1. CAD 2. PDS5B 3. SEL1L 4. ABCC2 5. RMI1']:
    operation_serialized = {
        "operation": operation.operation_type.name,
        "thoughts": [thought.state for thought in operation.get_thoughts()],
    }
    if any([thought.scored for thought in operation.get_thoughts()]):
        operation_serialized["scored"] = [
            thought.scored for thought in operation.get_thoughts()
        ]
        operation_serialized["scores"] = [
            thought.score for thought in operation.get_thoughts()
        ]
    if any([thought.validated for thought in operation.get_thoughts()]):
        operation_serialized["validated"] = [
            thought.validated for thought in operation.get_thoughts()
        ]
        operation_serialized["validity"] = [
            thought.valid for thought in operation.get_thoughts()
        ]
    if any(
        [
            thought.compared_to_ground_truth
            for thought in operation.get_thoughts()
        ]
    ):
        operation_serialized["compared_to_ground_truth"] = [
            thought.compared_to_ground_truth
            for thought in operation.get_thoughts()
        ]
        operation_serialized["problem_solved"] = [
            thought.solved for thought in operation.get_thoughts()
        ]
    output.append(operation_serialized)

In [13]:
question["question"]

'List the genes which are over-expressed in testis'

In [14]:
from graph_of_thoughts import controller, language_models, operations

from graph_of_thoughts.vector_db.weaviate import WeaviateClient
import logging
import dill
import time
with open("questions.pkl", "rb") as f:
    questions = dill.load(f)

# Problem input

# operations_log = {}
with open("operations_log_io.pkl", "rb") as f:
    operations_log = dill.load(f)
    


for json_file,values in questions.items():
    print(json_file)
    if json_file.startswith("MCQ"):
      question_type = "multiple choice"
    elif json_file.startswith("True_or_False"):
      question_type = "true/false"
    elif json_file.startswith("OpenEnded_1hop"):
      question_type = "list"
    elif json_file.startswith("OpenEnded_2hop"):
      question_type = "list"
    elif json_file.startswith("OpenEnded_genes"):
      question_type = "open ended"
    
    if json_file not in operations_log:
      operations_log[json_file] = {}
    for question in values:
        print('question:', question["question"])
        print('answer:', question["answer"])
        
        if question["question"] in operations_log[json_file]:
          # print("already done")
          continue

        def io() -> operations.GraphOfOperations:
          """
          Generates the Graph of Operations for the IO method.

          :return: Graph of Operations
          :rtype: GraphOfOperations
          """
          operations_graph = operations.GraphOfOperations()

          operations_graph.append_operation(operations.Generate(1, 1))
          operations_graph.append_operation(operations.Score(1, False))
          # operations_graph.append_operation(operations.GroundTruth())

          return operations_graph
        # Retrieve the Graph of Operations
        io = io()

        config_file = "config.json"
        config_file = "config_v1.json"

        # Configure the Language Model (Assumes config.json is in the current directory with OpenAI API key)
        lm = language_models.AzureGPT(config_file, model_name="azuregpt")
        vdb = WeaviateClient(config_file)
        
        # Create the Controller
        try:
          ctrl = controller.Controller(
            lm, 
            io, 
            ALZKBPrompter(vdb,lm), 
            ALZKBParser(),
            # The following dictionary is used to configure the initial thought state
            {
              "question": question["question"],
              "ground_truth" : question["answer"],
              "question_type": question_type,
              "current": "",
              "phase": 0,
              "method": "io"
              # "method" : "got"
            }
          )
          
          # ctrl.logger.setLevel(logging.DEBUG)

          # Run the Controller and generate the output graph
          ctrl.run()
        except Exception as e:
          print(e)
        # ctrl.output_graph("output_got.json")
        operations_log[json_file][question["question"]] = ctrl.graph.operations
        
        # delete the controller to free up memory
        del ctrl
        # time.sleep(1)
        #break
  #break
    



MCQ_1hop.json
question: Which of the following binds to the drug Leucovorin? 1. CAD 2. PDS5B 3. SEL1L 4. ABCC2 5. RMI1
answer: 4
question: Which of the following binds to the drug Chlormerodrin? 1. PDS5A 2. RMI1 3. CAD 4. PDS5B 5. SLC12A1
answer: 5
question: Which of the following binds to the drug Papaverine? 1. SEL1L 2. CAD 3. PDS5B 4. PDS5A 5. PDE4B
answer: 5
question: Which of the following binds to the drug Ethchlorvynol? 1. RMI1 2. GABRB3 3. SEL1L 4. PDS5B 5. CAD
answer: 2
question: Which of the following binds to the drug Methimazole? 1. PDS5B 2. PDS5A 3. CYP3A4 4. RMI1 5. SEL1L
answer: 3
question: Which of the following binds to the drug Amoxapine? 1. RMI1 2. SEL1L 3. CAD 4. PDS5A 5. HTR1A
answer: 5
question: Which of the following binds to the drug Amobarbital? 1. PDS5B 2. RMI1 3. GABRA5 4. PDS5A 5. CAD
answer: 3
question: Which of the following binds to the drug Doxazosin? 1. PDS5B 2. SEL1L 3. CAD 4. PDS5A 5. KCNH7
answer: 5
question: Which of the following binds to the drug 

INFO:backoff:Backing off chat(...) for 0.9s (openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 22927 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}})
INFO:backoff:Backing off chat(...) for 0.0s (openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 22927 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}})
INFO:backoff:Backing off chat(...) for 0.2s (openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 22927 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'message

Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 22927 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}
question: List the genes which are over-expressed in nipple
answer: NUSAP1, KBTBD4, RBMS1, ADORA2B, KLHL28, GGA3, HLA-C, DCD, ARPC2, EPHX3, SPSB3, TSPAN14, ITGA11, ACAD9, USP39, TMTC3, FOXK2, CDON, ASCL2, PPIF, GNA11, MYLK, RBMX2, WFDC5, PTER, RAB11FIP1, CBX4, CISH, SLC25A5, ANKRD35, GNB4, POU2F2, FKBP14, DYNC2I2, SERPINH1, GPATCH2L, C5orf46, TSHZ3, THAP4, APOE, OGDH, LOXL1, MYEOV, SLC35E1, MICALL1, NFX1, TSPAN5, MAF1, PRELP, CPED1, ZBTB7B, NLRP1, TPM2, NSFL1C, SFXN3, ANO1, FBXO17, LRRC1, PNPLA3, MAP3K1, PEAK1, COL1A2, NIPAL4, EFEMP2, TRIB3, PLEKHA2, CSNK2A1, PWP1, PUF60, NABP2, KATNB1, COL6A2, BAZ2A, PM20D1, EIF4A3, CCL28, SLCO3A1, SKI, SINHCAF, C1QTNF12, TSPO, COL18A1, METTL16, TCF19, DHRS1, ATPAF2, NFAT5, CCDC5

INFO:backoff:Backing off chat(...) for 0.7s (openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, you requested 16468 tokens (15268 in the messages, 1200 in the completion). Please reduce the length of the messages or completion.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}})


parse_generate_answer text: ['GCN1\nGTF3C1\nIFT122\nFIG4\nGTF2F1\nPOT1\nGET1\nGTF3C5\nPTPRA\nVIPAS39\nPROX1\nIFT140\nGTF2F2\nIFT56\nIFT27\nC1orf56\nMAT2A\nGTF2A2\nPAIP1\nTPD52\nPPP2R3C\nC1orf115\nGPR158\nDNAH5\nIFT172\nHID1\nIFT22\nIFT80\nPABPC1L\nPIP5K1A\nPANK4\nC22orf15\nPPP1R16A\nHTRA1\nGLG1\nIFT88\nPRELID3B\nPER1\nPTPRN\nCEP104\nH1-10\nTCTN1\nIFT81\nC1orf53\nHAPLN1\nPPIP5K1\nPTP4A3\nPRR14\nSUPT16H\nPPP2R2D\nHARS1\nGPR50\nMR1\nGPR19\nDDX42\nSLC25A22\nGHRHR\nIFT52\nIFT57\nPTPRG\nIFT46\nPPIP5K2\nC1orf122\nCIB1\nSLC35G1\nVPS9D1\nTPRG1L\nSLC25A14\nGFPT1\nFGFR1\nSLC25A45\nUSP22\nNAT14\nCOPG1\nMRPL49\nPTN\nPTBP1\nPPP2R2C\nHMGN5\nNAP1L5\nFKBP14\nHM13\nGATAD2A\nSLC22A14\nTEX261\nSLC46A1\nP4HA2\nTRAPPC14\nGPR173\nH2BC21\nMRPL23\nPLAG1\nHIC2\nMAU2\nPLEKHG4B\nCITED1\nTPD52L2\nC1orf50\nSLC6A15\nPREPL\nN4BP2L1\nPTPRS\nPPP1R21\nPITX1\nDNAH7\nPOU2F1\nPTPN13\nSUPT5H\nACTR3B\nVARS1\nC14orf93\nTSC22D1\nSUPT6H\nCEP290\nSLC25A39\nP4HB\nLDOC1\nHINT1\nP3H4\nVGF\nACTL6B\nP3H1\nPPP2R2B\nCTTN\nCEP44\nACTR6\

INFO:backoff:Backing off chat(...) for 1.7s (openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, you requested 16468 tokens (15268 in the messages, 1200 in the completion). Please reduce the length of the messages or completion.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}})
INFO:backoff:Backing off chat(...) for 3.5s (openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, you requested 16468 tokens (15268 in the messages, 1200 in the completion). Please reduce the length of the messages or completion.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}})
INFO:backoff:Backing off chat(...) for 1.7s (openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, you requested 16468 tokens (15268 

Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, you requested 16468 tokens (15268 in the messages, 1200 in the completion). Please reduce the length of the messages or completion.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}
question: List the genes which are over-expressed in mammalian vulva
answer: SBSN, SS18, PPP4C, ERAP2, ITPR2, GNAI1, PCOLCE, DDR1, RBBP9, GSTK1, BCL3, LMNB1, CRYBG3, S100A14, IGFBP5, SCRIB, GORASP2, HOXC10, PPP2R2C, BAG3, TMPRSS13, TFAP2A, TALDO1, ID1, FAM210B, CITED4, ALKBH7, ISG20L2, HDGF, EIF3I, RECK, PRPF4B, SAP30L, OPN3, L3MBTL2, PTBP1, KATNA1, DNAJB6, OGT, GPR87, MTERF4, CENPT, TES, PDIK1L, AAR2, MED14, SERPINB7, SDCBP2, SERTAD2, TXNL4B, PPP2R3A, HLA-G, SCEL, SURF4, TTC39B, NXN, ANTXR1, SPSB2, SHISA5, TM9SF4, BCAP31, PPP2R5C, WDR26, KRT16, COMMD5, GPR107, IL17RC, SF3B5, ADM, MAN1A1, HOXC11, TACSTD2, C1QBP, TAGAP, EPHX3, CNOT2, NCOA3, SMAD2, RABGGTA, PEX13, IN

INFO:backoff:Backing off chat(...) for 0.6s (openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 21751 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}})


parse_generate_answer text: ['LINC02693\nCH25H\nLOC101929552\nCEP290\nCXXC5\nC16orf89\nC22orf39\nC1orf94\nC16orf92\nC1orf146\nC22orf23\nC1orf185\nLOC100287290\nFAM149A\nFAM98B\nLINC03040\nC1orf198\nBEX5\nLOC100505555\nFAM98A\nC1orf115\nC1orf159\nFAM89B\nCXXC4\nC16orf96\nCEP19\nFAM104B\nDDX51\nH2BC18\nING4\nDNAH11\nC1orf216\nC20orf96\nGET1\nC16orf74\nC15orf62\nFAM98C\nC22orf46P\nC1orf87\nTEX44\nFAM107B\nDNAH5\nBRF1\nFAM219A\nTEX29\nFAM153A\nUSP54\nCT55\nSERINC5\nCEP78\nMED29\nC2orf73\nC1orf141\nFAM107A\nDEUP1\nNAT14\nFAM181A\nUSP46\nENC1\nVSX1\nUSP43\nTEX35\nBEX4\nING1\nCEP97\nC17orf107\nTSEN15\nCEP15\nC16orf46\nFAM47E\nUSP49\nPROCA1\nCEP85\nC2orf49\nFAM199X\nGTF3C6\nFAM217B\nMARK4\nC19orf25\nC1orf122\nOR2L13\nFAM218A\nCAND2\nBEX1\nFAM43B\nIFT27\nCEP41\nFAM221A\nFAM78B\nTPD52\nCOL25A1\nUSP11\nFAM229B\nLIN28B\nCREB5\nPRELID3A\nUSP35\nBEX3\nSERINC3\nC2orf15\nTEX26\nZNF584\nC2orf76\nDDX47\nRAD54B\nTPD52L1\nUPF3A\nFAM229A\nFBXL21P\nFAM86B1\nTRIM46\nPREX1\nTRIM59\nTEX15\nTRIM45\nFAM13C\nFAM1

INFO:backoff:Backing off chat(...) for 1.5s (openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 21751 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}})
INFO:backoff:Backing off chat(...) for 1.5s (openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 21751 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}})
INFO:backoff:Backing off chat(...) for 5.8s (openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 21751 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'message

Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 21751 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}
question: List the genes which are over-expressed in blood
answer: RASSF7, NFE2, PPIB, SF3B1, RIPK1, CHD1, SEMA4D, TRIOBP, CAPG, PAK1, SOCS3, GBP4, GZMB, WDR48, HNRNPH2, RAB2B, CDKN2D, CYRIA, PRKAR1A, KDM6B, LRRC25, ZNF200, SLC22A1, MADD, PTPRN2, TCF7, SETDB2, TIGD3, ZNF74, ST3GAL2, LETM2, S100A4, ATG7, DHRS7, TGOLN2, ZFP82, ZDHHC17, RAB35, CCNI, JAKMIP1, SPCS3, KCNN4, TMCC1, MX2, SPOPL, LSMEM1, PRELID1, C11orf68, NCSTN, ELK3, ALOX12, TECPR1, FAM8A1, GPR65, SMPD2, CMTM3, PCNA, DNAJC4, GIMAP2, TDP2, ASXL2, ZNF777, NPL, PRKCZ, CCDC167, KLF7, GON4L, BICDL1, PCSK7, SIAH2, TMEM156, OSBPL3, TSPAN33, WDR4, ETV3, FBXO42, IDS, N4BP2, BST1, MAP3K8, PTPN7, ANP32E, CHRNG, TMEM40, TRIM10, HAP1, OGA, SORL1, SLC25A39, MAP3K14, P

INFO:backoff:Backing off chat(...) for 0.8s (openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 20504 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}})


parse_generate_answer text: ['C14orf119\nLINC02693\nC14orf93\nLINC03040\nGEN1\nCXXC1\nING5\nUNC119\nDDX21\nSUPT4H1\nCCL28\nC2orf15\nSET\nMED25\nCEP295\nLY6G5C\nCIB1\nDDX59\nCDC14A\nDDX23\nH2BC5\nDDX17\nMED14\nBIRC2\nUNC93B1\nDDX49\nCEP104\nING1\nSUPT20H\nPRELID1\nDDX46\nNUP58\nNUP153\nH2BC21\nCDC25B\nNUP93\nC21orf58\nBECN1\nDDX20\nC15orf39\nLY96\nMED11\nC1orf56\nCEP95\nBIRC3\nPROK2\nDDX41\nN4BP1\nUNC13D\nDDX28\nDDX39A\nCA13\nGNA15\nNUP98\nMED23\nTSEN54\nC17orf99\nDDX39B\nMED18\nH1-4\nMIS18BP1\nRAD21\nF13A1\nCYTH4\nMED26\nBICD1\nMED29\nACTR5\nBICDL1\nFAM118A\nLY75\nSTAT5B\nDDX27\nH4C8\nCEP152\nCD244\nPREX1\nSETD1B\nDDX3X\nDDX54\nLOC400499\nCCT6A\nFAM104A\nDDX56\nRAD9B\nZBED5\nDEGS1\nCTC1\nREC8\nMED22\nC2orf88\nCARS1\nGTF3C2\nDDX18\nHM13\nCD274\nN4BP2\nCEP85\nCYTH1\nGATC\nHS1BP3\nRELA\nDDX3Y\nTBL1X\nMAD1L1\nCASP5\nN4BP2L1\nCD226\nCIR1\nMYBL1\nCEP19\nC1orf43\nCBX5\nC21orf91\nCELF1\nVPREB3\nVPS26C\nEXOSC5\nNUP214\nCELF2\nCDC123\nCEP41\nC1orf162\nCDC34\nHENMT1\nVPS13C\nCCDC152\nCD74\nCEP164

INFO:backoff:Backing off chat(...) for 0.8s (openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 20504 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}})
INFO:backoff:Backing off chat(...) for 0.5s (openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 20504 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}})
INFO:backoff:Backing off chat(...) for 3.4s (openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 20504 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'message

Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 20504 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}
question: List the genes which are over-expressed in adipose tissue
answer: GALNT2, COPA, TTLL12, ABCA12, TTC3, CCDC149, HMG20B, CYP26B1, VSIG4, ALG9, RELA, FGF7, MYH9, FAM171B, EEF1AKMT3, COL6A3, WFDC5, EPRS1, CD81, TGFB1I1, PTGIS, BLTP1, NRIP1, YAP1, TPSD1, SNX12, SCARA5, EPAS1, CTHRC1, ZSCAN2, ANXA2, DNMBP, PNMA8B, SOD3, PPP1R1A, DTX1, ETV5, REEP2, PRKAB1, ENTPD3, ICMT, CYB5A, DCBLD1, RHOJ, ACER1, SH2D4A, ARHGEF11, MMP11, TBCD, GNS, WIZ, DLD, PGR, DVL3, TMEM140, CSF1, TMTC1, NUCKS1, CFAP97, ARHGAP24, SRPX, HOXD8, TEAD1, CDH5, FOLR2, KCTD12, EGFLAM, FGFR1, CTSL, RO60, ARL6IP1, LIMCH1, STBD1, SLC7A2, MMP28, DPM3, LORICRIN, ACSL5, OLFM1, WWC2, HARS1, USP13, YWHAE, CTSC, ZFHX4, OR51E1, HEG1, PRICKLE2, FAM180A, MARK

INFO:backoff:Backing off chat(...) for 0.7s (openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 23244 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}})


parse_generate_answer text: ['GEN1\nC14orf93\nC1orf159\nC1orf54\nLINC01619\nCH25H\nC1orf162\nC1orf115\nMAK16\nABCC1\nGPR152\nH2AC18\nH2AC19\nHEG1\nIFT57\nFAM151A\nC2orf15\nC14orf180\nDDX21\nLOC400499\nGPR146\nHRCT1\nSUPT6H\nC16orf78\nHID1\nC5orf52\nC15orf62\nH2BC5\nC16orf74\nC15orf39\nC5orf58\nC16orf89\nH1-4\nCOL26A1\nCEP295\nGUK1\nC2orf16\nMRE11\nDDX42\nHACL1\nIFT52\nHIC1\nCEP152\nC1orf131\nADGRE1\nC16orf54\nC2orf81\nCOL14A1\nCASP5\nPRELID1\nS100A16\nGPR176\nMRPL58\nTRAPPC14\nBICC1\nGPR183\nCXCL14\nENTR1\nEN1\nGPR156\nDOK1\nDNAH14\nCOL27A1\nCTHRC1\nREC114\nGINS1\nHMGN1\nHARS1\nCASP1\nVSIG1\nE2F5\nE4F1\nADGRA1\nC11orf96\nLDOC1\nGLDN\nGRHL1\nATAD5\nSTAT5A\nADGRE5\nMED15\nC6orf141\nHIC2\nACTG2\nMRPL28\nCD151\nIFT43\nGPR132\nE2F4\nZNF296\nGPR174\nCOL23A1\nGNA14\nTANC1\nBECN1\nH1-7\nZBED1\nHAGHL\nADGRG5\nCCL28\nHAPLN3\nMRPL54\nNUP107\nGAR1\nCA11\nHSF5\nCOLEC11\nSTEAP1B\nH2AC20\nLOC100505555\nFBXL22\nADPRH\nMED22\nFAM107A\nMRPL16\nC1orf174\nFAM117A\nDNAH1\nSUPV3L1\nMRPL52\nTAL1\nOR51E1\nZNF

INFO:backoff:Backing off chat(...) for 1.3s (openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 23244 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}})
INFO:backoff:Backing off chat(...) for 1.2s (openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 23244 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}})
INFO:backoff:Backing off chat(...) for 6.2s (openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 23244 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'message

Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 23244 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}
question: List the genes which are over-expressed in smooth muscle tissue
answer: JPT1, ZFPM2, ABCA1, RBFOX2, PAMR1, GBE1, IQGAP1, ARMCX2, PSMA3, FBL, PPIB, HMGA1, GALNT2, PFKP, TMEM248, CD47, PKD2, ABCF2, ASPH, SLC1A5, CALR, PSMB1, LRRC17, IGF2BP2, SF3B3, TCEAL9, LMNA, NAP1L1, NPM1, ITGB1, COL6A3, MARS1, PPP2R5C, NME7, TP53I3, PRPF31, SNX3, SMPD1, DPYSL3, NF2, ADM, IGF2BP3, GLT8D1, C3, RPS24, RRBP1, ANXA11, TIMM17A, NID1, HSD11B1, PSMC4, MYH10, CAPZA1, CPD, LRP12, TGFB1I1, NRP1, CHP1, LDHB, HSPH1, CCT5, MPST, RPS6KA2, CAPRIN1, PRSS3, DNAJC10, BCAT1, PXN, PLA2G4A, GYS1, LTBP1, DRAM1, NUCB2, CCL2, NFKBIA, SND1, RRAS2, SLC16A3, NACA, CD200, ANXA5, RARS1, AMIGO2, GMPPA, ANXA2, POLR2B, EMC7, SRSF9, BTF3, RPL35A, PTTG1

INFO:backoff:Backing off chat(...) for 0.1s (openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 18189 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}})


parse_generate_answer text: ['DDX47\nUSP47\nMEF2D\nZBED5\nZNF292\nHEG1\nZNF146\nMEF2C\nCXXC5\nING5\nINPP5D\nEEF1D\nZXDB\nFXYD5\nLOC102723728\nREEP5\nZNF621\nSUPT7L\nH2BC5\nUSP22\nDDX17\nFXYD7\nEEF2K\nSERINC1\nZNF785\nUSP45\nLY6E\nZNF316\nZNF786\nPEX26\nEIF2B5\nZNF121\nZBED1\nBECN1\nDDX19A\nEIF5A\nCCL25\nMEF2A\nZNF619\nCYTH4\nZNF408\nDDX54\nZNF106\nCYTH1\nCTCF\nMXI1\nCCL19\nSETD5\nGIMAP1\nHIGD2A\nSP1\nZNF799\nFAM89A\nTANC1\nZNF25\nZNF491\nZEB2\nMAT2B\nH1-10\nZNF772\nHTR3C\nEPHA2\nFAM98C\nZNF484\nZNF283\nZNF664\nUSP28\nREEP3\nTEP1\nCCL28\nPRELID1\nCASP8\nPPIF\nCITED2\nPEX11A\nCTIF\nMED14\nZFYVE26\nEPN1\nC1orf115\nSUMF2\nCTSA\nLIN7C\nFAM151A\nLY75\nZNF841\nSTAT1\nXBP1\nZEB1\nSIRT5\nEEF1B2\nACTR1B\nINSIG2\nZNF169\nSFXN5\nFXYD1\nZNF317\nZFP36\nCCL11\nCCL5\nFAM107A\nZNF154\nDEGS1\nGATC\nZNF620\nZNF480\nZNF862\nZNF445\nDUSP26\nGIMAP1-GIMAP5\nEIF1AX\nC22orf39\nDIP2C\nZBED10P\nSERPINA11\nCEBPA\nHSF4\nMED11\nMEGF9\nPEX19\nSET\nCTNNA1\nS100A14\nUSP13\nGPR146\nVSIG4\nCTBP1\nZNF699\nSUMF1\nZNF629\n

INFO:backoff:Backing off chat(...) for 1.7s (openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 18189 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}})
INFO:backoff:Backing off chat(...) for 1.5s (openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 18189 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}})
INFO:backoff:Backing off chat(...) for 0.0s (openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 18189 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'message

Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 18189 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}
question: List the genes which are under-expressed in nipple
answer: SLC25A31, AKR1B1, KIF2C, WT1, PPP1R21, RNF148, LNP1, ELMO1, FAM228B, SLCO6A1, HDAC2, PRTFDC1, PSD2, RPL7, RASGEF1A, CCP110, PCDH10, SETX, CDC5L, ZYG11B, SEC31B, TRMT13, ASPHD1, POLB, CRELD1, IFIT3, AHSA1, ARID4B, GCA, ITFG1, RDH16, EBF2, IQCF2, LRRC8D, ELMO2, TCP1, AP3D1, CNTN2, CALM1, MYOM1, CLDN18, DPCD, KANK3, LARS2, LRRC40, CIMIP4, TMEM130, TEX38, PFDN6, HMBOX1, KCMF1, FTH1, IFI44, UBLCP1, CARD16, MYL10, TFR2, UHRF2, TAGLN3, GRAMD1B, TMED5, FAM24A, NRXN2, PLA2G4C, RTCA, MFSD4A, CDO1, GPD2, PIGH, KCTD7, TSSK2, OLIG2, SFPQ, FAAH, DHX29, ADCY1, BLTP3B, RNF112, ZBTB18, SSH2, ATP2B1, IL17C, NIPAL3, HGD, ATCAY, CLN8, SERP2, MROH2B, ITM2C, SCN2B, KI

INFO:backoff:Backing off chat(...) for 0.5s (openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 19400 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}})


parse_generate_answer text: ['MED26\nMED15\nMED31\nMED1\nMED23\nLY75\nMED8\nMIB1\nMAB21L4\nMED13L\nREV1\nCT62\nCXXC1\nLY96\nMED24\nINPP5A\nMYO5B\nCT83\nMRPL49\nCOL28A1\nMRPL58\nMRPL39\nREG1A\nMKNK2\nMYO5C\nCTHRC1\nH1-7\nNMRK1\nMED12\nMRPL57\nMKNK1\nMRPL37\nMYO1C\nN4BP2L2\nCIB1\nMYO1B\nMRPL35\nMYO18B\nLOC100505555\nMRPL22\nN4BP1\nH2BC5\nMYL12B\nMRPL14\nMRPL44\nMYO15B\nMRPL32\nCIB4\nMRPL54\nIFT122\nMRE11\nMS4A5\nCTNNB1\nMR1\nF13A1\nIFT56\nMRPL16\nINPP5K\nMYO1G\nC1orf115\nPTPN21\nMRPL36\nCOL27A1\nMS4A14\nMRPL13\nCOL21A1\nMATN2\nMRPL23\nTRAPPC11\nCTNNAL1\nDEFB119\nGNA15\nMRPL17\nMKRN1\nDDX55\nGPR146\nG3BP1\nMRC2\nMYO9A\nUSP44\nMRPL33\nHEMK1\nNUP58\nC1orf56\nGNA11\nS100A14\nMID1\nMS4A7\nACTC1\nMRPS18B\nGCN1\nPTPN14\nFAM209B\nREC114\nDEF6\nUBP1\nEN2\nMOV10L1\nCOL14A1\nINO80C\nMRPL11\nOBSL1\nHNRNPAB\nMYL9\nMYL2\nMXRA5\nING1\nPHLDB2\nUNC45A\nCTNND1\nACTL9\nMBD3L1\nC1orf116\nTENT5B\nRAP2C\nPABPC1L\nLDB1\nH1-10\nMRPS18A\nMS4A6A\nP3H1\nINPP4B\nUNC45B\nS100A16\nACTL6A\nMLEC\nCA13\nACACB\nP4HA2\nPP

INFO:backoff:Backing off chat(...) for 1.5s (openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 19400 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}})
INFO:backoff:Backing off chat(...) for 2.0s (openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 19400 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}})
INFO:backoff:Backing off chat(...) for 5.6s (openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 19400 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'message

Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 19400 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}
question: List the genes which are under-expressed in midbrain
answer: IL20RB, CENPS-CORT, ZNF551, FSD2, MCM6, RNF216, MELK, PPP6R3, ELP5, AP1M2, PDCD11, THRB, UBTF, SGSM3, N4BP2L1, PPL, GALNT3, ABHD2, PLEKHN1, SNAI2, TARBP2, TMEM54, SELE, LYZL4, PKP1, RPL31, SMLR1, GID4, SLC35E4, DSPP, GRHL3, FBXL12, AKR1B10, PGM2, PLN, IL18, VEGFC, MARCHF7, TMOD1, TBC1D22B, PPP2R5E, TMEM68, EIF3K, LOX, ZC3H7B, RCAN3, CCDC168, USP2, SOCS2, ZBTB43, RNF32, TNFSF13B, NFKBIZ, RPL3, NME4, SSMEM1, DAB2, VEZT, PDIA4, ZNF581, DNAJB12, RERE, HECTD1, VWF, ADAMTS5, COBLL1, G2E3, SBSN, SLK, EGR1, PITPNM3, COL15A1, HSPA5, ZNF438, PPP1R13L, ATP8B1, TSPAN1, ZBTB48, RPS3, IYD, PLA2G4A, OBSCN, SHE, ADAM33, MMP8, SSR1, TREM1, SMPDL3A, UBAP2, PPCDC

INFO:backoff:Backing off chat(...) for 0.2s (openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 19114 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}})
INFO:backoff:Backing off chat(...) for 0.8s (openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 19114 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}})
INFO:backoff:Backing off chat(...) for 2.1s (openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 19114 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'message

Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 19114 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}
question: List the genes which are under-expressed in bronchus
answer: SLITRK1, KIF3A, ADAM22, ANGPTL3, SF3B1, BRINP1, TEX46, SNAP91, CAMSAP2, AK5, GLRX2, TSC1, LSAMP, RTN4, GAS7, TRIM23, MIR9-1HG, INSYN1, ZNF85, SLC35F1, THRA, OGDHL, AZU1, DYNC1I1, PSRC1, ANGPTL1, TRUB1, SEC61A2, PIH1D2, PAQR8, CAP2, KLHL10, PTN, PCNT, GSG1, PDHA2, PGLYRP2, NES, CCDC88A, MAPK1, CREBBP, SLC22A16, NFE2, CCDC96, GPR158, FBXW11, TSPAN16, UBE3A, HEPACAM, CNTNAP4, SCOC, GABRB2, CCDC54, KCNJ10, PLPPR1, PCDH9, OSBPL11, FAM168B, GDAP1, POMGNT1, PEG10, SNCA, DEFB119, HIPK2, RNF133, MBD3L1, GUCY1A2, USP13, MCMDC2, SLCO1B1, MTPAP, MAGEH1, CNTN1, ACRV1, GPATCH2L, B3GAT1, CPM, KIF5C, AGPAT4, CGRRF1, KCTD2, POLR3F, CYFIP2, MTFR1L, MAOB, COX20, 

INFO:backoff:Backing off chat(...) for 0.9s (openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 16732 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}})


parse_generate_answer text: ['UNC5C\nPRC1\nHEG1\nPAG1\nGPR146\nPPP2R5A\nPROCR\nUNC5B\nPPP2R5C\nLINC02693\nPTPN21\nDEFB119\nCT62\nACTG2\nUSP39\nPROK1\nPCTP\nINPP5D\nGPR161\nPTPRB\nACTC1\nPTP4A1\nPRELID1\nPFN1\nPANK1\nHAPLN2\nACTR1B\nCAB39L\nMRE11\nPPP2R5B\nUSP46\nSUPT4H1\nPPIF\nMRPL16\nMRPL58\nUSP54\nPPP1R14C\nPTPN5\nC1orf198\nP4HA1\nMRPL52\nACTR2\nATG16L1\nCT83\nC1orf216\nCAB39\nHPCA\nGPR87\nH1-2\nPTP4A2\nPTPN14\nPPP1R15A\nHGFAC\nGPRC5B\nPPP1R1A\nACTL9\nACTRT3\nPPP2R2A\nM6PR\nACTR3\nPAIP2B\nPTPRD\nINSIG1\nGLDN\nPROM1\nACTN1\nHGD\nPPP1R14A\nDNAH8\nPRR22\nTRARG1\nUNC45B\nCTNNA1\nGUK1\nMRPL37\nPI15\nATP6V1G3\nPLN\nPDPN\nPIGH\nMRPL35\nPPIA\nCTIF\nPUM2\nPARD6G\nGIMAP1-GIMAP5\nPRTN3\nC5orf47\nPPP1R15B\nDEF6\nIK\nGRHPR\nNAP1L1\nC1orf54\nC8G\nATG16L2\nATP6V1G2\nFAR1\nACTN2\nC16orf89\nSLC25A5\nDEFA4\nTRIAP1\nPATL1\nMATN2\nPABPC1\nPTPN11\nGPR155\nPIP4K2A\nPOU3F2\nUSP25\nPACC1\nPARD3\nMAB21L4\nMPRIP\nPAFAH1B1\nC1orf162\nPITPNC1\nPPIH\nSLC35A4\nGINS1\nSLC16A5\nINO80\nPTPRZ1\nPPP1R16B\nRAPGEF5\nGNA

INFO:backoff:Backing off chat(...) for 1.4s (openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 16732 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}})
INFO:backoff:Backing off chat(...) for 3.3s (openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 16732 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}})
INFO:backoff:Backing off chat(...) for 3.0s (openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 16732 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'message

Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 16732 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}
question: List the genes which are under-expressed in mammalian vulva
answer: NOSTRIN, HECTD4, ANK3, PPFIA1, MAN1C1, VGLL4, EPB41L2, RBBP6, C6orf136, H2AZ2, ZDHHC21, PAAF1, SERINC3, THSD7A, CEP15, ANKRD23, TMEM239, GNG3, VPS11, SLC4A8, PPM1H, ALDH3B1, ATP6V0E2, PADI1, KATNAL1, WDR19, PSME3IP1, PPIG, KCNMB4, DOCK5, PP2D1, CRISP2, HMGB4, UQCR11, NSD3, CARNS1, ZNF565, LYAR, NCOA2, RSF1, NDRG3, MYRIP, SRGN, ZYG11B, NALCN, LYRM2, AKAP3, CADPS, ZKSCAN1, ALKBH5, ATP8A1, SAMD4B, NTRK3, FAM149A, PLN, KAT6B, SLC12A2, UBE2H, SORL1, PER1, SEC22C, TAOK1, CDC34, HTATSF1, RNF150, PIKFYVE, PRRG1, TNK2, RAB31, CIR1, QNG1, GYPB, PATZ1, LIMS1, KIF3B, PRKCE, NXPE3, RIMKLB, CCDC93, PSMG4, HSD17B6, SNX22, ZFTA, CCDC83, ISG15, SART3, GP

INFO:backoff:Backing off chat(...) for 0.6s (openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 19040 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}})


parse_generate_answer text: ['GEN1\nMAK16\nUNC119B\nUNC93B1\nH2AC19\nMED26\nC14orf119\nUSP39\nREG4\nOR51E1\nLY96\nCEP55\nLY75\nUNC45B\nC14orf93\nMRE11\nUSP44\nMED22\nOR51E2\nINO80\nC14orf39\nDEFB119\nEP400\nCEP44\nBIRC5\nVPREB3\nMED11\nMED13\nUSP53\nEN1\nPRELID2\nREG1A\nC14orf28\nCDC25B\nDDX21\nREG1B\nMED18\nCITED4\nEBI3\nTRIM29\nPRR29\nUSP28\nC1orf56\nDEFB129\nCDC26\nDDX50\nGPR89B\nCEP164\nCDC25A\nZBED4\nTEX47\nACTR5\nPRELID1\nH4C8\nUBR5\nCCL19\nCWC15\nUNC13D\nCDC25C\nEEF1B2\nNUP35\nMRC1\nC16orf54\nH1-4\nUPF1\nIFT56\nINO80C\nTRARG1\nPRODH2\nHIGD2A\nCTR9\nDDX59\nCCL26\nCEP83\nMIX23\nPROK2\nNUP85\nUSP19\nNAT9\nN4BP1\nC1orf226\nINO80D\nMAT1A\nCDC45\nDDX19B\nCREB3L2\nCREBRF\nCASP14\nMED20\nMAB21L4\nCCL25\nNR5A1\nCCL28\nNUP37\nE2F8\nGNG5\nMRPL46\nN4BP2\nCREB3L4\nCWC22\nEFL1\nPROM2\nFAM104A\nUSP38\nCTHRC1\nGTF3C5\nC1orf116\nNUP43\nCEP135\nUSP36\nC15orf48\nC1orf162\nEXD3\nC14orf180\nDET1\nZNF784\nH1-3\nMRC2\nUSP15\nBRK1\nPLEKHG1\nNUCB2\nBIRC3\nEEF1D\nTSEN54\nLRRC28\nMRPL58\nC17orf58\nPROK1\n

INFO:backoff:Backing off chat(...) for 0.6s (openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 19040 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}})
INFO:backoff:Backing off chat(...) for 0.3s (openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 19040 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}})
INFO:backoff:Backing off chat(...) for 4.5s (openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 19040 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'message

Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 19040 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}
question: List the genes which are under-expressed in blood
answer: MAMLD1, BMI1, MAPK11, FBXO31, MSH3, EBF1, IFT43, AIFM1, CFAP69, SLC25A11, ZNF512B, CCK, TMEM187, SLC24A3, TMEM97, SAT2, TSR2, VPS50, RABL6, KIF9, HDHD2, SLC25A13, LAMTOR3, SLC30A6, RAPGEF4, FADS3, MSH2, PTPN9, PPIC, PI4K2A, LZTS2, COX6B1, RBSN, XG, MRPL47, HEY1, RPS6, APOL4, DOCK7, DNAJA3, IDE, GMFB, FGFR2, FBXL17, ABHD15, SMURF1, SOCS4, OLFML2B, AP4M1, SLCO2B1, ADAMTS15, TSPAN11, NAB1, PCDH1, GPR176, MRPS35, RIN1, EPHX2, HSPB8, CRYBG2, OARD1, POLR2M, NRAS, SLC16A14, ANKRD46, STX1A, MFGE8, DNAJC14, IRGQ, ERGIC3, PPP2R3A, CPNE3, PTPRZ1, CDC16, CLU, IFRD2, ABLIM1, PPARG, LPAR1, KARS1, SLIT2, PDE9A, ZNF358, SEMA3F, GNAL, SCN4B, USP18, ANAPC13, CCDC59

INFO:backoff:Backing off chat(...) for 0.9s (openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 27822 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}})
INFO:backoff:Backing off chat(...) for 1.4s (openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 27822 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}})
INFO:backoff:Backing off chat(...) for 0.1s (openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 27822 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'message

Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 27822 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}
question: List the genes which are under-expressed in adipose tissue
answer: CIBAR2, PDX1, SIRT2, SPINT2, KIAA0753, BRINP1, TIAM2, SLC4A8, TP53TG3, DLG3, ZNF292, SRRT, TRIM17, POP1, UBASH3A, UBA7, ATG4D, LEFTY1, SLC4A1AP, MICAL2, ST6GAL1, LHFPL1, CD247, NKRF, NPY2R, PRUNE1, HORMAD1, NEUROG2, DPF3, CBX3, CAMK2G, SERP2, RHOF, ADAM30, LIN28B, BCL11B, TMEM82, BRD8, STK17A, H2BC5, TM4SF19, ELAPOR1, OCM, SPACDR, PCDH17, MBP, YY1AP1, CAPN13, FKBP11, PLEKHA6, DCC, ATXN1, PTPRZ1, BTN3A3, P2RY10, FAM24B, DNER, SERPINA11, PAX5, STARD4, CXCR6, DHRS7, ZNF771, PCDH10, DOCK3, SMG8, MGME1, CD47, MTERF1, AMMECR1, SCAMP5, SACM1L, MFSD2A, KIAA2012, EDAR, PRPF19, SEMA4D, SSPOP, ZNF674, ST8SIA1, DEFB118, ZFYVE27, IL1B, HS3ST3B1, ARAP2

INFO:backoff:Backing off chat(...) for 0.3s (openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 26730 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}})


parse_generate_answer text: ['ADI1\nIFT122\nUNC119B\nPRADC1\nADAL\nHAP1\nUSP47\nIFT81\nIFT88\nUNC5D\nGPR153\nDEFB119\nREG1A\nIFT172\nTRIM29\nADPRHL1\nACTR1A\nAADAC\nGPR158\nADO\nGPR39\nGPR107\nLOC101929552\nADGRV1\nADPRM\nIFT22\nMRPL55\nMED25\nIFT140\nCASP9\nREG4\nGID8\nDNAH11\nICA1L\nIFT74\nING4\nCAB39\nFIG4\nUNC79\nATAD1\nEFL1\nADK\nMRPL35\nCREG1\nPROM1\nDNAH5\nUSP54\nGET1\nC1orf56\nLINC03040\nGPR87\nMRPL14\nUSP46\nC22orf39\nINPP5A\nLAD1\nACTL9\nERP44\nUSP37\nACTRT1\nMAT1A\nGADD45A\nACTA1\nH1-8\nUNC45B\nTIPRL\nC1orf216\nC1orf94\nTRAPPC11\nHABP4\nHMX2\nUSP53\nACTR3B\nGDNF\nDEFB129\nPRODH2\nGATC\nPROM2\nC1orf105\nMRPL36\nZNF415\nADCK2\nADIG\nCWC27\nACAD8\nPRODH\nHACD3\nHIGD1B\nMRPL44\nACTR8\nGPR108\nMRPL37\nACTC1\nDEFB105A\nDEFA5\nALAD\nTRIM55\nACADL\nCOL28A1\nHABP2\nHAT1\nUSP43\nAAGAB\nINPP5F\nZXDB\nTHEGL\nCA8\nATAD2\nH1-6\nABCC5\nHBP1\nINSIG1\nDEFB118\nCT47A8\nC1orf50\nHLF\nMRPL43\nGLG1\nDEFA4\nREG3A\nREG3G\nGCN1\nC1orf116\nPABPC1L2A\nGPRASP3\nCT47A7\nC1orf226\nC22orf31\nGID4\nUSP44\

INFO:backoff:Backing off chat(...) for 0.4s (openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 26730 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}})
INFO:backoff:Backing off chat(...) for 2.0s (openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 26730 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}})
INFO:backoff:Backing off chat(...) for 3.0s (openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 26730 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'message

Error code: 400 - {'error': {'message': "This model's maximum context length is 16384 tokens. However, your messages resulted in 26730 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}
question: List the genes which are under-expressed in smooth muscle tissue
answer: CASC3, MAN1C1, RASSF2, RIDA, BEX1, GABRB1, B4GAT1, R3HDM2, CAMKK2, FCN3, SLC48A1, NIBAN1, ADA2, ACSL1, PEG10, FKBP4, NXF1, CDH1, CRH, VSIG4, PSG11, EPB41L1, CIT, NDRG1, STXBP1, SCAP, DNM3, WDR82, TMEM176B, SH3TC1, OGT, C7, S100A9, P2RY13, C5AR1, DSP, MAST4, AMD1, NPTXR, GLUL, CA4, PSG5, TBXAS1, PAK3, GABBR1, PAQR6, SCG3, HOXC6, CYP19A1, CD48, PSD3, APOA1, CYRIA, TLN2, CHST15, TASOR, PRMT2, KRT14, CALM1, AQP1, THRA, OLFM1, FAM111A, NDFIP1, TSPOAP1, EPS15, PDE2A, RPS6KA1, MTMR9, DHCR24, BCL11B, LCP1, HLA-F, PPP2R5A, TERF2IP, LYST, SRRM2, CLASP2, PTPRD, OGDHL, ASXL2, SEZ6L, SLCO2B1, FAM13B, TM7SF2, PCSK6, FOXJ3, CFD, VCAM1, LEP, KAT6A,

In [58]:
from graph_of_thoughts import controller, language_models, operations

from graph_of_thoughts.vector_db.weaviate import WeaviateClient
import logging
import dill
import time
with open("questions.pkl", "rb") as f:
    questions = dill.load(f)

# Problem input

operations_log = {}
# with open("operations_log_cot.pkl", "rb") as f:
#     operations_log = dill.load(f)
    


for json_file,values in questions.items():
    print(json_file)
    if json_file.startswith("MCQ"):
      question_type = "multiple choice"
    elif json_file.startswith("True_or_False"):
      question_type = "true/false"
    elif json_file.startswith("OpenEnded_1hop"):
      question_type = "list"
    elif json_file.startswith("OpenEnded_2hop"):
      question_type = "list"
    elif json_file.startswith("OpenEnded_genes"):
      question_type = "open ended"
    
    if json_file not in operations_log:
      operations_log[json_file] = {}
    for question in values:
        print('question:', question["question"])
        print('answer:', question["answer"])
        
        if question["question"] in operations_log[json_file]:
          # print("already done")
          continue

        def cot() -> operations.GraphOfOperations:
          """
          Generates the Graph of Operations for the IO method.

          :return: Graph of Operations
          :rtype: GraphOfOperations
          """
          operations_graph = operations.GraphOfOperations()

          operations_graph.append_operation(operations.Generate(1, 1))
          operations_graph.append_operation(operations.Score(1, False))
          # operations_graph.append_operation(operations.GroundTruth())

          return operations_graph
        # Retrieve the Graph of Operations
        io = cot()

        config_file = "config.json"
        config_file = "config_v1.json"

        # Configure the Language Model (Assumes config.json is in the current directory with OpenAI API key)
        lm = language_models.AzureGPT(config_file, model_name="azuregpt")
        vdb = WeaviateClient(config_file)
        
        # Create the Controller
        try:
          ctrl = controller.Controller(
            lm, 
            cot, 
            ALZKBPrompter(vdb,lm), 
            ALZKBParser(),
            # The following dictionary is used to configure the initial thought state
            {
              "question": question["question"],
              "ground_truth" : question["answer"],
              "question_type": question_type,
              "current": "",
              "phase": 0,
              "method": "cot"
              # "method" : "got"
            }
          )
          
          # ctrl.logger.setLevel(logging.DEBUG)

          # Run the Controller and generate the output graph
          ctrl.run()
        except Exception as e:
          print(e)
        # ctrl.output_graph("output_got.json")
        operations_log[json_file][question["question"]] = ctrl.graph.operations
        
        # delete the controller to free up memory
        del ctrl
        # time.sleep(1)
        #break
  #break
    



TypeError: 'dict_keys' object is not subscriptable

In [37]:
#show all the logs from ctrl logger which uses the logging module
import logging
logger = logging.getLogger("ctrl")
logger.setLevel(logging.DEBUG)
logger.debug("test")

# logger.handlers[0].stream.getvalue()


DEBUG:ctrl:test




In [11]:
vdb.client.query.get(vdb.db, ["knowledge"]).with_near_vector({'vector': embedded_question}).with_additional(["distance"]).with_limit(2000).do()


# (get_client().query
#         .get(class_name, properties))
#         .with_near_vector(near_vector)
#         .with_limit(limit)
#         .with_additional(additional)
#         .do()


{'errors': [{'locations': [{'column': 36, 'line': 1}],
   'message': 'Argument "nearVector" has invalid value {vector: [[-0.007626513484865427, -0.0017940843245014548, 0.04350820183753967, -0.03055901639163494, -0.02252204716205597, 0.03702036663889885, -0.016868364065885544, -0.0012710855808109045, -0.029685145244002342, -0.0009293158072978258, 0.0003525276551954448, 0.030453091487288475, -0.0022790166549384594, 0.03463708236813545, -0.005726505536586046, 0.005984694696962833, 0.04072770103812218, 0.023859335109591484, -0.004756640642881393, 0.003248881548643112, -0.0012719130609184504, 0.031009191647171974, 0.031009191647171974, -0.01877499371767044, 0.017119934782385826, 0.009625825099647045, 0.01096311304718256, -0.04276673495769501, -0.007255780044943094, -0.012042212300002575, 0.029049601405858994, -0.013220613822340965, 4.688472836278379e-05, -0.017464186996221542, 0.016431428492069244, -0.003429282922297716, 0.0018288405844941735, -0.002863252768293023, -0.010665202513337135, 0