## Numpy | Answers

In [1]:
import os
import autogen
import argparse
import sys
import logging
import json
import time

### Get OpenAI API

In [3]:
gpt_model = "gpt-3.5-turbo"
model_tag = "3"

# gpt_model = "gpt-4-turbo"
# model_tag = "4"


llm_config = {"model": gpt_model,\
              "api_key": os.environ["OPENAI_API_KEY"],\
                "max_tokens": 2000}

### Answer Generator Class

In [4]:
class Generator(object):
    def __init__(self, domain, title, detail, samples):
        self.domain = domain
        self.title = title
        self.detail = detail
        self.samples = str(samples)
        self.nitems = 10
    

    def create_writer_agent(self):
        writer_intro = f"""
            You are a writer acting as the interviewee. 
            Write a detailed answer to the provided question related to {self.title} in the {self.domain} sector.
            Use the following context and key ideas to be included to get familiar with the topic: {self.detail}.
            The answer should be in rich and informative markdown format.

            Guidelines:
            1. Answer the main question comprehensively, including math equations and code snippets if necessary.
            2. Address each follow-up question with bullet points.
            3. Use proper markdown format for code snippets. 
            4 In markdown subtitles use:  ###, ####, **, ***, - can be used to organize the answer.
            5. Use $$ ... $$ for block math equations in markdown.
            6. Use $ ... $ for inline math equations within text.
            7. Include code snippets and math equations as needed.
            8. Ensure that the math equations are correctly formatted in markdown.
            9. Use bullet points or numbered lists to structure the answer.
            10. Use emojis, bold, italic, and colored text to highlight important points.

            Here is sample answer for reference: {self.samples}
            """


        self.answer_writer = autogen.AssistantAgent(
                    name="Writer",
                    description="Aanswer writer for provided question.",
                    system_message= writer_intro,
                    llm_config=llm_config,
                    )
        
    def create_reviewer_agent(self):
        review_intro = f"""
            You are a reviewer for the answer created by the answer writer. \
            You dont rewrite the answer but you will revise provided answer, make it well formatted and cleaned. \
            Do not add any text like reply is great, etc. as responce\
            You will be provided with a answer in markdown format created by the answer writer. \
            Your task is to review the answers and fix it if there is error and return revised one. \
            Make sure the answers are clear, concise and engaging. \
            
            Guidelines:
            1. Make sure the answer exists in the markdown format.
            2. Make sure the answer is well structured with proper headings and subheadings.
            3. Make sure code snippets and math equations are formatted properly in markdown.
            4. Make sure Main question is highlighted with ### and Follow-up questions are highlighted with ####.
            5. Make sure each of follow up questions highlighted with ####.
            6. Make sure each of the follow-up questions are answered with bullet points if required.
            7. Make sure $$ ... $$ is used for block math equations in markdown.
            8. Make sure  $ ... $ is used for inline math equations within text.
            9. Make sure code snippets is well formated.
            10. Ensure that the math equations are correctly formatted in markdown.
            11. Make sure bullet points or numbered lists is properly used to structure the answer.
            12. Make sure emojis, bold, italic, and colored text to highlight is well formated for important points.

            Here is sample answer for reference: {self.samples}
            """
            
        
        self.answer_reviewer = autogen.AssistantAgent(
            name="Reviewer",
            description="Review the answer created by the answer writer.",
            system_message= review_intro,
            llm_config=llm_config)
        
        return



    def answer_task(self):
        task = f"""
            Write a detailed answer to questions related to {self.title} in the {self.domain} sector.
            Familiarize yourself with the context provided here: {self.detail}. 
            Your answer should be rich in mathematical and programmatic content, 
            presented in an informative markdown format.

            Guidelines:
            1. Main Question: Provide a comprehensive answer with adequate math equations and code snippets if necessary.
            2. Follow-up Questions: Answer each follow-up question using bullet points.
            3. Code Snippets: Include code snippets as needed, formatted properly in markdown.
            4. Math Equations: Include math equations using the proper markdown format.
            - Use $$ ... $$ for block math equations.
            - Use $ ... $ for inline math equations within text.
            5. String Handling: Avoid words like "candidate's," "interviewer's," etc., as they can disrupt the string. Use escape characters if necessary (e.g., candidate\'s).

            Formatting Tips:
            1. Structure: Use bullet points or numbered lists to structure your answer.
            2. Organization: Utilize subheadings to organize your content.
            3. Clarity: Ensure sections are clear and concise.
            4. Math in LaTeX: Format math equations in LaTeX, ensuring they render correctly in markdown.
            5. Code Blocks: Use code blocks for any code included in your answer.
            6. Emojis: Incorporate emojis to express ideas where appropriate.
            7. Tables: Create tables in markdown for comparisons or structured data.
            8. Visuals: Use images or workflow diagrams to enhance explanations.
            9. External Links: Include links to refer to external resources for additional context.
            10. Highlighting: Use bold and italic text to emphasize key points.
            11. Text Color: Apply colored text to highlight important information.
            12. Equation Appearance: Ensure math equations look good in markdown.
            13. LaTeX Accuracy: Verify that all LaTeX parts in the markdown are correct.
            14. Consistency: Check that subtitles and bullet points are formatted correctly.
            Word Limit: Maximum 1000 words..
            """

        return task
    

    def review_task(self):
        task = f"""
            Review the answer provided to you. It may not be in the required format. \
            You dont rewrite the answer but you will revise provided answer, make it well formatted and cleaned. \
            Do not add any text like reply is great, etc. as responce\
            You will be provided with a answer in markdown format created by the answer writer. \
            Your task is to review the answers and fix it if there is error and return revised one. \
            Make sure the answers are clear, concise and engaging. \
            
            Guidelines:
            1. Make sure the answer exists in the markdown format.
            2. Make sure the answer is well structured with proper headings and subheadings.
            3. Make sure code snippets and math equations are formatted properly in markdown.
            4. Make sure Main question is highlighted with ### and Follow-up questions are highlighted with ####.
            5. Make sure each of follow up questions highlighted with ####.
            6. Make sure each of the follow-up questions are answered with bullet points if required.
            7. Make sure $$ ... $$ is used for block math equations in markdown.
            8. Make sure  $ ... $ is used for inline math equations within text.
            9. Make sure code snippets is well formated.
            10. Ensure that the math equations are correctly formatted in markdown.
            11. Make sure bullet points or numbered lists is properly used to structure the answer.
            12. Make sure emojis, bold, italic, and colored text to highlight is well formated for important points.
            """

        return task

    def format_question(self, question):
        quest = ""
        qtext = ""
        for key, value in question.items():
            quest += f"{key}: {value}\n"
            if key == "Main question":
                qtext += f"**{key}**: {value}\n\n"
            elif key == "Explanation":
                qtext += f"**{key}**: {value}\n\n"
            elif key == "Follow-up questions":
                qtext += f"**{key}**:\n\n"
                for i, q in enumerate(value):
                    qtext += f"{i+1}. {q}\n\n"
                qtext += "\n\n"
        return quest, qtext
    


    def generate_answer(self, qlist):

        # get answer pre-task
        pre_task = self.answer_task()
        #print(pre_task)

        REPLY = []

        for question in qlist:

            quest, qtext = self.format_question(question)

            task = str(pre_task) +  "Here is the question you need to answer:\n"
            task += str(quest)

            #print(task)

            feedback = self.answer_writer.generate_reply(messages=[{"content": task, "role": "user"}])

            REPLY.append({"question":qtext, "answer":feedback})

        return REPLY
    

    def review_answer(self, REPLY):

        file_name = self.title.replace(" ", "_").replace("-","_").lower()
        #print(f"Generating {self.title} answers")

        # get pre-task
        pre_task = self.review_task()
        #print(pre_task)

        with open("docs/" + file_name + ".md", "w") as f:
            for item in REPLY:
                question = item["question"]
                answer = item["answer"]

                task = str(pre_task) +  "Here is the answer you need to revise: \n"
                task = task + answer
                

                feedback = self.answer_reviewer.generate_reply(messages=[{"content": task, "role": "user"}])

                f.write(f"## Question\n{question}\n\n## Answer\n{feedback}\n\n")

        print(f"Generated {self.title} answers")
        print("-------------------------")
        return

## Generate Answers

In [5]:
with open("sectors/sectors.json", "r") as f:
    SECTORS = json.load(f)

In [6]:
[item['titles'] for item in SECTORS]

['Introduction to Bioinformatics',
 'Sequence Alignment',
 'Genome Assembly',
 'Gene Prediction',
 'Protein Structure Prediction',
 'Molecular Docking',
 'Phylogenetic Analysis',
 'Functional Genomics',
 'Transcriptomics',
 'Metagenomics',
 'Introduction to Genomics',
 'Whole Genome Sequencing',
 'Exome Sequencing',
 'Genome-Wide Association Studies',
 'Comparative Genomics',
 'Epigenomics',
 'Structural Genomics',
 'Functional Annotation',
 'Introduction to Omics',
 'Proteomics',
 'Metabolomics',
 'Lipidomics',
 'Glycomics',
 'Pharmacogenomics',
 'Nutrigenomics',
 'Epitranscriptomics',
 'Single-cell Omics',
 'Microbiomics',
 'Interactomics',
 'Data Integration',
 'Data Visualization',
 'Machine Learning in Bioinformatics',
 'Population Genomics',
 'Clinical Genomics',
 'Toxicogenomics',
 'Immunogenomics',
 'Systems Biology',
 'Cancer Genomics']

In [7]:
from examples.example_answers import examples as example_answers

In [8]:
from questions.introduction_to_bioinformatics import questions as introduction_to_bioinformatics
from questions.sequence_alignment import questions as sequence_alignment
from questions.genome_assembly import questions as genome_assembly
from questions.gene_prediction import questions as gene_prediction
from questions.protein_structure_prediction import questions as protein_structure_prediction
from questions.molecular_docking import questions as molecular_docking
from questions.phylogenetic_analysis import questions as phylogenetic_analysis
from questions.functional_genomics import questions as functional_genomics
from questions.transcriptomics import questions as transcriptomics
from questions.metagenomics import questions as metagenomics
from questions.introduction_to_genomics import questions as introduction_to_genomics
from questions.whole_genome_sequencing import questions as whole_genome_sequencing
from questions.exome_sequencing import questions as exome_sequencing
from questions.genome_wide_association_studies import questions as genome_wide_association_studies
from questions.comparative_genomics import questions as comparative_genomics
from questions.epigenomics import questions as epigenomics
from questions.structural_genomics import questions as structural_genomics
from questions.functional_annotation import questions as functional_annotation
from questions.introduction_to_omics import questions as introduction_to_omics
from questions.proteomics import questions as proteomics
from questions.metabolomics import questions as metabolomics
from questions.lipidomics import questions as lipidomics
from questions.glycomics import questions as glycomics
from questions.pharmacogenomics import questions as pharmacogenomics
from questions.nutrigenomics import questions as nutrigenomics
from questions.epitranscriptomics import questions as epitranscriptomics
from questions.single_cell_omics import questions as single_cell_omics
from questions.microbiomics import questions as microbiomics
from questions.interactomics import questions as interactomics
from questions.data_integration import questions as data_integration
from questions.data_visualization import questions as data_visualization
from questions.machine_learning_in_bioinformatics import questions as machine_learning_in_bioinformatics
from questions.population_genomics import questions as population_genomics
from questions.clinical_genomics import questions as clinical_genomics
from questions.toxicogenomics import questions as toxicogenomics
from questions.immunogenomics import questions as immunogenomics
from questions.systems_biology import questions as systems_biology
from questions.cancer_genomics import questions as cancer_genomics

In [10]:
QLIST = { "Introduction to Bioinformatics": introduction_to_bioinformatics,\
            "Sequence Alignment": sequence_alignment,\
            "Genome Assembly": genome_assembly,\
            "Gene Prediction": gene_prediction,\
            "Protein Structure Prediction": protein_structure_prediction,\
            "Molecular Docking": molecular_docking,\
            "Phylogenetic Analysis": phylogenetic_analysis,\
            "Functional Genomics": functional_genomics,\
            "Transcriptomics": transcriptomics,\
            "Metagenomics": metagenomics,\
            "Introduction to Genomics": introduction_to_genomics,\
            "Whole Genome Sequencing": whole_genome_sequencing,\
            "Exome Sequencing": exome_sequencing,\
            "Genome-Wide Association Studies": genome_wide_association_studies,\
            "Comparative Genomics": comparative_genomics,\
            "Epigenomics": epigenomics,\
            "Structural Genomics": structural_genomics,\
            "Functional Annotation": functional_annotation,\
            "Introduction to Omics": introduction_to_omics,\
            "Proteomics": proteomics,\
            "Metabolomics": metabolomics,\
            "Lipidomics": lipidomics,\
            "Glycomics": glycomics,\
            "Pharmacogenomics": pharmacogenomics,\
            "Nutrigenomics": nutrigenomics,\
            "Epitranscriptomics": epitranscriptomics,\
            "Single-cell Omics": single_cell_omics,\
            "Microbiomics": microbiomics,\
            "Interactomics": interactomics,\
            "Data Integration": data_integration,\
            "Data Visualization": data_visualization,\
            "Machine Learning in Bioinformatics": machine_learning_in_bioinformatics,\
            "Population Genomics": population_genomics,\
            "Clinical Genomics": clinical_genomics,\
            "Toxicogenomics": toxicogenomics,\
            "Immunogenomics": immunogenomics,\
            "Systems Biology": systems_biology,\
            "Cancer Genomics": cancer_genomics}

In [10]:
samples = str(example_answers)

QLIST = {
         "Single-cell Omics": single_cell_omics
         }

for item in SECTORS:
    title = item["titles"]
    domain = item["domain"]
    detail = item["detail"]
    
    if title in QLIST.keys():
        print(f"Generating answers for {title}")

        # get the questions
        qlist = QLIST[title]

        # create the generator
        gen = Generator(domain, title, detail, samples)

        # create the agents
        gen.create_writer_agent()
        gen.create_reviewer_agent()

        # generate the answers
        REPLY = gen.generate_answer(qlist)
        
        # review the answers
        gen.review_answer(REPLY)
        
        time.sleep(5)

Generating answers for Single-cell Omics
Generated Single-cell Omics answers
-------------------------
