In [1]:
from sensitivity_data_generation import LLMClient, PubMedRetriever, SensitivityDatasetGenerator

[nltk_data] Downloading package punkt to /home/ubuntu/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /home/ubuntu/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


LLM CLIENT

In [2]:
llm_client_meta = LLMClient(developer="meta", model_name="llama3-70b-8192")
llm_client_openai = LLMClient(developer="openai", model_name="gpt-3.5-turbo-instruct")
pubmed_apikey = "20f8e223bcce9c230523437db9efc6b2b709"

In [3]:
prompts = { "prompt_factchecking" : "Given the following abstract from a scientific paper, generate a fact-checking questions that can be answered with a specific piece of text from this abstract. The question should be directly answerable with a specific line or statement from the abstract. Abstract: {abstract}. Please generate a fact-checking question and return it as string. Please output only the question, do not include the answer or anything else.",
            "prompt_reasoning" : """Given the following abstract from a scientific paper, generate a deep reasoning questions that require synthesis of multiple parts of the abstract to form a comprehensive answer. 
                                These question should necessitate an understanding of the overall content, and the answer should include a paraphrase of more than one substring of the abstract. 
                                Abstract: {abstract}
                                Please generate a deep reasoning question and return it as a string. 
                                Please output only the question, do not include the answer or anything else
                                """,

"prompt_ground_truth" : """You will be given an abstract from a scientific paper and a question related to it.
                                            Considering the abstract, generate a short answer for the question.
                                            If the answer is only a name or a number write a full sentence, do not return only one name or number.
                                            Abstract: {abstract}
                                            Question: {question}
                                            Please output only the accurate short answer.
                                            """,

"prompt_correct_similar" : """You will be given a sentence.
                                                Your task is to write a new sentence that is a paraphrase similar to the original one. 
                                                It should keep the same meaning but using some different words.
                                                Make sure that the original and the new sentence share at least 3 content words.
                                                Sentence: {ground_truth}
                                                Please output only the new sentence. 
                                            """,

"prompt_correct_different" : """You will be given a sentence.
                                                Your task is to write a new sentence that is a paraphrase completely different to the original one. 
                                                It should keep the same meaning but using completely different words.
                                                Make sure that the original and the new sentence do not have any content word in common, not even 1.
                                                
                                                Example 1:
                                                Original sentence: The car-trained network showed a drop in performance for inverted versus upright cars. 
                                                New sentence: This CNN demonstrated decreased behavior with upside-down items in comparison to those oriented correctly.

                                                Example 2:
                                                Original sentence: This paper tries to demonstrate, first, that the behavioral signatures associated to human face recognition can be explained as a result of the optimization of the task. Also, they show that it is not so “special”, as this behavior can be found in CNNs trained to recognize other objects (like cars). 
                                                New sentence: This article aims to demonstrate that the distinct characteristics linked to recognizing people’s visages may stem from enhancement of the activity. Furthermore, it illustrates that this pattern is not unique, as similar ones occur in neural networks developed to identify different items, such as vehicles.

                                                Your task:
                                                Original sentence: {ground_truth} 
                                                New sentence:

                                                Please output only the new sentence.""",

"prompt_incorrect_similar" : """You will be provided a sentence.
                                    Your objective is to modify this sentence slightly so that it becomes incorrect or nonsensical.

                                    Example 1:
                                    Original sentence: The car-trained network showed a drop in performance for inverted versus upright cars. 
                                    New sentence: The car-trained network showed an increase in performance for inverted versus upright cars. 
                                    Example 2:
                                    Original sentence: The authores utilize deep convolutional neural networks (CNNs) on their experiments.
                                    New sentence: The authores utilize deep recurrent neural networks (CNNs) on their experiments.         

                                    Sentence: {ground_truth}

                                    Please output only the new sentence.
                                    """,

"prompt_incorrect_related" : """You will be provided a sentence.
                                    Your task is to write a new short sentence on the topic of the original one but with different information.

                                    Example 1:
                                    Original sentence: The car-trained network showed a drop in performance for inverted versus upright cars. 
                                    New sentence: The inverted versus upright effect is a very popular phenomena investigated on the field of Computer Vision. 
                                    Example 2:
                                    Original sentence: The authores utilize deep convolutional neural networks (CNNs) on their experiments.
                                    New sentence: Deep convolutional neural networks (CNNs) are widely use on scientific research.


                                    Sentence: {ground_truth}
                                    Please output only the new sentence.
                                    """,
"prompt_validate_question" : """You will be given a context and a question. Your task is to categorize the question according to the types listed below, based solely on the relationship between the question and the provided context.
                Fact Checking: the answer is a composition based on only one substring of the context, that can be used directly as it is written in the context.
                Reasoning: answer is a composition based on a more elaborated understanding made from more than one substring of the context.  substrings can not be used exactly as they are written, it requires a deeper understanding of the content.
                Unrelated: the answer cannot be derived from the context provided.
                Context: {abstract}
                Question: {question}
                Please provide only the category name of the question based on the definitions provided.""",

"prompt_validate_incorrect" : """You will be presented with a question, its correct answer, and a candidate sentence. Your task is to categorize the candidate sentence based on how it relates to the correct answer and the content of the question. Choose from the categories below:

                    Incorrect Similar: The sentence resembles the correct answer but contains incorrect information or misinterpretations.
                    Incorrect Related: The sentence is topically related to the question but does not address or correctly answer it.
                    Incorrect Unrelated: The sentence has no relevance to the topic or context of the question.
                    Rigth: The sentence is a possible rigth answer to the question.

                    Question: {question}
                    Correct Answer: {ground_truth}
                    Candidate Sentence: {answer}

                    Please provide only the category name for the candidate sentence based on the definitions provided.""",}

In [4]:
save_path= "/home/ubuntu/iris_repos/llm_evaluation/sensitivity_data_generation/intermediate_generation"
sensitivity_data_generator = SensitivityDatasetGenerator(llm_client_meta, pubmed_apikey, prompts, save_path)

In [22]:
queries = [
    "sports", "plants" 
]

# "chemistry","environment"
# "sports", "plants" 
# "linguistics", "medicine"

In [23]:
full_df, abstracts = sensitivity_data_generator.generate_sensitivity_dataset(queries, abs_per_query= 10,question_per_abs=6)

Working with query: sports
Working with abstract: 0
A fact checking set was created.
A reasoning set was created.
A fact checking set was created.
A reasoning set was created.
A fact checking set was created.
A reasoning set was created.
Working with abstract: 1
A fact checking set was created.
A reasoning set was created.
A fact checking set was created.
A reasoning set was created.
A fact checking set was created.
A reasoning set was created.
Working with abstract: 2
A fact checking set was created.
A reasoning set was created.
A fact checking set was created.
A reasoning set was created.
A fact checking set was created.
A reasoning set was created.
Working with abstract: 3
A fact checking set was created.
A reasoning set was created.
A fact checking set was created.
A reasoning set was created.
A fact checking set was created.
A reasoning set was created.
Working with abstract: 4
A fact checking set was created.
A reasoning set was created.
A fact checking set was created.
A reasoni

In [26]:
full_df

Unnamed: 0,query,abstract_id,question_type,question,ground_truth,correct_similar,correct_different,incorrect_similar,incorrect_related,incorrect_unrelated
0,sports,0,Fact Checking,What is the main goal of the study described i...,The main goal of the study is to better unders...,The primary objective of the research is to ga...,Here is the new sentence:\n\nThis research foc...,The main goal of the study is to better unders...,The implementation of these emerging roles is ...,The fungus responsible for causing Fusarium he...
1,sports,0,Reasoning,what potential insights might this study provi...,This study may provide insights into the effec...,This research offers valuable perspectives on ...,This research could shed light on how to succe...,This study may provide insights into the effec...,The future of healthcare relies heavily on the...,The findings suggest that Streptomyces pratens...
2,sports,0,Fact Checking,What is the main objective of the study mentio...,The main objective of the study mentioned in t...,The primary goal of the research outlined in t...,This research aims to explore the ways in whic...,The main objective of the study mentioned in t...,The implementation of innovative nursing roles...,The fungus mainly responsible for causing Fusa...
3,sports,0,Reasoning,what implications do these roles have for the ...,These roles have significant implications for ...,These positions have far-reaching consequences...,This designation holds crucial consequences fo...,These roles have significant implications for ...,Hospitals that are proactive in responding to ...,These findings suggest that Streptomyces prate...
4,sports,0,Fact Checking,What is the primary objective of the study des...,The primary objective of the study is to bette...,The main goal of the research is to gain a dee...,This research aims to illuminate the ways in w...,The primary objective of the study is to bette...,General hospitals are increasingly recognizing...,The primary cause of Fusarium head blight (FHB...
...,...,...,...,...,...,...,...,...,...,...
117,plants,17,Reasoning,what implications do these mechanisms have for...,These mechanisms suggest that tannic acid coul...,These findings imply that tannic acid may be a...,This compound displays potential as a medicati...,Here is the modified sentence:\n\nThese mechan...,Research has shown that tannic acid also exhib...,The study's findings have broader implications...
118,plants,18,Fact Checking,Here is a fact-checking question based on the ...,A total of 314 proteins were identified as dif...,A total of 314 proteins exhibited altered beha...,A grand total of 314 molecular building blocks...,A total of 314 planets were identified as diff...,Hemp plants have been found to possess unique ...,"According to the study, tibial fractures occur..."
119,plants,18,Reasoning,what implications do these findings have for r...,These findings have significant implications f...,These discoveries have important consequences ...,These discoveries hold major consequences for ...,These findings have significant implications f...,Industrial hemp's potential as a sustainable c...,The debilitating nature of tibial fractures ca...
120,plants,18,Fact Checking,How many proteins were identified to be differ...,A total of 314 proteins were identified to be ...,"Upon exposure to sodium chloride, a total of 3...",A grand total of 314 molecular components exhi...,A total of 314 proteins were identified to be ...,The salt tolerance of hemp plants is closely l...,The seven aspects of equine veterinary profess...


In [27]:
abstracts

{0: 'This study aims to better understand how new future-oriented nursing roles are enacted in a general hospital.',
 1: 'Herein, we investigate the relationships between the COVID-19 pandemic and overcoming the virus, and its effects on body composition parameters in young adults from Slovakia.',
 2: 'This study investigated whether abnormal peak inversion spontaneous potentials (PISPs) recorded at resting myofascial trigger points (MTrPs) stem from the discharge of muscle spindles. Forty-eight male Sprague-Dawley rats were randomly divided into six groups. Five groups underwent MTrP modeling intervention, whereas one group did not receive intervention and was duly designated as the blank control. After model construction, five rat models were randomly subjected to ramp-and-hold stretch tests, succinylcholine injection, eperisone hydrochloride injection, saline injection, and blank drug intervention. By contrast, the rats in the blank control group were subjected to ramp-and-hold stre

In [28]:
df_path= "/home/ubuntu/iris_repos/llm_evaluation/sensitivity_data_generation/data_sportandplants.csv"
full_df.to_csv(df_path, index=False)

In [29]:
import json
abs_path =  "/home/ubuntu/iris_repos/llm_evaluation/sensitivity_data_generation/abstracts_sportsandplants.json"

with open(abs_path, 'w') as json_file:
    json.dump(abstracts, json_file, indent=4)