In [1]:
import os
import ast
import openai
import time
import json

In [1]:
# openai.organization = ""  #My OpenAI account
# openai.api_key = "" #My Key
# openai.Model.list()

In [12]:
with open('summary/human2_summary_by_gpt35.json', 'r') as f:
    summarys = json.load(f)

with open('dataset/evidence_corpus.json', 'r') as f:
    evidence_corpus = json.load(f)

In [13]:
uid_list = [doc['cord_uid'] for doc in evidence_corpus]

In [14]:
# Call GPT3.5 Model 
def get_completion_from_messages(messages, 
                                 model="gpt-3.5-turbo-16k", 
                                 temperature=0, 
                                 max_tokens=1024):
    try:
        response = openai.ChatCompletion.create(
            model=model,
            messages=messages,
            temperature=temperature, # this is the degree of randomness of the model's output
            max_tokens=max_tokens, # the maximum number of tokens the model can ouptut 
        )
        return response.choices[0].message["content"]
    except openai.error.RateLimitError:
        return None

In [15]:
def retry_with_exponential_backoff(input_text, max_retries=5):
    retry_count = 0
    wait_time = 1  # initial wait time in seconds

    while retry_count < max_retries:
        response = get_completion_from_messages(input_text)
        if response is not None:
            return response

        time.sleep(wait_time)
        wait_time *= 2  # double the wait time for next retry
        retry_count += 1

    raise Exception("Max retries reached")

In [16]:
delimiter_1 = "#"
delimiter_2 = "##"

system_message = f"""
You are an evidence selector that follows the output pattern.\
I give you one news paragraph and three relevant scientific abstracts.\
You need to select  sentences from relevant scientific abstracts that are most relevant to the news paragraph\
I will put the relevant scientific abstracts behind {delimiter_1}. I will put the paragraph behind {delimiter_2}. 
"""


In [17]:
i = 0

In [18]:
for item in summarys:
    print(f"Selecting the sentence for the {i}")
    
    summary = item['abstractive']
    #print(summary)
    evidence_id = item['evidence']
    evidence = ''
    for id in evidence_id:
        evidence = evidence + evidence_corpus[uid_list.index(id)]['title'] + evidence_corpus[uid_list.index(id)]['abstract']
    #print(evidence)

    user_message = f"""
    Please select three sentences from {delimiter_1}{evidence} that are most relevant to {delimiter_2}{summary}.\
    Please use {delimiter_1} as the starting symbol for the output sentence.
    """

    messages =  [  
                {'role':'system', 'content': system_message},    
                {'role':'user', 'content': user_message},  
                ]
    selected_sentence = retry_with_exponential_backoff(messages)
    selected_sentence = selected_sentence.split(delimiter_1)[1:]
    #print(selected_sentence)

    item['selected_sentence'] = selected_sentence

    time.sleep(2)
    i += 1

Selecting the sentence for the 0
Selecting the sentence for the 1
Selecting the sentence for the 2
Selecting the sentence for the 3
Selecting the sentence for the 4
Selecting the sentence for the 5
Selecting the sentence for the 6
Selecting the sentence for the 7
Selecting the sentence for the 8
Selecting the sentence for the 9
Selecting the sentence for the 10
Selecting the sentence for the 11
Selecting the sentence for the 12
Selecting the sentence for the 13
Selecting the sentence for the 14
Selecting the sentence for the 15
Selecting the sentence for the 16
Selecting the sentence for the 17
Selecting the sentence for the 18
Selecting the sentence for the 19
Selecting the sentence for the 20
Selecting the sentence for the 21
Selecting the sentence for the 22
Selecting the sentence for the 23
Selecting the sentence for the 24
Selecting the sentence for the 25
Selecting the sentence for the 26
Selecting the sentence for the 27
Selecting the sentence for the 28
Selecting the sentence f

In [20]:
print(selected_sentence.split(delimiter_1)[1:])

['Regression analysis demonstrated that weekly hospitalization growth rates declined by 2.9 percentage points (95% confidence interval [CI] = 0.3-5.5) among adults aged 40-64 years during the first 2 weeks after implementing statewide mask mandates. \n', 'After mask mandates had been implemented for ≥3 weeks, hospitalization growth rates declined by 5.5 percentage points among persons aged 18-39 years (95% CI = 0.6-10.4) and those aged 40-64 years (95% CI = 0.8-10.2). \n', 'Statewide mask mandates might be associated with reductions in SARS-CoV-2 transmission and might contribute to reductions in COVID-19 hospitalization growth rates, compared with growth rates during <4 weeks before implementation of the mandate and the implementation week.']


In [18]:
selected_sentence.split(delimiter_1)[3]

'Statewide mask mandates might be associated with reductions in SARS-CoV-2 transmission and might contribute to reductions in COVID-19 hospitalization growth rates, compared with growth rates during <4 weeks before implementation of the mandate and the implementation week.'

In [19]:
selected_sentence

'#Regression analysis demonstrated that weekly hospitalization growth rates declined by 2.9 percentage points (95% confidence interval [CI] = 0.3-5.5) among adults aged 40-64 years during the first 2 weeks after implementing statewide mask mandates. \n#After mask mandates had been implemented for ≥3 weeks, hospitalization growth rates declined by 5.5 percentage points among persons aged 18-39 years (95% CI = 0.6-10.4) and those aged 40-64 years (95% CI = 0.8-10.2). \n#Statewide mask mandates might be associated with reductions in SARS-CoV-2 transmission and might contribute to reductions in COVID-19 hospitalization growth rates, compared with growth rates during <4 weeks before implementation of the mandate and the implementation week.'

In [10]:
summarys[100]

{'text': 'Breakthrough Infections Among Vaccinated Individuals in Eastern India\n\nA recent study conducted in the eastern state of India has shed light on breakthrough cases of SARS-CoV-2 infections among recipients of the BBV-152 (COVAXIN®) and AZD1222 (COVISHIELD(TM)) vaccines. The study aimed to analyze the clinical presentation, hospitalization rates, and antibody titers against the spike antigen in vaccinated individuals who tested positive for the virus.\n\nThe study collected samples from vaccinated individuals who reported breakthrough infections at various healthcare facilities in Odisha. Out of the 274 samples analyzed, more than 83.2% of the individuals showed symptoms, with 9.9% requiring hospitalization. Interestingly, the seropositivity rate was significantly higher in Covishield recipients (96.7%) compared to Covaxin recipients (77.1%). Additionally, hospitalized patients had lower median antibody titers compared to individuals in home isolation.\n\nThe median age for b

In [19]:
FILE_PATH = 'sentence_selection/selected_sentence_human2.json'
with open(FILE_PATH, 'w') as output_file:
	print(json.dumps(summarys, indent=2), file=output_file)