In [1]:
import os
import ast
import openai
import time
import json

In [1]:
# openai.organization = ""  #My OpenAI account
# openai.api_key = "" #My Key
# openai.Model.list()

In [43]:
with open('summary/human1_summary_by_gpt35.json', 'r') as f:
    data = json.load(f)
print(len(data))

600


In [44]:
with open('dataset/evidence_corpus.json', 'r') as f:
    evidence_corpus = json.load(f)

In [45]:
uid_list = [doc['cord_uid'] for doc in evidence_corpus]

In [46]:
# Call GPT3.5 Model 
def get_completion_from_messages(messages, 
                                 model="gpt-3.5-turbo-16k", 
                                 temperature=0, 
                                 max_tokens=1024):
    try:
        response = openai.ChatCompletion.create(
            model=model,
            messages=messages,
            temperature=temperature, # this is the degree of randomness of the model's output
            max_tokens=max_tokens, # the maximum number of tokens the model can ouptut 
        )
        return response.choices[0].message["content"]
    except openai.error.RateLimitError:
        return None

In [47]:
def retry_with_exponential_backoff(input_text, max_retries=5):
    retry_count = 0
    wait_time = 1  # initial wait time in seconds

    while retry_count < max_retries:
        response = get_completion_from_messages(input_text)
        if response is not None:
            return response

        time.sleep(wait_time)
        wait_time *= 2  # double the wait time for next retry
        retry_count += 1

    raise Exception("Max retries reached")

In [48]:
delimiter_1 = "#"
delimiter_2 = "##"

#system_message = f"""
#You are a Fact Checker.\
#The user will give you a news paragraph and several evidence sentences.\
#The evidence sentence may contain information that supports or refutes the news paragraph’s central claim. \
#Determine whether the news story is factual or not based on the evidence sentences.\ 
#Pick phrases or sentences from both the news story and the evidence to support your conclusion.\
#The user will put the news paragraph behind {delimiter_1} and the relevant sentence behind {delimiter_2}. 
#"""
system_message = f"""
You are a Fact Checker.\
The user will present a news paragraph. This will be marked with the delimiter {delimiter_1}. \
Following this, the user will present evidence paragraphs, marked with the delimiter {delimiter_2}. These sentences may either support or refute the news paragraph's central claim.\
Your task is to determine the factual accuracy of the news story based on these evidence paragraphs. Are they supporting or contradicting the news?
Make a final prediction and provide a comprehensive explanation based on the following:\

1. Initial Analysis: Summarize the news paragraph's central claim, identifying key concepts.\
2. Alignment Check: Examine evidence sentences for alignment with the news paragraph. Flag any discrepancies.\
3. Causation Assessment: Evaluate if the news paragraph confuses correlation with causation. Identify potential logical fallacies.\
4. Accuracy Verification:Verify quantitative and qualitative accuracy in the news paragraph compared to evidence sentences.\
5. Precision Check: Assess if the news paragraph overgeneralizes or oversimplifies findings from evidence sentences.\
6. Contextual Insight: Consider the broader context surrounding the news and evidence.\
7. Phrase Selection: Select precise phrases from both the news story and evidence to support each analysis step.\

The user will input the news paragraph behind {delimiter_1} and the relevant sentence behind {delimiter_2}.
"""

In [85]:
i = 596

In [86]:
for item in data[596:]:
    print(f"Predicting {i}")
    
    news = item['abstractive']
    #print(summary)
    evidence_id = item['evidence']
    evidence = ''
    for id in evidence_id:
        evidence = evidence + evidence_corpus[uid_list.index(id)]['title'] + evidence_corpus[uid_list.index(id)]['abstract']
   

    #user_message = f"""
    #I will give you one news paragraph {delimiter_1}{news} and several relevant sentences {delimiter_2}{evidence_sentence}.\
    #Please help me determine if these sentences support or refute the news point of view. \
    #Finally, please answer using one word'refute' or 'support' and give reasons.\
    #Please provides the final output in JSON format containing the following two keys: prediction and reason.
    #"""
    user_message = f"""
    I will give you one news paragraph {delimiter_1}{news} and several relevant paragraph {delimiter_2}{evidence}.\
    Please help me determine if these paragraph support or refute the news point of view. \
    Finally, please answer using one word'refute' or 'support' and give reasons.\
    Please provides the final output in JSON format containing the following two keys: prediction and reason.
    Example of Expected Output:
    {{
        "prediction": "support",
        "reason": "The relevant sentences provide evidence that corroborates the main point made in the news paragraph by presenting similar findings..."
    }}
    Please provide your analysis in the above format. Don't add ```json at the beginning of output.
    """

    messages =  [  
                {'role':'system', 'content': system_message},    
                {'role':'user', 'content': user_message},  
                ]
    response = retry_with_exponential_backoff(messages)
    print(response)
    response_json = json.loads(response)
    #processed_response = response.split(delimiter_1)[1:]
    item['prediction'] = response_json['prediction']
    item['reason'] = response_json['reason']
    #print(response)
    
    time.sleep(2)
    i += 1

Predicting 596
{
    "prediction": "support",
    "reason": "The relevant sentences provide evidence that corroborates the main point made in the news paragraph by presenting similar findings. The evidence paragraphs mention President Biden's plans for infrastructure and social welfare, which aligns with the news paragraph's claim of outlining his plans. Additionally, the evidence paragraphs mention the challenges faced by Biden's administration, such as vaccine distribution and addressing the influx of migrants at the border, which supports the news paragraph's statement of challenges remaining. Overall, the evidence paragraphs provide supporting information that aligns with the central claim of the news paragraph."
}
Predicting 597
{
    "prediction": "refute",
    "reason": "The relevant sentences provide evidence that contradicts the main point made in the news paragraph. While the news paragraph claims that fully vaccinated adults aged 65 or older are 94% less likely to be hospita

In [87]:
FILE_PATH = 'prediction/sif_cot_prediction_human1_gpt35.json'
with open(FILE_PATH, 'w') as output_file:
	print(json.dumps(data, indent=2), file=output_file) 