In [2]:
import json, os
from openai import OpenAI

data_path = "../data/QnA_data/"
# filename = "QnA_test.json"
filename = "QnA_summary_hin_India_election_2024_100.json"
source_filename = data_path + filename
des_filename = source_filename


# Load articles
with open(source_filename, 'r', encoding='utf-8') as f:
    articles = json.load(f)

# Load questions
with open(data_path+'questions.json', 'r', encoding='utf-8') as f:
    questions = json.load(f)

# Set API key
client = OpenAI(
    # This is the default and can be omitted
    api_key=os.getenv("OPENAI_API_KEY"),
)



In [26]:
def get_answers(article, questions, model):
    article_title = article.get('title')
    article_content = article.get('body')
    print(f"Proceeding article {article_title}")
    # answers = {}
    # for q_id, question in questions.items():
    pre = "Read the following article in Hindi, and answer ALL the 80 questions below. \
        Do not leave any question unanswered, even if the questions are not related to articles. \
            Only CPATILIZED LETTERS ACCETPETED for the correct answer of each question \
                and ONLY return a string of question numbers and answers divided by semi-colon in order. \
                    Such as a string of 1A;2B;3C;4D; .\n\n"
    
    
    prompt = pre + f"Article Content:\n{article_content}\n\n Questions: {questions}\n"
    try:
        response = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "user", "content": prompt}
            ],
            temperature=0
        )
        # content = response.choices[0].message.content
        # answers[q_id] = content
    except Exception as e:
        print(f"Error: {e}")
        # answers[q_id] = "Sorry, error from GPT."
    return response

In [27]:
def convert_string_to_dict(answer_string):
    if answer_string == "":
        return {}
    if answer_string[-1] == ';':
        answer_string = answer_string[:-1]
    return {item[:-1]: item[-1] for item in answer_string.split(';')}



In [28]:
index = 95
article = articles[index]
print(article['answers'])


{'38': 'A', '39': 'A', '40': 'A', '41': 'A', '42': 'D', '43': 'A', '44': 'B', '45': 'D', '46': 'C', '47': 'A', '48': 'A', '49': 'B', '50': 'D', '51': 'A', '52': 'D', '53': 'D', '54': 'D', '55': 'C', '56': 'D', '57': 'A', '58': 'E', '59': 'D', '60': 'B', '61': 'B', '62': 'B', '63': 'B', '64': 'B', '65': 'B', '66': 'B', '67': 'B', '68': 'B', '69': 'B', '70': 'B', '71': 'B', '72': 'B', '73': 'B', '74': 'B', '75': 'B', '76': 'B', '77': 'B', '78': 'B', '79': 'B', '80': 'E'}


In [29]:
if 'answers' in article:
    del article['answers']

print(article)

{'uri': '2024-06-378099828', 'lang': 'hin', 'isDuplicate': False, 'date': '2024-06-04', 'time': '08:21:26', 'dateTime': '2024-06-04T08:21:26Z', 'dateTimePub': '2024-06-04T08:21:15Z', 'dataType': 'news', 'sim': 0.47843137383461, 'url': 'https://ndtv.in/bollywood/lok-sabha-election-results-2024-kangana-ranaut-and-arun-govil-are-leading-in-the-election-fray-for-the-first-time-5813700', 'title': 'Lok Sabha Election Results 2024: पहली बार चुनाव मैदान में उतरे कंगना रनौत और अरुण गोविल आगे', 'body': "कंगना रनौत और अरुण गोविल के लोकसभा परिणाम\n\nनई दिल्ली:\n\nLok Sabha Election Results 2024: हिमाचल प्रदेश के मंडी लोकसभा क्षेत्र से अभिनेत्री कंगना रनौत भाजपा की टिकट पर सांसद बनने के करीब हैं. निर्वाचन आयोग के आंकड़ों के अनुसार वह कांग्रेस प्रत्याशी विक्रमादित्य सिंह से 57,000 वोट से आगे हैं. दोपहर 12.30 बजे तक के आयोग के आंकड़ों के अनुसार 'रामायण' के राम यानि के अभिनेता अरुण गोविल भी उत्तर प्रदेश के मेरठ में निकटतम प्रतिद्वंद्वी से 2753 वोट से आगे हैं. प्रसिद्ध अभिनेता हेमा मालिनी मथुरा से तीसर

In [30]:
answers = get_answers(article, questions, model="gpt-4-turbo")
pure_answers = answers.choices[0].message.content
print(pure_answers)

# convert string to dictionary 1A;2B;3C;44D to {'1': 'A', '2': 'B', '3': 'C', '44': 'D'}
processed_answers = convert_string_to_dict(pure_answers)
article['answers'] = processed_answers


Proceeding article Lok Sabha Election Results 2024: पहली बार चुनाव मैदान में उतरे कंगना रनौत और अरुण गोविल आगे
1E;2E;3E;4E;5E;6E;7E;8E;9E;10E;11E;12E;13E;14E;15B;16B;17B;18B;19B;20B;21B;22B;23B;24B;25B;26B;27B;28B;29B;30B;31B;32B;33B;34B;35B;36B;37B;38A;39A;40A;41A;42D;43A;44B;45D;46C;47A;48D;49D;50D;51A;52D;53D;54A;55C;56D;57A;58E;59D;60B;61B;62B;63B;64B;65B;66B;67B;68B;69B;70B;71B;72B;73B;74B;75B;76B;77B;78B;79B;80E;


In [31]:
print(articles[index]['answers'])

{'1': 'E', '2': 'E', '3': 'E', '4': 'E', '5': 'E', '6': 'E', '7': 'E', '8': 'E', '9': 'E', '10': 'E', '11': 'E', '12': 'E', '13': 'E', '14': 'E', '15': 'B', '16': 'B', '17': 'B', '18': 'B', '19': 'B', '20': 'B', '21': 'B', '22': 'B', '23': 'B', '24': 'B', '25': 'B', '26': 'B', '27': 'B', '28': 'B', '29': 'B', '30': 'B', '31': 'B', '32': 'B', '33': 'B', '34': 'B', '35': 'B', '36': 'B', '37': 'B', '38': 'A', '39': 'A', '40': 'A', '41': 'A', '42': 'D', '43': 'A', '44': 'B', '45': 'D', '46': 'C', '47': 'A', '48': 'D', '49': 'D', '50': 'D', '51': 'A', '52': 'D', '53': 'D', '54': 'A', '55': 'C', '56': 'D', '57': 'A', '58': 'E', '59': 'D', '60': 'B', '61': 'B', '62': 'B', '63': 'B', '64': 'B', '65': 'B', '66': 'B', '67': 'B', '68': 'B', '69': 'B', '70': 'B', '71': 'B', '72': 'B', '73': 'B', '74': 'B', '75': 'B', '76': 'B', '77': 'B', '78': 'B', '79': 'B', '80': 'E'}


In [32]:
# Save results to a JSON file
with open(des_filename, 'w', encoding='utf-8') as f:
    json.dump(articles, f, ensure_ascii=False, indent=4)