In [17]:
from concurrent.futures import ThreadPoolExecutor
import json
import os
from openai import OpenAI
import logging
from time import perf_counter, sleep
import re
from threading import RLock
from dotenv import load_dotenv

load_dotenv()

log_format = '[ %(levelname)s] [%(asctime)s] [%(module)s] [%(lineno)s] [%(message)s]'
logging.basicConfig(level=logging.DEBUG, format=log_format)

file_handler = logging.FileHandler('logfile.log')
file_handler.setLevel(logging.DEBUG)

formatter = logging.Formatter(log_format)
file_handler.setFormatter(formatter)

log = logging.getLogger()
log.addHandler(file_handler)

lock = RLock()
converted_qno = set()
failed_qno = []
reponse_data = []

file_no =2
example_output_path = f'converted/{file_no}/example_{file_no}.json'
question_path = f'converted/{file_no}/questions_{file_no}.json'
question_copy_path =f'converted/{file_no}/questions_{file_no}_copy.json'
no_path = f'converted/{file_no}/no_{file_no}.json'
data_path = f'data/data_{file_no}.json'


In [18]:

SYSTEM_PROMPT = """
**You are a UPSC prelims question expert specializing in converting English MCQs to Hindi.**

**Given a question in English (statement) and a hint, convert it to Hindi (statement) suitable for the UPSC prelims exam, maintaining technical accuracy and UPSC context.**

**Input (JSON):**

* `statement`: Question statement with options in English.
* `hint`: Hint for the question in English.
    
**Output (JSON):**

* `statement`: Converted question statement with options in Hindi.
* `hint`: Hint for the question in Hindi.

**Example::1**

* Example Input::1
    "statement": "Consider following statements regarding the representation of States in the Parliament: 1. Delimitation of Constituencies is undertaken on the basis of census exercise to ensure that every State is represented in proportion to its population in both the Houses of Parliament. 2. Delimitation Commission is a constitutional body, the notification of whose orders cannot be challenged in a Court. 3. Territorial constituencies in States, at present, are based on the data of 2001 census, as the Constitution (87thAmendment) Act, 2003 enabled the delimitation exercise on the basis of 2001Census figures. 4. As it stands today, Constitution of India prohibits any delimitation exercise till 2031. Which of the statements given above are not correct ? (A) 1, 2 and 4only (B) 2, 3 and 4only (C) 1, 3 and 4only (D) 1, 2, 3 and 4"
    "hint": "Delimitation constituencies are NOT applicable to representation of states in Council of States. Though it is correct to say the Order of delimitation commission, once notified, cannot be challenged in any Court, Delimitation commission is NOT a constitutional body but a statutory body. The Constitution has prohibited the revision of representation of States in the Lok Sabha till 2026, but not the delimitation of the Lok Sabha and Assembly constituencies..."

* Example Output::1
    "statement": "संसद में राज्यों के प्रतिनिधित्व का प्रस्तावना से संबंधित निम्नलिखित कथनों को विचार करें: 1. निर्वाचनी सीमाओं का निर्धारण जनगणना अभ्यास के आधार पर किया जाता है ताकि सुनिश्चित किया जा सके कि प्रत्येक राज्य को संसद के दोनों सदनों में उसकी जनसंख्या के अनुपात में प्रतिनिधित्व मिले। 2. निर्वाचन सीमा आयोग एक संवैधानिक निकाय है, जिसके आदेशों की अधिसूचना को किसी भी न्यायालय में चुनौती नहीं की जा सकती है। 3. राज्यों में क्षेत्रीय निर्वाचनी सीमाएँ, वर्तमान में, 2001 की जनगणना के आंकड़ों पर आधारित हैं, क्योंकि संविधान (87वां संशोधन) अधिनियम, 2003 ने 2001 की जनगणना के आंकड़ों पर आधारित निर्वाचन का अभ्यास संभव बनाया। 4. जैसा कि आज है, भारतीय संविधान किसी भी निर्वाचन अभ्यास का कोई अभ्यस्त नहीं करता है जब तक 2031 तक। उपर्युक्त कथनों में से कौन सही नहीं है? (A) 1, 2 और 4 (B) केवल 2, 3 और 4 (C) केवल 1, 3 और 4 केवल (D) 1, 2, 3 और 4"
    "hint": "निर्वाचन सीमाएँ संसद के सदन मे प्रतिनिधित्व के लिए लागू नहीं हैं। यह सही है कि एक बार घोषित किए गए निर्वाचन आयोग के आदेश को किसी भी न्यायालय में चुनौती नहीं की जा सकती है, लेकिन निर्वाचन आयोग संवैधानिक निकाय नहीं है बल्कि एक वैधानिक निकाय है। संविधान ने राज्यों के प्रतिनिधित्व की संशोधन की प्रतिनिधित्व को 2026 तक रोका है, लेकिन संविधान में स्पष्ट नहीं किया है कि संविधान और विधानसभा क्षेत्रों की नियमन (delimitation) नहीं कर सकते हैं।.."
    

**IMPORTANT INFORMATION**
* Choices must be (A),(B),(C),(D) in capital letter form
* DONOT change the base statement format. The number of options should remain the same in the converted statement.
* DONOT forget to translate choices from end of each input `statement` to reponse `statement`.
* DONOT translate numbers
"""
# **Example::2**

# * Example Input::2
#     "statement": "Which of the following statements is/are true about the Gram Sabha? 1. All people living in a village or a group of villages are members of the Gram Sabha. 2. All the plans for work of Gram Panchayat have to be approved by Gram Sabha. 3. For better implementation of some specific tasks, Gram Sabha form committees. 4. The elected Secretary of the Gram Sabha calls the meeting and keeps a record of the proceedings. (A) 2 and 3 (B) 1, 3 and 4 (C) 2, 3 and 4 (D) 1,2,3,4"
#     "hint": "Only adult villagers who have the right to vote can be member of Gram Sabha. Persons below 18 years of age can't become members. Gram Sabha plays a supervisory and monitoring role over Gram Panchayat by approving it plan of work. Gram Sabha form committees like construction, animal husbandry, etc to carry out some specific tasks. The Gram Panchayat has a Secretary who is also the Secretary of the Gram Sabha. This person is not an elected person but is appointed by the government. The Secretary is responsible for calling the meeting of the Gram Sabha and Gram Panchayat and keeping a record of the proceedings."

# * Example Output::2
#     "statement": "निम्नलिखित में से कौन से कथन ग्राम सभा के बारे में सही हैं? 1. ग्राम सभा के सभी लोग एक गाँव या एक समूह के गाँवों के सदस्य हैं। 2. ग्राम पंचायत के काम की सभी योजनाएँ ग्राम सभा द्वारा मंजूर करनी हैं। 3. कुछ विशिष्ट कार्यों के बेहतर कार्यान्वयन के लिए, ग्राम सभा समितियाँ बनाती है। 4. ग्राम सभा का चुनावित सचिव सभा को बुलाता है और कार्यवाहियों का रिकॉर्ड रखता है। (A) 2 और 3 (B) 1, 3 और 4 (C) 2, 3 और 4 (D) 1,2,3,4",
#     "hint": "केवल मतदान करने का अधिकार वाले वयस्क ग्रामीण ग्राम सभा के सदस्य बन सकते हैं। 18 वर्ष से कम उम्र के व्यक्ति सदस्य नहीं बन सकते। ग्राम सभा ग्राम पंचायत की काम की योजना को मंजूरी देकर एक पर्यवेक्षक और निगरानी भूमिका निभाती है। ग्राम सभा निर्माण, पशुपालन आदि जैसी समूचे कुछ विशिष्ट कार्यों को संभालने के लिए समितियाँ बनाती है। ग्राम पंचायत के पास एक सचिव होता है जो ग्राम सभा का भी सचिव होता है। यह व्यक्ति चुना नहीं जाता है, बल्कि सरकार द्वारा नियुक्त होता है। सचिव को ग्राम सभा और ग्राम पंचायत की मीटिंग बुलाने और प्रक्रियाओं का रिकॉर्ड रखने का जिम्मेदारी होती है।"


In [19]:
def save_converted_questions():
    with open (no_path, 'w') as fp:
        json.dump(converted_qno, fp)

In [20]:
def clean_text(text, clean=True):
    if not clean:
        return re.sub(r'\s+', ' ', text.strip())
        # return text.strip()
    # return re.sub(r'\s+', ' ', text.replace("\\n", " ").replace("", " ").replace("'", "").replace('"', "'").strip())
    return re.sub(r'\s+', ' ', text.replace("\\n", " ").replace("", " ").replace("'","").replace('"', "'").strip())


In [21]:
def get_data():
    input_data = []
    input_data_dict = {}
    qno = []
    with open(data_path, 'r') as fp:
        json_data = json.load(fp)
    for key, item in json_data.items():
        statement = clean_text(item['statement'])
        hint = clean_text(item['hint'])
        qno = item["Qno"]
        temp = {'statement': statement, 'hint': hint, "Qno": qno}
        input_data.append(temp)
        input_data_dict[key] = temp
    log.info(f"Input data: {len(input_data)}")
    return input_data


In [22]:
def get_file_data():
    if os.path.exists(question_path):
        with open(question_path, 'r') as fp:
            file_data = json.load(fp)
            return file_data
    return []

In [23]:
def save_data():
    try:
        if os.path.exists(question_path):
            with open(question_path, 'r') as fp:
                file_data = json.load(fp)

            with open(question_copy_path, 'w') as fp:
                json.dump(file_data, fp, ensure_ascii=False)

        log.info(f'Saving {len(reponse_data)} data')   
        with open(question_path, 'w') as fp:
            json.dump(reponse_data, fp, ensure_ascii=False)
        return True

    except Exception as e:
        log.error(str(e))
        return None

In [24]:
def get_converted_questions_file():
    if os.path.exists(question_path):
        with open(question_path, 'r') as fp:
            file_data_q = json.load(fp)
        log.info(f'{question_path}: {len(file_data_q)}')
        return [data['Qno'] for data in file_data_q]

    return []

In [25]:
def filter_unique_elements():
    unique_elements = []
    seen_qnos = set()
    if not  os.path.exists(question_path):
        log.error('Path doesnot exists')
        return
    with open(question_path, 'r') as fp:
            file_data = json.load(fp)

    for item in file_data:
        if item['Qno'] not in seen_qnos:
            unique_elements.append(item)
            seen_qnos.add(item['Qno'])
    log.info(f"File data: {len(file_data)}")
    log.info(f"Unique data: {len(unique_elements)}")
    if not len(file_data) > len(unique_elements):
        log.info("No filter required")
        return
    with open(question_path, 'w') as fp:
            json.dump(file_data, fp, ensure_ascii=False)
    with open(question_copy_path, 'w') as fp:
            json.dump(file_data, fp, ensure_ascii=False)
            
    converted_qno = get_converted_questions_file()
    save_converted_questions()

In [26]:
def filter_without_options():
    pattern = r'\(A|a\).*\(B|b\).*\(C|c\).*\(D|d\)'
    elements_without_options = []
    if os.path.exists(question_path):
        with open(question_path, 'r') as fp:
                file_data = json.load(fp)
        
    for item in file_data:
        if not re.search(pattern, item.get('statement')):
            log.info(item["Qno"])
            elements_without_options.append(item)
    if len(elements_without_options)==0:
        log.info("No filter required")
        return
          
    for element in elements_without_options:
        file_data.remove(element)

    with open(question_path, 'w') as fp:
            json.dump(file_data, fp, ensure_ascii=False)
    with open(question_copy_path, 'w') as fp:
            json.dump(file_data, fp, ensure_ascii=False)
            
    converted_qno = get_converted_questions_file()
    save_converted_questions()

In [27]:
def get_converted_questions():
    if os.path.exists(question_path):
        with open(question_path, 'r') as fp:
            file_data_q = json.load(fp)
        log.info(f'From questions.json: {len(file_data_q)}')
        

    if os.path.exists(no_path):
        with open(no_path, 'r') as fp:
            file_data = json.load(fp)
            log.info(f'From no.json: {len(file_data)}')
            return file_data

    return []

In [28]:
def get_rate_limit(message):
    if not message: 
        return 1
    duration_pattern = r'(\d+)m(\d+)s'
    duration_match = re.search(duration_pattern, message)
    if duration_match:
        minutes = int(duration_match.group(1))
        seconds = int(duration_match.group(2))
        total_seconds = minutes * 60 + seconds
        return total_seconds
        
    seconds_pattern = r'(\d+)s'
    seconds_match = re.search(seconds_pattern, message)
    if seconds_match:
        total_seconds = int(seconds_match.group(1))
        return total_seconds
    return 1
    

In [29]:
get_rate_limit('Rate limit reached for gpt-3.5-turbo in organization org-HZolppxVjUPEvBYohG9NyuVw on requests per min (RPM): Limit 3, Used 3, Requested 1. Please try again in 20s. Visit https://platform.openai.com/account/rate-limits to learn more. You can increase your rate limit by adding a payment method to your account at https://platform.openai.com/account/billing.')

20

In [30]:
def convert_question(input_data, OPENAI_KEY):
    qno = input_data.pop("Qno")
    try:
        USER_PROMPT = str(input_data)
        data, error = OpenAI.generate_completion(SYSTEM_PROMPT, USER_PROMPT, OPENAI_KEY)
        if not data:
            if error.get("code", None) == "rate_limit_exceeded":
                wait = get_rate_limit(error.get("message",None))
                log.debug(f"Rate limit exceeded: Sleeping {wait} seconds")
                sleep(wait)
            raise Exception('Opena ai didnot respond')
        data["Qno"] = qno
        converted_qno.append(qno)
        reponse_data.append(data)
        return True
        
    except Exception as e:
        log.error(str(e))
        failed_qno.append(qno)
        return False

In [31]:
reponse_data = get_file_data()
input_data = get_data()
converted_qno = get_converted_questions()
OPENAI_KEY_1 = os.getenv('OPENAI_KEY_1')
OPENAI_KEY_2 = os.getenv('OPENAI_KEY_2')
OPENAI_KEY_3 = os.getenv('OPENAI_KEY_3')
OPENAI_KEY_4 = os.getenv('OPENAI_KEY_4')

log.info(len(converted_qno))
log.info(OPENAI_KEY_4)

[ INFO] [2024-05-06 12:35:53,058] [4169408832] [14] [Input data: 958]
[ INFO] [2024-05-06 12:35:53,065] [1120403275] [5] [From questions.json: 226]
[ INFO] [2024-05-06 12:35:53,066] [1120403275] [11] [From no.json: 226]
[ INFO] [2024-05-06 12:35:53,067] [2371499246] [9] [226]
[ INFO] [2024-05-06 12:35:53,068] [2371499246] [10] [sk-proj-MEEoTcFD87KBPqiC8z6oT3BlbkFJTxLRIC3mu7PnnraM3iC1]


In [32]:
from datetime import datetime

for data in input_data:
    current_time = datetime.now().time()
    if current_time.hour == 22 and current_time.minute > 0:
        log.info("Break")
        break
    qno = data["Qno"]
    if qno in converted_qno:
        continue
    log.info(f'Converting question {qno}')
    try:
        # log.info(data)
        converted = convert_question(data, OPENAI_KEY=OPENAI_KEY_2)
        if not converted:
            log.error(f'Conversion failed {qno}')
            continue
        save_data()
        save_converted_questions()
        log.info(f'Converted question {qno}')
    except Exception as e:
        log.error(str(e))
        


[ INFO] [2024-05-06 12:35:55,725] [11875418] [11] [Converting question 1994]
[ INFO] [2024-05-06 12:35:55,726] [openai] [49] [Sending request to openai api.]
[ DEBUG] [2024-05-06 12:35:55,728] [connectionpool] [1055] [Starting new HTTPS connection (1): api.openai.com:443]
[ DEBUG] [2024-05-06 12:36:12,819] [connectionpool] [549] [https://api.openai.com:443 "POST /v1/chat/completions HTTP/1.1" 500 517]
[ DEBUG] [2024-05-06 12:36:12,820] [openai] [53] [OpenAI api did not send 200 status code: 500]
[ DEBUG] [2024-05-06 12:36:12,821] [openai] [54] [Response: {'message': 'Failed to create completion as the model generated invalid Unicode output. Unfortunately, this can happen in rare situations. Consider reviewing your prompt or reducing the temperature of your request. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID req_9fbe471646f942417c1a02015f0f46ab in your message.)', 'type': 'server_error', 'par

KeyboardInterrupt: 

In [None]:
def create_example():
    example_output =[]
    file_data = get_file_data()
    for data in file_data:
        file_qno = data["Qno"]
        for i_data in input_data:
            input_qno = i_data["Qno"]
            if file_qno == input_qno:
                example_output.append([data, i_data])
                break
    with open (example_output_path, 'w') as fp:
        json.dump(example_output, fp, ensure_ascii=False)

In [49]:
filter_unique_elements()

[ INFO] [2024-05-04 20:25:54,411] [762392232] [14] [File data: 73]
[ INFO] [2024-05-04 20:25:54,411] [762392232] [15] [Unique data: 73]
[ INFO] [2024-05-04 20:25:54,412] [762392232] [17] [No filter required]


In [None]:
create_example()