### general

In [1]:
from datasets import load_dataset

OPENAI_API_KEY = 0 # DELETED #
LANGUAGES = ['zh','en', 'es', 'fr', 'de', 'ru',  'ja', 'th', 'sw', 'bn', 'te']
mgsm_datasets = {lang: load_dataset('juletxara/mgsm', lang) for lang in LANGUAGES}

import re

def check_answer(model_answer, answer_number):
    try:
        return float(str(answer_number))  == float(str(re.findall('-?\d+(?:\.\d+)?', model_answer.replace(',', ''))[-1]))
    except Exception:
        return False

import backoff
import openai

@backoff.on_exception(backoff.expo, openai.error.RateLimitError)
@backoff.on_exception(backoff.expo, openai.error.APIConnectionError)
def openai_completion_with_backoff(**kwargs):
    return openai.ChatCompletion.create(**kwargs)

  from .autonotebook import tqdm as notebook_tqdm


### translate_with_cot

In [2]:
experiment_name = 'translate_with_cot'
desired_langs = ['te','zh', 'es', 'fr', 'de', 'ru',  'ja', 'th', 'sw', 'bn']
preprompts = dict()
for lang in desired_langs:
    with open(f'preprompts/{experiment_name}/{lang}.txt', 'r', encoding = 'utf-8') as f:
        preprompts[lang] = f.read()
print(preprompts[desired_langs[0]])

Follow the given examples and answer the question. In your response, first translate the question from Telugu to English. Then, answer the question in English, using step-by-step reasoning. The last sentence of the response must be in the format of - The answer is {final answer}. As examples:
For user's prompt -
ప్రశ్న: రోజర్ వద్ద 5 టెన్నిస్ బంతులు ఉన్నాయి. అతడు మరో 2 క్యాన్‌ల టెన్నిస్ బంతులు కొనుగోలు చేశాడు. ప్రతి క్యాన్‌లో 3 టెన్నిస్ బంతులున్నాయి. ఇప్పుడు అతడి వద్ద ఎన్ని టెన్నిస్ బంతులు ఉన్నాయి?
assitant's repsonse should be -
Translation to English: Question: Roger has 5 tennis balls. He buys 2 more cans of tennis balls. Each can has 3 tennis balls. How many tennis balls does he have now?.
Step-by-Step Answer: Roger started with 5 balls. 2 cans of 3 tennis balls each is 6 tennis balls. 5 + 6 = 11. The answer is 11.
For user's prompt -
ప్రశ్న: సర్వర్ రూమ్‌లో తొమ్మిది కంప్యూటర్‌లు ఉన్నాయి. సోమవారం నుంచి గురువారం వరకు ప్రతిరోజూ మరో ఐదు కంప్యూటర్‌లు ఇన్‌స్టాల్ చేయబడ్డాయి. సర్వర్ రూమ్‌లో

In [None]:
import os
import openai
import pandas as pd
import time
symbol_to_language = {'en': 'English', 'fr': 'French','es':'Spanish','ru':'Russian','de':'German','ja':'Japanese','th':'Thai','sw':'Swahili','te':'Telugu','bn':'Bengali','zh':'Chinese'}

openai.api_key = OPENAI_API_KEY

results = dict()

for lang in desired_langs:
    if lang in ['en']:
        continue
    dataset = mgsm_datasets[lang]
    print(f'\nAnalyzing language "{lang}"')
    results[lang] = list()
    try:
        for idx, query in enumerate(dataset['test']):
            print(f'Analyzing #{idx}')
            system_prompt = preprompts[lang]
            user_prompt = query['question']
            try:
                response = openai_completion_with_backoff(
                  model="gpt-3.5-turbo-0613",
                  messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_prompt}
                ],
                  temperature=0,
                  max_tokens=1024
                )
            except Exception as e:
                print(e)
                time.sleep(20)
                response = openai_completion_with_backoff(
                  model="gpt-3.5-turbo-0613",
                  messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_prompt}
                ],
                  temperature=0,
                  max_tokens=1024)
            model_answer = response['choices'][0]['message']['content']
            if idx == 0:
                print(f"Result Example for lang {lang}:")
                print('system:')
                print(system_prompt)
                print('user:')
                print(user_prompt)
                print('chatGPT:')
                print(model_answer)
            results[lang].append({
                'question': query['question'],
                'question_idx': idx,
                'system_prompt': system_prompt,
                'user_prompt': user_prompt,
                'answer_number': query['answer_number'],
                'model_answer': model_answer,
                'model correct': check_answer(model_answer, query['answer_number'])
            })
        df = pd.DataFrame(results[lang])
        df.to_csv(f'results\\mgmt_{experiment_name}\\{lang}.csv', index=False)
    except Exception as e:
        print("------------")
        print(f"Exception in lanuage {lang}:")
        print(e)
        print("------------")
        continue


Analyzing language "te"
Analyzing #0
Result Example for lang te:
system:
Follow the given examples and answer the question. In your response, first translate the question from Telugu to English. Then, answer the question in English, using step-by-step reasoning. The last sentence of the response must be in the format of - The answer is {final answer}. As examples:
For user's prompt -
ప్రశ్న: రోజర్ వద్ద 5 టెన్నిస్ బంతులు ఉన్నాయి. అతడు మరో 2 క్యాన్‌ల టెన్నిస్ బంతులు కొనుగోలు చేశాడు. ప్రతి క్యాన్‌లో 3 టెన్నిస్ బంతులున్నాయి. ఇప్పుడు అతడి వద్ద ఎన్ని టెన్నిస్ బంతులు ఉన్నాయి?
assitant's repsonse should be -
Translation to English: Question: Roger has 5 tennis balls. He buys 2 more cans of tennis balls. Each can has 3 tennis balls. How many tennis balls does he have now?.
Step-by-Step Answer: Roger started with 5 balls. 2 cans of 3 tennis balls each is 6 tennis balls. 5 + 6 = 11. The answer is 11.
For user's prompt -
ప్రశ్న: సర్వర్ రూమ్‌లో తొమ్మిది కంప్యూటర్‌లు ఉన్నాయి. సోమవారం నుంచి గురువారం

### translate_without_cot

In [3]:
experiment_name = 'translate_without_cot'
desired_langs = ['es', 'fr', 'de', 'ru',  'ja', 'th', 'sw', 'bn', 'te','zh']
preprompts = dict()
for lang in desired_langs:
    dataset = mgsm_datasets[lang]
    with open(f'preprompts/{experiment_name}/{lang}.txt', 'r', encoding = 'utf-8') as f:
        preprompts[lang] = f.read()
print(preprompts[desired_langs[0]])

Follow the given examples and answer the question. In your response, first translate the question from Spanish to English. Then, answer the question in a single line, without any step-by-step reasoning. As examples:
For user's prompt -
Pregunta: Roger tiene 5 pelotas de tenis. Compra 2 tubos más de pelotas de tenis. Cada tubo contiene 3 pelotas de tenis. ¿Cuántas pelotas de tenis tiene ahora?
assitant's repsonse should be -
Translation to English: Question: Roger has 5 tennis balls. He buys 2 more cans of tennis balls. Each can has 3 tennis balls. How many tennis balls does he have now?.
The answer is 11.
For user's prompt -
Pregunta: Había nueve computadoras en la sala de servidores. Se instalaron cinco computadoras cada día, de lunes a jueves. ¿Cuántas computadoras hay ahora en la sala de servidores?
assitant's repsonse should be -
Translation to English: Question: There were nine computers in the server room. Five more computers were installed each day, from monday to thursday. How 

In [4]:
import os
import openai
import pandas as pd
import time

openai.api_key = OPENAI_API_KEY

results = dict()
for lang in desired_langs:
    if lang in []:
        continue
    dataset = mgsm_datasets[lang]
    print(f'\nAnalyzing language "{lang}"')
    results[lang] = list()
    try:
        for idx, query in enumerate(dataset['test']):
            print(f'Analyzing #{idx}')
            system_prompt = preprompts[lang]
            user_prompt = query['question']
            try:
                response = openai_completion_with_backoff(
                  model="gpt-3.5-turbo-0613",
                  messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_prompt}
                ],
                  temperature=0,
                  max_tokens=256
                )
            except Exception as e:
                print(e)
                time.sleep(20)
                response = openai_completion_with_backoff(
                  model="gpt-3.5-turbo-0613",
                  messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_prompt}
                ],
                  temperature=0,
                  max_tokens=256)
            model_answer = response['choices'][0]['message']['content']
            if idx == 0:
                print(f"Result Example for lang {lang}:")
                print('system:')
                print(system_prompt)
                print('user:')
                print(user_prompt)
                print('chatGPT:')
                print(model_answer)
            results[lang].append({
                'question': query['question'],
                'question_idx': idx,
                'system_prompt': system_prompt,
                'user_prompt': user_prompt,
                'answer_number': query['answer_number'],
                'model_answer': model_answer,
                'model correct': check_answer(model_answer, query['answer_number'])
            })
        df = pd.DataFrame(results[lang])
        df.to_csv(f'results\\mgmt_{experiment_name}\\{lang}.csv', index=False)
    except Exception as e:
        print("------------")
        print(f"Exception in lanuage {lang}:")
        print(e)
        print("------------")
        continue


Analyzing language "es"
Analyzing #0
Result Example for lang es:
system:
Follow the given examples and answer the question. In your response, first translate the question from Spanish to English. Then, answer the question in a single line, without any step-by-step reasoning. As examples:
For user's prompt -
Pregunta: Roger tiene 5 pelotas de tenis. Compra 2 tubos más de pelotas de tenis. Cada tubo contiene 3 pelotas de tenis. ¿Cuántas pelotas de tenis tiene ahora?
assitant's repsonse should be -
Translation to English: Question: Roger has 5 tennis balls. He buys 2 more cans of tennis balls. Each can has 3 tennis balls. How many tennis balls does he have now?.
The answer is 11.
For user's prompt -
Pregunta: Había nueve computadoras en la sala de servidores. Se instalaron cinco computadoras cada día, de lunes a jueves. ¿Cuántas computadoras hay ahora en la sala de servidores?
assitant's repsonse should be -
Translation to English: Question: There were nine computers in the server room. 

### CoT in English Results

In [3]:
experiment_name = 'cot_in_english'
desired_langs = ['es', 'fr', 'de', 'ru',  'ja', 'th', 'sw', 'bn', 'te']
preprompts = dict()
for lang in desired_langs:
    dataset = mgsm_datasets[lang]
    with open(f'preprompts/{experiment_name}/{lang}.txt', 'r', encoding = 'utf-8') as f:
        preprompts[lang] = f.read()
print(preprompts[desired_langs[0]])

Follow the given examples and answer the question. In your response, use step-by-step reasoning in en. The last sentence of the response must be in the format of - The answer is {final answer}. As examples:
For user's prompt -
Pregunta: Roger tiene 5 pelotas de tenis. Compra 2 tubos más de pelotas de tenis. Cada tubo contiene 3 pelotas de tenis. ¿Cuántas pelotas de tenis tiene ahora?
assitant's repsonse should be - 
Step-by-Step Answer: Roger started with 5 balls. 2 cans of 3 tennis balls each is 6 tennis balls. 5 + 6 = 11. The answer is 11.
For user's prompt -
Pregunta: Había nueve computadoras en la sala de servidores. Se instalaron cinco computadoras cada día, de lunes a jueves. ¿Cuántas computadoras hay ahora en la sala de servidores?
assitant's repsonse should be - 
Step-by-Step Answer: There are 4 days from monday to thursday. 5 computers were added each day. That means in total 4 * 5 = 20 computers were added. There were 9 computers in the beginning, so now there are 9 + 20 = 29

In [9]:
import os
import openai
import pandas as pd
import time

openai.api_key = OPENAI_API_KEY

results = dict()

for lang in desired_langs:
    dataset = mgsm_datasets[lang]
    print(f'\nAnalyzing language "{lang}"')
    results[lang] = list()
    try:
        for idx, query in enumerate(dataset['test']):
            if idx % 10 == 0:
                pass
            print(f'Analyzing #{idx}')
            system_prompt = preprompts[lang]
            user_prompt = 'Question: ' + query['question']
            try:
                response = openai_completion_with_backoff(
                  model="gpt-3.5-turbo",
                  messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_prompt}
                ],
                  temperature=0,
                  max_tokens=256
                )
            except Exception as e:
                print(e)
                time.sleep(20)
                response = openai_completion_with_backoff(
                  model="gpt-3.5-turbo",
                  messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_prompt}
                ],
                  temperature=0,
                  max_tokens=256)
            model_answer = response['choices'][0]['message']['content']
            # print('system:')
            # print(system_prompt)
            # print('user:')
            # print(user_prompt)
            # print('chatGPT:')
            # print(model_answer)
            results[lang].append({
                'question': query['question'],
                'question_idx': idx,
                'system_prompt': system_prompt,
                'user_prompt': user_prompt,
                'answer_number': query['answer_number'],
                'model_answer': model_answer,
                'model correct': check_answer(model_answer, query['answer_number'])
            })
        df = pd.DataFrame(results[lang])
        df.to_csv(f'{experiment_name}_results_{lang}.csv', index=False)
    except Exception as e:
        print(e)
        continue


Analyzing language "es"
Analyzing #0
Analyzing #1
Analyzing #2
Analyzing #3
Analyzing #4
Analyzing #5
Analyzing #6
Analyzing #7
Analyzing #8
Analyzing #9
Analyzing #10
Analyzing #11
Analyzing #12
Analyzing #13
Analyzing #14
Analyzing #15
Analyzing #16
Analyzing #17
Analyzing #18
Analyzing #19
Analyzing #20
Analyzing #21
Analyzing #22
Analyzing #23
Analyzing #24
Analyzing #25
Analyzing #26
Analyzing #27
Analyzing #28
Analyzing #29
Analyzing #30
Analyzing #31
Analyzing #32
Analyzing #33
Analyzing #34
Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Analyzing #35
Analyzing #36
Analyzing #37
Analyzing #38
Analyzing #39
Analyzing #40
Analyzing #41
Analyzing #42
Analyzing #43
Analyzing #44
Analyzing #45
Analyzing #46
Analyzing #47
Analyzing #48
Analyzing #49
Analyzing #50
Analyzing #51
Analyzing #52
Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Analyzing #53
Analyzing #54

In [10]:
experiment_name = 'cot_in_english'
desired_langs = ['zh']
preprompts = dict()
for lang in desired_langs:
    dataset = mgsm_datasets[lang]
    with open(f'preprompts/{experiment_name}/{lang}.txt', 'r', encoding = 'utf-8') as f:
        preprompts[lang] = f.read()
print(preprompts[desired_langs[0]])

Follow the given examples and answer the question. In your response, use step-by-step reasoning in en. The last sentence of the response must be in the format of - The answer is {final answer}. As examples:
For user's prompt -
问题：罗杰有 5 个网球。他又买了 2 罐网球。每罐有 3 个网球。他现在有多少个网球？
assitant's repsonse should be - 
Step-by-Step Answer: Roger started with 5 balls. 2 cans of 3 tennis balls each is 6 tennis balls. 5 + 6 = 11. The answer is 11.
For user's prompt -
问题：服务器机房里有九台电脑。从周一到周四，每天又安装了五台电脑。服务器机房里现在有多少台电脑？
assitant's repsonse should be - 
Step-by-Step Answer: There are 4 days from monday to thursday. 5 computers were added each day. That means in total 4 * 5 = 20 computers were added. There were 9 computers in the beginning, so now there are 9 + 20 = 29 computers. The answer is 29.
For user's prompt -
问题：利亚有 32 块巧克力，她妹妹有 42 块。如果她们吃了 35 块，她们一共还剩下多少块？
assitant's repsonse should be - 
Step-by-Step Answer: Leah had 32 chocolates and Leah’s sister had 42. That means there were originally 32 + 42 = 74

In [11]:
import os
import openai
import pandas as pd
import time

openai.api_key = OPENAI_API_KEY

results = dict()

for lang in desired_langs:
    dataset = mgsm_datasets[lang]
    print(f'\nAnalyzing language "{lang}"')
    results[lang] = list()
    try:
        for idx, query in enumerate(dataset['test']):
            if idx % 10 == 0:
                pass
            print(f'Analyzing #{idx}')
            system_prompt = preprompts[lang]
            user_prompt = 'Question: ' + query['question']
            try:
                response = openai_completion_with_backoff(
                  model="gpt-3.5-turbo",
                  messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_prompt}
                ],
                  temperature=0,
                  max_tokens=256
                )
            except Exception as e:
                print(e)
                time.sleep(20)
                response = openai_completion_with_backoff(
                  model="gpt-3.5-turbo",
                  messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_prompt}
                ],
                  temperature=0,
                  max_tokens=256)
            model_answer = response['choices'][0]['message']['content']
            # print('system:')
            # print(system_prompt)
            # print('user:')
            # print(user_prompt)
            # print('chatGPT:')
            # print(model_answer)
            results[lang].append({
                'question': query['question'],
                'question_idx': idx,
                'system_prompt': system_prompt,
                'user_prompt': user_prompt,
                'answer_number': query['answer_number'],
                'model_answer': model_answer,
                'model correct': check_answer(model_answer, query['answer_number'])
            })
        df = pd.DataFrame(results[lang])
        df.to_csv(f'results\\{experiment_name}\\{lang}.csv', index=False)
    except Exception as e:
        print(e)
        continue


Analyzing language "zh"
Analyzing #0
Analyzing #1
Analyzing #2
Analyzing #3
Analyzing #4
Analyzing #5
Analyzing #6
Analyzing #7
Analyzing #8
Analyzing #9
Analyzing #10
Analyzing #11
Analyzing #12
Analyzing #13
Analyzing #14
Analyzing #15
Analyzing #16
Analyzing #17
Analyzing #18
Analyzing #19
Analyzing #20
Analyzing #21
Analyzing #22
Analyzing #23
Analyzing #24
Analyzing #25
Analyzing #26
Analyzing #27
Analyzing #28
Analyzing #29
Analyzing #30
Analyzing #31
Analyzing #32
Analyzing #33
Analyzing #34
Analyzing #35
Analyzing #36
Analyzing #37
Analyzing #38
Analyzing #39
Analyzing #40
Analyzing #41
Analyzing #42
Analyzing #43
Analyzing #44
Analyzing #45
Analyzing #46
Analyzing #47
Analyzing #48
Analyzing #49
Analyzing #50
Analyzing #51
Analyzing #52
Analyzing #53
Analyzing #54
Analyzing #55
Analyzing #56
Analyzing #57
Analyzing #58
Analyzing #59
Analyzing #60
Analyzing #61
Analyzing #62
Analyzing #63
Analyzing #64
Analyzing #65
Analyzing #66
Analyzing #67
Analyzing #68
Analyzing #69
Analy

### Two langugae input - CoT in Original Language results

In [5]:
experiment_name = 'mgmt_two_language_input_cot_in_original_language'
desired_langs = ['es', 'fr', 'de', 'ru',  'ja', 'th', 'sw', 'bn', 'te','en','zh']
preprompts = dict()
for lang in desired_langs:
    dataset = mgsm_datasets[lang]
    with open(f'preprompts/{experiment_name}/{lang}.txt', 'r', encoding = 'utf-8') as f:
        preprompts[lang] = f.read()
print(preprompts[desired_langs[0]])

Follow the given examples and answer the question. You are given the same question in two languages - English and Spanish. In your response, you need to answer only in Spanish. In your response use step-by-step reasoning, as examples:
For user's prompt -
Spanish - Pregunta: Roger tiene 5 pelotas de tenis. Compra 2 tubos más de pelotas de tenis. Cada tubo contiene 3 pelotas de tenis. ¿Cuántas pelotas de tenis tiene ahora?
English - Question: Roger has 5 tennis balls. He buys 2 more cans of tennis balls. Each can has 3 tennis balls. How many tennis balls does he have now?
Assitant's repsonse should be -
Spanish Respuesta paso a paso: Roger comenzó con 5 pelotas. 2 tubos de 3 pelotas de tenis cada uno equivale a 6 pelotas de tenis. 5 + 6 = 11. La respuesta es 11.
For user's prompt -
Spanish - Pregunta: Había nueve computadoras en la sala de servidores. Se instalaron cinco computadoras cada día, de lunes a jueves. ¿Cuántas computadoras hay ahora en la sala de servidores?
English - Question

In [6]:
import os
import openai
import pandas as pd
import time
symbol_to_language = {'en': 'English', 'fr': 'French','es':'Spanish','ru':'Russian','de':'German','ja':'Japanese','th':'Thai','sw':'Swahili','te':'Telugu','bn':'Bengali','zh':'Chinese'}

openai.api_key = OPENAI_API_KEY

results = dict()

for lang in desired_langs:
    if lang in ['de','es','fr','ja','ru','th','sw','bn','te','en']:
        continue
    dataset_original = mgsm_datasets[lang]
    dataset_english = mgsm_datasets['en']
    print(f'\nAnalyzing language "{lang}"')
    results[lang] = list()
    try:
        for idx, (query, query_english) in enumerate(zip(dataset_original['test'], dataset_english['test'])):
            print(f'Analyzing #{idx}')
            system_prompt = preprompts[lang]
            user_prompt = f"{symbol_to_language[lang]} - {query['question']}\nEnglish - {query_english['question']}"
            try:
                response = openai_completion_with_backoff(
                  model="gpt-3.5-turbo-0613",
                  messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_prompt}
                ],
                  temperature=0,
                  max_tokens=256
                )
            except Exception as e:
                print(e)
                time.sleep(20)
                response = openai_completion_with_backoff(
                  model="gpt-3.5-turbo-0613",
                  messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_prompt}
                ],
                  temperature=0,
                  max_tokens=256)
            model_answer = response['choices'][0]['message']['content']
            if idx == 0:
                print(f"Result Example for lang {lang}:")
                print('system:')
                print(system_prompt)
                print('user:')
                print(user_prompt)
                print('chatGPT:')
                print(model_answer)
            results[lang].append({
                'question': query['question'],
                'question_idx': idx,
                'system_prompt': system_prompt,
                'user_prompt': user_prompt,
                'answer_number': query['answer_number'],
                'model_answer': model_answer,
                'model correct': check_answer(model_answer, query['answer_number'])
            })
        df = pd.DataFrame(results[lang])
        df.to_csv(f'results\\{experiment_name}\\{lang}.csv', index=False)
    except Exception as e:
        print("------------")
        print(f"Exception in lanuage {lang}:")
        print(e)
        print("------------")
        continue


Analyzing language "zh"
Analyzing #0
Result Example for lang zh:
system:
Follow the given examples and answer the question. You are given the same question in two languages - English and Chinese. In your response, you need to answer only in Chinese. In your response use step-by-step reasoning, as examples:
For user's prompt -
Chinese - 问题：罗杰有 5 个网球。他又买了 2 罐网球。每罐有 3 个网球。他现在有多少个网球？
English - Question: Roger has 5 tennis balls. He buys 2 more cans of tennis balls. Each can has 3 tennis balls. How many tennis balls does he have now?
Assitant's repsonse should be -
Chinese 逐步解答：罗杰一开始有 5 个球。2 罐各 3 个网球就是 6 个网球。5 + 6 = 11。答案是 11。
For user's prompt -
Chinese - 问题：服务器机房里有九台电脑。从周一到周四，每天又安装了五台电脑。服务器机房里现在有多少台电脑？
English - Question: There were nine computers in the server room. Five more computers were installed each day, from monday to thursday. How many computers are now in the server room?
Assitant's repsonse should be -
Chinese 逐步解答：从周一到周四有 4 天。每天增加 5 台电脑。这意味着一共增加了 4 * 5 = 20 台电脑。一开始有 9 台电脑，所以现

### English input - CoT in Original Language results

In [8]:
experiment_name = 'english_input_cot_in_original_language'
desired_langs = ['es', 'fr', 'de', 'ru',  'ja', 'th', 'sw', 'bn', 'te','en','zh']
preprompts = dict()
for lang in desired_langs:
    dataset = mgsm_datasets[lang]
    with open(f'preprompts/{experiment_name}/{lang}.txt', 'r', encoding = 'utf-8') as f:
        preprompts[lang] = f.read()
print(preprompts[desired_langs[0]])

Follow the given examples and answer the question. You are given a question in English. In your response, you need to answer only in Spanish. In your response use step-by-step reasoning, as examples:
For user's prompt -
Question: Roger has 5 tennis balls. He buys 2 more cans of tennis balls. Each can has 3 tennis balls. How many tennis balls does he have now?
Assitant's repsonse should be -
Respuesta paso a paso: Roger comenzó con 5 pelotas. 2 tubos de 3 pelotas de tenis cada uno equivale a 6 pelotas de tenis. 5 + 6 = 11. La respuesta es 11.
For user's prompt -
Question: There were nine computers in the server room. Five more computers were installed each day, from monday to thursday. How many computers are now in the server room?
Assitant's repsonse should be -
Respuesta paso a paso: Hay 4 días de lunes a jueves. Se agregaron 5 computadoras cada día. Eso significa que en total se agregaron 4 * 5 = 20 computadoras. Al principio, había 9 computadoras, por lo que ahora hay 9 + 20 = 29 co

In [9]:
import os
import openai
import pandas as pd
import time
symbol_to_language = {'en': 'English', 'fr': 'French','es':'Spanish','ru':'Russian','de':'German','ja':'Japanese','th':'Thai','sw':'Swahili','te':'Telugu','bn':'Bengali','zh':'Chinese'}

openai.api_key = OPENAI_API_KEY

results = dict()

for lang in desired_langs:
    if lang in ['en']:
        continue
    dataset_original = mgsm_datasets[lang]
    dataset_english = mgsm_datasets['en']
    print(f'\nAnalyzing language "{lang}"')
    results[lang] = list()
    try:
        for idx, (query, query_english) in enumerate(zip(dataset_original['test'], dataset_english['test'])):
            print(f'Analyzing #{idx}')
            system_prompt = preprompts[lang]
            user_prompt = f"{query_english['question']}"
            try:
                response = openai_completion_with_backoff(
                  model="gpt-3.5-turbo-0613",
                  messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_prompt}
                ],
                  temperature=0,
                  max_tokens=256
                )
            except Exception as e:
                print(e)
                time.sleep(20)
                response = openai_completion_with_backoff(
                  model="gpt-3.5-turbo-0613",
                  messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_prompt}
                ],
                  temperature=0,
                  max_tokens=256)
            model_answer = response['choices'][0]['message']['content']
            if idx == 0:
                print(f"Result Example for lang {lang}:")
                print('system:')
                print(system_prompt)
                print('user:')
                print(user_prompt)
                print('chatGPT:')
                print(model_answer)
            results[lang].append({
                'question': query['question'],
                'question_idx': idx,
                'system_prompt': system_prompt,
                'user_prompt': user_prompt,
                'answer_number': query['answer_number'],
                'model_answer': model_answer,
                'model correct': check_answer(model_answer, query['answer_number'])
            })
        df = pd.DataFrame(results[lang])
        df.to_csv(f'results\\mgmt_{experiment_name}\\{lang}.csv', index=False)
    except Exception as e:
        print("------------")
        print(f"Exception in lanuage {lang}:")
        print(e)
        print("------------")
        continue


Analyzing language "es"
Analyzing #0
Result Example for lang es:
system:
Follow the given examples and answer the question. You are given a question in English. In your response, you need to answer only in Spanish. In your response use step-by-step reasoning, as examples:
For user's prompt -
Question: Roger has 5 tennis balls. He buys 2 more cans of tennis balls. Each can has 3 tennis balls. How many tennis balls does he have now?
Assitant's repsonse should be -
Respuesta paso a paso: Roger comenzó con 5 pelotas. 2 tubos de 3 pelotas de tenis cada uno equivale a 6 pelotas de tenis. 5 + 6 = 11. La respuesta es 11.
For user's prompt -
Question: There were nine computers in the server room. Five more computers were installed each day, from monday to thursday. How many computers are now in the server room?
Assitant's repsonse should be -
Respuesta paso a paso: Hay 4 días de lunes a jueves. Se agregaron 5 computadoras cada día. Eso significa que en total se agregaron 4 * 5 = 20 computadora