In [1]:
import os
os.chdir("../")

In [2]:
import time
import openai
import numpy as np
from mega.data.load_datasets import load_xnli_dataset
from mega.data.data_utils import choose_few_shot_examples
from mega.prompting.instructions import INSTRUCTIONS
from mega.prompting.prompting_utils import load_prompt_template
from mega.utils.env_utils import load_env
from mega.models.completion_models import get_model_pred, gpt3x_completion
from mega.prompting.prompting_utils import construct_prompt
from tqdm import tqdm

In [3]:
# Make sure that {env_name}.env file is present in the envs/ directory
env_name = "melange"
load_env(env_name=env_name)

In [4]:
openai.api_base

'https://gpttesting1.openai.azure.com/'

In [5]:
model = "gpt-35-turbo-deployment"
pivot_lang = "hi"
tgt_lang = "hi"
prompt_name = "GPT-3 style"
few_shot_k = 8

In [6]:
# Loading datasets
train_dataset = load_xnli_dataset(pivot_lang, split = "train")
test_dataset = load_xnli_dataset(tgt_lang, split = "validation")

Found cached dataset xnli (/home/t-kabirahuja/.cache/huggingface/datasets/xnli/hi/1.1.0/818164464f9c9fd15776ca8a00423b074344c3e929d00a2c1a84aa5a50c928bd)


  0%|          | 0/3 [00:00<?, ?it/s]

Found cached dataset xnli (/home/t-kabirahuja/.cache/huggingface/datasets/xnli/hi/1.1.0/818164464f9c9fd15776ca8a00423b074344c3e929d00a2c1a84aa5a50c928bd)


  0%|          | 0/3 [00:00<?, ?it/s]

In [7]:
# Loading prompt template
prompt_template = load_prompt_template(pivot_lang, prompt_name, dataset = "xnli")
print(prompt_template.jinja)

{{premise}}
Question: {{hypothesis}} True, False, or Neither? ||| {{ answer_choices[label] }}


In [8]:
# Loading instruction for the task
instruction = INSTRUCTIONS["xnli"]
print(instruction)

You are an NLP assistant whose purpose is to solve Natural Language Inference (NLI) problems. NLI is the task of determining the inference relation between two (short, ordered) texts: entailment, contradiction, or neutral. Answer as concisely as possible in the same format as the examples below:


In [9]:
# Getting few-shot examples
train_examples = choose_few_shot_examples(
        train_dataset, few_shot_k, selection_criteria="random")

In [10]:
test_example = test_dataset[0]

prompt, label = construct_prompt(
    train_examples,
    test_dataset[0],
    train_prompt_template=prompt_template,
    test_prompt_template=prompt_template,
    chat_prompt=True,
    instruction=instruction
)
prompt

[{'role': 'system',
  'content': 'You are an NLP assistant whose purpose is to solve Natural Language Inference (NLI) problems. NLI is the task of determining the inference relation between two (short, ordered) texts: entailment, contradiction, or neutral. Answer as concisely as possible in the same format as the examples below:'},
 {'role': 'user',
  'content': 'मैंने हाल ही में देखा है कि मैं अपने वॉर ् डरोब को अपडेट करने की कोशिश करने की कोशिश कर रहा हूँ कि मैं जैकेट और ब ् लाउज का एक ् सचेंज कर सकता हूँ और सभी बहुत कुछ\nQuestion: सुबह में मैं आमतौर पर बस इसलिए फेंक रहा हूँ क ् योंकि मैं काम करने के लिए जींस पहनना पसंद करता हूँ True, False, or Neither?'},
 {'role': 'assistant', 'content': 'False'},
 {'role': 'user',
  'content': 'लेकिन कुछ देशों में मुद ् रा चिंतन में बहुत अधिक प ् रभाव पड ़ सकता है , अन ् य लोगों में मुद ् रा छोड ़ ने को लगता है कि डॉक ् टर ने क ् या आदेश दिया है .\nQuestion: मुद ् रा अटकलें अवैध होना चाहिए . True, False, or Neither?'},
 {'role': 'assistant', 'cont

In [11]:
prediction = gpt3x_completion(
    prompt,
    model,
    temperature=0,
    max_tokens=10
)
match = float(prediction.startswith(label))
print(f"Prediction: {prediction}")
print(f"Label: {label}")
print(f"Match: {match}")

Prediction: True
Label: Neither
Match: 0.0


In [13]:
matches = []
preds = []
labels = []
for test_example in tqdm(test_dataset.select(range(100))):
    prompt, label = construct_prompt(
        train_examples,
        test_example,
        train_prompt_template=prompt_template,
        test_prompt_template=prompt_template,
        chat_prompt=True,
        instruction=instruction
    )
    prediction = gpt3x_completion(
        prompt,
        model,
        temperature=0,
        max_tokens=10
    )
    time.sleep(1/2)
    match = float(prediction.startswith(label))
    preds.append(prediction)
    labels.append(label)
    matches.append(match)

print(f"Accuracy: {np.mean(matches)}")

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [01:32<00:00,  1.09it/s]

Accuracy: 0.54



