In [2]:
import os

os.chdir("../")

In [4]:
# imports
import warnings

warnings.filterwarnings("ignore")

import time
import openai
import json
from typing import Union
import numpy as np
import pandas as pd
from datasets import Dataset, DatasetDict
from mega.data.data_utils import choose_few_shot_examples
from mega.prompting.instructions import INSTRUCTIONS
from mega.prompting.prompting_utils import load_prompt_template
# from mega.utils.env_utils import load_env
from mega.models.completion_models import get_model_pred, gpt3x_completion, gemini_completion
from mega.prompting.prompting_utils import construct_prompt, construct_cmsentiment_prompt
from tqdm import tqdm

In [6]:
# # Make sure that {env_name}.env file is present in the envs/ directory
# env_name = "melange"
# load_env(env_name=env_name)

In [4]:
openai.api_version = "2022-12-01"

In [5]:
openai.api_base

'https://gpttesting1.openai.azure.com/'

Get data

In [7]:
# Read train.txt and test.txt files
with open("gluecosdata/sentiment/train.txt", "r") as f:
    train_lines = f.readlines()
with open("gluecosdata/sentiment/validation.txt", "r") as f:
    test_lines = f.readlines()

# Create dictionaries with "text" and "label" keys and corresponding values from train.txt and test.txt
train_dict = {"text": [], "label": []}
for line in train_lines:
    text, label = line.strip().split("\t")
    train_dict["text"].append(text)
    train_dict["label"].append(label)

test_dict = {"text": [], "label": []}
for line in test_lines:
    text, label = line.strip().split("\t")
    test_dict["text"].append(text)
    test_dict["label"].append(label)

# # Create Dataset objects
train_dataset = Dataset.from_dict(train_dict)
test_dataset = Dataset.from_dict(test_dict)

# Create DatasetDict with "train" and "test" keys
dataset_dict = DatasetDict({"train": train_dataset, "test": test_dataset})
dataset_dict

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 1851
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 231
    })
})

In [15]:
model = "gemini-pro"
prompt_name = "following positive negative neutral"
few_shot_k = 8
lang = "en"

In [9]:
train_dataset[:5]

{'text': ['campeon really hit me in the heart ! ! !',
  'hahah todo es bien pues , que estas haciendo que no te has dormido lol',
  'y nada de fat puro muscle',
  'stupid .. i think u do sometimes ! ! : o ya se por quien tmb ... lol',
  'yeaaa people will always bitch and have shit to say ! i was tld my damas looked better then me'],
 'label': ['positive', 'positive', 'neutral', 'negative', 'negative']}

In [10]:
template = """Does the following sentence have a positive, negative or neutral sentiment? {sentence}"""

verbalizer = {"positive": "positive", "negative": "negative", "neutral": "neutral"}

In [11]:
# Loading instruction for the task
instruction = INSTRUCTIONS["gluecos_sentiment"]
print(instruction)

You are an NLP assistant whose purpose is to solve Sentiment Analysis problems. Sentiment Analysis is the task of determining whether the sentiment, opinion or emotion expressed in a textual data is: positive, negative, or neutral. Answer as concisely as possible in the same format as the examples below:


In [12]:
# Getting few-shot examples
train_examples = choose_few_shot_examples(
    train_dataset, few_shot_k, selection_criteria="random"
)

In [13]:
test_example = test_dataset[0]

prompt, label = construct_cmsentiment_prompt(
    train_examples,
    test_dataset[0],
    train_prompt_template=template,
    test_prompt_template=template,
    chat_prompt=False,
    instruction=instruction,
    verbalizer=verbalizer,
)
prompt

'Does the following sentence have a positive, negative or neutral sentiment? {sentence}\nneutral\nDoes the following sentence have a positive, negative or neutral sentiment? {sentence}\nnegative\nDoes the following sentence have a positive, negative or neutral sentiment? {sentence}\nnegative\nDoes the following sentence have a positive, negative or neutral sentiment? {sentence}\nnegative\nDoes the following sentence have a positive, negative or neutral sentiment? {sentence}\npositive\nDoes the following sentence have a positive, negative or neutral sentiment? {sentence}\npositive\nDoes the following sentence have a positive, negative or neutral sentiment? {sentence}\nnegative\nDoes the following sentence have a positive, negative or neutral sentiment? {sentence}\npositive\nDoes the following sentence have a positive, negative or neutral sentiment? {sentence}\n'

In [16]:
prediction = gemini_completion(
    prompt,
    model,
    lang,
    temperature=0,
    max_tokens=10
)
match = float(prediction.startswith(label))
print(f"Prediction: {prediction}")
print(f"Label: {label}")
print(f"Match: {match}")

Prediction: positive
Label: neutral
Match: 0.0


In [17]:
matches = []
preds = []
labels = []
for test_example in tqdm(test_dataset):
    prompt, label = construct_cmsentiment_prompt(
        train_examples,
        test_example,
        train_prompt_template=template,
        test_prompt_template=template,
        chat_prompt=False,
        instruction=instruction,
        verbalizer=verbalizer,
    )
    prediction = gemini_completion(
        prompt,
        model,
        lang,
        temperature=0,
        max_tokens=10
    )
    time.sleep(1/2)
    match = float(prediction.startswith(label))
    preds.append(prediction)
    labels.append(label)
    matches.append(match)

print(f"Accuracy: {np.mean(matches)}")

100%|██████████| 231/231 [07:11<00:00,  1.87s/it]

Accuracy: 0.2943722943722944





In [30]:
output_dir = f"results/gluecos_sentiment/gemini-pro/english-hindi/PivotLang_{lang}_PromptName_{prompt_name}_FewShotK_{few_shot_k}"
os.makedirs(output_dir, exist_ok=True)

In [31]:
for idx, (pred, label, match) in enumerate(zip(preds, labels, matches)):
    results = {
        "idx" : idx,
        "prediction" : pred,
        "label" : label,
        "match" : match
    }
    with open(os.path.join(output_dir, "preds.json"), "a") as f:   
        f.write(json.dumps(results, ensure_ascii=False)+'\n')

In [None]:
with open(os.path.join(output_dir, "results.json"), "w") as f:
    results = {
        "env": "melange",
        "dataset": "glucose_sentiment",
        "pivot_lang": "en-hi",
        "tgt_lang": "en-hi",
        "pivot_prompt_name": prompt_name,
        "tgt_prompt_name": prompt_name,
        "few_shot_k": few_shot_k,
        "few_shot_selection": "random",
        "seed": 42,
        "model": "gemini-pro",
        "model_type": "completion",
        "save_dir": "results",
        "temperature": 0,
        "top_p": 1,
        "max_tokens": 10,
        "metrics" :{
            "accuracy" : np.mean(matches)
        }
    }
    json.dump(results, f, ensure_ascii=False, indent=4)

In [18]:
print(f"Accuracy: {np.mean(matches)}")

Accuracy: 0.2943722943722944


Bad pipe message: %s [b"\xbb\xc2s\xb2]^\xb8\x1f\xff\xeb3JG\xe0`Q\xce\x0f\x00\x00|\xc0,\xc00\x00\xa3\x00\x9f\xcc\xa9\xcc\xa8\xcc\xaa\xc0\xaf\xc0\xad\xc0\xa3\xc0\x9f\xc0]\xc0a\xc0W\xc0S\xc0+\xc0/\x00\xa2\x00\x9e\xc0\xae\xc0\xac\xc0\xa2\xc0\x9e\xc0\\\xc0`\xc0V\xc0R\xc0$\xc0(\x00k\x00j\xc0#\xc0'\x00g\x00@\xc0\n\xc0\x14\x009\x008\xc0\t\xc0\x13\x003\x002\x00\x9d\xc0\xa1\xc0\x9d\xc0Q\x00\x9c\xc0\xa0\xc0\x9c\xc0P\x00=\x00<\x005\x00/\x00\x9a\x00\x99\xc0\x07\xc0\x11\x00\x96\x00\x05\x00\xff\x01\x00\x00j\x00\x00\x00\x0e\x00\x0c\x00\x00\t127.0.0.1\x00\x0b\x00\x04\x03\x00\x01\x02\x00\n\x00\x0c\x00\n\x00\x1d\x00\x17\x00\x1e\x00\x19\x00\x18\x00#\x00\x00\x00\x16\x00\x00\x00\x17\x00\x00\x00\r\x000\x00.\x04\x03\x05\x03\x06\x03\x08\x07\x08\x08\x08\t\x08\n\x08\x0b\x08\x04\x08\x05\x08\x06\x04\x01\x05\x01\x06\x01\x03\x03\x02\x03\x03\x01\x02\x01\x03\x02\x02\x02\x04\x02\x05\x02"]
Bad pipe message: %s [b'\x82(\x0e\xf1\xcb\x86.6\x9b^\x86\xf5\x9c\xea\xd4\x14"q\x00\x00\xa6\xc0,\xc00\x00\xa3\x00\x9f\xcc\xa9\xcc\xa8