In [1]:
import os
os.chdir("../")

In [3]:
# imports
import warnings
warnings.filterwarnings("ignore")

import time
import openai
import json
from typing import Union
import numpy as np
import pandas as pd
from datasets import Dataset, DatasetDict
from mega.data.data_utils import choose_few_shot_examples
from mega.prompting.instructions import INSTRUCTIONS
from mega.prompting.prompting_utils import load_prompt_template
# from mega.utils.env_utils import load_env
from mega.models.completion_models import get_model_pred, gpt3x_completion, gemini_completion
from mega.prompting.prompting_utils import construct_prompt, construct_cmsentiment_prompt
from tqdm import tqdm

In [5]:
# # Make sure that {env_name}.env file is present in the envs/ directory
# env_name = "melange"
# load_env(env_name=env_name)

In [6]:
# openai.api_base

Get data

In [7]:
# Read train.txt and test.txt files
with open('gluecosdata/sentiment/train.txt', 'r') as f:
    train_lines = f.readlines()
with open('gluecosdata/sentiment/validation.txt', 'r') as f:
    test_lines = f.readlines()

# Create dictionaries with "text" and "label" keys and corresponding values from train.txt and test.txt
train_dict = {"text": [], "label": []}
for line in train_lines:
    text, label = line.strip().split('\t')
    train_dict["text"].append(text)
    train_dict["label"].append(label)

test_dict = {"text": [], "label": []}
for line in test_lines:
    text, label = line.strip().split('\t')
    test_dict["text"].append(text)
    test_dict["label"].append(label)

# # Create Dataset objects
train_dataset = Dataset.from_dict(train_dict)
test_dataset = Dataset.from_dict(test_dict)

# Create DatasetDict with "train" and "test" keys
dataset_dict = DatasetDict({"train": train_dataset, "test": test_dataset})
dataset_dict

FileNotFoundError: [Errno 2] No such file or directory: 'gluecosdata/sentiment/train.txt'

In [6]:
model = "gpt-35-turbo-deployment"
prompt_name = "following positive negative neutral"
few_shot_k = 8

In [7]:
train_dataset[:5]

{'text': ['campeon really hit me in the heart ! ! !',
  'hahah todo es bien pues , que estas haciendo que no te has dormido lol',
  'y nada de fat puro muscle',
  'stupid .. i think u do sometimes ! ! : o ya se por quien tmb ... lol',
  'yeaaa people will always bitch and have shit to say ! i was tld my damas looked better then me'],
 'label': ['positive', 'positive', 'neutral', 'negative', 'negative']}

In [8]:
template = """Does the following sentence have a positive, negative or neutral sentiment? {sentence}"""

verbalizer = { "positive": "positive", "negative": "negative", "neutral": "neutral"}


In [9]:
# Loading instruction for the task
instruction = INSTRUCTIONS["gluecos_sentiment"]
print(instruction)

You are an NLP assistant whose purpose is to solve Sentiment Analysis problems. Sentiment Analysis is the task of determining whether the sentiment, opinion or emotion expressed in a textual data is: positive, negative, or neutral. Answer as concisely as possible in the same format as the examples below:


In [10]:
# Getting few-shot examples
train_examples = choose_few_shot_examples(
        train_dataset, few_shot_k, selection_criteria="random")

In [11]:
test_example = test_dataset[0]

prompt, label = construct_cmsentiment_prompt(
    train_examples,
    test_dataset[0],
    train_prompt_template=template,
    test_prompt_template=template,
    chat_prompt=True,
    instruction=instruction,
    verbalizer=verbalizer
)
prompt

[{'role': 'system',
  'content': 'You are an NLP assistant whose purpose is to solve Sentiment Analysis problems. Sentiment Analysis is the task of determining whether the sentiment, opinion or emotion expressed in a textual data is: positive, negative, or neutral. Answer as concisely as possible in the same format as the examples below:'},
 {'role': 'user',
  'content': 'Does the following sentence have a positive, negative or neutral sentiment? {sentence}'},
 {'role': 'assistant', 'content': 'neutral'},
 {'role': 'user',
  'content': 'Does the following sentence have a positive, negative or neutral sentiment? {sentence}'},
 {'role': 'assistant', 'content': 'neutral'},
 {'role': 'user',
  'content': 'Does the following sentence have a positive, negative or neutral sentiment? {sentence}'},
 {'role': 'assistant', 'content': 'neutral'},
 {'role': 'user',
  'content': 'Does the following sentence have a positive, negative or neutral sentiment? {sentence}'},
 {'role': 'assistant', 'content

In [12]:
prediction = gpt3x_completion(
    prompt,
    model,
    temperature=0,
    max_tokens=10
)
match = float(prediction.startswith(label))
print(f"Prediction: {prediction}")
print(f"Label: {label}")
print(f"Match: {match}")

Prediction: positive
Label: neutral
Match: 0.0


In [13]:
matches = []
preds = []
labels = []
for test_example in tqdm(test_dataset):
    prompt, label = construct_cmsentiment_prompt(
        train_examples,
        test_example,
        train_prompt_template=template,
        test_prompt_template=template,
        chat_prompt=True,
        instruction=instruction,
        verbalizer=verbalizer
    )
    prediction = gpt3x_completion(
        prompt,
        model,
        temperature=0,
        max_tokens=10
    )
    time.sleep(1/2)
    match = float(prediction.startswith(label))
    preds.append(prediction)
    labels.append(label)
    matches.append(match)

print(f"Accuracy: {np.mean(matches)}")

100%|██████████| 231/231 [05:57<00:00,  1.55s/it]

Accuracy: 0.2943722943722944



