In [6]:
import numpy as np
import random
import openai
import json
import os

Reading in the data

In [7]:
# function to load data
def load_dataset(*src_filenames, labels=None):
    data = []
    for filename in src_filenames:
        with open(filename) as f:
            for line in f:
                d = json.loads(line)
                if labels is None or d['gold_label'] in labels:
                    data.append(d)
    return data


In [9]:
# read in dynasent train data (for use in data augmentation)

dynansent_r2_train = os.path.join('dynasent-v1.1', 'dynasent-v1.1-round02-dynabench-train.jsonl')

ternary_labels = ('positive', 'negative', 'neutral')

r2_train = load_dataset(dynansent_r2_train, labels=ternary_labels)

X_train, y_train = zip(*[(d['sentence'], d['gold_label']) for d in r2_train])
dataset = (X_train, y_train)


Sampling for fewshot prompt engineering

In [24]:
def fewshotTemplate(sentences, labels):
    prompt = [{"role": "system", "content": "You are a restaurant reviewer. You are leaving a review for the restaurant, and the review sentiment is one of positive, negative, or neutral. Classifying the sentiment should be somewhat difficult; it should not be immediately obvious what the sentiment of the review is. The review should be a single sentence."}]
    for sentiment, review in zip(labels, sentences):
        prompt.append({"role": "user", "content": f"Please leave a {sentiment} review for a restaurant."})
        prompt.append({"role": "assistant", "content": review})
    return prompt

Prompting with GPT-3.5 turbo

In [39]:
# get API key
with open('openai-key.txt') as f:
    lines = f.readlines()
openai.api_key = lines[0]

In [40]:
# zero-shot prompts

def promptZeroshot(sentiment):
    MODEL = "gpt-3.5-turbo"
    
    # call API
    response = openai.ChatCompletion.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": "You are a restaurant reviewer. You are leaving a review for the restaurant, and the review sentiment is one of positive, negative, or neutral. Classifying the sentiment should be somewhat difficult; it should not be immediately obvious what the sentiment of the review is. The review should be a single sentence."},
            {"role": "user", "content": f"Please leave a {sentiment} review for a restaurant."},
        ],
        temperature=1.5, # allow more creativity in the generation process
    )

    return(response['choices'][0]['message']['content'])

In [62]:
# few-shot prompts

def promptFewshot(sentiment, X, y, k=5):
    MODEL = "gpt-3.5-turbo"
            
    # create prompt by randomly sampling from provided examples
    indices = random.sample(range(len(X)), k)
    sentences = X[indices]
    labels = y[indices]
    prompt = fewshotTemplate(sentences, labels)
    
    # add instruction to generate a new review
    prompt.append({"role": "user", "content": f"Please leave a {sentiment} review for a restaurant."})
    
    # call API
    response = openai.ChatCompletion.create(
    model=MODEL,
    messages=prompt,
    temperature=1.0,
)
    
    # return generated sentence
    return(response["choices"][0]["message"]["content"])

    

In [57]:
# generate n zero-shot examples

def generateZeroshot(n, sentiment_choices=['positive', 'negative', 'neutral']):
    labels = []
    reviews = []
    for _ in range(n):
        sentiment = random.choice(sentiment_choices)
        review = promptZeroshot(sentiment)
        labels.append(sentiment)
        reviews.append(review)
    return reviews, labels

In [63]:
# generate n few-shot examples

def generateFewshot(n, X, y, sentiment_choices=['positive', 'negative', 'neutral'], k=5):
    labels = []
    reviews = []
    
    for _ in range(n):
        
        # get indices corresponding to viable sentiments
        subset = [index for index, element in enumerate(y) if element in sentiment_choices]
        
        # subset X, y based on sentiment choices
        X, y = np.array(X)[subset], np.array(y)[subset]

        # randomly select sentiment
        sentiment = random.choice(sentiment_choices)
        
        review = promptFewshot(sentiment, X, y, k)
        labels.append(sentiment)
        reviews.append(review)
        
    return reviews, labels

Generating data

In [58]:
reviews, labels = generateZeroshot(3)
zeroshot_data = [{'sentence': a, 'label': b} for a, b in zip(reviews, labels)]

# Save JSON data to a file
with open('zero-shot-examples.json', 'w') as json_file:
    json.dump(zeroshot_data, json_file)


In [64]:
reviews, labels = generateFewshot(3, X_train, y_train)
fewshot_data = [{'sentence': a, 'label': b} for a, b in zip(reviews, labels)]

# Save JSON data to a file
with open('few-shot-examples.json', 'w') as json_file:
    json.dump(fewshot_data, json_file)
