In [152]:
import numpy as np
import random
import openai

Reading in the data

In [3]:
import json
import os

# function to load data
def load_dataset(*src_filenames, labels=None):
    data = []
    for filename in src_filenames:
        with open(filename) as f:
            for line in f:
                d = json.loads(line)
                if labels is None or d['gold_label'] in labels:
                    data.append(d)
    return data


In [24]:
# read in dynasent train data

dynansent_r2_train = os.path.join('dynasent-v1.1', 'dynasent-v1.1-round02-dynabench-train.jsonl')

ternary_labels = ('positive', 'negative', 'neutral')

r2_train = load_dataset(dynansent_r2_train, labels=ternary_labels)

X_train, y_train = zip(*[(d['sentence'], d['gold_label']) for d in r2_train])
dataset = (X_train, y_train)


Templates for fewshot prompt engineering

In [91]:
def fewshotTemplate(X, y, indices, label='all'):
    prompt = 'Each item in the following list contains a restaurant review and the respective sentiment.' # initialize prompt
    
    if label == 'all':
        prompt += ' The sentiment is one of "positive", "negative", or "neutral".\n' 
        for ix in indices:
            prompt += f'Review: {X[ix]} (Sentiment: {y[ix]})\n'
    else:
        prompt += f' The sentiment is {label}.\n'
        for ix in indices:
            prompt += f'Review: {X[ix]}\n'
        
    prompt += 'Review: '
    
    return prompt

In [82]:
def generatePrompts(X, y, templateFunction, label='all', n=10, k=5):
    prompt_list = []
    
    # subset data if prompts are not mixed
    if label in ['positive', 'negative', 'neutral']:
        subset = np.where(np.array(y)==label)
        X, y = np.array(X)[subset], np.array(y)[subset]
    
    # sample from dataset
    for _ in range(n):
        # randomly sample indices
        indices = random.sample(range(len(X)), k)
        
        # create prompt
        prompt = templateFunction(X, y, indices, label)
        prompt_list.append(prompt)
        
    return prompt_list

In [105]:
random.seed(888)
mixedPrompts = generatePrompts(X_train, y_train, fewshotTemplate, n=10, k=5)
posPrompts = generatePrompts(X_train, y_train, fewshotTemplate, label='positive', n=10, k=5)
negPrompts = generatePrompts(X_train, y_train, fewshotTemplate, label='negative', n=10, k=5)
neuPrompts = generatePrompts(X_train, y_train, fewshotTemplate, label='neutral', n=10, k=5)

Zero-shot prompting with GPT-3.5 turbo

In [106]:
# get API key
with open('openai-key.txt') as f:
    lines = f.readlines()
openai.api_key = lines[0]

In [147]:
def promptZeroshot(sentiment):
    
    MODEL = "gpt-3.5-turbo"
    response = openai.ChatCompletion.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": "You are a restaurant reviewer. You are leaving a review for the restaurant, and the review sentiment is one of positive, negative, or neutral. Classifying the sentiment should be somewhat difficult; it should not be immediately obvious what the sentiment of the review is. The review should be a single sentence."},
            {"role": "user", "content": f"Please leave a {sentiment} review for the restaurant."},
        ],
        temperature=1.5, # allow more creativity in the generation process
    )

    return(response['choices'][0]['message']['content'])

In [None]:
def promptFewshot(sentiment):
    MODEL = "gpt-3.5-turbo"
    
    response = openai.ChatCompletion.create(
    model=MODEL,
    messages=[
        {"role": "system", "content": "You are a helpful, pattern-following assistant."},
        {"role": "user", "content": "Help me translate the following corporate jargon into plain English."},
        {"role": "assistant", "content": "Sure, I'd be happy to!"},
        {"role": "user", "content": "New synergies will help drive top-line growth."},
        {"role": "assistant", "content": "Things working well together will increase revenue."},
        {"role": "user", "content": "Let's circle back when we have more bandwidth to touch base on opportunities for increased leverage."},
        {"role": "assistant", "content": "Let's talk later when we're less busy about how to do better."},
        {"role": "user", "content": "This late pivot means we don't have time to boil the ocean for the client deliverable."},
    ],
    temperature=0,
)

    print(response["choices"][0]["message"]["content"])

    

In [139]:
def generateData(prompt, n, sentiments=['positive', 'negative', 'neutral']):
    labels = []
    reviews = []
    for _ in range(n):
        sentiment = random.choice(sentiments)
        review = prompt(sentiment)
        labels.append(sentiment)
        reviews.append(review)
    return reviews, labels

In [148]:
reviews, labels = generateData(promptZeroshot, 50)
json_data = [{'sentence': a, 'label': b} for a, b in zip(reviews, labels)]

# Save JSON data to a file
with open('zero-shot-examples.json', 'w') as json_file:
    json.dump(json_data, json_file)
