## Imports

In [11]:
from dotenv import load_dotenv, find_dotenv
import matplotlib.pyplot as plt
from openai import OpenAI
import os
import pandas as pd
from sklearn.model_selection import train_test_split

In [12]:
DEFAULT_FONT = 14

In [13]:
# Load dataset
dataPath = './feedback-prize-english-language-learning/train.csv'
df_all = pd.read_csv(dataPath)

# Create essay average scores to simplify dataset comparisons
categories = df_all.columns[2:8]
df_all['Average'] = df_all[categories].mean(axis=1).round(1)

In [14]:
# Split data into Train, Validation and Test sets
df_train, df_holdout = train_test_split(
    df_all, test_size=0.2, random_state=42)
df_val, df_test = train_test_split(
    df_holdout, test_size=0.5, random_state=42)

# Create summary of all three datasets
train_desc = df_train['Average'].describe()
val_desc = df_val['Average'].describe()
test_desc = df_test['Average'].describe()

summary = pd.concat([train_desc, val_desc, test_desc], axis=1)
summary.columns = ['Train', 'Validate', 'Test']

print("\nSummary of Averages for each dataset:")
pd.set_option('display.float_format', lambda x: '%0.1f' % x)
display(summary)


Summary of Averages for each dataset:


Unnamed: 0,Train,Validate,Test
count,3128.0,391.0,392.0
mean,3.1,3.1,3.1
std,0.6,0.5,0.6
min,1.0,1.0,1.4
25%,2.7,2.7,2.7
50%,3.1,3.1,3.1
75%,3.5,3.4,3.5
max,5.0,4.6,4.8


## Generate Prompts

Function to generate k-shot prompts from example essays and an essay to evaluate.

In [15]:
def generate_prompt(scored_essay, example_essays=[], example_scores=[]):

    if len(example_essays) != len(example_scores):
        raise ValueError("Number of example essays and scores provided must match")

    INSTRUCTIONS = f"""Score the following essay in these categories: cohesion, syntax, vocabulary, phraseology, grammar, conventions. 
    Return a JSON object with scores between 1.0 and 5.0 is 0.5 increments. """
    EXAMPLE_PREFIX = f"""Here is an example essay followed by scores: """
    SCORED_ESSAY_PREFIX = f"""Essay to score: """
    CATEGORIES = categories
    #CATEGORIES = ['cohesion', 'syntax', 'vocabulary', 'phraseology', 'grammar', 'conventions']

    prompt = INSTRUCTIONS

    for e_ind, example in enumerate(example_essays):
        prompt += EXAMPLE_PREFIX
        prompt += example

        scores = " Score: "
        for s_ind, score in enumerate(example_scores[e_ind]):
            scores = scores + CATEGORIES[s_ind] + ": " + str(score) + ", "
        
        prompt += scores
    
    prompt += SCORED_ESSAY_PREFIX
    prompt += scored_essay

    return prompt

In [16]:
test_essay1 = f""" Once, I walked down the empty street during midnight. """
example_1 = f"""Imagine someone that you know, that is trying to show you what maturity"""
example_2 = f"""They say that conserving energy and resources, some buissness have a four day"""
scores = [[1.0, 2.0, 3.0, 4.0, 5.0], [1.5, 2.5, 3.5, 4.5, 5.0]]
prompt = generate_prompt(test_essay1, [example_1, example_2], scores)
print(prompt)

Score the following essay in these categories: cohesion, syntax, vocabulary, phraseology, grammar, conventions. 
    Return a JSON object with scores between 1.0 and 5.0 is 0.5 increments. Here is an example essay followed by scores: Imagine someone that you know, that is trying to show you what maturity Score: cohesion: 1.0, syntax: 2.0, vocabulary: 3.0, phraseology: 4.0, grammar: 5.0, Here is an example essay followed by scores: They say that conserving energy and resources, some buissness have a four day Score: cohesion: 1.5, syntax: 2.5, vocabulary: 3.5, phraseology: 4.5, grammar: 5.0, Essay to score:  Once, I walked down the empty street during midnight. 


In [18]:
client = OpenAI(
    api_key=os.environ.get("OPENAI_API_KEY")
)

def get_completion(prompt, model="gpt-3.5-turbo"):

    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": prompt,
            }
        ],
        model=model,
    )

    return chat_completion