In [36]:
import openai
import os
import pandas
import numpy as np
openai.api_key = os.getenv("OPENAI_API_KEY")

In [31]:
prompts = pandas.read_csv("stimuli.csv").query("avoid==False")
prompts.head(1)

Unnamed: 0,item,Title,version,header,avoid,continuation,name_gender,notes,locus_of_uncertainty,Unnamed: 9
0,Emergency Damage,Emergency Damage I,unambiguous_uncovered,"Joanne's home insurance covers ""Emergency Dama...",False,"Late one night, Joanne hears noises coming fro...",female,dogs playing not emergency,perceived emergency,


In [33]:
prompts.rename(columns={'Title':'title'}, inplace=True)
prompts["prompt"] = prompts.header + "\n" + prompts.continuation
prompts.rename(columns={'version':'completion'}, inplace=True)
prompts = prompts[["item","title","prompt","completion"]]
prompts.head(1)

Unnamed: 0,item,title,prompt,continuation
0,Emergency Damage,Emergency Damage I,"Joanne's home insurance covers ""Emergency Dama...","Late one night, Joanne hears noises coming fro..."


In [38]:
# 37 items, so 25 is ~70% train and 12 is ~30% test
len(np.unique(prompts['title']))

37

In [60]:
# Fine-tuning data. Note: right now, GPT-3 can only be fine-tuned on 'completion' tasks.
np.random.seed(44)
shuffledPrompts = sorted(prompts['item'], key = lambda x: np.random.rand())
trainItems = shuffledPrompts[0:25]
testItems = shuffledPrompts[25:37]

trainSet = prompts[prompts.item.isin(trainItems)]
testSet = prompts[prompts.item.isin(testItems)]

Unnamed: 0,item,title,prompt,continuation
12,Escape of Water,Escape of Water I,Carol's homeowner's insurance policy includes ...,Carol discovers that her bathroom wall has bec...
13,Escape of Water,Escape of Water I,Carol's homeowner's insurance policy includes ...,Carol discovers that her bathroom wall has bec...
14,Escape of Water,Escape of Water I,Carol's homeowner's insurance policy includes ...,Carol discovers that her bathroom wall has bec...
15,Escape of Water,Escape of Water II,Miranda's homeowner's insurance policy include...,"Miranda lives in a two-story house, and she ke..."
16,Escape of Water,Escape of Water II,Miranda's homeowner's insurance policy include...,"Miranda lives in a two-story house, and she ha..."
...,...,...,...,...
148,Vehicle Glass,Vehicle Glass I,Ahmad's car insurance policy includes coverage...,Ahmad drives a convertible Porsche and parks h...
149,Vehicle Glass,Vehicle Glass I,Ahmad's car insurance policy includes coverage...,Ahmad drives a convertible Porsche and parks h...
150,Vehicle Glass,Vehicle Glass II,Aneesh's car insurance policy includes coverag...,Aneesh drives a convertible Porsche and parks ...
151,Vehicle Glass,Vehicle Glass II,Aneesh's car insurance policy includes coverag...,Aneesh drives a convertible Porsche and parks ...


In [None]:
fineTuningData = (fineTuningData.assign(completion=["5 {{STOP}}","95 {{STOP}}"]))[["prompt","completion","suffix"]]
openai.File.create(
  file=fineTuningData.to_json(orient = "records", lines=True),
  purpose='fine-tune'
)

In [393]:
openai.FineTune.create(training_file="file-YxSIy798Py3YbYOvXf2Pt2gl", model = "davinci")

InvalidRequestError: Invalid base model: text-davinci-insert-001 (model must be one of ada, babbage, curie, davinci) or a fine-tuned model created by your organization: org-O1nE8SHl8ANRhiZJ3M17aIt6

In [214]:
openai.FineTune.list()

<OpenAIObject list at 0x12a323fb0> JSON: {
  "data": [
    {
      "created_at": 1670224821,
      "fine_tuned_model": "curie:ft-stanford-2022-12-05-07-23-36",
      "hyperparams": {
        "batch_size": 1,
        "learning_rate_multiplier": 0.1,
        "n_epochs": 4,
        "prompt_loss_weight": 0.01
      },
      "id": "ft-Ynnt5qHkqeQbp3inH6WOwvkj",
      "model": "curie",
      "object": "fine-tune",
      "organization_id": "org-O1nE8SHl8ANRhiZJ3M17aIt6",
      "result_files": [
        {
          "bytes": 435,
          "created_at": 1670225018,
          "filename": "compiled_results.csv",
          "id": "file-zAT6ZPDkWZRh64QXF3O0RjHw",
          "object": "file",
          "purpose": "fine-tune-results",
          "status": "processed",
          "status_details": null
        }
      ],
      "status": "succeeded",
      "training_files": [
        {
          "bytes": 1688,
          "created_at": 1670224694,
          "filename": "file",
          "id": "file-nenODZw8K

In [383]:
openai.FineTune.retrieve(id="ft-svkAuE87NX7uCinyp6hP800T")

<FineTune fine-tune id=ft-svkAuE87NX7uCinyp6hP800T at 0x12b899030> JSON: {
  "created_at": 1670269670,
  "events": [
    {
      "created_at": 1670269670,
      "level": "info",
      "message": "Created fine-tune: ft-svkAuE87NX7uCinyp6hP800T",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1670269674,
      "level": "info",
      "message": "Fine-tune costs $0.04",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1670269675,
      "level": "info",
      "message": "Fine-tune enqueued. Queue number: 0",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1670269904,
      "level": "info",
      "message": "Fine-tune started",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1670269986,
      "level": "info",
      "message": "Completed epoch 1/4",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1670269987,
      "level": "info",
      "message": "Completed epoch 2/4",
      "object": "fine-tune-ev

In [394]:
finetunedGPT3Predictions = prompts.head(10)
finetunedGPT3Predictions["prediction"] = finetunedGPT3Predictions.apply(lambda x: openai.Completion.create(model="davinci:ft-stanford-2022-12-05-19-53-46", # davinci:ft-stanford-2022-12-05-19-53-46
                                    prompt=x["prompt"],
                                    #Only possible to fine-tune models on 'prompt' and 'completion' at this point; 'suffix' argument not revealed to fine-tuned davinci models                                                                      
                                    suffix=x["suffix"], 
                                    temperature=0, max_tokens=10).choices[0].text, axis=1 )

InvalidRequestError: Unrecognized request argument supplied: suffix

In [392]:
finetunedGPT3Predictions # not currently fine-tuned, just predictions of text-davinci-insert-001

Unnamed: 0,item,title,version,prompt,suffix,prediction
0,Escape of Water,Escape of Water I,unambiguous_uncovered,Carol's homeowner's insurance policy includes ...,of those polled would believe that the damage ...,99%
1,Escape of Water,Escape of Water I,unambiguous_covered,Carol's homeowner's insurance policy includes ...,of those polled would believe that the damage ...,99%
2,Escape of Water,Escape of Water I,controversial,Carol's homeowner's insurance policy includes ...,of those polled would believe that the damage ...,99%
3,Vehicle Glass,Vehicle Glass I,unambiguous_uncovered,Ahmad's car insurance policy includes coverage...,of those polled would believe that the damage ...,99%
4,Vehicle Glass,Vehicle Glass I,unambiguous_covered,Ahmad's car insurance policy includes coverage...,of those polled would believe that the damage ...,99%
5,Vehicle Glass,Vehicle Glass I,controversial,Ahmad's car insurance policy includes coverage...,of those polled would believe that the damage ...,99%
6,Storm Damage and Maintenance,Storm Damage and Maintenance I,unambiguous_uncovered,Tom's home insurance policy includes coverage ...,of those polled would believe that the damage ...,98%
7,Storm Damage and Maintenance,Storm Damage and Maintenance I,unambiguous_covered,Tom's home insurance policy includes coverage ...,of those polled would believe that the damage ...,98%
8,Storm Damage and Maintenance,Storm Damage and Maintenance I,controversial,Tom's home insurance policy includes coverage ...,of those polled would believe that the damage ...,98%
9,Vehicle Damage,Vehicle Damage I,unambiguous_uncovered,Jenny's car insurance policy includes coverage...,of those polled would believe that the damage ...,80%
