# run_models
**Author:** Khoi Nguyen

**Date created:** 03/06/2023

**Last modified:** 05/12/2023

**Description:** This notebook generates completions of the task "Elaborate on the following sentence: {SENTENCE}" using the models fine-tuned in the notebook `finetune_models.ipynb` and datasets determined by the notebook `split_data.ipynb`.

**WARNING:** This notebook requires API calls and will cost money. Please be careful when running this step.

In [1]:
import json
import openai
import os
import tqdm

OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
openai.api_key = OPENAI_API_KEY
instruction = "Elaborate on the following sentence: "

### ada
Running the test set with the task "Elaborate on the following sentence: " using the model `52k_instruct_ada`

In [2]:
# Synthesize Test Results Data with ada
modeldir = "ada"
confirmation = input(f'Are you sure you want to run synthethic {modeldir} model results? Type YES or NO: ')

if confirmation == 'YES':
    with open(f'data/{modeldir}/test.json') as f:
        test_data = json.load(f)

    with open(f'data/{modeldir}/model_results.json', 'w') as outfile:
        results = []
        for sentence in tqdm.tqdm(test_data):
            prompt = "{instruction}\"{sentence}\"\n\n##\n\n".format(sentence=sentence["sentence"], instruction=instruction)
            response = openai.Completion.create(
                model="ada:ft-delilah:52k-instuct-ada-2023-05-03-22-55-32",
                prompt=prompt,
                temperature=0.7,
                max_tokens=256,
                top_p=1,
                frequency_penalty=0,
                presence_penalty=0,
                stop=["\n"]
            )
            result = {
                'prompt': prompt,
                'completion': response.choices[0].text.strip(),
            }
            results.append(result)
        json.dump({"sentences": results}, outfile)
        outfile.flush()
else:
    print('Synthethic {modeldir} model test results aborted.')

100%|██████████| 1000/1000 [09:34<00:00,  1.74it/s]


In [3]:
# Synthesize Test Results Data with Curie
confirmation = input(f'Are you sure you want to run synthethic {modeldir} Curie results? Type YES or NO: ')

if confirmation == 'YES':
    with open(f'data/{modeldir}/test.json') as f:
        test_data = json.load(f)

    with open(f'data/{modeldir}/curie_results.json', 'w') as outfile:
        results = []
        for sentence in tqdm.tqdm(test_data):
            prompt = "{instruction}\"{sentence}\"\n\n".format(sentence=sentence["sentence"], instruction=instruction)
            response = openai.Completion.create(
                model="text-curie-001",
                prompt=prompt,
                temperature=0.7,
                max_tokens=256,
                top_p=1,
                frequency_penalty=0,
                presence_penalty=0
            )
            result = {
                'prompt': prompt,
                'completion': response.choices[0].text.strip(),
            }
            results.append(result)
        json.dump({"sentences": results}, outfile)
        outfile.flush()
else:
    print('Synthethic {modeldir} Curie test results aborted.')

100%|██████████| 1000/1000 [09:01<00:00,  1.85it/s]


### 1k_ada
Running the test set with the task "Elaborate on the following sentence: " using the model `1k_ada`

In [4]:
# Synthesize Test Results Data with 1k_ada
modeldir = "1k_ada"
confirmation = input(f'Are you sure you want to run synthethic {modeldir} model results? Type YES or NO: ')

if confirmation == 'YES':
    with open(f'data/{modeldir}/test.json') as f:
        test_data = json.load(f)

    with open(f'data/{modeldir}/model_results.json', 'w') as outfile:
        results = []
        for sentence in tqdm.tqdm(test_data):
            prompt = "{instruction}\"{sentence}\"\n\n##\n\n".format(sentence=sentence["sentence"], instruction=instruction)
            response = openai.Completion.create(
                model="ada:ft-delilah:1k-ada-2023-05-05-18-03-38",
                prompt=prompt,
                temperature=0.7,
                max_tokens=256,
                top_p=1,
                frequency_penalty=0,
                presence_penalty=0,
                stop=["\n"]
            )
            result = {
                'prompt': prompt,
                'completion': response.choices[0].text.strip(),
            }
            results.append(result)
        json.dump({"sentences": results}, outfile)
        outfile.flush()
else:
    print('Synthethic {modeldir} model test results aborted.')

100%|██████████| 1000/1000 [06:21<00:00,  2.62it/s]


In [5]:
# Synthesize Test Results Data with Curie
confirmation = input(f'Are you sure you want to run synthethic {modeldir} Curie results? Type YES or NO: ')

if confirmation == 'YES':
    with open(f'data/{modeldir}/test.json') as f:
        test_data = json.load(f)

    with open(f'data/{modeldir}/curie_results.json', 'w') as outfile:
        results = []
        for sentence in tqdm.tqdm(test_data):
            prompt = "{instruction}\"{sentence}\"\n\n".format(sentence=sentence["sentence"], instruction=instruction)
            response = openai.Completion.create(
                model="text-curie-001",
                prompt=prompt,
                temperature=0.7,
                max_tokens=256,
                top_p=1,
                frequency_penalty=0,
                presence_penalty=0
            )
            result = {
                'prompt': prompt,
                'completion': response.choices[0].text.strip(),
            }
            results.append(result)
        json.dump({"sentences": results}, outfile)
        outfile.flush()
else:
    print('Synthethic {modeldir} Curie test results aborted.')

100%|██████████| 1000/1000 [09:00<00:00,  1.85it/s]


### 10k_ada
Running the test set with the task "Elaborate on the following sentence: " using the model `10k_ada`

In [6]:
# Synthesize Test Results Data with 10k_ada
modeldir = "10k_ada"
confirmation = input(f'Are you sure you want to run synthethic {modeldir} model results? Type YES or NO: ')

if confirmation == 'YES':
    with open(f'data/{modeldir}/test.json') as f:
        test_data = json.load(f)

    with open(f'data/{modeldir}/model_results.json', 'w') as outfile:
        results = []
        for sentence in tqdm.tqdm(test_data):
            prompt = "{instruction}\"{sentence}\"\n\n##\n\n".format(sentence=sentence["sentence"], instruction=instruction)
            response = openai.Completion.create(
                model="ada:ft-delilah:52k-instuct-ada-2023-05-03-22-55-32",
                prompt=prompt,
                temperature=0.7,
                max_tokens=256,
                top_p=1,
                frequency_penalty=0,
                presence_penalty=0,
                stop=["\n"]
            )
            result = {
                'prompt': prompt,
                'completion': response.choices[0].text.strip(),
            }
            results.append(result)
        json.dump({"sentences": results}, outfile)
        outfile.flush()
else:
    print('Synthethic {modeldir} model test results aborted.')

100%|██████████| 1000/1000 [09:19<00:00,  1.79it/s]


In [7]:
# Synthesize Test Results Data with Curie
confirmation = input(f'Are you sure you want to run synthethic {modeldir} Curie results? Type YES or NO: ')

if confirmation == 'YES':
    with open(f'data/{modeldir}/test.json') as f:
        test_data = json.load(f)

    with open(f'data/{modeldir}/curie_results.json', 'w') as outfile:
        results = []
        for sentence in tqdm.tqdm(test_data):
            prompt = "{instruction}\"{sentence}\"\n\n".format(sentence=sentence["sentence"], instruction=instruction)
            response = openai.Completion.create(
                model="text-curie-001",
                prompt=prompt,
                temperature=0.7,
                max_tokens=256,
                top_p=1,
                frequency_penalty=0,
                presence_penalty=0
            )
            result = {
                'prompt': prompt,
                'completion': response.choices[0].text.strip(),
            }
            results.append(result)
        json.dump({"sentences": results}, outfile)
        outfile.flush()
else:
    print('Synthethic {modeldir} Curie test results aborted.')

100%|██████████| 1000/1000 [09:12<00:00,  1.81it/s]


### 100k_ada
Running the test set with the task "Elaborate on the following sentence: " using the model `100k_ada`

In [8]:
# Synthesize Test Results Data with 100k_ada
modeldir = "100k_ada"
confirmation = input(f'Are you sure you want to run synthethic {modeldir} model results? Type YES or NO: ')

if confirmation == 'YES':
    with open(f'data/{modeldir}/test.json') as f:
        test_data = json.load(f)

    with open(f'data/{modeldir}/model_results.json', 'w') as outfile:
        results = []
        for sentence in tqdm.tqdm(test_data):
            prompt = "{instruction}\"{sentence}\"\n\n##\n\n".format(sentence=sentence["sentence"], instruction=instruction)
            response = openai.Completion.create(
                model="10k_ada",
                prompt=prompt,
                temperature=0.7,
                max_tokens=256,
                top_p=1,
                frequency_penalty=0,
                presence_penalty=0,
                stop=["\n"]
            )
            result = {
                'prompt': prompt,
                'completion': response.choices[0].text.strip(),
            }
            results.append(result)
        json.dump({"sentences": results}, outfile)
        outfile.flush()
else:
    print('Synthethic {modeldir} model test results aborted.')

Synthethic {modeldir} model test results aborted.


In [9]:
# Synthesize Test Results Data with Curie
confirmation = input(f'Are you sure you want to run synthethic {modeldir} Curie results? Type YES or NO: ')

if confirmation == 'YES':
    with open(f'data/{modeldir}/test.json') as f:
        test_data = json.load(f)

    with open(f'data/{modeldir}/curie_results.json', 'w') as outfile:
        results = []
        for sentence in tqdm.tqdm(test_data):
            prompt = "{instruction}\"{sentence}\"\n\n".format(sentence=sentence["sentence"], instruction=instruction)
            response = openai.Completion.create(
                model="text-curie-001",
                prompt=prompt,
                temperature=0.7,
                max_tokens=256,
                top_p=1,
                frequency_penalty=0,
                presence_penalty=0
            )
            result = {
                'prompt': prompt,
                'completion': response.choices[0].text.strip(),
            }
            results.append(result)
        json.dump({"sentences": results}, outfile)
        outfile.flush()
else:
    print('Synthethic {modeldir} Curie test results aborted.')

Synthethic {modeldir} Curie test results aborted.
