In [1]:
import pandas as pd
import openai, os, sys
from sklearn.model_selection import train_test_split

sys.path.insert(1, '../')
from auth import auth
os.environ['OPENAI_API_KEY'] = auth.SECRET_KEY

In [2]:
DATA_PATH = '../data/essays/'

In [3]:
essays = list()
annotations = list()

for f in os.listdir(DATA_PATH):
    if f.endswith('.txt'):
        essays.append(f)
    elif f.endswith('.ann'):
        annotations.append(f)

essays.sort()
annotations.sort()

print(f'''
Number of essays: {len(essays)}
Number of annotations: {len(annotations)}
''')


Number of essays: 402
Number of annotations: 402



In [4]:
data = list()

for i in range(len(essays)):
    with open(DATA_PATH+essays[i], 'r') as e:
        prompt = e.read()
    with open(DATA_PATH+annotations[i], 'r') as a:
        completion = a.read()
    data.append((prompt, completion))

data_df = pd.DataFrame(columns=['prompt', 'completion'], data=data)
data_df.shape

(402, 2)

In [5]:
data_df.head()

Unnamed: 0,prompt,completion
0,Should students be taught to compete or to coo...,T1\tMajorClaim 503 575\twe should attach more ...
1,More people are migrating to other countries t...,T1\tMajorClaim 391 489\tthey are able to susta...
2,International tourism is now more common than ...,T1\tMajorClaim 352 475\tit has contributed to ...
3,International tourism is now more common than ...,T1\tMajorClaim 262 376\tthis industry has affe...
4,Living and studying overseas\n\nIt is every st...,T1\tMajorClaim 286 359\tone who studies overse...


In [14]:
train, test = train_test_split(data_df, test_size=0.2)

In [15]:
train.to_json('../data/proc_data/train_data.jsonl', orient='records', lines=True)
test.to_json('../data/proc_data/test_data.jsonl', orient='records', lines=True)

In [23]:
!openai api fine_tunes.create -t "../data/proc_data/train_data_prepared.jsonl" -v "../data/proc_data/test_data_prepared.jsonl" -m ada --suffix "model_01"

Upload progress: 100%|████████████████████| 1.34M/1.34M [00:00<00:00, 1.74Git/s]
Uploaded file from ../data/proc_data/train_data_prepared.jsonl: file-Vl1oey7mNgpDtId5iwgJOEZ0
Upload progress: 100%|███████████████████████| 342k/342k [00:00<00:00, 438Mit/s]
Uploaded file from ../data/proc_data/test_data_prepared.jsonl: file-y1yYcsRKv59F0iDlVYy3HdRY
Created fine-tune: ft-1zDRCjW7lNK3c3JGVpeG6lrv
Streaming events until fine-tuning is complete...

(Ctrl-C will interrupt the stream, but not cancel the fine-tune)
[2023-04-01 15:51:55] Created fine-tune: ft-1zDRCjW7lNK3c3JGVpeG6lrv
[2023-04-01 15:52:18] Fine-tune costs $0.51
[2023-04-01 15:52:18] Fine-tune enqueued. Queue number: 0
[2023-04-01 15:52:20] Fine-tune started

Stream interrupted (client disconnected).
To resume the stream, run:

  openai api fine_tunes.follow -i ft-1zDRCjW7lNK3c3JGVpeG6lrv



In [26]:
!openai api fine_tunes.follow -i ft-1zDRCjW7lNK3c3JGVpeG6lrv

[2023-04-01 15:51:55] Created fine-tune: ft-1zDRCjW7lNK3c3JGVpeG6lrv
[2023-04-01 15:52:18] Fine-tune costs $0.51
[2023-04-01 15:52:18] Fine-tune enqueued. Queue number: 0
[2023-04-01 15:52:20] Fine-tune started
[2023-04-01 15:53:31] Completed epoch 1/4
[2023-04-01 15:54:26] Completed epoch 2/4
[2023-04-01 15:55:20] Completed epoch 3/4
[2023-04-01 15:56:14] Completed epoch 4/4
[2023-04-01 15:56:34] Uploaded model: ada:ft-hackfest-gpt:model-01-2023-04-01-18-56-34
[2023-04-01 15:56:35] Uploaded result file: file-O1Ajmpanaxn1qQVrHk5yIvnX
[2023-04-01 15:56:35] Fine-tune succeeded

Job complete! Status: succeeded 🎉
Try out your fine-tuned model:

openai api completions.create -m ada:ft-hackfest-gpt:model-01-2023-04-01-18-56-34 -p <YOUR_PROMPT>


In [29]:
ft_model = 'ada:ft-hackfest-gpt:model-01-2023-04-01-18-56-34'

response = openai.Completion.create(model=ft_model, prompt="It is always said that competition can effectively promote the development of economy. In order to survive in the competition, companies continue to improve their products and service, and as a result, the whole society prospers. However, when we discuss the issue of competition or cooperation, what we are concerned about is not the whole society, but the development of an individual's whole life. From this point of view, I firmly believe that we should attach more importance to cooperation during primary education.First of all, through cooperation, children can learn about interpersonal skills which are significant in the future life of all students. What we acquired from team work is not only how to achieve the same goal with others but more importantly, how to get along with others. During the process of cooperation, children can learn about how to listen to opinions of others, how to communicate with others, how to think comprehensively, and even how to compromise with other team members when conflicts occurred. All of these skills help them to get on well with other people and will benefit them for the whole life. what are my claims?", temperature=0, max_tokens=200)

In [30]:
response

<OpenAIObject text_completion id=cmpl-70almsvqwLw31A0hkco6ZweGV8Pqd at 0x7f04651617f0> JSON: {
  "choices": [
    {
      "finish_reason": "length",
      "index": 0,
      "logprobs": null,
      "text": "\nFirst of all, through cooperation, children can learn about interpersonal skills which are significant in the future life of all students\n\nThrough cooperation, children can learn about how to listen to opinions of others, how to communicate with others, how to think comprehensively, and even how to compromise with other team members when conflicts occurred\nAll of these skills help them to get on well with other people and will benefit them for the whole life\n\nWhat are my claims?\n\nIn order to survive in the competition, companies continue to improve their products and service\n\nWhen we discuss the issue of competition or cooperation, what we are concerned about is not the whole society, but the development of an individual's whole life\n\nIn order to understand the importanc