In [1]:
# Reference: https://beta.openai.com/docs/guides/fine-tuning

"""
Currently there is no fee for fine-tuning the model itself, however we will be introducing a fee for this starting December 8th 2021 at 00:00 UTC. After this time, trained tokens will be billed at 50% the base model inference rate per 1k tokens (trained tokens = tokens in file * epochs).

We will also begin supporting unlimited monthly fine-tunes and file sizes on December 8th, but until then there is a limit of 10 fine-tuning runs per month and data sets are limited to 2.5M tokens (a file size that's roughly 80-100MB).
"""

# There are some limitations put in at the moment

import openai

from fp_dataset_artifacts.utils import init_openai
from datasets import list_datasets, load_dataset, list_metrics, load_metric

init_openai()

data = load_dataset('snli')
data

Reusing dataset snli (/home/x/.cache/huggingface/datasets/snli/plain_text/1.0.0/1f60b67533b65ae0275561ff7828aad5ee4282d0e6f844fd148d05d3c6ea251b)


  0%|          | 0/3 [00:00<?, ?it/s]

DatasetDict({
    test: Dataset({
        features: ['premise', 'hypothesis', 'label'],
        num_rows: 10000
    })
    train: Dataset({
        features: ['premise', 'hypothesis', 'label'],
        num_rows: 550152
    })
    validation: Dataset({
        features: ['premise', 'hypothesis', 'label'],
        num_rows: 10000
    })
})

In [5]:
"""Fine tuning expects the following format:

{"prompt": "<prompt text>", "completion": "<ideal generated text>"}
{"prompt": "<prompt text>", "completion": "<ideal generated text>"}
{"prompt": "<prompt text>", "completion": "<ideal generated text>"}
...

"""

int2label = data['train'].features['label'].int2str

def to_example(data, include_label=True):
    sentences = []
    
    for feature in data.keys():                
        text = data[feature]
        
        if feature == 'label':
            if not include_label:
                text = ''
            else:
                text = int2label(text).capitalize()
            
        sentence = f'{feature.capitalize()}: {text}'
        sentences.append(sentence)
        
    return '\n'.join(sentences)


def format_jsonl(x):
     return {
         'prompt': to_example(x, False),
         'completion': int2label(x['label']).capitalize()
     }
    

def build_jsonl(data, filename):
    import json
    
    with open(filename, 'w') as f:
        for x in data:
            if x['label'] >= 0:
                f.write(json.dumps(format_jsonl(x)) + '\n') 

build_jsonl(data['train'], 'snli_finetune_train.jsonl')
build_jsonl(data['validation'], 'snli_finetune_validation.jsonl')

In [6]:
# Upload files
openai.File.create(file=open("snli_finetune_train.jsonl"), purpose="fine-tune")

<File file id=file-zESrbwb1mh3p2OjVzhcsKmH4 at 0x7f1fda351f40> JSON: {
  "bytes": 98207620,
  "created_at": 1638409664,
  "filename": "snli_finetune_train.jsonl",
  "id": "file-zESrbwb1mh3p2OjVzhcsKmH4",
  "object": "file",
  "purpose": "fine-tune",
  "status": "uploaded",
  "status_details": null
}

In [7]:
openai.File.create(file=open("snli_finetune_validation.jsonl"), purpose="fine-tune")

<File file id=file-QC8NTdPW56lhbaDtuxvofGLj at 0x7f203a616810> JSON: {
  "bytes": 1822513,
  "created_at": 1638409667,
  "filename": "snli_finetune_validation.jsonl",
  "id": "file-QC8NTdPW56lhbaDtuxvofGLj",
  "object": "file",
  "purpose": "fine-tune",
  "status": "uploaded",
  "status_details": null
}

In [13]:
openai.FineTune.create(
    training_file="file-zESrbwb1mh3p2OjVzhcsKmH4",
    validation_file="file-QC8NTdPW56lhbaDtuxvofGLj",
    model="curie",
    n_epochs=4, # default
    compute_classification_metrics=True,
    classification_n_classes=3
)

<FineTune fine-tune id=ft-dUn3kTdJRe33NcVQlngm2L1n at 0x7f1fda35c900> JSON: {
  "created_at": 1638409797,
  "events": [
    {
      "created_at": 1638409797,
      "level": "info",
      "message": "Created fine-tune: ft-dUn3kTdJRe33NcVQlngm2L1n",
      "object": "fine-tune-event"
    }
  ],
  "fine_tuned_model": null,
  "hyperparams": {
    "batch_size": null,
    "classification_n_classes": 3,
    "compute_classification_metrics": true,
    "learning_rate_multiplier": null,
    "n_epochs": 4,
    "prompt_loss_weight": 0.1,
    "use_packing": null
  },
  "id": "ft-dUn3kTdJRe33NcVQlngm2L1n",
  "model": "curie",
  "object": "fine-tune",
  "organization_id": "org-5AE307Eg4rc5EAoEA2S2bwkH",
  "result_files": [],
  "status": "pending",
  "training_files": [
    {
      "bytes": 98207620,
      "created_at": 1638409664,
      "filename": "snli_finetune_train.jsonl",
      "id": "file-zESrbwb1mh3p2OjVzhcsKmH4",
      "object": "file",
      "purpose": "fine-tune",
      "status": "error",
  

In [23]:
# Too many tokens, I'm allowed about 10% of the entire training dataset based on the current limit
int(len(data['train']) * 0.1)

55015

In [26]:
sample_data = data['train'].shuffle().select([i for i in range(55015)])
sample_data

Loading cached shuffled indices for dataset at /home/x/.cache/huggingface/datasets/snli/plain_text/1.0.0/1f60b67533b65ae0275561ff7828aad5ee4282d0e6f844fd148d05d3c6ea251b/cache-81adece7851b63ef.arrow


Dataset({
    features: ['premise', 'hypothesis', 'label'],
    num_rows: 55015
})

In [27]:
build_jsonl(sample_data, 'snli_finetune_train_sample.jsonl')
openai.File.create(file=open("snli_finetune_train_sample.jsonl"), purpose="fine-tune")

<File file id=file-pD1lpBznzyfCVq8U8rOV58RE at 0x7f1fda16e270> JSON: {
  "bytes": 9805852,
  "created_at": 1638410623,
  "filename": "snli_finetune_train_sample.jsonl",
  "id": "file-pD1lpBznzyfCVq8U8rOV58RE",
  "object": "file",
  "purpose": "fine-tune",
  "status": "uploaded",
  "status_details": null
}

In [28]:
# Try creating a fine tune with smaller dataset
openai.FineTune.create(
    training_file="file-pD1lpBznzyfCVq8U8rOV58RE",
    validation_file="file-QC8NTdPW56lhbaDtuxvofGLj",
    model="curie",
    n_epochs=4, # default
    compute_classification_metrics=True,
    classification_n_classes=3
)

<FineTune fine-tune id=ft-liaJqXchRH2a0b7InmAZuvEG at 0x7f1fda0caf90> JSON: {
  "created_at": 1638410655,
  "events": [
    {
      "created_at": 1638410655,
      "level": "info",
      "message": "Created fine-tune: ft-liaJqXchRH2a0b7InmAZuvEG",
      "object": "fine-tune-event"
    }
  ],
  "fine_tuned_model": null,
  "hyperparams": {
    "batch_size": null,
    "classification_n_classes": 3,
    "compute_classification_metrics": true,
    "learning_rate_multiplier": null,
    "n_epochs": 4,
    "prompt_loss_weight": 0.1,
    "use_packing": null
  },
  "id": "ft-liaJqXchRH2a0b7InmAZuvEG",
  "model": "curie",
  "object": "fine-tune",
  "organization_id": "org-5AE307Eg4rc5EAoEA2S2bwkH",
  "result_files": [],
  "status": "pending",
  "training_files": [
    {
      "bytes": 9805852,
      "created_at": 1638410623,
      "filename": "snli_finetune_train_sample.jsonl",
      "id": "file-pD1lpBznzyfCVq8U8rOV58RE",
      "object": "file",
      "purpose": "fine-tune",
      "status": "proc

In [29]:
openai.FineTune.list()

<OpenAIObject list at 0x7f1fda351450> JSON: {
  "data": [
    {
      "created_at": 1638409797,
      "fine_tuned_model": null,
      "hyperparams": {
        "batch_size": null,
        "classification_n_classes": 3,
        "compute_classification_metrics": true,
        "learning_rate_multiplier": null,
        "n_epochs": 4,
        "prompt_loss_weight": 0.1,
        "use_packing": null
      },
      "id": "ft-dUn3kTdJRe33NcVQlngm2L1n",
      "model": "curie",
      "object": "fine-tune",
      "organization_id": "org-5AE307Eg4rc5EAoEA2S2bwkH",
      "result_files": [],
      "status": "failed",
      "training_files": [
        {
          "bytes": 98207620,
          "created_at": 1638409664,
          "filename": "snli_finetune_train.jsonl",
          "id": "file-zESrbwb1mh3p2OjVzhcsKmH4",
          "object": "file",
          "purpose": "fine-tune",
          "status": "error",
          "status_details": "The file contains 20821169 tokens and exceeds our 3000000 token limit. 

In [36]:
openai.FineTune.retrieve('ft-liaJqXchRH2a0b7InmAZuvEG')

<FineTune fine-tune id=ft-liaJqXchRH2a0b7InmAZuvEG at 0x7f1fda0ceb80> JSON: {
  "created_at": 1638410655,
  "events": [
    {
      "created_at": 1638410655,
      "level": "info",
      "message": "Created fine-tune: ft-liaJqXchRH2a0b7InmAZuvEG",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1638410659,
      "level": "info",
      "message": "Fine-tune enqueued. Queue number: 0",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1638410663,
      "level": "info",
      "message": "Fine-tune started",
      "object": "fine-tune-event"
    }
  ],
  "fine_tuned_model": null,
  "hyperparams": {
    "batch_size": 2,
    "classification_n_classes": 3,
    "compute_classification_metrics": true,
    "learning_rate_multiplier": 0.2,
    "n_epochs": 4,
    "prompt_loss_weight": 0.1,
    "use_packing": true
  },
  "id": "ft-liaJqXchRH2a0b7InmAZuvEG",
  "model": "curie",
  "object": "fine-tune",
  "organization_id": "org-5AE307Eg4rc5EAoEA2S2bwkH",
  "result

In [45]:
openai.FineTune.list_events('ft-liaJqXchRH2a0b7InmAZuvEG')

<OpenAIObject list at 0x7f1fd9eec270> JSON: {
  "data": [
    {
      "created_at": 1638410655,
      "level": "info",
      "message": "Created fine-tune: ft-liaJqXchRH2a0b7InmAZuvEG",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1638410659,
      "level": "info",
      "message": "Fine-tune enqueued. Queue number: 0",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1638410663,
      "level": "info",
      "message": "Fine-tune started",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1638411178,
      "level": "info",
      "message": "Completed epoch 1/4",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1638412203,
      "level": "info",
      "message": "Completed epoch 2/4",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1638413227,
      "level": "info",
      "message": "Completed epoch 3/4",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1638414249,
      "leve

In [50]:
openai.FineTune.retrieve('ft-liaJqXchRH2a0b7InmAZuvEG')

<FineTune fine-tune id=ft-liaJqXchRH2a0b7InmAZuvEG at 0x7f1fda0d79f0> JSON: {
  "created_at": 1638410655,
  "events": [
    {
      "created_at": 1638410655,
      "level": "info",
      "message": "Created fine-tune: ft-liaJqXchRH2a0b7InmAZuvEG",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1638410659,
      "level": "info",
      "message": "Fine-tune enqueued. Queue number: 0",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1638410663,
      "level": "info",
      "message": "Fine-tune started",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1638411178,
      "level": "info",
      "message": "Completed epoch 1/4",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1638412203,
      "level": "info",
      "message": "Completed epoch 2/4",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1638413227,
      "level": "info",
      "message": "Completed epoch 3/4",
      "object": "fine-tune-e

In [48]:
x = data['test'][0]
x = to_example(x, False)
x

'Premise: This church choir sings to the masses as they sing joyous songs from the book at a church.\nHypothesis: The church has cracks in the ceiling.\nLabel: '

In [51]:
# Using the fine tuned model
openai.Completion.create(
    model='curie:ft-user-5hzndcnnszukksvrzrlnjn8l-2021-12-02-03-14-04',
    prompt=x
)

<OpenAIObject text_completion id=cmpl-4AWtQeqpHkqJ869kJP1BvF14RNMqL at 0x7f1fda0d7810> JSON: {
  "choices": [
    {
      "finish_reason": "length",
      "index": 0,
      "logprobs": null,
      "text": "Neutralmentailment\nPremise: Two men in shorts sit on a"
    }
  ],
  "created": 1638415068,
  "id": "cmpl-4AWtQeqpHkqJ869kJP1BvF14RNMqL",
  "model": "curie:ft-user-5hzndcnnszukksvrzrlnjn8l-2021-12-02-03-14-04",
  "object": "text_completion"
}

In [52]:
openai.Completion.create(
    model='curie:ft-user-5hzndcnnszukksvrzrlnjn8l-2021-12-02-03-14-04',
    prompt=x,
    temperature=0.3,
    max_tokens=60,
    top_p=1,
    frequency_penalty=0.5,
    presence_penalty=0,
)

<OpenAIObject text_completion id=cmpl-4AWued8lpqg9AIawfG9H3rdGNlB81 at 0x7f1fda0cec20> JSON: {
  "choices": [
    {
      "finish_reason": "length",
      "index": 0,
      "logprobs": null,
      "text": "Neutral: This is happening at a wedding.\nHypothesis: The choir is singing at the funeral.\nLabel: Contradiction: A man in a green shirt and jeans is walking down the street.\nHypothesis: A man is walking to the store.\nLabel"
    }
  ],
  "created": 1638415144,
  "id": "cmpl-4AWued8lpqg9AIawfG9H3rdGNlB81",
  "model": "curie:ft-user-5hzndcnnszukksvrzrlnjn8l-2021-12-02-03-14-04",
  "object": "text_completion"
}

In [53]:
openai.Completion.create(
    model='curie:ft-user-5hzndcnnszukksvrzrlnjn8l-2021-12-02-03-14-04',
    prompt=x,
    temperature=0.3,
    max_tokens=10,
    stop=['\n'],
    top_p=1,
    frequency_penalty=0.5,
    presence_penalty=0,
)

<OpenAIObject text_completion id=cmpl-4AWvCugPl9dwZY7e6obfd2ceJjwjC at 0x7f1fda0c7e50> JSON: {
  "choices": [
    {
      "finish_reason": "length",
      "index": 0,
      "logprobs": null,
      "text": "Neutral: This choir is singing at a wedding"
    }
  ],
  "created": 1638415178,
  "id": "cmpl-4AWvCugPl9dwZY7e6obfd2ceJjwjC",
  "model": "curie:ft-user-5hzndcnnszukksvrzrlnjn8l-2021-12-02-03-14-04",
  "object": "text_completion"
}

In [56]:
to_example(data['test'][0])

'Premise: This church choir sings to the masses as they sing joyous songs from the book at a church.\nHypothesis: The church has cracks in the ceiling.\nLabel: Neutral'

In [54]:
# Another example
openai.Completion.create(
    model='curie:ft-user-5hzndcnnszukksvrzrlnjn8l-2021-12-02-03-14-04',
    prompt=to_example(data['test'][1], False),
    temperature=0.3,
    max_tokens=10,
    stop=['\n'],
    top_p=1,
    frequency_penalty=0.5,
    presence_penalty=0,
)

<OpenAIObject text_completion id=cmpl-4AWvsbWr32vSHCkn066wantC9oEV9 at 0x7f1fd9f6da40> JSON: {
  "choices": [
    {
      "finish_reason": "length",
      "index": 0,
      "logprobs": null,
      "text": "Entailment: A man in a blue shirt"
    }
  ],
  "created": 1638415220,
  "id": "cmpl-4AWvsbWr32vSHCkn066wantC9oEV9",
  "model": "curie:ft-user-5hzndcnnszukksvrzrlnjn8l-2021-12-02-03-14-04",
  "object": "text_completion"
}

In [57]:
to_example(data['test'][1])

'Premise: This church choir sings to the masses as they sing joyous songs from the book at a church.\nHypothesis: The church is filled with song.\nLabel: Entailment'

In [58]:
# One more
openai.Completion.create(
    model='curie:ft-user-5hzndcnnszukksvrzrlnjn8l-2021-12-02-03-14-04',
    prompt=to_example(data['test'][2], False),
    temperature=0.3,
    max_tokens=10,
    stop=['\n'],
    top_p=1,
    frequency_penalty=0.5,
    presence_penalty=0,
)

<OpenAIObject text_completion id=cmpl-4AWwgP5jpKgje4mI4aUmdqn1YfmcL at 0x7f1fd9eec9a0> JSON: {
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "logprobs": null,
      "text": "Contradiction"
    }
  ],
  "created": 1638415270,
  "id": "cmpl-4AWwgP5jpKgje4mI4aUmdqn1YfmcL",
  "model": "curie:ft-user-5hzndcnnszukksvrzrlnjn8l-2021-12-02-03-14-04",
  "object": "text_completion"
}

In [59]:
to_example(data['test'][2])

'Premise: This church choir sings to the masses as they sing joyous songs from the book at a church.\nHypothesis: A choir singing at a baseball game.\nLabel: Contradiction'

In [60]:
# One more?
openai.Completion.create(
    model='curie:ft-user-5hzndcnnszukksvrzrlnjn8l-2021-12-02-03-14-04',
    prompt=to_example(data['test'][3], False),
    temperature=0.3,
    max_tokens=10,
    stop=['\n'],
    top_p=1,
    frequency_penalty=0.5,
    presence_penalty=0,
)

<OpenAIObject text_completion id=cmpl-4AWxdL8LhmlJ5umX8Q6rnAgDNvrH3 at 0x7f1fda0ce4f0> JSON: {
  "choices": [
    {
      "finish_reason": "length",
      "index": 0,
      "logprobs": null,
      "text": "Neutral: A woman with a green headscar"
    }
  ],
  "created": 1638415329,
  "id": "cmpl-4AWxdL8LhmlJ5umX8Q6rnAgDNvrH3",
  "model": "curie:ft-user-5hzndcnnszukksvrzrlnjn8l-2021-12-02-03-14-04",
  "object": "text_completion"
}

In [61]:
to_example(data['test'][3])

'Premise: A woman with a green headscarf, blue shirt and a very big grin.\nHypothesis: The woman is young.\nLabel: Neutral'

In [None]:
# We might need to work on prompt engineering to make sure the generated text has only the label
# Adding `\n` might help.
# The fine tuning took about 1 hour for 10% of the training data w/ 4 epochs, so it could take a long time to iterate.

In [2]:
with open('../results/snli_results.csv', 'wb') as f:
    f.write(openai.File.download('file-fODaqo7g6sIEmjKDynpQUfJq'))

In [3]:
import pandas as pd
df = pd.read_csv('../results/snli_results.csv')
df

Unnamed: 0,step,elapsed_tokens,elapsed_examples,training_loss,training_sequence_accuracy,training_token_accuracy,validation_loss,validation_sequence_accuracy,validation_token_accuracy,classification/accuracy,classification/weighted_f1_score
0,1,4098,2,0.377935,0.0,0.641304,0.432851,0.0,0.4,,
1,2,8196,4,0.292203,0.0,0.622837,,,,,
2,3,12294,6,0.391761,0.0,0.750000,,,,,
3,4,16392,8,0.357319,0.0,0.736462,,,,,
4,5,20490,10,0.261835,0.0,0.722807,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
2045,2046,8384508,4092,0.069999,1.0,1.000000,,,,,
2046,2047,8388606,4094,0.073392,0.5,0.993031,,,,,
2047,2048,8392704,4096,0.063837,1.0,1.000000,,,,,
2048,2049,8396802,4098,0.071893,0.0,0.993174,,,,,
