In [1]:
import openai

from fp_dataset_artifacts.utils import init_openai
from fp_dataset_artifacts.anli import map_finetune, get_response
from datasets import (
    list_datasets, load_dataset, list_metrics, load_metric, concatenate_datasets
)

init_openai()

snli = load_dataset('snli')
anli = load_dataset('anli')

Reusing dataset snli (/home/x/.cache/huggingface/datasets/snli/plain_text/1.0.0/1f60b67533b65ae0275561ff7828aad5ee4282d0e6f844fd148d05d3c6ea251b)
Reusing dataset anli (/home/x/.cache/huggingface/datasets/anli/plain_text/0.1.0/aabce88453b06dff21c201855ea83283bab0390bff746deadb30b65695755c0b)


In [70]:
# Prepare two seperate training set for fine-tuning
# One for training with only snli
# Another with training with snli and anli round 1
# We make sure that the trainging set size is equal
snli_training_size = 25000
anli_training_size = anli['train_r1'].num_rows
total_training_size = snli_training_size + anli_training_size
total_training_size

41946

In [71]:
snli_only = snli['train'].shuffle(0).select(list(range(total_training_size))).map(map_finetune)
snli_only = snli_only.map(map_finetune)
snli_only = snli_only.remove_columns(['premise', 'hypothesis', 'label'])
snli_only

Loading cached shuffled indices for dataset at /home/x/.cache/huggingface/datasets/snli/plain_text/1.0.0/1f60b67533b65ae0275561ff7828aad5ee4282d0e6f844fd148d05d3c6ea251b/cache-0e9d0b15c43a175e.arrow


HBox(children=(FloatProgress(value=0.0, max=41946.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=41946.0), HTML(value='')))




Dataset({
    features: ['prompt', 'completion'],
    num_rows: 41946
})

In [72]:
anli_only = anli['train_r1'].map(map_finetune)
anli_only = anli_only.remove_columns(['uid', 'reason', 'premise', 'hypothesis', 'label'])
anli_only

Loading cached processed dataset at /home/x/.cache/huggingface/datasets/anli/plain_text/0.1.0/aabce88453b06dff21c201855ea83283bab0390bff746deadb30b65695755c0b/cache-d198be314a77b6f4.arrow


Dataset({
    features: ['prompt', 'completion'],
    num_rows: 16946
})

In [73]:
snli_plus_anli = concatenate_datasets([
    snli_only.select(list(range(snli_training_size))),
    anli_only
]).shuffle(0)
snli_plus_anli

Dataset({
    features: ['prompt', 'completion'],
    num_rows: 41946
})

In [74]:
# Same setup for the validation
total_valid_size = snli['validation'].num_rows
anli_valid_size = anli['dev_r1'].num_rows
snli_valid_size = total_valid_size - anli_valid_size

snli_valid = snli['validation'].shuffle(0).select(list(range(total_valid_size)))
snli_valid = snli_valid.map(map_finetune).remove_columns(['premise', 'hypothesis', 'label'])
snli_valid

Loading cached shuffled indices for dataset at /home/x/.cache/huggingface/datasets/snli/plain_text/1.0.0/1f60b67533b65ae0275561ff7828aad5ee4282d0e6f844fd148d05d3c6ea251b/cache-70baa31142a3b54f.arrow
Loading cached processed dataset at /home/x/.cache/huggingface/datasets/snli/plain_text/1.0.0/1f60b67533b65ae0275561ff7828aad5ee4282d0e6f844fd148d05d3c6ea251b/cache-7d2d35d5f4813980.arrow


Dataset({
    features: ['prompt', 'completion'],
    num_rows: 10000
})

In [75]:
anli_valid = anli['dev_r1']
anli_valid = anli_valid.map(map_finetune).remove_columns(['uid', 'reason', 'premise', 'hypothesis', 'label'])
anli_valid

Loading cached processed dataset at /home/x/.cache/huggingface/datasets/anli/plain_text/0.1.0/aabce88453b06dff21c201855ea83283bab0390bff746deadb30b65695755c0b/cache-5e201f1ee0e85880.arrow


Dataset({
    features: ['prompt', 'completion'],
    num_rows: 1000
})

In [76]:
snli_plus_anli_valid = concatenate_datasets([
    snli_valid.select(list(range(snli_valid_size))),
    anli_valid
]).shuffle(0)
snli_plus_anli_valid

Loading cached shuffled indices for dataset at /home/x/.cache/huggingface/datasets/snli/plain_text/1.0.0/1f60b67533b65ae0275561ff7828aad5ee4282d0e6f844fd148d05d3c6ea251b/cache-be13fafa07810d17.arrow


Dataset({
    features: ['prompt', 'completion'],
    num_rows: 10000
})

In [77]:
def upload(dataset, filename, purpose='fine-tune'):
    dataset.to_json(filename)
    response = openai.File.create(
        file=open(filename), purpose=purpose
    )
    file_id = response['id']
    return file_id

In [78]:
# Save and upload all files
snli_train_id = upload(snli_only, 'ablation_snli_train.jsonl')
# snli_valid_id = upload(snli_valid, 'ablation_snli_valid.jsonl')
anli_train_id = upload(snli_plus_anli, 'ablation_anli_train.jsonl')
# anli_valid_id = upload(snli_plus_anli_valid, 'ablation_anli_valid.jsonl')

HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))




In [79]:
print(snli_train_id, snli_valid_id, anli_train_id, anli_valid_id, sep='\n')

file-SbdP5nU4beGEMZCH9P56MPjH
file-1syChFe6i616f70H3MfnArBf
file-a7ryGWMmgQwczg959jdTDsJw
file-v5uBw86uR9XhsKeaKCLkTwKq


In [80]:
def finetune(
    train_file_id,
    valid_file_id,
    model='curie',
    n_epochs=4,
    compute_classification_metrics=True,
    classification_n_classes=3,
):
    # Initialize OpenAI API with API_KEY
    init_openai()

    # Create fine-tune
    finetune_resp = openai.FineTune.create(
        training_file=train_file_id,
        validation_file=valid_file_id,
        model=model,
        n_epochs=n_epochs,
        compute_classification_metrics=compute_classification_metrics,
        classification_n_classes=classification_n_classes,
    )

    # Get the fine-tune ID
    finetune_id = finetune_resp['id']

    return finetune_id

In [84]:
# Fine tuning 1: snli only with more snli data instead of anli
snli_finetune_id = finetune(snli_train_id, snli_valid_id)
snli_finetune_id

'ft-a0nNphtyncMOu5XmSLlUkvDz'

In [81]:
# Fine tuning 2: snli with anli in addition
anli_finetune_id = finetune(anli_train_id, anli_valid_id)
anli_finetune_id

'ft-rDeH2hvXBGo0ZlmsxEKbvkP4'

In [85]:
openai.FineTune.retrieve(snli_finetune_id)

<FineTune fine-tune id=ft-a0nNphtyncMOu5XmSLlUkvDz at 0x7f690c09e6d0> JSON: {
  "created_at": 1638837445,
  "events": [
    {
      "created_at": 1638837445,
      "level": "info",
      "message": "Created fine-tune: ft-a0nNphtyncMOu5XmSLlUkvDz",
      "object": "fine-tune-event"
    }
  ],
  "fine_tuned_model": null,
  "hyperparams": {
    "batch_size": null,
    "classification_n_classes": 3,
    "compute_classification_metrics": true,
    "learning_rate_multiplier": null,
    "n_epochs": 4,
    "prompt_loss_weight": 0.1,
    "use_packing": null
  },
  "id": "ft-a0nNphtyncMOu5XmSLlUkvDz",
  "model": "curie",
  "object": "fine-tune",
  "organization_id": "org-5AE307Eg4rc5EAoEA2S2bwkH",
  "result_files": [],
  "status": "pending",
  "training_files": [
    {
      "bytes": 7630415,
      "created_at": 1638837407,
      "filename": "ablation_snli_train.jsonl",
      "id": "file-SbdP5nU4beGEMZCH9P56MPjH",
      "object": "file",
      "purpose": "fine-tune",
      "status": "processed",

In [87]:
openai.FineTune.retrieve(anli_finetune_id)

<FineTune fine-tune id=ft-rDeH2hvXBGo0ZlmsxEKbvkP4 at 0x7f690c09e900> JSON: {
  "created_at": 1638837419,
  "events": [
    {
      "created_at": 1638837420,
      "level": "info",
      "message": "Created fine-tune: ft-rDeH2hvXBGo0ZlmsxEKbvkP4",
      "object": "fine-tune-event"
    }
  ],
  "fine_tuned_model": null,
  "hyperparams": {
    "batch_size": null,
    "classification_n_classes": 3,
    "compute_classification_metrics": true,
    "learning_rate_multiplier": null,
    "n_epochs": 4,
    "prompt_loss_weight": 0.1,
    "use_packing": null
  },
  "id": "ft-rDeH2hvXBGo0ZlmsxEKbvkP4",
  "model": "curie",
  "object": "fine-tune",
  "organization_id": "org-5AE307Eg4rc5EAoEA2S2bwkH",
  "result_files": [],
  "status": "pending",
  "training_files": [
    {
      "bytes": 12587384,
      "created_at": 1638837416,
      "filename": "ablation_anli_train.jsonl",
      "id": "file-a7ryGWMmgQwczg959jdTDsJw",
      "object": "file",
      "purpose": "fine-tune",
      "status": "processed"

In [86]:
openai.FineTune.list()

<OpenAIObject list at 0x7f690c098b80> JSON: {
  "data": [
    {
      "created_at": 1638409797,
      "fine_tuned_model": null,
      "hyperparams": {
        "batch_size": null,
        "classification_n_classes": 3,
        "compute_classification_metrics": true,
        "learning_rate_multiplier": null,
        "n_epochs": 4,
        "prompt_loss_weight": 0.1,
        "use_packing": null
      },
      "id": "ft-dUn3kTdJRe33NcVQlngm2L1n",
      "model": "curie",
      "object": "fine-tune",
      "organization_id": "org-5AE307Eg4rc5EAoEA2S2bwkH",
      "result_files": [],
      "status": "failed",
      "training_files": [
        {
          "bytes": 98207620,
          "created_at": 1638409664,
          "filename": "snli_finetune_train.jsonl",
          "id": "file-zESrbwb1mh3p2OjVzhcsKmH4",
          "object": "file",
          "purpose": "fine-tune",
          "status": "error",
          "status_details": "The file contains 20821169 tokens and exceeds our 3000000 token limit. 

In [69]:
openai.FineTune.cancel()

<FineTune fine-tune id=ft-c3uTTRmjuODtbxvK4J9IWOO8 at 0x7f6920cd6ae0> JSON: {
  "created_at": 1638837342,
  "events": [
    {
      "created_at": 1638837342,
      "level": "info",
      "message": "Created fine-tune: ft-c3uTTRmjuODtbxvK4J9IWOO8",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1638837362,
      "level": "info",
      "message": "Fine-tune cancelled",
      "object": "fine-tune-event"
    }
  ],
  "fine_tuned_model": null,
  "hyperparams": {
    "batch_size": null,
    "classification_n_classes": 3,
    "compute_classification_metrics": true,
    "learning_rate_multiplier": null,
    "n_epochs": 4,
    "prompt_loss_weight": 0.1,
    "use_packing": null
  },
  "id": "ft-c3uTTRmjuODtbxvK4J9IWOO8",
  "model": "curie",
  "object": "fine-tune",
  "organization_id": "org-5AE307Eg4rc5EAoEA2S2bwkH",
  "result_files": [],
  "status": "cancelled",
  "training_files": [
    {
      "bytes": 13499120,
      "created_at": 1638837298,
      "filename": "ablation_a