In [1]:
import openai

from fp_dataset_artifacts.utils import init_openai
from fp_dataset_artifacts.boolq import format_question, format_answer
from datasets import list_datasets, load_dataset, list_metrics, load_metric, concatenate_datasets

init_openai()

data = load_dataset('anli')
data

Reusing dataset anli (/home/x/.cache/huggingface/datasets/anli/plain_text/0.1.0/aabce88453b06dff21c201855ea83283bab0390bff746deadb30b65695755c0b)


  0%|          | 0/9 [00:00<?, ?it/s]

DatasetDict({
    train_r1: Dataset({
        features: ['uid', 'premise', 'hypothesis', 'label', 'reason'],
        num_rows: 16946
    })
    dev_r1: Dataset({
        features: ['uid', 'premise', 'hypothesis', 'label', 'reason'],
        num_rows: 1000
    })
    test_r1: Dataset({
        features: ['uid', 'premise', 'hypothesis', 'label', 'reason'],
        num_rows: 1000
    })
    train_r2: Dataset({
        features: ['uid', 'premise', 'hypothesis', 'label', 'reason'],
        num_rows: 45460
    })
    dev_r2: Dataset({
        features: ['uid', 'premise', 'hypothesis', 'label', 'reason'],
        num_rows: 1000
    })
    test_r2: Dataset({
        features: ['uid', 'premise', 'hypothesis', 'label', 'reason'],
        num_rows: 1000
    })
    train_r3: Dataset({
        features: ['uid', 'premise', 'hypothesis', 'label', 'reason'],
        num_rows: 100459
    })
    dev_r3: Dataset({
        features: ['uid', 'premise', 'hypothesis', 'label', 'reason'],
        num_rows: 12

In [8]:
data['train_r1'][0]

{'uid': '0fd0abfb-659e-4453-b196-c3a64d2d8267',
 'premise': 'The Parma trolleybus system (Italian: "Rete filoviaria di Parma" ) forms part of the public transport network of the city and "comune" of Parma, in the region of Emilia-Romagna, northern Italy. In operation since 1953, the system presently comprises four urban routes.',
 'hypothesis': 'The trolleybus system has over 2 urban routes',
 'label': 0,
 'reason': ''}

In [9]:
data['train_r2'][0]

{'uid': '245475c4-6225-4f5c-b945-e13e0e988fc8',
 'premise': 'Topi Raja Sweety Roja is a 1996 Telugu comedy film, produced by Dr. A. Vijayalakshmi on Sri Sai Madhavi Productions banner and directed by Dr. N. Siva Prasad. Starring Rajendra Prasad, Roja in the lead roles and music also composed by "Hero" Rajendra Prasad.',
 'hypothesis': 'The lead role was played by Rajendra Prasad',
 'label': 0,
 'reason': ''}

In [10]:
data['train_r3'][0]

{'uid': '2093cfb3-a15f-4282-81e3-0cb793ffd0d7',
 'premise': 'TOKYO, Dec 18 (Reuters) - Japan’s Shionogi & Co said on Tuesday that it has applied to health regulators in the United States, Canada and Europe for approval of its HIV drug Dolutegravir. Shionogi developed Dolutegravir with a Viiv Healthcare, an AIDS drug joint venture between GlaxoSmithKline and Pfizer, in exchange for its rights to the drug.',
 'hypothesis': 'The article was written on December 18th.',
 'label': 0,
 'reason': 'TOKYO, Dec 18 (Reuters) is when the article was written as it states in the first words of the sentence'}

In [38]:
def int2label(i):
    return ['Entailment', 'Neutral', 'Contradiction'][i]
int2label(0)

'Entailment'

In [39]:
def map_finetune_train(x):
    premise = x['premise']
    hypothesis = x['hypothesis']
    label = int2label(x['label'])

    return {
        'prompt': f"Premise: {premise}\n\nHypothesis: {hypothesis}\n\nLabel: ",
        'completion': label + '\n',
    }
map_finetune_train(data['train_r1'][0])

{'prompt': 'Premise: The Parma trolleybus system (Italian: "Rete filoviaria di Parma" ) forms part of the public transport network of the city and "comune" of Parma, in the region of Emilia-Romagna, northern Italy. In operation since 1953, the system presently comprises four urban routes.\n\nHypothesis: The trolleybus system has over 2 urban routes\n\nLabel: ',
 'completion': 'Entailment\n'}

In [38]:
train_r1 = data['train_r1'].map(map_finetune_train)
train_r1 = train_r1.remove_columns(['uid', 'reason', 'premise', 'hypothesis', 'label'])

Loading cached processed dataset at /home/x/.cache/huggingface/datasets/anli/plain_text/0.1.0/aabce88453b06dff21c201855ea83283bab0390bff746deadb30b65695755c0b/cache-0abd26387ee4a689.arrow


In [39]:
train_r2 = data['train_r2'].map(map_finetune_train)
train_r2 = train_r2.remove_columns(['uid', 'reason', 'premise', 'hypothesis', 'label'])

  0%|          | 0/45460 [00:00<?, ?ex/s]

In [47]:
train_r3 = data['train_r3'].map(map_finetune_train)
train_r3 = train_r3.remove_columns(['uid', 'reason', 'premise', 'hypothesis', 'label'])

  0%|          | 0/100459 [00:00<?, ?ex/s]

In [49]:
train = concatenate_datasets([train_r1, train_r2, train_r3])
train.to_json('anli_finetune_train.jsonl')

Creating json from Arrow format:   0%|          | 0/17 [00:00<?, ?ba/s]

74546999

In [50]:
# This trainging dataset will definitely go over the token limit.
# Let's just use round 1 for fine tuning for now.

In [51]:
train_r1.to_json('anli_finetune_train_r1.jsonl')

Creating json from Arrow format:   0%|          | 0/2 [00:00<?, ?ba/s]

8042647

In [53]:
dev_r1 = data['dev_r1'].map(map_finetune_train)
dev_r1 = dev_r1.remove_columns(['uid', 'reason', 'premise', 'hypothesis', 'label'])
dev_r1.to_json('anli_finetune_dev_r1.jsonl')

  0%|          | 0/1000 [00:00<?, ?ex/s]

Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

475195

In [54]:
openai.File.create(file=open('anli_finetune_train_r1.jsonl'), purpose='fine-tune')

<File file id=file-FkO4WnaoNWQjODh96npgZ6Kr at 0x7f67643eccc0> JSON: {
  "bytes": 8042647,
  "created_at": 1638672457,
  "filename": "anli_finetune_train_r1.jsonl",
  "id": "file-FkO4WnaoNWQjODh96npgZ6Kr",
  "object": "file",
  "purpose": "fine-tune",
  "status": "uploaded",
  "status_details": null
}

In [55]:
openai.File.create(file=open('anli_finetune_dev_r1.jsonl'), purpose='fine-tune')

<File file id=file-cZYCbTSEyYvGpQ1lW2kB41pg at 0x7f66ff96b2c0> JSON: {
  "bytes": 475195,
  "created_at": 1638672465,
  "filename": "anli_finetune_dev_r1.jsonl",
  "id": "file-cZYCbTSEyYvGpQ1lW2kB41pg",
  "object": "file",
  "purpose": "fine-tune",
  "status": "uploaded",
  "status_details": null
}

In [56]:
openai.FineTune.create(
    training_file='file-FkO4WnaoNWQjODh96npgZ6Kr',
    validation_file='file-cZYCbTSEyYvGpQ1lW2kB41pg',
    model='curie',
    n_epochs=4,
    compute_classification_metrics=True,
    classification_n_classes=3
)

<FineTune fine-tune id=ft-uYZ4vdMlvmoLVuycheBpvM7U at 0x7f66ff988bd0> JSON: {
  "created_at": 1638672534,
  "events": [
    {
      "created_at": 1638672534,
      "level": "info",
      "message": "Created fine-tune: ft-uYZ4vdMlvmoLVuycheBpvM7U",
      "object": "fine-tune-event"
    }
  ],
  "fine_tuned_model": null,
  "hyperparams": {
    "batch_size": null,
    "classification_n_classes": 3,
    "compute_classification_metrics": true,
    "learning_rate_multiplier": null,
    "n_epochs": 4,
    "prompt_loss_weight": 0.1,
    "use_packing": null
  },
  "id": "ft-uYZ4vdMlvmoLVuycheBpvM7U",
  "model": "curie",
  "object": "fine-tune",
  "organization_id": "org-5AE307Eg4rc5EAoEA2S2bwkH",
  "result_files": [],
  "status": "pending",
  "training_files": [
    {
      "bytes": 8042647,
      "created_at": 1638672457,
      "filename": "anli_finetune_train_r1.jsonl",
      "id": "file-FkO4WnaoNWQjODh96npgZ6Kr",
      "object": "file",
      "purpose": "fine-tune",
      "status": "processe

In [2]:
openai.FineTune.retrieve('ft-uYZ4vdMlvmoLVuycheBpvM7U')

<FineTune fine-tune id=ft-uYZ4vdMlvmoLVuycheBpvM7U at 0x7fb682539220> JSON: {
  "created_at": 1638672534,
  "events": [
    {
      "created_at": 1638672534,
      "level": "info",
      "message": "Created fine-tune: ft-uYZ4vdMlvmoLVuycheBpvM7U",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1638672540,
      "level": "info",
      "message": "Fine-tune enqueued. Queue number: 0",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1638672545,
      "level": "info",
      "message": "Fine-tune started",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1638673078,
      "level": "info",
      "message": "Completed epoch 1/4",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1638673618,
      "level": "info",
      "message": "Completed epoch 2/4",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1638674154,
      "level": "info",
      "message": "Completed epoch 3/4",
      "object": "fine-tune-e

In [32]:
with open('../results/anli_results.csv', 'wb') as f:
    f.write(openai.File.download('file-8d5IsoqvC86h9bihVABgKQSP'))

In [29]:
import pandas as pd

In [33]:
df = pd.read_csv('../results/anli_results.csv')
df

Unnamed: 0,step,elapsed_tokens,elapsed_examples,training_loss,training_sequence_accuracy,training_token_accuracy,validation_loss,validation_sequence_accuracy,validation_token_accuracy,classification/accuracy,classification/weighted_f1_score
0,1,2049,1,0.296271,0.0,0.619048,0.2819,0.0,0.666667,,
1,2,4098,2,0.200318,0.0,0.893939,,,,,
2,3,6147,3,0.200883,0.0,0.803030,,,,,
3,4,8196,4,0.216732,0.0,0.763889,,,,,
4,5,10245,5,0.194912,0.0,0.865672,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
3635,3636,7450164,3636,0.013985,1.0,1.000000,,,,,
3636,3637,7452213,3637,0.013357,1.0,1.000000,,,,,
3637,3638,7454262,3638,0.013917,0.0,0.985507,,,,,
3638,3639,7456311,3639,0.013196,1.0,1.000000,,,,,


In [40]:
test_r1 = data['test_r1'].map(map_finetune_train)
test_r1 = test_r1.remove_columns(['uid', 'reason', 'premise', 'hypothesis', 'label'])
test_r1

  0%|          | 0/1000 [00:00<?, ?ex/s]

Dataset({
    features: ['prompt', 'completion'],
    num_rows: 1000
})

In [41]:
test_r1[0]

{'prompt': 'Premise: Ernest Jones is a British jeweller and watchmaker. Established in 1949, its first store was opened in Oxford Street, London. Ernest Jones specialises in diamonds and watches, stocking brands such as Gucci and Emporio Armani. Ernest Jones is part of the Signet Jewelers group.\n\nHypothesis: The first Ernest Jones store was opened on the continent of Europe.\n\nLabel: ',
 'completion': 'Entailment\n'}

In [43]:
openai.Completion.create(
    model='curie:ft-user-5hzndcnnszukksvrzrlnjn8l-2021-12-05-03-26-14',
    prompt=test_r1[0]['prompt'],
    temperature=0.3,
    max_tokens=10,
    stop=['\n'],
    top_p=1,
    frequency_penalty=0.5,
    presence_penalty=0,
)

<OpenAIObject text_completion id=cmpl-4Bpl3HMLFQi3iujtRsrHeUsS4tPP2 at 0x7fb682545900> JSON: {
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "logprobs": null,
      "text": "Entailment"
    }
  ],
  "created": 1638725913,
  "id": "cmpl-4Bpl3HMLFQi3iujtRsrHeUsS4tPP2",
  "model": "curie:ft-user-5hzndcnnszukksvrzrlnjn8l-2021-12-05-03-26-14",
  "object": "text_completion"
}

In [46]:
test_r1[3]

{'prompt': "Premise: Shadowboxer is a 2005 crime thriller film directed by Lee Daniels and starring Academy Award winners Cuba Gooding Jr., Helen Mirren, and Mo'Nique. It opened in limited release in six cities: New York, Los Angeles, Washington, D.C., Baltimore, Philadelphia, and Richmond, Virginia.\n\nHypothesis: Shadowboxer was written and directed by Lee Daniels and was starring Academy Award winners Cuba Gooding Jr., Helen Mirren, and Mo'Nique.\n\nLabel: ",
 'completion': 'Neutral\n'}

In [47]:
openai.Completion.create(
    model='curie:ft-user-5hzndcnnszukksvrzrlnjn8l-2021-12-05-03-26-14',
    prompt=test_r1[3]['prompt'],
    temperature=0.3,
    max_tokens=10,
    stop=['\n'],
    top_p=1,
    frequency_penalty=0.5,
    presence_penalty=0,
)

<OpenAIObject text_completion id=cmpl-4BplRBigRny9VprKnJYoXZ3PhEi3v at 0x7fb682539630> JSON: {
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "logprobs": null,
      "text": "Entailment"
    }
  ],
  "created": 1638725937,
  "id": "cmpl-4BplRBigRny9VprKnJYoXZ3PhEi3v",
  "model": "curie:ft-user-5hzndcnnszukksvrzrlnjn8l-2021-12-05-03-26-14",
  "object": "text_completion"
}

In [48]:
test_r1[4]

{'prompt': 'Premise: Takaaki Kajita (梶田 隆章 , Kajita Takaaki ) is a Japanese physicist, known for neutrino experiments at the Kamiokande and its successor, Super-Kamiokande. In 2015, he was awarded the Nobel Prize in Physics jointly with Canadian physicist Arthur B. McDonald.\n\nHypothesis: Arthur B. McDonald is a Japanese physicist, known for neutrino experiments at the Kamiokande and its successor, Super-Kamiokande.\n\nLabel: ',
 'completion': 'Contradiction\n'}

In [49]:
openai.Completion.create(
    model='curie:ft-user-5hzndcnnszukksvrzrlnjn8l-2021-12-05-03-26-14',
    prompt=test_r1[4]['prompt'],
    temperature=0.3,
    max_tokens=10,
    stop=['\n'],
    top_p=1,
    frequency_penalty=0.5,
    presence_penalty=0,
)

<OpenAIObject text_completion id=cmpl-4BpleO24l6rcwury8gNFTgjeWoaZ9 at 0x7fb682545e00> JSON: {
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "logprobs": null,
      "text": "Contradiction"
    }
  ],
  "created": 1638725950,
  "id": "cmpl-4BpleO24l6rcwury8gNFTgjeWoaZ9",
  "model": "curie:ft-user-5hzndcnnszukksvrzrlnjn8l-2021-12-05-03-26-14",
  "object": "text_completion"
}

In [50]:
test_r1[5]

{'prompt': 'Premise: Michael T. Scuse (born 1954) is an American public official. He was the acting United States Deputy Secretary of Agriculture, and, following the resignation of Tom Vilsack on January 13, 2017, was acting United States Secretary of Agriculture until Donald Trump took office as President. He also served as Under Secretary of Agriculture for Farm and Foreign Agricultural Services since 2012.\n\nHypothesis: The US Secretary of Agriculture is assigned Secret Service for protection purposes.\n\nLabel: ',
 'completion': 'Neutral\n'}

In [51]:
openai.Completion.create(
    model='curie:ft-user-5hzndcnnszukksvrzrlnjn8l-2021-12-05-03-26-14',
    prompt=test_r1[5]['prompt'],
    temperature=0.3,
    max_tokens=10,
    stop=['\n'],
    top_p=1,
    frequency_penalty=0.5,
    presence_penalty=0,
)

<OpenAIObject text_completion id=cmpl-4Bplsj6FbMF8o0TNczocCI9rBOkuv at 0x7fb6737b4900> JSON: {
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "logprobs": null,
      "text": "Neutral"
    }
  ],
  "created": 1638725964,
  "id": "cmpl-4Bplsj6FbMF8o0TNczocCI9rBOkuv",
  "model": "curie:ft-user-5hzndcnnszukksvrzrlnjn8l-2021-12-05-03-26-14",
  "object": "text_completion"
}