In [1]:
import openai

from fp_dataset_artifacts.utils import init_openai
from fp_dataset_artifacts.boolq import format_question, format_answer
from datasets import list_datasets, load_dataset, list_metrics, load_metric

init_openai()

data = load_dataset('boolq')
data

Using custom data configuration default
Reusing dataset boolq (/home/x/.cache/huggingface/datasets/boolq/default/0.1.0/bf0dd57da941c50de94ae3ce3cef7fea48c08f337a4b7aac484e9dddc5aa24e5)


  0%|          | 0/2 [00:00<?, ?it/s]

DatasetDict({
    train: Dataset({
        features: ['question', 'answer', 'passage'],
        num_rows: 9427
    })
    validation: Dataset({
        features: ['question', 'answer', 'passage'],
        num_rows: 3270
    })
})

In [43]:
def map_finetune_train(x):
    question = format_question(x['question'])
    passage = x['passage']
    answer = format_answer(x['answer'])

    return {
        'prompt': f"{passage}\n\nQ: {question}\n\nA: ",
        'completion': answer,
    }

In [38]:
train = data['train'].map(map_finetune_train)
train = train.remove_columns(['question', 'answer', 'passage'])

Loading cached processed dataset at /home/x/.cache/huggingface/datasets/boolq/default/0.1.0/bf0dd57da941c50de94ae3ce3cef7fea48c08f337a4b7aac484e9dddc5aa24e5/cache-5ebed985e83a6877.arrow


In [51]:
train[0]

{'prompt': 'Persian (/ˈpɜːrʒən, -ʃən/), also known by its endonym Farsi (فارسی fārsi (fɒːɾˈsiː) ( listen)), is one of the Western Iranian languages within the Indo-Iranian branch of the Indo-European language family. It is primarily spoken in Iran, Afghanistan (officially known as Dari since 1958), and Tajikistan (officially known as Tajiki since the Soviet era), and some other regions which historically were Persianate societies and considered part of Greater Iran. It is written in the Persian alphabet, a modified variant of the Arabic script, which itself evolved from the Aramaic alphabet.\n\nQ: Do iran and afghanistan speak the same language?\n\nA: ',
 'completion': 'Yes'}

In [41]:
train.to_json('boolq_finetune_train.jsonl')

Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

6223009

In [44]:
valid = data['validation'].map(map_finetune_train)
valid = valid.remove_columns(['question', 'answer', 'passage'])
valid.to_json('boolq_finetune_valid.jsonl')

  0%|          | 0/3270 [00:00<?, ?ex/s]

Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

2134729

In [45]:
openai.File.create(file=open('boolq_finetune_train.jsonl'), purpose='fine-tune')

<File file id=file-gJIlaEZ4vKpcMs8kOd8Tzifx at 0x7f1cfc7172c0> JSON: {
  "bytes": 6223009,
  "created_at": 1638669386,
  "filename": "boolq_finetune_train.jsonl",
  "id": "file-gJIlaEZ4vKpcMs8kOd8Tzifx",
  "object": "file",
  "purpose": "fine-tune",
  "status": "uploaded",
  "status_details": null
}

In [46]:
openai.File.create(file=open('boolq_finetune_valid.jsonl'), purpose='fine-tune')

<File file id=file-MahHYpnc6hjHw2EZQ1CH3jOw at 0x7f1cfc735a90> JSON: {
  "bytes": 2134729,
  "created_at": 1638669392,
  "filename": "boolq_finetune_valid.jsonl",
  "id": "file-MahHYpnc6hjHw2EZQ1CH3jOw",
  "object": "file",
  "purpose": "fine-tune",
  "status": "uploaded",
  "status_details": null
}

In [48]:
openai.FineTune.create(
    training_file='file-gJIlaEZ4vKpcMs8kOd8Tzifx',
    validation_file='file-MahHYpnc6hjHw2EZQ1CH3jOw',
    model='curie',
    n_epochs=4,
    compute_classification_metrics=True,
    classification_positive_class='Yes',
    classification_n_classes=2
)

<FineTune fine-tune id=ft-9ZmeSNYkAUhZEmXkoiigSMdJ at 0x7f1cfc717a40> JSON: {
  "created_at": 1638669489,
  "events": [
    {
      "created_at": 1638669489,
      "level": "info",
      "message": "Created fine-tune: ft-9ZmeSNYkAUhZEmXkoiigSMdJ",
      "object": "fine-tune-event"
    }
  ],
  "fine_tuned_model": null,
  "hyperparams": {
    "batch_size": null,
    "classification_n_classes": 2,
    "classification_positive_class": "Yes",
    "compute_classification_metrics": true,
    "learning_rate_multiplier": null,
    "n_epochs": 4,
    "prompt_loss_weight": 0.1,
    "use_packing": null
  },
  "id": "ft-9ZmeSNYkAUhZEmXkoiigSMdJ",
  "model": "curie",
  "object": "fine-tune",
  "organization_id": "org-5AE307Eg4rc5EAoEA2S2bwkH",
  "result_files": [],
  "status": "pending",
  "training_files": [
    {
      "bytes": 6223009,
      "created_at": 1638669386,
      "filename": "boolq_finetune_train.jsonl",
      "id": "file-gJIlaEZ4vKpcMs8kOd8Tzifx",
      "object": "file",
      "purpos

In [64]:
openai.FineTune.retrieve('ft-9ZmeSNYkAUhZEmXkoiigSMdJ')

<FineTune fine-tune id=ft-9ZmeSNYkAUhZEmXkoiigSMdJ at 0x7f1cfc328a90> JSON: {
  "created_at": 1638669489,
  "events": [
    {
      "created_at": 1638669489,
      "level": "info",
      "message": "Created fine-tune: ft-9ZmeSNYkAUhZEmXkoiigSMdJ",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1638669494,
      "level": "info",
      "message": "Fine-tune enqueued. Queue number: 0",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1638669499,
      "level": "info",
      "message": "Fine-tune started",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1638669912,
      "level": "info",
      "message": "Completed epoch 1/4",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1638670500,
      "level": "info",
      "message": "Completed epoch 2/4",
      "object": "fine-tune-event"
    },
    {
      "created_at": 1638671084,
      "level": "info",
      "message": "Completed epoch 3/4",
      "object": "fine-tune-e

In [63]:
valid[0]

{'prompt': "All biomass goes through at least some of these steps: it needs to be grown, collected, dried, fermented, distilled, and burned. All of these steps require resources and an infrastructure. The total amount of energy input into the process compared to the energy released by burning the resulting ethanol fuel is known as the energy balance (or ``energy returned on energy invested''). Figures compiled in a 2007 report by National Geographic Magazine point to modest results for corn ethanol produced in the US: one unit of fossil-fuel energy is required to create 1.3 energy units from the resulting ethanol. The energy balance for sugarcane ethanol produced in Brazil is more favorable, with one unit of fossil-fuel energy required to create 8 from the ethanol. Energy balance estimates are not easily produced, thus numerous such reports have been generated that are contradictory. For instance, a separate survey reports that production of ethanol from sugarcane, which requires a tro

In [65]:
openai.Completion.create(
    model='curie:ft-user-5hzndcnnszukksvrzrlnjn8l-2021-12-05-02-38-38',
    prompt=valid[0]['prompt'],
    temperature=0.3,
    max_tokens=10,
    stop=['\n'],
    top_p=1,
    frequency_penalty=0.5,
    presence_penalty=0,
)

<OpenAIObject text_completion id=cmpl-4Bbk73cd5LJmxhlQPctCm7SDfu5zz at 0x7f1cfc323860> JSON: {
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "logprobs": null,
      "text": "Yes, than it releases?"
    }
  ],
  "created": 1638672039,
  "id": "cmpl-4Bbk73cd5LJmxhlQPctCm7SDfu5zz",
  "model": "curie:ft-user-5hzndcnnszukksvrzrlnjn8l-2021-12-05-02-38-38",
  "object": "text_completion"
}

In [66]:
valid[1]

{'prompt': "Property tax or 'house tax' is a local tax on buildings, along with appurtenant land. It is and imposed on the Possessor (not the custodian of property as per 1978, 44th amendment of constitution). It resembles the US-type wealth tax and differs from the excise-type UK rate. The tax power is vested in the states and is delegated to local bodies, specifying the valuation method, rate band, and collection procedures. The tax base is the annual rental value (ARV) or area-based rating. Owner-occupied and other properties not producing rent are assessed on cost and then converted into ARV by applying a percentage of cost, usually four percent. Vacant land is generally exempt. Central government properties are exempt. Instead a 'service charge' is permissible under executive order. Properties of foreign missions also enjoy tax exemption without requiring reciprocity. The tax is usually accompanied by service taxes, e.g., water tax, drainage tax, conservancy (sanitation) tax, ligh

In [67]:
openai.Completion.create(
    model='curie:ft-user-5hzndcnnszukksvrzrlnjn8l-2021-12-05-02-38-38',
    prompt=valid[1]['prompt'],
    temperature=0.3,
    max_tokens=10,
    stop=['\n'],
    top_p=1,
    frequency_penalty=0.5,
    presence_penalty=0,
)

<OpenAIObject text_completion id=cmpl-4BbkZ3yNi1LBAxb0AefU5ZZ4b23xT at 0x7f1cfc328720> JSON: {
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "logprobs": null,
      "text": "Yes"
    }
  ],
  "created": 1638672067,
  "id": "cmpl-4BbkZ3yNi1LBAxb0AefU5ZZ4b23xT",
  "model": "curie:ft-user-5hzndcnnszukksvrzrlnjn8l-2021-12-05-02-38-38",
  "object": "text_completion"
}

In [69]:
valid[3]

{'prompt': 'Harry Potter and the Escape from Gringotts is an indoor steel roller coaster at Universal Studios Florida, a theme park located within the Universal Orlando Resort. Similar to dark rides, the roller coaster utilizes special effects in a controlled-lighting environment and also employs motion-based 3-D projection of both animation and live-action sequences to enhance the experience. The ride, which is themed to the Gringotts Wizarding Bank, became the flagship attraction for the expanded Wizarding World of Harry Potter when it opened on July 8, 2014.\n\nQ: Is harry potter and the escape from gringotts a roller coaster ride?\n\nA: ',
 'completion': 'Yes'}

In [70]:
openai.Completion.create(
    model='curie:ft-user-5hzndcnnszukksvrzrlnjn8l-2021-12-05-02-38-38',
    prompt=valid[3]['prompt'],
    temperature=0.3,
    max_tokens=10,
    stop=['\n'],
    top_p=1,
    frequency_penalty=0.5,
    presence_penalty=0,
)

<OpenAIObject text_completion id=cmpl-4BbkweIPDS3f4pjSJw78C5bcANmCw at 0x7f1cfc3426d0> JSON: {
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "logprobs": null,
      "text": "Yes"
    }
  ],
  "created": 1638672090,
  "id": "cmpl-4BbkweIPDS3f4pjSJw78C5bcANmCw",
  "model": "curie:ft-user-5hzndcnnszukksvrzrlnjn8l-2021-12-05-02-38-38",
  "object": "text_completion"
}

In [72]:
valid[5]

{'prompt': "Barq's /ˈbɑːrks/ is an American soft drink. Its brand of root beer is notable for having caffeine. Barq's, created by Edward Barq and bottled since the turn of the 20th century, is owned by the Barq family but bottled by the Coca-Cola Company. It was known as Barq's Famous Olde Tyme Root Beer until 2012.\n\nQ: Is barq's root beer a pepsi product?\n\nA: ",
 'completion': 'No'}

In [73]:
openai.Completion.create(
    model='curie:ft-user-5hzndcnnszukksvrzrlnjn8l-2021-12-05-02-38-38',
    prompt=valid[5]['prompt'],
    temperature=0.3,
    max_tokens=10,
    stop=['\n'],
    top_p=1,
    frequency_penalty=0.5,
    presence_penalty=0,
)

<OpenAIObject text_completion id=cmpl-4BblGR2GF0PHvVXRy6sP121LW7i1U at 0x7f1cfc3284f0> JSON: {
  "choices": [
    {
      "finish_reason": "length",
      "index": 0,
      "logprobs": null,
      "text": "No, it is a Coca-Cola product."
    }
  ],
  "created": 1638672110,
  "id": "cmpl-4BblGR2GF0PHvVXRy6sP121LW7i1U",
  "model": "curie:ft-user-5hzndcnnszukksvrzrlnjn8l-2021-12-05-02-38-38",
  "object": "text_completion"
}

In [74]:
# Similar to SNLI, it has issue with text generation continuation.
# Adding '\n' to the end of the completion might help with that.
# Another issue is that, these should be trained on train test split since it doesn't have a test set.

In [2]:
with open('../results/boolq_results.csv', 'wb') as f:
    f.write(openai.File.download('file-7mMJchteVwqmaih6gj199hXk'))

In [3]:
import pandas as pd
df = pd.read_csv('../results/boolq_results.csv')
df

Unnamed: 0,step,elapsed_tokens,elapsed_examples,training_loss,training_sequence_accuracy,training_token_accuracy,validation_loss,validation_sequence_accuracy,validation_token_accuracy,classification/accuracy,classification/precision,classification/recall,classification/auroc,classification/auprc,classification/f1.0
0,1,2049,1,0.266593,0.0,0.000000,0.273952,0.0,0.0,,,,,,
1,2,4098,2,0.207604,0.0,0.538462,,,,,,,,,
2,3,6147,3,0.195201,0.0,0.166667,,,,,,,,,
3,4,8196,4,0.207342,0.0,0.666667,,,,,,,,,
4,5,10245,5,0.201088,0.0,0.571429,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2712,2713,5558937,2713,0.063869,0.0,0.928571,,,,,,,,,
2713,2714,5560986,2714,0.039642,1.0,1.000000,,,,,,,,,
2714,2715,5563035,2715,0.034781,0.0,0.928571,,,,,,,,,
2715,2716,5565084,2716,0.042580,0.0,0.933333,,,,,,,,,
