diff --git a/examples/embeddings/Code_search.ipynb b/examples/embeddings/Code_search.ipynb index d440161493..ca60566a7a 100644 --- a/examples/embeddings/Code_search.ipynb +++ b/examples/embeddings/Code_search.ipynb @@ -260,7 +260,7 @@ "def format_inferrer_validator(df):\n", " \"\"\"\n", " This validator will infer the likely fine-tuning format of the data, and display it to the user if it is classification.\n", - " It will also suggest to use ada, --no_packing and explain train/validation split benefits.\n", + " It will also suggest to use ada and explain train/validation split benefits.\n", " \"\"\"\n", " ft_type = infer_task_type(df)\n", " immediate_msg = None\n", diff --git a/examples/finetuning/finetuning-classification.ipynb b/examples/finetuning/finetuning-classification.ipynb index ff576ba35a..60b8896ecc 100644 --- a/examples/finetuning/finetuning-classification.ipynb +++ b/examples/finetuning/finetuning-classification.ipynb @@ -257,7 +257,7 @@ "\n", "- Your file contains 1197 prompt-completion pairs\n", "- Based on your data it seems like you're trying to fine-tune a model for classification\n", - "- For classification, we recommend you try one of the faster and cheaper models, such as `ada`. You should also set the `--no_packing` parameter when fine-tuning\n", + "- For classification, we recommend you try one of the faster and cheaper models, such as `ada`\n", "- For classification, you can estimate the expected model performance by keeping a held out dataset, which is not used for training\n", "- There are 11 examples that are very long. These are rows: [134, 200, 281, 320, 404, 595, 704, 838, 1113, 1139, 1174]\n", "For conditional generation, and for classification the examples shouldn't be longer than 2048 tokens.\n", @@ -277,7 +277,7 @@ "Feel free to take a look!\n", "\n", "Now use that file when fine-tuning:\n", - "> openai api fine_tunes.create -t \"sport2_prepared_train.jsonl\" -v \"sport2_prepared_valid.jsonl\" --no_packing --compute_classification_metrics --classification_positive_class \" baseball\"\n", + "> openai api fine_tunes.create -t \"sport2_prepared_train.jsonl\" -v \"sport2_prepared_valid.jsonl\" --compute_classification_metrics --classification_positive_class \" baseball\"\n", "\n", "After you’ve fine-tuned a model, remember that your prompt has to end with the indicator string `\\n\\n###\\n\\n` for the model to start generating completions, rather than continuing with the prompt.\n", "Once your model starts training, it'll approximately take 30.8 minutes to train a `curie` model, and less for `ada` and `babbage`. Queue will approximately take half an hour per job ahead of you.\n" @@ -301,7 +301,7 @@ "cell_type": "markdown", "source": [ "## Fine-tuning\n", - "The tool suggests we run the following command to train the dataset. Since this is a classification task, we would like to know what the generalization performance on the provided validation set is for our classification use case. The tool suggests to add `--compute_classification_metrics --classification_positive_class \" baseball\"` in order to compute the classification metrics. Classification performs better with a hyperparameter `--no_packing`.\n", + "The tool suggests we run the following command to train the dataset. Since this is a classification task, we would like to know what the generalization performance on the provided validation set is for our classification use case. The tool suggests to add `--compute_classification_metrics --classification_positive_class \" baseball\"` in order to compute the classification metrics.\n", "\n", "We can simply copy the suggested command from the CLI tool. We specifically add `-m ada` to fine-tune a cheaper and faster ada model, which is usually comperable in performance to slower and more expensive models on classification use cases. " ], @@ -311,7 +311,7 @@ "cell_type": "code", "execution_count": 9, "source": [ - "!openai api fine_tunes.create -t \"sport2_prepared_train.jsonl\" -v \"sport2_prepared_valid.jsonl\" --no_packing --compute_classification_metrics --classification_positive_class \" baseball\" -m ada" + "!openai api fine_tunes.create -t \"sport2_prepared_train.jsonl\" -v \"sport2_prepared_valid.jsonl\" --compute_classification_metrics --classification_positive_class \" baseball\" -m ada" ], "outputs": [ { @@ -737,4 +737,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/examples/finetuning/olympics-3-train-qa.ipynb b/examples/finetuning/olympics-3-train-qa.ipynb index ebf89a5c9c..76fb3f3ca8 100644 --- a/examples/finetuning/olympics-3-train-qa.ipynb +++ b/examples/finetuning/olympics-3-train-qa.ipynb @@ -373,7 +373,7 @@ } ], "source": [ - "!openai api fine_tunes.create -t \"olympics-data/discriminator_train.jsonl\" -v \"olympics-data/discriminator_test.jsonl\" --no_packing --batch_size 16 --compute_classification_metrics --classification_positive_class \" yes\" --model ada" + "!openai api fine_tunes.create -t \"olympics-data/discriminator_train.jsonl\" -v \"olympics-data/discriminator_test.jsonl\" --batch_size 16 --compute_classification_metrics --classification_positive_class \" yes\" --model ada" ] }, { @@ -391,7 +391,7 @@ } ], "source": [ - "!openai api fine_tunes.create -t \"olympics-data/qa_train.jsonl\" -v \"olympics-data/qa_test.jsonl\" --no_packing --batch_size 16" + "!openai api fine_tunes.create -t \"olympics-data/qa_train.jsonl\" -v \"olympics-data/qa_test.jsonl\" --batch_size 16" ] }, { diff --git a/openai/cli.py b/openai/cli.py index 1beca23d17..6c07b49135 100644 --- a/openai/cli.py +++ b/openai/cli.py @@ -397,7 +397,6 @@ def create(cls, args): "batch_size", "learning_rate_multiplier", "prompt_loss_weight", - "use_packing", "compute_classification_metrics", "classification_n_classes", "classification_positive_class", @@ -891,23 +890,6 @@ def help(args): "learning rate is determined by the original learning rate used for " "pretraining multiplied by this value.", ) - sub.add_argument( - "--use_packing", - action="store_true", - dest="use_packing", - help="On classification tasks, we recommend not setting this flag. " - "On all other tasks, we recommend setting it. " - "When set, we pack as many prompt-completion pairs as possible into each " - "training example. This greatly increases the speed of a fine-tuning job, " - "often without negatively affecting model performance.", - ) - sub.add_argument( - "--no_packing", - action="store_false", - dest="use_packing", - help="Disables the packing flag (see --use_packing for description).", - ) - sub.set_defaults(use_packing=None) sub.add_argument( "--prompt_loss_weight", type=float, diff --git a/openai/validators.py b/openai/validators.py index 356f461506..0d4d85d4f2 100644 --- a/openai/validators.py +++ b/openai/validators.py @@ -2,7 +2,6 @@ import sys from typing import Any, Callable, NamedTuple, Optional -import numpy as np import pandas as pd @@ -535,12 +534,12 @@ def read_any_format(fname, fields=["prompt", "completion"]): def format_inferrer_validator(df): """ This validator will infer the likely fine-tuning format of the data, and display it to the user if it is classification. - It will also suggest to use ada, --no_packing and explain train/validation split benefits. + It will also suggest to use ada and explain train/validation split benefits. """ ft_type = infer_task_type(df) immediate_msg = None if ft_type == "classification": - immediate_msg = f"\n- Based on your data it seems like you're trying to fine-tune a model for {ft_type}\n- For classification, we recommend you try one of the faster and cheaper models, such as `ada`. You should also set the `--no_packing` parameter when fine-tuning\n- For classification, you can estimate the expected model performance by keeping a held out dataset, which is not used for training" + immediate_msg = f"\n- Based on your data it seems like you're trying to fine-tune a model for {ft_type}\n- For classification, we recommend you try one of the faster and cheaper models, such as `ada`\n- For classification, you can estimate the expected model performance by keeping a held out dataset, which is not used for training" return Remediation(name="num_examples", immediate_msg=immediate_msg) @@ -634,27 +633,6 @@ def get_classification_hyperparams(df): return n_classes, pos_class -def get_batch_size_suggestion(df, no_packing): - """ - Suggest the batch size based on the number of examples after packing optionally is applied. - """ - n_examples, n_characters = ( - len(df), - df.completion.str.len().sum() + df.prompt.str.len().sum(), - ) - BATCH_SIZE_TO_N_EXAMPLES_RATIO = 0.002 - BATCH_SIZE_TO_N_CHARACTERS_RATIO = BATCH_SIZE_TO_N_EXAMPLES_RATIO / 10_000 - - if no_packing: - batch_size = BATCH_SIZE_TO_N_EXAMPLES_RATIO * n_examples - else: - batch_size = BATCH_SIZE_TO_N_CHARACTERS_RATIO * n_characters - - batch_size = max(1, int(2 ** np.ceil(np.log2(batch_size)))) - batch_size_suggestion = f" --batch_size {batch_size}" - return batch_size_suggestion - - def write_out_file(df, fname, any_remediations, auto_accept): """ This function will write out a dataframe to a file, if the user would like to proceed, and also offer a fine-tuning command with the newly created file. @@ -670,14 +648,7 @@ def write_out_file(df, fname, any_remediations, auto_accept): if accept_suggestion(input_text, auto_accept): split = True - no_packing = ft_format == "classification" or ( - ft_format == "conditional generation" and len(df) < 1000 - ) additional_params = "" - if no_packing: - additional_params = " --no_packing" - additional_params += get_batch_size_suggestion(df, no_packing) - common_prompt_suffix_new_line_handled = common_prompt_suffix.replace("\n", "\\n") common_completion_suffix_new_line_handled = common_completion_suffix.replace( "\n", "\\n" diff --git a/openai/version.py b/openai/version.py index d0dcdac6c8..bc927384a8 100644 --- a/openai/version.py +++ b/openai/version.py @@ -1 +1 @@ -VERSION = "0.11.4" +VERSION = "0.11.5"