From 905409ff4ccc3327e751ebaca66252c2ad728f42 Mon Sep 17 00:00:00 2001 From: Shane Adams Date: Thu, 31 Aug 2023 14:58:01 -0700 Subject: [PATCH 1/5] price simtae --- src/together/commands/finetune.py | 8 +++++++ src/together/finetune.py | 37 +++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/src/together/commands/finetune.py b/src/together/commands/finetune.py index f59bd9bf..a9c22864 100644 --- a/src/together/commands/finetune.py +++ b/src/together/commands/finetune.py @@ -35,6 +35,13 @@ def _add_create(parser: argparse._SubParsersAction[argparse.ArgumentParser]) -> required=True, type=str, ) + subparser.add_argument( + "--estimate-price", + "-e", + help="Estimate the price of the fine tune job", + required=False, + action='store_true' + ) # subparser.add_argument( # "--validation-file", # "-v", @@ -290,6 +297,7 @@ def _run_create(args: argparse.Namespace) -> None: # checkpoint_steps=args.checkpoint_steps, suffix=args.suffix, wandb_api_key=args.wandb_api_key if not args.no_wandb_api_key else None, + estimate_price=args.estimate_price, ) print(json.dumps(response, indent=4)) diff --git a/src/together/finetune.py b/src/together/finetune.py index 552b7c07..1d349f0a 100644 --- a/src/together/finetune.py +++ b/src/together/finetune.py @@ -11,6 +11,18 @@ logger = get_logger(str(__name__), log_level=together.log_level) +# this will change soon to be data driven and give a clearer estimate +def model_param_count( name ): + pcount = { + "togethercomputer/RedPajama-INCITE-7B-Chat": 6857302016, "togethercomputer/RedPajama-INCITE-7B-Base": 6857302016, "togethercomputer/RedPajama-INCITE-7B-Instruct": 6857302016, "togethercomputer/RedPajama-INCITE-Chat-3B-v1": 2775864320, "togethercomputer/RedPajama-INCITE-Base-3B-v1": 2775864320, "togethercomputer/RedPajama-INCITE-Instruct-3B-v1": 2775864320, "togethercomputer/Pythia-Chat-Base-7B": 6857302016, "togethercomputer/llama-2-7b": 6738415616, "togethercomputer/llama-2-7b-chat": 6738415616, "togethercomputer/llama-2-13b": 13015864320, "togethercomputer/llama-2-13b-chat": 13015864320, "togethercomputer/LLaMA-2-7B-32K": 6738415616, "togethercomputer/Llama-2-7B-32K-Instruct": 6738415616, "togethercomputer/CodeLlama-7b": 6738546688, "togethercomputer/CodeLlama-7b-Python": 6738546688, "togethercomputer/CodeLlama-7b-Instruct": 6738546688, "togethercomputer/CodeLlama-13b": 13016028160, "togethercomputer/CodeLlama-13b-Python": 13016028160, "togethercomputer/CodeLlama-13b-Instruct": 13016028160 } + return pcount[name] + if "3b" in name.casefold(): + return 3000000000 + elif "7b" in name.casefold(): + return 7000000000 + elif "13b" in name.casefold(): + return 13000000000 + return 13000000000 class Finetune: def __init__( @@ -38,6 +50,7 @@ def create( str ] = None, # resulting finetuned model name will include the suffix wandb_api_key: Optional[str] = None, + estimate_price: bool = False, ) -> Dict[Any, Any]: if n_epochs is None or n_epochs < 1: logger.fatal("The number of epochs must be specified") @@ -94,6 +107,30 @@ def create( logger.critical(training_file_feedback) raise together.FileTypeError(training_file_feedback) + if estimate_price: + param_size = model_param_count(model) + print(param_size) + if param_size == 0: + error = f"Unknown model {model}. Cannot estimate price. Please check the name of the model" + raise together.FileTypeError(error) + + for file in uploaded_files['data']: + if file["id"] == parameter_payload["training_file"]: + ## This is the file + byte_count = file["bytes"] + token_estimate = int(file["bytes"] / 4) + data = { + "method" : "together_getPrice", + "params" : [ model, "FT", { "tokens" : token_estimate, "epochs" : n_epochs, "parameters" : model_param_count( model ) } ], "id" : 1 } + r = requests.post( "https://computer.together.xyz/", json=data ) + estimate = r.json()["result"]["total"] + estimate /= 1000000000 + training_file_feedback = ( + f"A rough price estimate for this job is ${estimate:.2f} USD. The estimated number of tokens is {token_estimate} tokens. Accurate pricing is not available until full tokenization has been performed. The actual price might be higher or lower depending on how the data is tokenized. Our token estimate here is number of bytes in the training file, {byte_count} bytes, divided by an average token length of 4 bytes. We currently have a per job minimum of $5.00 USD." + ) + print(training_file_feedback) + exit() + # Send POST request to SUBMIT FINETUNE JOB # HTTP headers for authorization headers = { From 209cf15a51536cb0597c5e7a7a8f2418f45b3cd2 Mon Sep 17 00:00:00 2001 From: Shane Adams Date: Thu, 31 Aug 2023 15:06:05 -0700 Subject: [PATCH 2/5] fix lint --- src/together/finetune.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/together/finetune.py b/src/together/finetune.py index 76f475de..818f6e52 100644 --- a/src/together/finetune.py +++ b/src/together/finetune.py @@ -12,7 +12,7 @@ logger = get_logger(str(__name__), log_level=together.log_level) # this will change soon to be data driven and give a clearer estimate -def model_param_count( name ): +def model_param_count( name : str ): pcount = { "togethercomputer/RedPajama-INCITE-7B-Chat": 6857302016, "togethercomputer/RedPajama-INCITE-7B-Base": 6857302016, "togethercomputer/RedPajama-INCITE-7B-Instruct": 6857302016, "togethercomputer/RedPajama-INCITE-Chat-3B-v1": 2775864320, "togethercomputer/RedPajama-INCITE-Base-3B-v1": 2775864320, "togethercomputer/RedPajama-INCITE-Instruct-3B-v1": 2775864320, "togethercomputer/Pythia-Chat-Base-7B": 6857302016, "togethercomputer/llama-2-7b": 6738415616, "togethercomputer/llama-2-7b-chat": 6738415616, "togethercomputer/llama-2-13b": 13015864320, "togethercomputer/llama-2-13b-chat": 13015864320, "togethercomputer/LLaMA-2-7B-32K": 6738415616, "togethercomputer/Llama-2-7B-32K-Instruct": 6738415616, "togethercomputer/CodeLlama-7b": 6738546688, "togethercomputer/CodeLlama-7b-Python": 6738546688, "togethercomputer/CodeLlama-7b-Instruct": 6738546688, "togethercomputer/CodeLlama-13b": 13016028160, "togethercomputer/CodeLlama-13b-Python": 13016028160, "togethercomputer/CodeLlama-13b-Instruct": 13016028160 } return pcount[name] @@ -117,7 +117,7 @@ def create( if file["id"] == parameter_payload["training_file"]: ## This is the file byte_count = file["bytes"] - token_estimate = int(file["bytes"] / 4) + token_estimate = int(int(file["bytes"]) / 4) data = { "method" : "together_getPrice", "params" : [ model, "FT", { "tokens" : token_estimate, "epochs" : n_epochs, "parameters" : model_param_count( model ) } ], "id" : 1 } From f5eff6a1a2731b62bb9128c9fcd3a27f2dc382dc Mon Sep 17 00:00:00 2001 From: orangetin Date: Thu, 31 Aug 2023 17:09:43 -0500 Subject: [PATCH 3/5] Fix lint --- src/together/commands/finetune.py | 2 +- src/together/finetune.py | 69 ++++++++++++++++++++++--------- 2 files changed, 51 insertions(+), 20 deletions(-) diff --git a/src/together/commands/finetune.py b/src/together/commands/finetune.py index dcd7c3d4..f2a945d4 100644 --- a/src/together/commands/finetune.py +++ b/src/together/commands/finetune.py @@ -39,7 +39,7 @@ def _add_create(parser: argparse._SubParsersAction[argparse.ArgumentParser]) -> "-e", help="Estimate the price of the fine tune job", required=False, - action='store_true' + action="store_true", ) # subparser.add_argument( # "--validation-file", diff --git a/src/together/finetune.py b/src/together/finetune.py index 76f475de..801b19ce 100644 --- a/src/together/finetune.py +++ b/src/together/finetune.py @@ -11,18 +11,41 @@ logger = get_logger(str(__name__), log_level=together.log_level) + # this will change soon to be data driven and give a clearer estimate -def model_param_count( name ): +def model_param_count(name: str) -> int: pcount = { - "togethercomputer/RedPajama-INCITE-7B-Chat": 6857302016, "togethercomputer/RedPajama-INCITE-7B-Base": 6857302016, "togethercomputer/RedPajama-INCITE-7B-Instruct": 6857302016, "togethercomputer/RedPajama-INCITE-Chat-3B-v1": 2775864320, "togethercomputer/RedPajama-INCITE-Base-3B-v1": 2775864320, "togethercomputer/RedPajama-INCITE-Instruct-3B-v1": 2775864320, "togethercomputer/Pythia-Chat-Base-7B": 6857302016, "togethercomputer/llama-2-7b": 6738415616, "togethercomputer/llama-2-7b-chat": 6738415616, "togethercomputer/llama-2-13b": 13015864320, "togethercomputer/llama-2-13b-chat": 13015864320, "togethercomputer/LLaMA-2-7B-32K": 6738415616, "togethercomputer/Llama-2-7B-32K-Instruct": 6738415616, "togethercomputer/CodeLlama-7b": 6738546688, "togethercomputer/CodeLlama-7b-Python": 6738546688, "togethercomputer/CodeLlama-7b-Instruct": 6738546688, "togethercomputer/CodeLlama-13b": 13016028160, "togethercomputer/CodeLlama-13b-Python": 13016028160, "togethercomputer/CodeLlama-13b-Instruct": 13016028160 } - return pcount[name] - if "3b" in name.casefold(): - return 3000000000 - elif "7b" in name.casefold(): - return 7000000000 - elif "13b" in name.casefold(): + "togethercomputer/RedPajama-INCITE-7B-Chat": 6857302016, + "togethercomputer/RedPajama-INCITE-7B-Base": 6857302016, + "togethercomputer/RedPajama-INCITE-7B-Instruct": 6857302016, + "togethercomputer/RedPajama-INCITE-Chat-3B-v1": 2775864320, + "togethercomputer/RedPajama-INCITE-Base-3B-v1": 2775864320, + "togethercomputer/RedPajama-INCITE-Instruct-3B-v1": 2775864320, + "togethercomputer/Pythia-Chat-Base-7B": 6857302016, + "togethercomputer/llama-2-7b": 6738415616, + "togethercomputer/llama-2-7b-chat": 6738415616, + "togethercomputer/llama-2-13b": 13015864320, + "togethercomputer/llama-2-13b-chat": 13015864320, + "togethercomputer/LLaMA-2-7B-32K": 6738415616, + "togethercomputer/Llama-2-7B-32K-Instruct": 6738415616, + "togethercomputer/CodeLlama-7b": 6738546688, + "togethercomputer/CodeLlama-7b-Python": 6738546688, + "togethercomputer/CodeLlama-7b-Instruct": 6738546688, + "togethercomputer/CodeLlama-13b": 13016028160, + "togethercomputer/CodeLlama-13b-Python": 13016028160, + "togethercomputer/CodeLlama-13b-Instruct": 13016028160, + } + try: + return pcount[name] + except Exception: + if "3b" in name.casefold(): + return 3000000000 + elif "7b" in name.casefold(): + return 7000000000 + elif "13b" in name.casefold(): + return 13000000000 return 13000000000 - return 13000000000 + class Finetune: def __init__( @@ -35,7 +58,7 @@ def create( self, training_file: str, # training file_id # validation_file: Optional[str] = None, # validation file_id - model: Optional[str] = None, + model: str, n_epochs: int = 1, n_checkpoints: Optional[int] = 1, batch_size: Optional[int] = 32, @@ -113,20 +136,28 @@ def create( error = f"Unknown model {model}. Cannot estimate price. Please check the name of the model" raise together.FileTypeError(error) - for file in uploaded_files['data']: + for file in uploaded_files["data"]: if file["id"] == parameter_payload["training_file"]: ## This is the file byte_count = file["bytes"] - token_estimate = int(file["bytes"] / 4) - data = { - "method" : "together_getPrice", - "params" : [ model, "FT", { "tokens" : token_estimate, "epochs" : n_epochs, "parameters" : model_param_count( model ) } ], "id" : 1 } - r = requests.post( "https://computer.together.xyz/", json=data ) + token_estimate = int(int(file["bytes"]) / 4) + data = { + "method": "together_getPrice", + "params": [ + model, + "FT", + { + "tokens": token_estimate, + "epochs": n_epochs, + "parameters": model_param_count(model), + }, + ], + "id": 1, + } + r = requests.post("https://computer.together.xyz/", json=data) estimate = r.json()["result"]["total"] estimate /= 1000000000 - training_file_feedback = ( - f"A rough price estimate for this job is ${estimate:.2f} USD. The estimated number of tokens is {token_estimate} tokens. Accurate pricing is not available until full tokenization has been performed. The actual price might be higher or lower depending on how the data is tokenized. Our token estimate here is number of bytes in the training file, {byte_count} bytes, divided by an average token length of 4 bytes. We currently have a per job minimum of $5.00 USD." - ) + training_file_feedback = f"A rough price estimate for this job is ${estimate:.2f} USD. The estimated number of tokens is {token_estimate} tokens. Accurate pricing is not available until full tokenization has been performed. The actual price might be higher or lower depending on how the data is tokenized. Our token estimate here is number of bytes in the training file, {byte_count} bytes, divided by an average token length of 4 bytes. We currently have a per job minimum of $5.00 USD." print(training_file_feedback) exit() From 2676aed6668ee2ba8cf58d92ed77cf22f5e367bc Mon Sep 17 00:00:00 2001 From: Shane Adams Date: Thu, 31 Aug 2023 15:46:29 -0700 Subject: [PATCH 4/5] remove string checks for size and update copy per Nicki --- src/together/finetune.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/src/together/finetune.py b/src/together/finetune.py index 801b19ce..d256a3be 100644 --- a/src/together/finetune.py +++ b/src/together/finetune.py @@ -38,14 +38,7 @@ def model_param_count(name: str) -> int: try: return pcount[name] except Exception: - if "3b" in name.casefold(): - return 3000000000 - elif "7b" in name.casefold(): - return 7000000000 - elif "13b" in name.casefold(): - return 13000000000 - return 13000000000 - + return 0 class Finetune: def __init__( @@ -131,7 +124,6 @@ def create( if estimate_price: param_size = model_param_count(model) - print(param_size) if param_size == 0: error = f"Unknown model {model}. Cannot estimate price. Please check the name of the model" raise together.FileTypeError(error) @@ -157,7 +149,7 @@ def create( r = requests.post("https://computer.together.xyz/", json=data) estimate = r.json()["result"]["total"] estimate /= 1000000000 - training_file_feedback = f"A rough price estimate for this job is ${estimate:.2f} USD. The estimated number of tokens is {token_estimate} tokens. Accurate pricing is not available until full tokenization has been performed. The actual price might be higher or lower depending on how the data is tokenized. Our token estimate here is number of bytes in the training file, {byte_count} bytes, divided by an average token length of 4 bytes. We currently have a per job minimum of $5.00 USD." + training_file_feedback = f"A rough price estimate for this job is ${estimate:.2f} USD. The estimated number of tokens is {token_estimate} tokens. Accurate pricing is not available until full tokenization has been performed. The actual price might be higher or lower depending on how the data is tokenized. Our token estimate is based on the number of bytes in the training file, {byte_count} bytes, divided by an average token length of 4 bytes. We currently have a per job minimum of $5.00 USD." print(training_file_feedback) exit() From ca345dcef6acab8e735ccd135e4a7e23fd3f8862 Mon Sep 17 00:00:00 2001 From: Shane Adams Date: Thu, 31 Aug 2023 15:55:22 -0700 Subject: [PATCH 5/5] jesus --- src/together/finetune.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/together/finetune.py b/src/together/finetune.py index d256a3be..b423454b 100644 --- a/src/together/finetune.py +++ b/src/together/finetune.py @@ -40,6 +40,7 @@ def model_param_count(name: str) -> int: except Exception: return 0 + class Finetune: def __init__( self,