diff --git a/clients/python/llmengine/__init__.py b/clients/python/llmengine/__init__.py index 66efafb4..380ef213 100644 --- a/clients/python/llmengine/__init__.py +++ b/clients/python/llmengine/__init__.py @@ -34,6 +34,8 @@ ListFilesResponse, ListFineTunesResponse, ListLLMEndpointsResponse, + ModelDownloadRequest, + ModelDownloadResponse, UploadFileResponse, ) from llmengine.file import File @@ -51,6 +53,8 @@ "CreateFineTuneResponse", "DeleteFileResponse", "DeleteLLMEndpointResponse", + "ModelDownloadRequest", + "ModelDownloadResponse", "GetFileContentResponse", "File", "FineTune", diff --git a/clients/python/llmengine/data_types.py b/clients/python/llmengine/data_types.py index 5effe216..34eaf0f9 100644 --- a/clients/python/llmengine/data_types.py +++ b/clients/python/llmengine/data_types.py @@ -495,6 +495,28 @@ class GetFineTuneEventsResponse(BaseModel): events: List[LLMFineTuneEvent] = Field(..., description="List of fine-tuning events.") +class ModelDownloadRequest(BaseModel): + """ + Request object for downloading a model. + """ + + model_name: str = Field(..., description="Name of the model to download.") + download_format: Optional[str] = Field( + default="hugging_face", + description="Desired return format for downloaded model weights (default=hugging_face).", + ) + + +class ModelDownloadResponse(BaseModel): + """ + Response object for downloading a model. + """ + + urls: Dict[str, str] = Field( + ..., description="Dictionary of (file_name, url) pairs to download the model from." + ) + + class UploadFileResponse(BaseModel): """Response object for uploading a file.""" diff --git a/clients/python/llmengine/model.py b/clients/python/llmengine/model.py index b96afa1b..cd2191e3 100644 --- a/clients/python/llmengine/model.py +++ b/clients/python/llmengine/model.py @@ -10,6 +10,8 @@ ListLLMEndpointsResponse, LLMInferenceFramework, LLMSource, + ModelDownloadRequest, + ModelDownloadResponse, ModelEndpointType, PostInferenceHooks, Quantization, @@ -366,3 +368,51 @@ def delete(cls, model: str) -> DeleteLLMEndpointResponse: """ response = cls._delete(f"v1/llm/model-endpoints/{model}", timeout=DEFAULT_TIMEOUT) return DeleteLLMEndpointResponse.parse_obj(response) + + @classmethod + def download( + cls, + model_name: str, + download_format: str = "hugging_face", + ) -> ModelDownloadResponse: + """ + Download a fine-tuned model. + + This API can be used to download the resulting model from a fine-tuning job. + It takes the `model_name` and `download_format` as parameter and returns a + response object which contains a list of urls associated with the fine-tuned model. + The user can then download these urls to obtain the fine-tuned model. If called + on a nonexistent model, an error will be thrown. + + Args: + model_name (`str`): + name of the fine-tuned model + download_format (`str`): + download format requested (default=hugging_face) + Returns: + DownloadModelResponse: an object that contains a dictionary of filenames, urls from which to download the model weights. + The urls are presigned urls that grant temporary access and expire after an hour. + + === "Downloading model in Python" + ```python + from llmengine import Model + + response = Model.download("llama-2-7b.suffix.2023-07-18-12-00-00", download_format="hugging_face") + print(response.json()) + ``` + + === "Response in JSON" + ```json + { + "urls": {"my_model_file": 'https://url-to-my-model-weights'} + } + ``` + """ + + request = ModelDownloadRequest(model_name=model_name, download_format=download_format) + response = cls.post_sync( + resource_name="v1/llm/model-endpoints/download", + data=request.dict(), + timeout=DEFAULT_TIMEOUT, + ) + return ModelDownloadResponse.parse_obj(response) diff --git a/docs/api/data_types.md b/docs/api/data_types.md index b932fa70..0663607f 100644 --- a/docs/api/data_types.md +++ b/docs/api/data_types.md @@ -45,6 +45,10 @@ ::: llmengine.DeleteLLMEndpointResponse +::: llmengine.ModelDownloadRequest + +::: llmengine.ModelDownloadResponse + ::: llmengine.UploadFileResponse ::: llmengine.GetFileResponse diff --git a/docs/api/python_client.md b/docs/api/python_client.md index 8b3fdc1f..427ae8b6 100644 --- a/docs/api/python_client.md +++ b/docs/api/python_client.md @@ -21,6 +21,7 @@ - get - list - delete + - download ::: llmengine.File selection: diff --git a/examples/download_a_finetuned_model.ipynb b/examples/download_a_finetuned_model.ipynb new file mode 100644 index 00000000..da548b12 --- /dev/null +++ b/examples/download_a_finetuned_model.ipynb @@ -0,0 +1,348 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "8d3a4214", + "metadata": { + "id": "8d3a4214" + }, + "source": [ + "# Download a FineTuned Model \n", + "This notebook demonstrates how to download a finetuned model that you've created using LLM Engine and add it to huggingface!\n", + "\n", + "**This notebook is an extension of the previous finetuning notebook on ScienceQA**" + ] + }, + { + "cell_type": "markdown", + "id": "XK6VpTnOL4OV", + "metadata": { + "id": "XK6VpTnOL4OV" + }, + "source": [ + "# Packages Required\n", + "For this demo, we'll be using the `scale-llm-engine` package, the `datasets` package for downloading our finetuning dataset, `transformers`, and `huggingface_hub` for uploading our model to huggingface.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "S5u6DdInMEQ7", + "metadata": { + "id": "S5u6DdInMEQ7" + }, + "outputs": [], + "source": [ + "!pip install scale-llm-engine\n", + "!pip install transformers\n", + "!pip install datasets" + ] + }, + { + "cell_type": "markdown", + "id": "a3dc2a56", + "metadata": { + "id": "a3dc2a56" + }, + "source": [ + "# Data Preparation\n", + "Let's load in the dataset using Huggingface and view the features." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e06ac39e", + "metadata": { + "id": "e06ac39e" + }, + "outputs": [], + "source": [ + "from datasets import load_dataset\n", + "from smart_open import smart_open\n", + "import pandas as pd\n", + "\n", + "dataset = load_dataset('derek-thomas/ScienceQA')\n", + "dataset['train'].features" + ] + }, + { + "cell_type": "markdown", + "id": "1cbe8a58", + "metadata": { + "id": "1cbe8a58" + }, + "source": [ + "Now, let's format the dataset into what's acceptable for LLM Engine - a CSV file with 'prompt' and 'response' columns." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0b0eb8ad", + "metadata": { + "id": "0b0eb8ad" + }, + "outputs": [], + "source": [ + "choice_prefixes = [chr(ord('A') + i) for i in range(26)] # A-Z\n", + "def format_options(options, choice_prefixes):\n", + " return ' '.join([f'({c}) {o}' for c, o in zip(choice_prefixes, options)])\n", + "\n", + "def format_prompt(r, choice_prefixes):\n", + " options = format_options(r['choices'], choice_prefixes)\n", + " return f'''Context: {r[\"hint\"]}\\nQuestion: {r[\"question\"]}\\nOptions:{options}\\nAnswer:'''\n", + "\n", + "def format_label(r, choice_prefixes):\n", + " return choice_prefixes[r['answer']]\n", + "\n", + "def convert_dataset(ds):\n", + " prompts = [format_prompt(i, choice_prefixes) for i in ds if i['hint'] != '']\n", + " labels = [format_label(i, choice_prefixes) for i in ds if i['hint'] != '']\n", + " df = pd.DataFrame.from_dict({'prompt': prompts, 'response': labels})\n", + " return df\n", + "\n", + "save_to_s3 = False\n", + "df_train = convert_dataset(dataset['train'])\n", + "if save_to_s3:\n", + " train_url = 's3://...'\n", + " val_url = 's3://...'\n", + " df_train = convert_dataset(dataset['train'])\n", + " with smart_open(train_url, 'wb') as f:\n", + " df_train.to_csv(f)\n", + "\n", + " df_val = convert_dataset(dataset['validation'])\n", + " with smart_open(val_url, 'wb') as f:\n", + " df_val.to_csv(f)\n", + "else:\n", + " # Gists of the already processed datasets\n", + " train_url = 'https://gist.githubusercontent.com/jihan-yin/43f19a86d35bf22fa3551d2806e478ec/raw/91416c09f09d3fca974f81d1f766dd4cadb29789/scienceqa_train.csv'\n", + " val_url = 'https://gist.githubusercontent.com/jihan-yin/43f19a86d35bf22fa3551d2806e478ec/raw/91416c09f09d3fca974f81d1f766dd4cadb29789/scienceqa_val.csv'\n", + "\n", + "df_train" + ] + }, + { + "cell_type": "markdown", + "id": "e2fc8d76", + "metadata": { + "id": "e2fc8d76" + }, + "source": [ + "# Fine-tune\n", + "Now, we can fine-tune the model using LLM Engine." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4905d447", + "metadata": { + "id": "4905d447" + }, + "outputs": [], + "source": [ + "import os\n", + "os.environ['SCALE_API_KEY'] = 'xxx'\n", + "\n", + "from llmengine import FineTune\n", + "\n", + "response = FineTune.create(\n", + " model=\"llama-2-7b\",\n", + " training_file=train_url,\n", + " validation_file=val_url,\n", + " hyperparameters={\n", + " 'lr':2e-4,\n", + " },\n", + " suffix='science-qa-llama'\n", + ")\n", + "run_id = response.id" + ] + }, + { + "cell_type": "markdown", + "id": "55074457", + "metadata": { + "id": "55074457" + }, + "source": [ + "We can sleep until the job completes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "840938dd", + "metadata": { + "id": "840938dd" + }, + "outputs": [], + "source": [ + "import time\n", + "\n", + "while True:\n", + " job_status = FineTune.get(run_id).status\n", + " print(job_status)\n", + " if job_status == 'SUCCESS':\n", + " break\n", + " time.sleep(60)\n", + "\n", + "fine_tuned_model = FineTune.get(run_id).fine_tuned_model" + ] + }, + { + "cell_type": "markdown", + "id": "31278c6d", + "metadata": { + "id": "31278c6d" + }, + "source": [ + "# Downloading our Finetuned model \n", + "Let's download the weights for the new fine-tuned model using LLM Engine." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f2f3f43", + "metadata": { + "id": "9f2f3f43" + }, + "outputs": [], + "source": [ + "from llmengine import Model\n", + "\n", + "response = Model.download(FineTune.get(run_id).fine_tune_model, download_format=\"hugging_face\")\n", + "print(response.urls)" + ] + }, + { + "cell_type": "markdown", + "id": "ae9cbdf3", + "metadata": {}, + "source": [ + "We now have a dictionary of filenames and urls that point to the file(s) where our finetuned model lives. We can download the associated finetuned model either synchronously or asynchronously." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dc363e48", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import requests\n", + "\n", + "def download_files(url_dict, directory):\n", + " \"\"\"\n", + " Download files from given URLs to specified directory.\n", + " \n", + " Parameters:\n", + " - url_dict: Dictionary of {file_name: url} pairs.\n", + " - directory: Directory to save the files.\n", + " \"\"\"\n", + " if not os.path.exists(directory):\n", + " os.makedirs(directory)\n", + " \n", + " for file_name, url in url_dict.items():\n", + " response = requests.get(url, stream=True)\n", + " response.raise_for_status() # Raise an exception for HTTP errors\n", + " file_path = os.path.join(directory, file_name)\n", + " \n", + " with open(file_path, 'wb') as file:\n", + " for chunk in response.iter_content(chunk_size=8192):\n", + " file.write(chunk)\n", + "\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "000e1633", + "metadata": {}, + "outputs": [], + "source": [ + "output_directory = \"YOUR_MODEL_DIR\"\n", + "download_files(response.urls, output_directory) " + ] + }, + { + "cell_type": "markdown", + "id": "e4e87233", + "metadata": {}, + "source": [ + "Lastly, we can upload our downloaded model to the huggingface hub." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b7c8ee18", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install huggingface-hub" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "328efd19", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from huggingface_hub import Repository\n", + "\n", + "HF_USERNAME = \"YOUR_HUGGINGFACE_USERNAME\"\n", + "HF_TOKEN = \"YOUR_HUGGINGFACE_TOKEN\"\n", + "\n", + "def upload_to_huggingface(directory, model_name):\n", + " \"\"\"\n", + " Upload files from a directory to the Hugging Face Hub as a new model.\n", + "\n", + " Parameters:\n", + " - directory: Directory containing the files to be uploaded.\n", + " - model_name: Name of the new model.\n", + " - token: Your Hugging Face authentication token.\n", + " \"\"\"\n", + " \n", + " # Create a repository with the given name\n", + " repo = Repository(directory, clone_from=f\"{HF_USERNAME}/{model_name}\", use_auth_token=HF_TOKEN)\n", + " \n", + " # Commit and push files\n", + " repo.push_to_hub()\n", + "\n", + "model_name = \"my-new-model\"\n", + " \n", + "upload_to_huggingface(output_directory, model_name, HF_TOKEN)" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Environment (conda_pytorch_p38)", + "language": "python", + "name": "conda_pytorch_p38" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}