From aaa5cd4df34de85fcc9cb25a24978a7abf69e6be Mon Sep 17 00:00:00 2001
From: Ian Macleod <ian.macleod@scale.com>
Date: Fri, 4 Aug 2023 23:14:37 +0000
Subject: [PATCH 01/11] adding launch client modifications to handle download
 api, updating example notebook

---
 clients/python/llmengine/__init__.py          |   4 +
 clients/python/llmengine/data_types.py        |  11 +
 clients/python/llmengine/model.py             |  34 ++
 docs/api/data_types.md                        |   4 +
 docs/api/python_client.md                     |   1 +
 examples/download_a_finetuned_model.ipynb     | 360 ++++++++++++++++++
 server/llm_engine_server/api/llms_v1.py       |   2 +
 server/llm_engine_server/common/dtos/llms.py  |   7 +
 .../common/dtos/model_endpoints.py            |   1 +
 .../infra/gateways/s3_filesystem_gateway.py   |   2 +-
 10 files changed, 425 insertions(+), 1 deletion(-)
 create mode 100644 examples/download_a_finetuned_model.ipynb

diff --git a/clients/python/llmengine/__init__.py b/clients/python/llmengine/__init__.py
index cbb60297..2ac34a4d 100644
--- a/clients/python/llmengine/__init__.py
+++ b/clients/python/llmengine/__init__.py
@@ -26,6 +26,8 @@
     CreateFineTuneRequest,
     CreateFineTuneResponse,
     DeleteLLMEndpointResponse,
+    ModelDownloadRequest,
+    ModelDownloadResponse,
     GetFineTuneResponse,
     GetLLMEndpointResponse,
     ListFineTunesResponse,
@@ -44,6 +46,8 @@
     "CreateFineTuneRequest",
     "CreateFineTuneResponse",
     "DeleteLLMEndpointResponse",
+    "ModelDownloadRequest",
+    "ModelDownloadResponse",
     "FineTune",
     "GetFineTuneResponse",
     "GetLLMEndpointResponse",
diff --git a/clients/python/llmengine/data_types.py b/clients/python/llmengine/data_types.py
index b3fcbf58..80859f9f 100644
--- a/clients/python/llmengine/data_types.py
+++ b/clients/python/llmengine/data_types.py
@@ -440,3 +440,14 @@ class GetFineTuneEventsResponse(BaseModel):
     """
 
     events: List[LLMFineTuneEvent] = Field(..., description="List of fine-tuning events.")
+
+
+class ModelDownloadRequest(BaseModel):
+    """
+    Request object for downloading a model.
+    """
+    model_name: str = Field(..., description="Name of the model to download.")
+    download_format: str = Field(..., description="Desired return format for downloaded model weights (default=huggingface).")
+
+class ModelDownloadResponse(BaseModel):
+    urls: List[str] = Field(..., description="List of URLs to download model weights from.")
\ No newline at end of file
diff --git a/clients/python/llmengine/model.py b/clients/python/llmengine/model.py
index 1b6c9dba..3cba912d 100644
--- a/clients/python/llmengine/model.py
+++ b/clients/python/llmengine/model.py
@@ -5,6 +5,8 @@
     CreateLLMEndpointRequest,
     CreateLLMEndpointResponse,
     DeleteLLMEndpointResponse,
+    ModelDownloadRequest,
+    ModelDownloadResponse,
     GetLLMEndpointResponse,
     GpuType,
     ListLLMEndpointsResponse,
@@ -347,3 +349,35 @@ def delete(cls, model: str) -> DeleteLLMEndpointResponse:
         """
         response = cls._delete(f"v1/llm/model-endpoints/{model}", timeout=DEFAULT_TIMEOUT)
         return DeleteLLMEndpointResponse.parse_obj(response)
+
+
+    @classmethod 
+    def download(cls, 
+                 model_name: str, 
+                 download_format: str = "huggingface", 
+                 ) -> ModelDownloadResponse:
+        """
+        Download a fine-tuned model.
+
+        This API can be used to download the resulting model from a fine-tuning job.
+        It takes the `model_name` as a parameter and returns a response object
+        of ### todo: finish this
+
+        Args:
+            model_name (`str`):
+                name of the fine-tuned model (base model names are null, so not meaningful to download)
+            format (`str`): 
+                download format requested (currently only trying to support "huggingface") 
+                question: should this be an enum? 
+        Returns:
+            DownloadModelResponse: an object that contains the url from which to download the model weights
+
+        """
+
+        request = ModelDownloadRequest(model_name=model_name, download_format=download_format)
+        response = cls.post_sync(
+            resource_name = f"v1/llm/model-endpoints/download",
+            data=request.dict(),
+            timeout=DEFAULT_TIMEOUT,
+        )
+        return ModelDownloadResponse.parse_obj(response)
diff --git a/docs/api/data_types.md b/docs/api/data_types.md
index 55f33028..3807e9f9 100644
--- a/docs/api/data_types.md
+++ b/docs/api/data_types.md
@@ -43,3 +43,7 @@
 ::: llmengine.ListLLMEndpointsResponse
 
 ::: llmengine.DeleteLLMEndpointResponse
+
+::: llmengine.DownloadModelRequest
+
+::: llmengine.DownloadModelResponse
\ No newline at end of file
diff --git a/docs/api/python_client.md b/docs/api/python_client.md
index 820b2e56..3b1f320b 100644
--- a/docs/api/python_client.md
+++ b/docs/api/python_client.md
@@ -21,3 +21,4 @@
             - get
             - list
             - delete
+            - download
diff --git a/examples/download_a_finetuned_model.ipynb b/examples/download_a_finetuned_model.ipynb
new file mode 100644
index 00000000..3df4db55
--- /dev/null
+++ b/examples/download_a_finetuned_model.ipynb
@@ -0,0 +1,360 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "id": "8d3a4214",
+      "metadata": {
+        "id": "8d3a4214"
+      },
+      "source": [
+        "# Downlaod a FineTuned Model \n",
+        "This notebook demonstrates how to download a finetuned model that you've created using LLM Engine and add it to huggingface!\n",
+        "\n",
+        "**This notebook is an extension of the previous finetuning notebook on ScienceQA**"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "XK6VpTnOL4OV",
+      "metadata": {
+        "id": "XK6VpTnOL4OV"
+      },
+      "source": [
+        "# Packages Required\n",
+        "For this demo, we'll be using the `scale-llm-engine` package, the `datasets` package for downloading our finetuning dataset, `transformers`, and `huggingface_hub` for uploading our model to huggingface.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "S5u6DdInMEQ7",
+      "metadata": {
+        "id": "S5u6DdInMEQ7"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install scale-llm-engine\n",
+        "!pip install transformers\n",
+        "!pip install huggingface_hub\n",
+        "!pip install datasets\n",
+        "!pip install aiohttp   "
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "a3dc2a56",
+      "metadata": {
+        "id": "a3dc2a56"
+      },
+      "source": [
+        "# Data Preparation\n",
+        "Let's load in the dataset using Huggingface and view the features."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "e06ac39e",
+      "metadata": {
+        "id": "e06ac39e"
+      },
+      "outputs": [],
+      "source": [
+        "from datasets import load_dataset\n",
+        "from smart_open import smart_open\n",
+        "import pandas as pd\n",
+        "\n",
+        "dataset = load_dataset('derek-thomas/ScienceQA')\n",
+        "dataset['train'].features"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "1cbe8a58",
+      "metadata": {
+        "id": "1cbe8a58"
+      },
+      "source": [
+        "Now, let's format the dataset into what's acceptable for LLM Engine - a CSV file with 'prompt' and 'response' columns."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "0b0eb8ad",
+      "metadata": {
+        "id": "0b0eb8ad"
+      },
+      "outputs": [],
+      "source": [
+        "choice_prefixes = [chr(ord('A') + i) for i in range(26)] # A-Z\n",
+        "def format_options(options, choice_prefixes):\n",
+        "    return ' '.join([f'({c}) {o}' for c, o in zip(choice_prefixes, options)])\n",
+        "\n",
+        "def format_prompt(r, choice_prefixes):\n",
+        "    options = format_options(r['choices'], choice_prefixes)\n",
+        "    return f'''Context: {r[\"hint\"]}\\nQuestion: {r[\"question\"]}\\nOptions:{options}\\nAnswer:'''\n",
+        "\n",
+        "def format_label(r, choice_prefixes):\n",
+        "    return choice_prefixes[r['answer']]\n",
+        "\n",
+        "def convert_dataset(ds):\n",
+        "    prompts = [format_prompt(i, choice_prefixes) for i in ds if i['hint'] != '']\n",
+        "    labels = [format_label(i, choice_prefixes) for i in ds if i['hint'] != '']\n",
+        "    df = pd.DataFrame.from_dict({'prompt': prompts, 'response': labels})\n",
+        "    return df\n",
+        "\n",
+        "save_to_s3 = False\n",
+        "df_train = convert_dataset(dataset['train'])\n",
+        "if save_to_s3:\n",
+        "    train_url = 's3://...'\n",
+        "    val_url = 's3://...'\n",
+        "    df_train = convert_dataset(dataset['train'])\n",
+        "    with smart_open(train_url, 'wb') as f:\n",
+        "        df_train.to_csv(f)\n",
+        "\n",
+        "    df_val = convert_dataset(dataset['validation'])\n",
+        "    with smart_open(val_url, 'wb') as f:\n",
+        "        df_val.to_csv(f)\n",
+        "else:\n",
+        "    # Gists of the already processed datasets\n",
+        "    train_url = 'https://gist.githubusercontent.com/jihan-yin/43f19a86d35bf22fa3551d2806e478ec/raw/91416c09f09d3fca974f81d1f766dd4cadb29789/scienceqa_train.csv'\n",
+        "    val_url = 'https://gist.githubusercontent.com/jihan-yin/43f19a86d35bf22fa3551d2806e478ec/raw/91416c09f09d3fca974f81d1f766dd4cadb29789/scienceqa_val.csv'\n",
+        "\n",
+        "df_train"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "e2fc8d76",
+      "metadata": {
+        "id": "e2fc8d76"
+      },
+      "source": [
+        "# Fine-tune\n",
+        "Now, we can fine-tune the model using LLM Engine."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "4905d447",
+      "metadata": {
+        "id": "4905d447"
+      },
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "os.environ['SCALE_API_KEY'] = 'xxx'\n",
+        "\n",
+        "from llmengine import FineTune\n",
+        "\n",
+        "response = FineTune.create(\n",
+        "    model=\"llama-2-7b\",\n",
+        "    training_file=train_url,\n",
+        "    validation_file=val_url,\n",
+        "    hyperparameters={\n",
+        "        'lr':2e-4,\n",
+        "    },\n",
+        "    suffix='science-qa-llama'\n",
+        ")\n",
+        "run_id = response.id"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "55074457",
+      "metadata": {
+        "id": "55074457"
+      },
+      "source": [
+        "We can sleep until the job completes."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "840938dd",
+      "metadata": {
+        "id": "840938dd"
+      },
+      "outputs": [],
+      "source": [
+        "import time\n",
+        "\n",
+        "while True:\n",
+        "    job_status = FineTune.get(run_id).status\n",
+        "    print(job_status)\n",
+        "    if job_status == 'SUCCESS':\n",
+        "        break\n",
+        "    time.sleep(60)\n",
+        "\n",
+        "fine_tuned_model = FineTune.get(run_id).fine_tuned_model"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "31278c6d",
+      "metadata": {
+        "id": "31278c6d"
+      },
+      "source": [
+        "# Downloading our Finetuned model \n",
+        "Let's download the weights for the new fine-tuned model using LLM Engine."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "9f2f3f43",
+      "metadata": {
+        "id": "9f2f3f43"
+      },
+      "outputs": [],
+      "source": [
+        "from llmengine import Model\n",
+        "\n",
+        "response = Model.download(FineTune.get(run_id).fine_tune_model, download_format=\"huggingface\")\n",
+        "print(response.urls)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "ae9cbdf3",
+      "metadata": {},
+      "source": [
+        "We now have a list of urls that point to the file(s) where our finetuned model lives. We can download the associated finetuned model either synchronously or asynchronously."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "dc363e48",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "import aiohttp\n",
+        "import asyncio\n",
+        "from urllib.parse import urlparse\n",
+        "\n",
+        "async def download_file(session, url, output_dir):\n",
+        "    # Parse the URL to get the filename\n",
+        "    parsed = urlparse(url)\n",
+        "    filename = os.path.basename(parsed.path)\n",
+        "\n",
+        "    # Download the file\n",
+        "    async with session.get(url) as response:\n",
+        "        response.raise_for_status()\n",
+        "\n",
+        "        # Write the file\n",
+        "        with open(os.path.join(output_dir, filename), 'wb') as f:\n",
+        "            f.write(await response.read())\n",
+        "\n",
+        "        print(f\"Downloaded {filename}\")\n",
+        "\n",
+        "async def download_files(urls, output_dir):\n",
+        "    os.makedirs(output_dir, exist_ok=True)\n",
+        "\n",
+        "    async with aiohttp.ClientSession() as session:\n",
+        "        tasks = []\n",
+        "        for url in urls:\n",
+        "            tasks.append(download_file(session, url, output_dir))\n",
+        "\n",
+        "        await asyncio.gather(*tasks)\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "000e1633",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "output_directory = \"YOUR_MODEL_DIR\"\n",
+        "asyncio.run(download_files(response.urls, output_directory))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "328efd19",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "import tarfile\n",
+        "from getpass import getpass\n",
+        "from huggingface_hub import HfApi\n",
+        "\n",
+        "def create_tarball(source_dir, output_filename):\n",
+        "    with tarfile.open(output_filename, \"w:gz\") as tar:\n",
+        "        tar.add(source_dir, arcname=os.path.basename(source_dir))\n",
+        "\n",
+        "def upload_to_huggingface_model_hub(source_dir, model_name, hf_username, hf_password=None):\n",
+        "    # Get password if not provided\n",
+        "    if hf_password is None:\n",
+        "        hf_password = getpass(\"Enter Hugging Face password: \")\n",
+        "\n",
+        "    # Log in to Hugging Face\n",
+        "    api = HfApi()\n",
+        "    token = api.login(hf_username, hf_password)\n",
+        "\n",
+        "    # Create a new repository\n",
+        "    print(f\"Creating new model {model_name}.\")\n",
+        "    repo_url = api.create_repo(token, model_name, exist_ok=True)\n",
+        "\n",
+        "    # Create a tarball of the source directory\n",
+        "    tarball_name = f\"{model_name}.tar.gz\"\n",
+        "    create_tarball(source_dir, tarball_name)\n",
+        "\n",
+        "    # Upload the tarball\n",
+        "    print(f\"Uploading {tarball_name} to {repo_url}.\")\n",
+        "    HfApi().upload_file(\n",
+        "        path_or_fileobj=tarball_name,\n",
+        "        path_in_repo=tarball_name,  # The name of the file in the repo\n",
+        "        repo_id=f\"{hf_username}/{model_name}\",  # The id of the repo\n",
+        "        token=token,\n",
+        "    )\n",
+        "\n",
+        "    # Delete the tarball\n",
+        "    os.remove(tarball_name)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "de55e438",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "upload_to_huggingface_model_hub('YOUR_MODEL_DIR', 'YOUR_MODEL_NAME', 'YOUR_HUGGINGFACE_USER_NAME', 'YOUR_HUGGINGFACE_PASSWORD')"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Environment (conda_pytorch_p38)",
+      "language": "python",
+      "name": "conda_pytorch_p38"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.8.12"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 5
+}
diff --git a/server/llm_engine_server/api/llms_v1.py b/server/llm_engine_server/api/llms_v1.py
index 1d54ca5e..6f98be66 100644
--- a/server/llm_engine_server/api/llms_v1.py
+++ b/server/llm_engine_server/api/llms_v1.py
@@ -24,6 +24,8 @@
     GetLLMModelEndpointV1Response,
     ListFineTuneJobResponse,
     ListLLMModelEndpointsV1Response,
+    ModelDownloadRequest,
+    ModelDownloadResponse,
 )
 from llm_engine_server.common.dtos.model_endpoints import ModelEndpointOrderBy
 from llm_engine_server.common.dtos.tasks import TaskStatus
diff --git a/server/llm_engine_server/common/dtos/llms.py b/server/llm_engine_server/common/dtos/llms.py
index 2739dc1f..fef67222 100644
--- a/server/llm_engine_server/common/dtos/llms.py
+++ b/server/llm_engine_server/common/dtos/llms.py
@@ -169,3 +169,10 @@ class ListFineTuneJobResponse(BaseModel):
 
 class CancelFineTuneJobResponse(BaseModel):
     success: bool
+
+class ModelDownloadRequest(BaseModel):
+    model_name: str
+    format: str
+
+class ModelDownloadResponse(BaseModel):
+    url: str
\ No newline at end of file
diff --git a/server/llm_engine_server/common/dtos/model_endpoints.py b/server/llm_engine_server/common/dtos/model_endpoints.py
index 956e8ee1..83b114db 100644
--- a/server/llm_engine_server/common/dtos/model_endpoints.py
+++ b/server/llm_engine_server/common/dtos/model_endpoints.py
@@ -143,3 +143,4 @@ class GetModelEndpointsSchemaV1Response(BaseModel):
 
 
 # TODO history + creation logs
+
diff --git a/server/llm_engine_server/infra/gateways/s3_filesystem_gateway.py b/server/llm_engine_server/infra/gateways/s3_filesystem_gateway.py
index 4dab06ba..c11d6ffb 100644
--- a/server/llm_engine_server/infra/gateways/s3_filesystem_gateway.py
+++ b/server/llm_engine_server/infra/gateways/s3_filesystem_gateway.py
@@ -1,6 +1,6 @@
 import os
 import re
-from typing import IO
+from typing import IO, List
 
 import boto3
 import smart_open

From ad54664704ba614d21a137091b669c3a208ddd26 Mon Sep 17 00:00:00 2001
From: Ian Macleod <ian.macleod@scale.com>
Date: Fri, 4 Aug 2023 23:24:44 +0000
Subject: [PATCH 02/11] fixing docs error with ModelDownloadResponse

---
 docs/api/data_types.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/api/data_types.md b/docs/api/data_types.md
index 9587ca14..c61ab09b 100644
--- a/docs/api/data_types.md
+++ b/docs/api/data_types.md
@@ -44,9 +44,9 @@
 
 ::: llmengine.DeleteLLMEndpointResponse
 
-::: llmengine.DownloadModelRequest
+::: llmengine.ModelDownloadRequest
 
-::: llmengine.DownloadModelResponse
+::: llmengine.ModelDownloadResponse
 
 ::: llmengine.UploadFileResponse
 

From db985ac7bb4f603079eac97c80ef329fd1d4eca5 Mon Sep 17 00:00:00 2001
From: Ian Macleod <ian.macleod@scale.com>
Date: Fri, 4 Aug 2023 23:34:09 +0000
Subject: [PATCH 03/11] fixing docs for download function

---
 clients/python/llmengine/model.py | 27 +++++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/clients/python/llmengine/model.py b/clients/python/llmengine/model.py
index cb330897..eb7109c0 100644
--- a/clients/python/llmengine/model.py
+++ b/clients/python/llmengine/model.py
@@ -378,18 +378,33 @@ def download(cls,
         Download a fine-tuned model.
 
         This API can be used to download the resulting model from a fine-tuning job.
-        It takes the `model_name` as a parameter and returns a response object
-        of ### todo: finish this
+        It takes the `model_name` and `download_format` as parameter and returns a 
+        response object which contains a list of urls associated with the fine-tuned model.
+        The user can then download these urls to obtain the fine-tuned model. If called 
+        on a nonexistent model, an error will be thrown.
 
         Args:
             model_name (`str`):
-                name of the fine-tuned model (base model names are null, so not meaningful to download)
-            format (`str`): 
-                download format requested (currently only trying to support "huggingface") 
-                question: should this be an enum? 
+                name of the fine-tuned model 
+            download_format (`str`): 
+                download format requested (default=huggingface) 
         Returns:
             DownloadModelResponse: an object that contains the url from which to download the model weights
 
+        === "Downloading model in Python"
+            ```python
+            from llmengine import Model
+
+            response = Model.download("llama-2-7b.suffix.2023-07-18-12-00-00", download_format="huggingface")
+            print(response.json())
+            ```
+
+        === "Response in JSON"
+            ```json
+            {
+                "urls": ['https://path-to-my-model-weights'] 
+            }
+            ```
         """
 
         request = ModelDownloadRequest(model_name=model_name, download_format=download_format)

From 0ec6727abf380c1938aeee1ae1b4e1b87becd799 Mon Sep 17 00:00:00 2001
From: Ian Macleod <ian.macleod@scale.com>
Date: Thu, 10 Aug 2023 23:50:13 +0000
Subject: [PATCH 04/11] updating llm engine files, removing server changes

---
 clients/python/llmengine/data_types.py                     | 4 ++--
 server/llm_engine_server/api/llms_v1.py                    | 2 --
 server/llm_engine_server/common/dtos/llms.py               | 7 -------
 server/llm_engine_server/common/dtos/model_endpoints.py    | 1 -
 .../infra/gateways/s3_filesystem_gateway.py                | 2 +-
 5 files changed, 3 insertions(+), 13 deletions(-)

diff --git a/clients/python/llmengine/data_types.py b/clients/python/llmengine/data_types.py
index d40f5da9..32f393e2 100644
--- a/clients/python/llmengine/data_types.py
+++ b/clients/python/llmengine/data_types.py
@@ -487,10 +487,10 @@ class ModelDownloadRequest(BaseModel):
     Request object for downloading a model.
     """
     model_name: str = Field(..., description="Name of the model to download.")
-    download_format: str = Field(..., description="Desired return format for downloaded model weights (default=huggingface).")
+    download_format: Optional[str] = Field(..., description="Desired return format for downloaded model weights (default=huggingface).")
 
 class ModelDownloadResponse(BaseModel):
-    urls: List[str] = Field(..., description="List of URLs to download model weights from.")
+    urls: List[str] = Field(default="huggingface", description="List of URLs to download model weights from.")
 
 class UploadFileResponse(BaseModel):
     """Response object for uploading a file."""
diff --git a/server/llm_engine_server/api/llms_v1.py b/server/llm_engine_server/api/llms_v1.py
index 6f98be66..1d54ca5e 100644
--- a/server/llm_engine_server/api/llms_v1.py
+++ b/server/llm_engine_server/api/llms_v1.py
@@ -24,8 +24,6 @@
     GetLLMModelEndpointV1Response,
     ListFineTuneJobResponse,
     ListLLMModelEndpointsV1Response,
-    ModelDownloadRequest,
-    ModelDownloadResponse,
 )
 from llm_engine_server.common.dtos.model_endpoints import ModelEndpointOrderBy
 from llm_engine_server.common.dtos.tasks import TaskStatus
diff --git a/server/llm_engine_server/common/dtos/llms.py b/server/llm_engine_server/common/dtos/llms.py
index fef67222..2739dc1f 100644
--- a/server/llm_engine_server/common/dtos/llms.py
+++ b/server/llm_engine_server/common/dtos/llms.py
@@ -169,10 +169,3 @@ class ListFineTuneJobResponse(BaseModel):
 
 class CancelFineTuneJobResponse(BaseModel):
     success: bool
-
-class ModelDownloadRequest(BaseModel):
-    model_name: str
-    format: str
-
-class ModelDownloadResponse(BaseModel):
-    url: str
\ No newline at end of file
diff --git a/server/llm_engine_server/common/dtos/model_endpoints.py b/server/llm_engine_server/common/dtos/model_endpoints.py
index 83b114db..956e8ee1 100644
--- a/server/llm_engine_server/common/dtos/model_endpoints.py
+++ b/server/llm_engine_server/common/dtos/model_endpoints.py
@@ -143,4 +143,3 @@ class GetModelEndpointsSchemaV1Response(BaseModel):
 
 
 # TODO history + creation logs
-
diff --git a/server/llm_engine_server/infra/gateways/s3_filesystem_gateway.py b/server/llm_engine_server/infra/gateways/s3_filesystem_gateway.py
index c11d6ffb..4dab06ba 100644
--- a/server/llm_engine_server/infra/gateways/s3_filesystem_gateway.py
+++ b/server/llm_engine_server/infra/gateways/s3_filesystem_gateway.py
@@ -1,6 +1,6 @@
 import os
 import re
-from typing import IO, List
+from typing import IO
 
 import boto3
 import smart_open

From 8c595dee304ede45e2bd12c1b04c5c029eba898b Mon Sep 17 00:00:00 2001
From: Ian Macleod <ian.macleod@scale.com>
Date: Fri, 11 Aug 2023 00:08:34 +0000
Subject: [PATCH 05/11] updating response fields

---
 clients/python/llmengine/data_types.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clients/python/llmengine/data_types.py b/clients/python/llmengine/data_types.py
index 32f393e2..9ef43e95 100644
--- a/clients/python/llmengine/data_types.py
+++ b/clients/python/llmengine/data_types.py
@@ -487,10 +487,10 @@ class ModelDownloadRequest(BaseModel):
     Request object for downloading a model.
     """
     model_name: str = Field(..., description="Name of the model to download.")
-    download_format: Optional[str] = Field(..., description="Desired return format for downloaded model weights (default=huggingface).")
+    download_format: Optional[str] = Field(default="huggingface", description="Desired return format for downloaded model weights (default=huggingface).")
 
 class ModelDownloadResponse(BaseModel):
-    urls: List[str] = Field(default="huggingface", description="List of URLs to download model weights from.")
+    urls: List[str] = Field(description="List of URLs to download model weights from.")
 
 class UploadFileResponse(BaseModel):
     """Response object for uploading a file."""

From b1a804c24fba583d8f37206076ea7c805c4c604a Mon Sep 17 00:00:00 2001
From: Ian Macleod <ian.macleod@scale.com>
Date: Fri, 11 Aug 2023 20:00:26 +0000
Subject: [PATCH 06/11] updating to return dict and not list in response

---
 clients/python/llmengine/data_types.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/clients/python/llmengine/data_types.py b/clients/python/llmengine/data_types.py
index 06250476..20632a6b 100644
--- a/clients/python/llmengine/data_types.py
+++ b/clients/python/llmengine/data_types.py
@@ -503,8 +503,11 @@ class ModelDownloadRequest(BaseModel):
     download_format: Optional[str] = Field(default="huggingface", description="Desired return format for downloaded model weights (default=huggingface).")
 
 class ModelDownloadResponse(BaseModel):
-    urls: List[str] = Field(description="List of URLs to download model weights from.")
-
+    """
+    Response object for downloading a model.
+    """
+    urls: Dict[str, str]  = Field(..., description="Dictionary of (file_name, url) pairs to download the model from.")
+    
 class UploadFileResponse(BaseModel):
     """Response object for uploading a file."""
 

From 20785b781ef68a0c2b846f3bada03f034a0d482f Mon Sep 17 00:00:00 2001
From: Ian Macleod <ian.macleod@scale.com>
Date: Fri, 11 Aug 2023 20:45:13 +0000
Subject: [PATCH 07/11] adding change to docs

---
 clients/python/llmengine/model.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clients/python/llmengine/model.py b/clients/python/llmengine/model.py
index 6121d724..c8747b36 100644
--- a/clients/python/llmengine/model.py
+++ b/clients/python/llmengine/model.py
@@ -390,7 +390,7 @@ def download(cls,
             download_format (`str`): 
                 download format requested (default=huggingface) 
         Returns:
-            DownloadModelResponse: an object that contains the url from which to download the model weights
+            DownloadModelResponse: an object that contains a dictionary of filenames, urls from which to download the model weights
 
         === "Downloading model in Python"
             ```python
@@ -403,7 +403,7 @@ def download(cls,
         === "Response in JSON"
             ```json
             {
-                "urls": ['https://path-to-my-model-weights'] 
+                "urls": {"my_model_file": 'https://url-to-my-model-weights'}
             }
             ```
         """

From 7420267e2f4a177ccb473a411a793c464b4e3217 Mon Sep 17 00:00:00 2001
From: Ian Macleod <ian.macleod@scale.com>
Date: Fri, 11 Aug 2023 21:28:03 +0000
Subject: [PATCH 08/11] updating example

---
 examples/download_a_finetuned_model.ipynb | 134 ++++++++++------------
 1 file changed, 61 insertions(+), 73 deletions(-)

diff --git a/examples/download_a_finetuned_model.ipynb b/examples/download_a_finetuned_model.ipynb
index 3df4db55..d80da782 100644
--- a/examples/download_a_finetuned_model.ipynb
+++ b/examples/download_a_finetuned_model.ipynb
@@ -7,7 +7,7 @@
         "id": "8d3a4214"
       },
       "source": [
-        "# Downlaod a FineTuned Model \n",
+        "# Download a FineTuned Model \n",
         "This notebook demonstrates how to download a finetuned model that you've created using LLM Engine and add it to huggingface!\n",
         "\n",
         "**This notebook is an extension of the previous finetuning notebook on ScienceQA**"
@@ -35,9 +35,7 @@
       "source": [
         "!pip install scale-llm-engine\n",
         "!pip install transformers\n",
-        "!pip install huggingface_hub\n",
-        "!pip install datasets\n",
-        "!pip install aiohttp   "
+        "!pip install datasets"
       ]
     },
     {
@@ -223,7 +221,7 @@
       "id": "ae9cbdf3",
       "metadata": {},
       "source": [
-        "We now have a list of urls that point to the file(s) where our finetuned model lives. We can download the associated finetuned model either synchronously or asynchronously."
+        "We now have a dictionary of filenames and urls that point to the file(s) where our finetuned model lives. We can download the associated finetuned model either synchronously or asynchronously."
       ]
     },
     {
@@ -234,35 +232,29 @@
       "outputs": [],
       "source": [
         "import os\n",
-        "import aiohttp\n",
-        "import asyncio\n",
-        "from urllib.parse import urlparse\n",
+        "import requests\n",
         "\n",
-        "async def download_file(session, url, output_dir):\n",
-        "    # Parse the URL to get the filename\n",
-        "    parsed = urlparse(url)\n",
-        "    filename = os.path.basename(parsed.path)\n",
+        "def download_files(url_dict, directory):\n",
+        "    \"\"\"\n",
+        "    Download files from given URLs to specified directory.\n",
+        "    \n",
+        "    Parameters:\n",
+        "    - url_dict: Dictionary of {file_name: url} pairs.\n",
+        "    - directory: Directory to save the files.\n",
+        "    \"\"\"\n",
+        "    if not os.path.exists(directory):\n",
+        "        os.makedirs(directory)\n",
+        "    \n",
+        "    for file_name, url in url_dict.items():\n",
+        "        response = requests.get(url, stream=True)\n",
+        "        response.raise_for_status()  # Raise an exception for HTTP errors\n",
+        "        file_path = os.path.join(directory, file_name)\n",
+        "        \n",
+        "        with open(file_path, 'wb') as file:\n",
+        "            for chunk in response.iter_content(chunk_size=8192):\n",
+        "                file.write(chunk)\n",
         "\n",
-        "    # Download the file\n",
-        "    async with session.get(url) as response:\n",
-        "        response.raise_for_status()\n",
-        "\n",
-        "        # Write the file\n",
-        "        with open(os.path.join(output_dir, filename), 'wb') as f:\n",
-        "            f.write(await response.read())\n",
-        "\n",
-        "        print(f\"Downloaded {filename}\")\n",
-        "\n",
-        "async def download_files(urls, output_dir):\n",
-        "    os.makedirs(output_dir, exist_ok=True)\n",
-        "\n",
-        "    async with aiohttp.ClientSession() as session:\n",
-        "        tasks = []\n",
-        "        for url in urls:\n",
-        "            tasks.append(download_file(session, url, output_dir))\n",
-        "\n",
-        "        await asyncio.gather(*tasks)\n",
-        "\n"
+        "    "
       ]
     },
     {
@@ -273,63 +265,59 @@
       "outputs": [],
       "source": [
         "output_directory = \"YOUR_MODEL_DIR\"\n",
-        "asyncio.run(download_files(response.urls, output_directory))"
+        "download_files(response.urls, output_directory) "
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "e4e87233",
+      "metadata": {},
+      "source": [
+        "Lastly, we can upload our downloaded model to the huggingface hub."
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
-      "id": "328efd19",
+      "id": "b7c8ee18",
       "metadata": {},
       "outputs": [],
       "source": [
-        "import os\n",
-        "import tarfile\n",
-        "from getpass import getpass\n",
-        "from huggingface_hub import HfApi\n",
-        "\n",
-        "def create_tarball(source_dir, output_filename):\n",
-        "    with tarfile.open(output_filename, \"w:gz\") as tar:\n",
-        "        tar.add(source_dir, arcname=os.path.basename(source_dir))\n",
-        "\n",
-        "def upload_to_huggingface_model_hub(source_dir, model_name, hf_username, hf_password=None):\n",
-        "    # Get password if not provided\n",
-        "    if hf_password is None:\n",
-        "        hf_password = getpass(\"Enter Hugging Face password: \")\n",
-        "\n",
-        "    # Log in to Hugging Face\n",
-        "    api = HfApi()\n",
-        "    token = api.login(hf_username, hf_password)\n",
-        "\n",
-        "    # Create a new repository\n",
-        "    print(f\"Creating new model {model_name}.\")\n",
-        "    repo_url = api.create_repo(token, model_name, exist_ok=True)\n",
-        "\n",
-        "    # Create a tarball of the source directory\n",
-        "    tarball_name = f\"{model_name}.tar.gz\"\n",
-        "    create_tarball(source_dir, tarball_name)\n",
-        "\n",
-        "    # Upload the tarball\n",
-        "    print(f\"Uploading {tarball_name} to {repo_url}.\")\n",
-        "    HfApi().upload_file(\n",
-        "        path_or_fileobj=tarball_name,\n",
-        "        path_in_repo=tarball_name,  # The name of the file in the repo\n",
-        "        repo_id=f\"{hf_username}/{model_name}\",  # The id of the repo\n",
-        "        token=token,\n",
-        "    )\n",
-        "\n",
-        "    # Delete the tarball\n",
-        "    os.remove(tarball_name)"
+        "!pip install huggingface-hub"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
-      "id": "de55e438",
+      "id": "328efd19",
       "metadata": {},
       "outputs": [],
       "source": [
-        "upload_to_huggingface_model_hub('YOUR_MODEL_DIR', 'YOUR_MODEL_NAME', 'YOUR_HUGGINGFACE_USER_NAME', 'YOUR_HUGGINGFACE_PASSWORD')"
+        "import os\n",
+        "from huggingface_hub import Repository\n",
+        "\n",
+        "HF_USERNAME = \"YOUR_HUGGINGFACE_USERNAME\"\n",
+        "HF_TOKEN = \"YOUR_HUGGINGFACE_TOKEN\"\n",
+        "\n",
+        "def upload_to_huggingface(directory, model_name):\n",
+        "    \"\"\"\n",
+        "    Upload files from a directory to the Hugging Face Hub as a new model.\n",
+        "\n",
+        "    Parameters:\n",
+        "    - directory: Directory containing the files to be uploaded.\n",
+        "    - model_name: Name of the new model.\n",
+        "    - token: Your Hugging Face authentication token.\n",
+        "    \"\"\"\n",
+        "    \n",
+        "    # Create a repository with the given name\n",
+        "    repo = Repository(directory, clone_from=f\"{HF_USERNAME}/{model_name}\", use_auth_token=HF_TOKEN)\n",
+        "    \n",
+        "    # Commit and push files\n",
+        "    repo.push_to_hub()\n",
+        "\n",
+        "model_name = \"my-new-model\"\n",
+        "    \n",
+        "upload_to_huggingface(output_directory, model_name, HF_TOKEN)"
       ]
     }
   ],

From a47ac17250bfe54128437630988ae3b0342129e0 Mon Sep 17 00:00:00 2001
From: Ian Macleod <ian.macleod@scale.com>
Date: Sat, 12 Aug 2023 00:00:48 +0000
Subject: [PATCH 09/11] adding comment about presigned urls

---
 clients/python/llmengine/model.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/clients/python/llmengine/model.py b/clients/python/llmengine/model.py
index c8747b36..0ce3f923 100644
--- a/clients/python/llmengine/model.py
+++ b/clients/python/llmengine/model.py
@@ -390,7 +390,8 @@ def download(cls,
             download_format (`str`): 
                 download format requested (default=huggingface) 
         Returns:
-            DownloadModelResponse: an object that contains a dictionary of filenames, urls from which to download the model weights
+            DownloadModelResponse: an object that contains a dictionary of filenames, urls from which to download the model weights.
+            The urls are presigned urls that grant temporary access and expire after an hour. 
 
         === "Downloading model in Python"
             ```python

From 275f034b2b97a925604e96efb5dac91488610bb1 Mon Sep 17 00:00:00 2001
From: Ian Macleod <ian.macleod@scale.com>
Date: Mon, 14 Aug 2023 18:16:23 +0000
Subject: [PATCH 10/11] fixing precommit

---
 clients/python/llmengine/__init__.py   |  4 ++--
 clients/python/llmengine/data_types.py | 15 ++++++++++---
 clients/python/llmengine/model.py      | 30 +++++++++++++-------------
 3 files changed, 29 insertions(+), 20 deletions(-)

diff --git a/clients/python/llmengine/__init__.py b/clients/python/llmengine/__init__.py
index a0fb6d31..380ef213 100644
--- a/clients/python/llmengine/__init__.py
+++ b/clients/python/llmengine/__init__.py
@@ -27,8 +27,6 @@
     CreateFineTuneResponse,
     DeleteFileResponse,
     DeleteLLMEndpointResponse,
-    ModelDownloadRequest,
-    ModelDownloadResponse,
     GetFileContentResponse,
     GetFileResponse,
     GetFineTuneResponse,
@@ -36,6 +34,8 @@
     ListFilesResponse,
     ListFineTunesResponse,
     ListLLMEndpointsResponse,
+    ModelDownloadRequest,
+    ModelDownloadResponse,
     UploadFileResponse,
 )
 from llmengine.file import File
diff --git a/clients/python/llmengine/data_types.py b/clients/python/llmengine/data_types.py
index 20632a6b..0f0db637 100644
--- a/clients/python/llmengine/data_types.py
+++ b/clients/python/llmengine/data_types.py
@@ -499,15 +499,24 @@ class ModelDownloadRequest(BaseModel):
     """
     Request object for downloading a model.
     """
+
     model_name: str = Field(..., description="Name of the model to download.")
-    download_format: Optional[str] = Field(default="huggingface", description="Desired return format for downloaded model weights (default=huggingface).")
+    download_format: Optional[str] = Field(
+        default="huggingface",
+        description="Desired return format for downloaded model weights (default=huggingface).",
+    )
+
 
 class ModelDownloadResponse(BaseModel):
     """
     Response object for downloading a model.
     """
-    urls: Dict[str, str]  = Field(..., description="Dictionary of (file_name, url) pairs to download the model from.")
-    
+
+    urls: Dict[str, str] = Field(
+        ..., description="Dictionary of (file_name, url) pairs to download the model from."
+    )
+
+
 class UploadFileResponse(BaseModel):
     """Response object for uploading a file."""
 
diff --git a/clients/python/llmengine/model.py b/clients/python/llmengine/model.py
index 0ce3f923..09387cfe 100644
--- a/clients/python/llmengine/model.py
+++ b/clients/python/llmengine/model.py
@@ -5,13 +5,13 @@
     CreateLLMEndpointRequest,
     CreateLLMEndpointResponse,
     DeleteLLMEndpointResponse,
-    ModelDownloadRequest,
-    ModelDownloadResponse,
     GetLLMEndpointResponse,
     GpuType,
     ListLLMEndpointsResponse,
     LLMInferenceFramework,
     LLMSource,
+    ModelDownloadRequest,
+    ModelDownloadResponse,
     ModelEndpointType,
     PostInferenceHooks,
     Quantization,
@@ -369,29 +369,29 @@ def delete(cls, model: str) -> DeleteLLMEndpointResponse:
         response = cls._delete(f"v1/llm/model-endpoints/{model}", timeout=DEFAULT_TIMEOUT)
         return DeleteLLMEndpointResponse.parse_obj(response)
 
-
-    @classmethod 
-    def download(cls, 
-                 model_name: str, 
-                 download_format: str = "huggingface", 
-                 ) -> ModelDownloadResponse:
+    @classmethod
+    def download(
+        cls,
+        model_name: str,
+        download_format: str = "huggingface",
+    ) -> ModelDownloadResponse:
         """
         Download a fine-tuned model.
 
         This API can be used to download the resulting model from a fine-tuning job.
-        It takes the `model_name` and `download_format` as parameter and returns a 
+        It takes the `model_name` and `download_format` as parameter and returns a
         response object which contains a list of urls associated with the fine-tuned model.
-        The user can then download these urls to obtain the fine-tuned model. If called 
+        The user can then download these urls to obtain the fine-tuned model. If called
         on a nonexistent model, an error will be thrown.
 
         Args:
             model_name (`str`):
-                name of the fine-tuned model 
-            download_format (`str`): 
-                download format requested (default=huggingface) 
+                name of the fine-tuned model
+            download_format (`str`):
+                download format requested (default=huggingface)
         Returns:
             DownloadModelResponse: an object that contains a dictionary of filenames, urls from which to download the model weights.
-            The urls are presigned urls that grant temporary access and expire after an hour. 
+            The urls are presigned urls that grant temporary access and expire after an hour.
 
         === "Downloading model in Python"
             ```python
@@ -411,7 +411,7 @@ def download(cls,
 
         request = ModelDownloadRequest(model_name=model_name, download_format=download_format)
         response = cls.post_sync(
-            resource_name = f"v1/llm/model-endpoints/download",
+            resource_name="v1/llm/model-endpoints/download",
             data=request.dict(),
             timeout=DEFAULT_TIMEOUT,
         )

From ae086b1d9441a1da6cce19d2d545ba4d6939fe14 Mon Sep 17 00:00:00 2001
From: Ian Macleod <ian.macleod@scale.com>
Date: Tue, 15 Aug 2023 00:54:08 +0000
Subject: [PATCH 11/11] changing hugging face var

---
 clients/python/llmengine/data_types.py    | 4 ++--
 clients/python/llmengine/model.py         | 6 +++---
 examples/download_a_finetuned_model.ipynb | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/clients/python/llmengine/data_types.py b/clients/python/llmengine/data_types.py
index 1bea00ab..34eaf0f9 100644
--- a/clients/python/llmengine/data_types.py
+++ b/clients/python/llmengine/data_types.py
@@ -502,8 +502,8 @@ class ModelDownloadRequest(BaseModel):
 
     model_name: str = Field(..., description="Name of the model to download.")
     download_format: Optional[str] = Field(
-        default="huggingface",
-        description="Desired return format for downloaded model weights (default=huggingface).",
+        default="hugging_face",
+        description="Desired return format for downloaded model weights (default=hugging_face).",
     )
 
 
diff --git a/clients/python/llmengine/model.py b/clients/python/llmengine/model.py
index 09387cfe..cd2191e3 100644
--- a/clients/python/llmengine/model.py
+++ b/clients/python/llmengine/model.py
@@ -373,7 +373,7 @@ def delete(cls, model: str) -> DeleteLLMEndpointResponse:
     def download(
         cls,
         model_name: str,
-        download_format: str = "huggingface",
+        download_format: str = "hugging_face",
     ) -> ModelDownloadResponse:
         """
         Download a fine-tuned model.
@@ -388,7 +388,7 @@ def download(
             model_name (`str`):
                 name of the fine-tuned model
             download_format (`str`):
-                download format requested (default=huggingface)
+                download format requested (default=hugging_face)
         Returns:
             DownloadModelResponse: an object that contains a dictionary of filenames, urls from which to download the model weights.
             The urls are presigned urls that grant temporary access and expire after an hour.
@@ -397,7 +397,7 @@ def download(
             ```python
             from llmengine import Model
 
-            response = Model.download("llama-2-7b.suffix.2023-07-18-12-00-00", download_format="huggingface")
+            response = Model.download("llama-2-7b.suffix.2023-07-18-12-00-00", download_format="hugging_face")
             print(response.json())
             ```
 
diff --git a/examples/download_a_finetuned_model.ipynb b/examples/download_a_finetuned_model.ipynb
index d80da782..da548b12 100644
--- a/examples/download_a_finetuned_model.ipynb
+++ b/examples/download_a_finetuned_model.ipynb
@@ -212,7 +212,7 @@
       "source": [
         "from llmengine import Model\n",
         "\n",
-        "response = Model.download(FineTune.get(run_id).fine_tune_model, download_format=\"huggingface\")\n",
+        "response = Model.download(FineTune.get(run_id).fine_tune_model, download_format=\"hugging_face\")\n",
         "print(response.urls)"
       ]
     },