scaleapi · squeakymouse · Aug 1, 2023 · Jul 21, 2023 · Jul 26, 2023 · Jul 31, 2023
diff --git a/clients/python/llmengine/__init__.py b/clients/python/llmengine/__init__.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "0.0.0.beta7"
+__version__ = "0.0.0.beta8"
 
 from typing import Sequence
 
@@ -25,12 +25,18 @@
     CompletionSyncResponse,
     CreateFineTuneRequest,
     CreateFineTuneResponse,
+    DeleteFileResponse,
     DeleteLLMEndpointResponse,
+    GetFileContentResponse,
+    GetFileResponse,
     GetFineTuneResponse,
     GetLLMEndpointResponse,
+    ListFilesResponse,
     ListFineTunesResponse,
     ListLLMEndpointsResponse,
+    UploadFileResponse,
 )
+from llmengine.file import File
 from llmengine.fine_tuning import FineTune
 from llmengine.model import Model
 
@@ -43,11 +49,17 @@
     "CompletionSyncResponse",
     "CreateFineTuneRequest",
     "CreateFineTuneResponse",
+    "DeleteFileResponse",
     "DeleteLLMEndpointResponse",
+    "GetFileContentResponse",
+    "File",
     "FineTune",
+    "GetFileResponse",
     "GetFineTuneResponse",
     "GetLLMEndpointResponse",
+    "ListFilesResponse",
     "ListFineTunesResponse",
     "ListLLMEndpointsResponse",
     "Model",
+    "UploadFileResponse",
 )
diff --git a/clients/python/llmengine/api_engine.py b/clients/python/llmengine/api_engine.py
@@ -3,6 +3,7 @@
 import json
 import os
 from functools import wraps
+from io import BufferedReader
 from typing import Any, AsyncIterable, Dict, Iterator, Optional
 
 import requests
@@ -138,6 +139,22 @@ def post_stream(
                 except json.JSONDecodeError:
                     raise ValueError(f"Invalid JSON payload: {payload_data}")
 
+    @classmethod
+    def post_file(
+        cls, resource_name: str, files: Dict[str, BufferedReader], timeout: int
+    ) -> Dict[str, Any]:
+        api_key = get_api_key()
+        response = requests.post(
+            os.path.join(LLM_ENGINE_BASE_PATH, resource_name),
+            files=files,
+            timeout=timeout,
+            headers={"x-api-key": api_key},
+        )
+        if response.status_code != 200:
+            raise parse_error(response.status_code, response.content)
+        payload = response.json()
+        return payload
+
     @classmethod
     async def apost_sync(
         cls, resource_name: str, data: Dict[str, Any], timeout: int

diff --git a/clients/python/llmengine/data_types.py b/clients/python/llmengine/data_types.py
@@ -453,3 +453,47 @@ class GetFineTuneEventsResponse(BaseModel):
     """
 
     events: List[LLMFineTuneEvent] = Field(..., description="List of fine-tuning events.")
+
+
+class UploadFileResponse(BaseModel):
+    """Response object for uploading a file."""
+
+    id: str = Field(..., description="ID of the uploaded file.")
+    """ID of the uploaded file."""
+
+
+class GetFileResponse(BaseModel):
+    """Response object for retrieving a file."""
+
+    id: str = Field(..., description="ID of the requested file.")
+    """ID of the requested file."""
+
+    filename: str = Field(..., description="File name.")
+    """File name."""
+
+    size: int = Field(..., description="Length of the file, in characters.")
+    """Length of the file, in characters."""
+
+
+class ListFilesResponse(BaseModel):
+    """Response object for listing files."""
+
+    files: List[GetFileResponse] = Field(..., description="List of file IDs, names, and sizes.")
+    """List of file IDs, names, and sizes."""
+
+
+class DeleteFileResponse(BaseModel):
+    """Response object for deleting a file."""
+
+    deleted: bool = Field(..., description="Whether deletion was successful.")
+    """Whether deletion was successful."""
+
+
+class GetFileContentResponse(BaseModel):
+    """Response object for retrieving a file's content."""
+
+    id: str = Field(..., description="ID of the requested file.")
+    """ID of the requested file."""
+
+    content: str = Field(..., description="File content.")
+    """File content."""
diff --git a/clients/python/llmengine/file.py b/clients/python/llmengine/file.py
@@ -0,0 +1,193 @@
+from io import BufferedReader
+
+from llmengine.api_engine import DEFAULT_TIMEOUT, APIEngine
+from llmengine.data_types import (
+    DeleteFileResponse,
+    GetFileContentResponse,
+    GetFileResponse,
+    ListFilesResponse,
+    UploadFileResponse,
+)
+
+
+class File(APIEngine):
+    """
+    File API. This API is used to upload private files to LLM engine so that fine-tunes can access them for training and validation data.
+
+    Functions are provided to upload, get, list, and delete files, as well as to get the contents of a file.
+    """
+
+    @classmethod
+    def upload(cls, file: BufferedReader) -> UploadFileResponse:
+        """
+        Uploads a file to LLM engine.
+
+        Args:
+            file (`BufferedReader`):
+                A file opened with open(file_path, "r")
+
+        Returns:
+            UploadFileResponse: an object that contains the ID of the uploaded file
+
+        === "Uploading file in Python"
+            ```python
+            from llmengine import File
+
+            response = File.upload(open("training_dataset.csv", "r"))
+
+            print(response.json())
+            ```
+
+        === "Response in JSON"
+            ```json
+            {
+                "id": "file-abc123"
+            }
+            ```
+        """
+        files = {"file": file}
+        response = cls.post_file(
+            resource_name="v1/files",
+            files=files,
+            timeout=DEFAULT_TIMEOUT,
+        )
+        return UploadFileResponse.parse_obj(response)
+
+    @classmethod
+    def get(cls, file_id: str) -> GetFileResponse:
+        """
+        Get file metadata, including filename and size.
+
+        Args:
+            file_id (`str`):
+                ID of the file
+
+        Returns:
+            GetFileResponse: an object that contains the ID, filename, and size of the requested file
+
+        === "Getting metadata about file in Python"
+            ```python
+            from llmengine import File
+
+            response = File.get(
+                file_id="file-abc123",
+            )
+
+            print(response.json())
+            ```
+
+        === "Response in JSON"
+            ```json
+            {
+                "id": "file-abc123",
+                "filename": "training_dataset.csv",
+                "size": 100
+            }
+            ```
+        """
+        response = cls._get(f"v1/files/{file_id}", timeout=DEFAULT_TIMEOUT)
+        return GetFileResponse.parse_obj(response)
+
+    @classmethod
+    def list(cls) -> ListFilesResponse:
+        """
+        List metadata about all files, e.g. their filenames and sizes.
+
+        Returns:
+            ListFilesResponse: an object that contains a list of all files and their filenames and sizes
+
+        === "Listing files in Python"
+            ```python
+            from llmengine import File
+
+            response = File.list()
+            print(response.json())
+            ```
+
+        === "Response in JSON"
+            ```json
+            {
+                "files": [
+                    {
+                        "id": "file-abc123",
+                        "filename": "training_dataset.csv",
+                        "size": 100
+                    },
+                    {
+                        "id": "file-def456",
+                        "filename": "validation_dataset.csv",
+                        "size": 50
+                    }
+                ]
+            }
+            ```
+        """
+        response = cls._get("v1/files", timeout=30)
+        return ListFilesResponse.parse_obj(response)
+
+    @classmethod
+    def delete(cls, file_id: str) -> DeleteFileResponse:
+        """
+        Deletes a file.
+
+        Args:
+            file_id (`str`):
+                ID of the file
+
+        Returns:
+            DeleteFileResponse: an object that contains whether the deletion was successful
+
+        === "Deleting file in Python"
+            ```python
+            from llmengine import File
+
+            response = File.delete(file_id="file-abc123")
+            print(response.json())
+            ```
+
+        === "Response in JSON"
+            ```json
+            {
+                "deleted": true
+            }
+            ```
+        """
+        response = cls._delete(
+            f"v1/files/{file_id}",
+            timeout=DEFAULT_TIMEOUT,
+        )
+        return DeleteFileResponse.parse_obj(response)
+
+    @classmethod
+    def download(cls, file_id: str) -> GetFileContentResponse:
+        """
+        Get contents of a file, as a string. (If the uploaded file is in binary, a string encoding will be returned.)
+
+        Args:
+            file_id (`str`):
+                ID of the file
+
+        Returns:
+            GetFileContentResponse: an object that contains the ID and content of the file
+
+        === "Getting file content in Python"
+            ```python
+            from llmengine import File
+
+            response = File.get_content(file_id="file-abc123")
+            print(response.json())
+            ```
+
+        === "Response in JSON"
+            ```json
+            {
+                "id": "file-abc123",
+                "content": "Hello world!"
+            }
+            ```
+        """
+        response = cls._get(
+            f"v1/files/{file_id}/content",
+            timeout=DEFAULT_TIMEOUT,
+        )
+        return GetFileContentResponse.parse_obj(response)
diff --git a/clients/python/pyproject.toml b/clients/python/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "scale-llm-engine"
-version = "0.0.0.beta7"
+version = "0.0.0.beta8"
 description = "Scale LLM Engine Python client"
 license = "Apache-2.0"
 authors = ["Phil Chen <phil.chen@scale.com>"]

diff --git a/clients/python/setup.py b/clients/python/setup.py
@@ -3,6 +3,6 @@
 setup(
     name="scale-llm-engine",
     python_requires=">=3.7",
-    version="0.0.0.beta7",
+    version="0.0.0.beta8",
     packages=find_packages(),
 )
diff --git a/docs/api/data_types.md b/docs/api/data_types.md
@@ -43,3 +43,13 @@
 ::: llmengine.ListLLMEndpointsResponse
 
 ::: llmengine.DeleteLLMEndpointResponse
+
+::: llmengine.UploadFileResponse
+
+::: llmengine.GetFileResponse
+
+::: llmengine.GetFileContentResponse
+
+::: llmengine.ListFilesResponse
+
+::: llmengine.DeleteFileResponse
diff --git a/docs/api/python_client.md b/docs/api/python_client.md
@@ -21,3 +21,12 @@
             - get
             - list
             - delete
+
+::: llmengine.File
+    selection:
+        members:
+            - upload
+            - get
+            - get_content
+            - list
+            - delete
diff --git a/server/llm_engine_server/api/batch_jobs_v1.py b/server/llm_engine_server/api/batch_jobs_v1.py
@@ -125,7 +125,6 @@ async def create_docker_image_batch_job(
     auth: User = Depends(verify_authentication),
     external_interfaces: ExternalInterfaces = Depends(get_external_interfaces),
 ) -> CreateDockerImageBatchJobV1Response:
-
     add_trace_resource_name("batch_jobs_di_create")
     logger.info(f"POST /docker-image-batch-jobs with {request} for {auth}")
     try:

diff --git a/server/llm_engine_server/domain/use_cases/batch_job_use_cases.py b/server/llm_engine_server/domain/use_cases/batch_job_use_cases.py
@@ -175,7 +175,6 @@ def __init__(
     async def execute(
         self, user: User, request: CreateDockerImageBatchJobV1Request
     ) -> CreateDockerImageBatchJobV1Response:
-
         if request.docker_image_batch_job_bundle_id is not None:
             batch_bundle = await self.docker_image_batch_job_bundle_repository.get_docker_image_batch_job_bundle(
                 request.docker_image_batch_job_bundle_id

diff --git a/server/llm_engine_server/infra/gateways/live_streaming_model_endpoint_inference_gateway.py b/server/llm_engine_server/infra/gateways/live_streaming_model_endpoint_inference_gateway.py
@@ -72,7 +72,6 @@ def __init__(self, use_asyncio: bool):
     async def make_single_request(self, request_url: str, payload_json: Dict[str, Any]):
         errored = False
         if self.use_asyncio:
-
             async with aiohttp.ClientSession(json_serialize=_serialize_json) as aioclient:
                 aio_resp = await aioclient.post(
                     request_url,

diff --git a/server/tests/unit/infra/repositories/test_db_docker_image_batch_job_bundle_repository.py b/server/tests/unit/infra/repositories/test_db_docker_image_batch_job_bundle_repository.py
@@ -106,7 +106,6 @@ async def test_list_docker_image_batch_job_bundles(
     test_api_key: str,
     test_api_key_team: str,
 ):
-
     orm_docker_image_batch_job_bundle_1_v2.created_by = test_api_key_team
     orm_docker_image_batch_job_bundle_1_v2.owner = test_api_key_team
     docker_image_batch_job_bundle_1_v2.created_by = test_api_key_team