Skip to content
14 changes: 13 additions & 1 deletion clients/python/llmengine/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

__version__ = "0.0.0.beta7"
__version__ = "0.0.0.beta8"

from typing import Sequence

Expand All @@ -25,12 +25,18 @@
CompletionSyncResponse,
CreateFineTuneRequest,
CreateFineTuneResponse,
DeleteFileResponse,
DeleteLLMEndpointResponse,
GetFileContentResponse,
GetFileResponse,
GetFineTuneResponse,
GetLLMEndpointResponse,
ListFilesResponse,
ListFineTunesResponse,
ListLLMEndpointsResponse,
UploadFileResponse,
)
from llmengine.file import File
from llmengine.fine_tuning import FineTune
from llmengine.model import Model

Expand All @@ -43,11 +49,17 @@
"CompletionSyncResponse",
"CreateFineTuneRequest",
"CreateFineTuneResponse",
"DeleteFileResponse",
"DeleteLLMEndpointResponse",
"GetFileContentResponse",
"File",
"FineTune",
"GetFileResponse",
"GetFineTuneResponse",
"GetLLMEndpointResponse",
"ListFilesResponse",
"ListFineTunesResponse",
"ListLLMEndpointsResponse",
"Model",
"UploadFileResponse",
)
17 changes: 17 additions & 0 deletions clients/python/llmengine/api_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import json
import os
from functools import wraps
from io import BufferedReader
from typing import Any, AsyncIterable, Dict, Iterator, Optional

import requests
Expand Down Expand Up @@ -138,6 +139,22 @@ def post_stream(
except json.JSONDecodeError:
raise ValueError(f"Invalid JSON payload: {payload_data}")

@classmethod
def post_file(
cls, resource_name: str, files: Dict[str, BufferedReader], timeout: int
) -> Dict[str, Any]:
api_key = get_api_key()
response = requests.post(
os.path.join(LLM_ENGINE_BASE_PATH, resource_name),
files=files,
timeout=timeout,
headers={"x-api-key": api_key},
)
if response.status_code != 200:
raise parse_error(response.status_code, response.content)
payload = response.json()
return payload

@classmethod
async def apost_sync(
cls, resource_name: str, data: Dict[str, Any], timeout: int
Expand Down
44 changes: 44 additions & 0 deletions clients/python/llmengine/data_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,3 +453,47 @@ class GetFineTuneEventsResponse(BaseModel):
"""

events: List[LLMFineTuneEvent] = Field(..., description="List of fine-tuning events.")


class UploadFileResponse(BaseModel):
"""Response object for uploading a file."""

id: str = Field(..., description="ID of the uploaded file.")
"""ID of the uploaded file."""


class GetFileResponse(BaseModel):
"""Response object for retrieving a file."""

id: str = Field(..., description="ID of the requested file.")
"""ID of the requested file."""

filename: str = Field(..., description="File name.")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: newline spacing to let things breathe a bit?

"""File name."""

size: int = Field(..., description="Length of the file, in characters.")
"""Length of the file, in characters."""


class ListFilesResponse(BaseModel):
"""Response object for listing files."""

files: List[GetFileResponse] = Field(..., description="List of file IDs, names, and sizes.")
"""List of file IDs, names, and sizes."""


class DeleteFileResponse(BaseModel):
"""Response object for deleting a file."""

deleted: bool = Field(..., description="Whether deletion was successful.")
"""Whether deletion was successful."""


class GetFileContentResponse(BaseModel):
"""Response object for retrieving a file's content."""

id: str = Field(..., description="ID of the requested file.")
"""ID of the requested file."""

content: str = Field(..., description="File content.")
"""File content."""
193 changes: 193 additions & 0 deletions clients/python/llmengine/file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
from io import BufferedReader

from llmengine.api_engine import DEFAULT_TIMEOUT, APIEngine
from llmengine.data_types import (
DeleteFileResponse,
GetFileContentResponse,
GetFileResponse,
ListFilesResponse,
UploadFileResponse,
)


class File(APIEngine):
"""
File API. This API is used to upload private files to LLM engine so that fine-tunes can access them for training and validation data.

Functions are provided to upload, get, list, and delete files, as well as to get the contents of a file.
"""

@classmethod
def upload(cls, file: BufferedReader) -> UploadFileResponse:
"""
Uploads a file to LLM engine.

Args:
file (`BufferedReader`):
A file opened with open(file_path, "r")

Returns:
UploadFileResponse: an object that contains the ID of the uploaded file

=== "Uploading file in Python"
```python
from llmengine import File

response = File.upload(open("training_dataset.csv", "r"))

print(response.json())
```

=== "Response in JSON"
```json
{
"id": "file-abc123"
}
```
"""
files = {"file": file}
response = cls.post_file(
resource_name="v1/files",
files=files,
timeout=DEFAULT_TIMEOUT,
)
return UploadFileResponse.parse_obj(response)

@classmethod
def get(cls, file_id: str) -> GetFileResponse:
"""
Get file metadata, including filename and size.

Args:
file_id (`str`):
ID of the file

Returns:
GetFileResponse: an object that contains the ID, filename, and size of the requested file

=== "Getting metadata about file in Python"
```python
from llmengine import File

response = File.get(
file_id="file-abc123",
)

print(response.json())
```

=== "Response in JSON"
```json
{
"id": "file-abc123",
"filename": "training_dataset.csv",
"size": 100
}
```
"""
response = cls._get(f"v1/files/{file_id}", timeout=DEFAULT_TIMEOUT)
return GetFileResponse.parse_obj(response)

@classmethod
def list(cls) -> ListFilesResponse:
"""
List metadata about all files, e.g. their filenames and sizes.

Returns:
ListFilesResponse: an object that contains a list of all files and their filenames and sizes

=== "Listing files in Python"
```python
from llmengine import File

response = File.list()
print(response.json())
```

=== "Response in JSON"
```json
{
"files": [
{
"id": "file-abc123",
"filename": "training_dataset.csv",
"size": 100
},
{
"id": "file-def456",
"filename": "validation_dataset.csv",
"size": 50
}
]
}
```
"""
response = cls._get("v1/files", timeout=30)
return ListFilesResponse.parse_obj(response)

@classmethod
def delete(cls, file_id: str) -> DeleteFileResponse:
"""
Deletes a file.

Args:
file_id (`str`):
ID of the file

Returns:
DeleteFileResponse: an object that contains whether the deletion was successful

=== "Deleting file in Python"
```python
from llmengine import File

response = File.delete(file_id="file-abc123")
print(response.json())
```

=== "Response in JSON"
```json
{
"deleted": true
}
```
"""
response = cls._delete(
f"v1/files/{file_id}",
timeout=DEFAULT_TIMEOUT,
)
return DeleteFileResponse.parse_obj(response)

@classmethod
def download(cls, file_id: str) -> GetFileContentResponse:
"""
Get contents of a file, as a string. (If the uploaded file is in binary, a string encoding will be returned.)

Args:
file_id (`str`):
ID of the file

Returns:
GetFileContentResponse: an object that contains the ID and content of the file

=== "Getting file content in Python"
```python
from llmengine import File

response = File.get_content(file_id="file-abc123")
print(response.json())
```

=== "Response in JSON"
```json
{
"id": "file-abc123",
"content": "Hello world!"
}
```
"""
response = cls._get(
f"v1/files/{file_id}/content",
timeout=DEFAULT_TIMEOUT,
)
return GetFileContentResponse.parse_obj(response)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually maybe it is fine to have this always be a string - we just need to document the expectations, e.g. if you uploaded text, then it'll ofc be that text; else if it's binary, we'll return it as a string subject to some encoding.

2 changes: 1 addition & 1 deletion clients/python/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "scale-llm-engine"
version = "0.0.0.beta7"
version = "0.0.0.beta8"
description = "Scale LLM Engine Python client"
license = "Apache-2.0"
authors = ["Phil Chen <phil.chen@scale.com>"]
Expand Down
2 changes: 1 addition & 1 deletion clients/python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@
setup(
name="scale-llm-engine",
python_requires=">=3.7",
version="0.0.0.beta7",
version="0.0.0.beta8",
packages=find_packages(),
)
10 changes: 10 additions & 0 deletions docs/api/data_types.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,13 @@
::: llmengine.ListLLMEndpointsResponse

::: llmengine.DeleteLLMEndpointResponse

::: llmengine.UploadFileResponse

::: llmengine.GetFileResponse

::: llmengine.GetFileContentResponse

::: llmengine.ListFilesResponse

::: llmengine.DeleteFileResponse
9 changes: 9 additions & 0 deletions docs/api/python_client.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,12 @@
- get
- list
- delete

::: llmengine.File
selection:
members:
- upload
- get
- get_content
- list
- delete
1 change: 0 additions & 1 deletion server/llm_engine_server/api/batch_jobs_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,6 @@ async def create_docker_image_batch_job(
auth: User = Depends(verify_authentication),
external_interfaces: ExternalInterfaces = Depends(get_external_interfaces),
) -> CreateDockerImageBatchJobV1Response:

add_trace_resource_name("batch_jobs_di_create")
logger.info(f"POST /docker-image-batch-jobs with {request} for {auth}")
try:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,6 @@ def __init__(
async def execute(
self, user: User, request: CreateDockerImageBatchJobV1Request
) -> CreateDockerImageBatchJobV1Response:

if request.docker_image_batch_job_bundle_id is not None:
batch_bundle = await self.docker_image_batch_job_bundle_repository.get_docker_image_batch_job_bundle(
request.docker_image_batch_job_bundle_id
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ def __init__(self, use_asyncio: bool):
async def make_single_request(self, request_url: str, payload_json: Dict[str, Any]):
errored = False
if self.use_asyncio:

async with aiohttp.ClientSession(json_serialize=_serialize_json) as aioclient:
aio_resp = await aioclient.post(
request_url,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,6 @@ async def test_list_docker_image_batch_job_bundles(
test_api_key: str,
test_api_key_team: str,
):

orm_docker_image_batch_job_bundle_1_v2.created_by = test_api_key_team
orm_docker_image_batch_job_bundle_1_v2.owner = test_api_key_team
docker_image_batch_job_bundle_1_v2.created_by = test_api_key_team
Expand Down