Skip to content

Commit

Permalink
main.py: Fix /llm end-point
Browse files Browse the repository at this point in the history
  • Loading branch information
mdbecker committed Jun 20, 2023
1 parent 8d6077f commit 7507bd6
Show file tree
Hide file tree
Showing 6 changed files with 182 additions and 4 deletions.
3 changes: 3 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ RUN pip install --no-cache-dir --upgrade pip && \
# Copy the application code to the working directory
COPY gull_api/ ./gull_api

# Copy the mock LLM cli app to the working directory
COPY echo_args.sh ./

# Expose the port the app runs on
EXPOSE 8000

Expand Down
23 changes: 23 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,18 @@ GULL-API is a web application backend that can be used to run Large Language Mod

The API will be available at `http://localhost:8000`.

### Docker Test Mode

To build and run the Docker container in test mode, use the following commands:

```bash
docker build -t gull-api .
docker run -v ./example_cli.json:/app/cli.json -p 8000:8000 gull-api
```

In test mode, an included script echo_args.sh is used instead of a real LLM. This script echoes the arguments it receives, which can be helpful for local testing.


### Local Installation

1. Clone the repository:
Expand Down Expand Up @@ -82,6 +94,17 @@ Content-Type: application/json
}
```

### Example Requests

```bash
curl -X POST "http://localhost:8000/llm" -H "accept: application/json" -H "Content-Type: application/json" -d "{\"Instruct mode\":false, \"Maximum length\":256, \"Prompt\":\"Hello, world\", \"Stop sequences\":\"Goodbye, world\", \"Temperature\":0.7, \"Top P\":0.95}"
curl -X GET "http://localhost:8000/api" -H "accept: application/json" | python -mjson.tool
```

### Example CLI JSON

An example CLI JSON file is provided in the repository as `example_cli.json`. This file provides an example of the expected structure for defining the command-line arguments for the LLM.

## License

See LICENSE
17 changes: 17 additions & 0 deletions echo_args.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/bash

# First, print the name of the script itself
echo -n "$0 "

# Now print all arguments as they were received
for arg in "$@"
do
if [[ $arg =~ [[:space:]] ]]; then
echo -n "\"$arg\" "
else
echo -n "$arg "
fi
done

# Print a newline at the end for neat formatting
echo ""
73 changes: 73 additions & 0 deletions example_cli.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
{
"LLaMA-7B": [
{
"name": "Executable",
"hidden": true,
"default": "./echo_args.sh"
},
{
"default": "models/7B/ggml-model.bin",
"description": "Specify the path to the LLaMA model file (e.g., models/7B/ggml-model.bin).",
"hidden": true,
"name": "Model",
"nargs": 1,
"required": true,
"flag": "-m",
"type": "str"
},
{
"default": false,
"description": "Run the program in instruction mode, which is particularly useful when working with Alpaca models.",
"name": "Instruct mode",
"flag": "-ins",
"type": "bool"
},
{
"default": 128,
"description": "Set the number of tokens to predict when generating text. Adjusting this value can influence the length of the generated text.",
"max": 2048,
"min": 1,
"name": "Maximum length",
"nargs": 1,
"flag": "-n",
"step": 10,
"type": "int"
},
{
"description": "Provide a prompt",
"flag": "--prompt",
"name": "Prompt",
"nargs": 1,
"required": true,
"type": "str"
},
{
"description": "Specify one or multiple reverse prompts to pause text generation and switch to interactive mode.",
"name": "Stop sequences",
"nargs": 1,
"required": false,
"flag": "-r",
"type": "str"
},
{
"default": 0.8,
"description": "Adjust the randomness of the generated text.",
"flag": "--temp",
"max": 1,
"min": 0,
"name": "Temperature",
"nargs": 1,
"type": "float"
},
{
"default": 0.9,
"description": "Limit the next token selection to a subset of tokens with a cumulative probability above a threshold P.",
"flag": "--top_p",
"max": 1,
"min": 0,
"name": "Top P",
"nargs": 1,
"type": "float"
}
]
}
5 changes: 3 additions & 2 deletions gull_api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def process_request(request: BaseModel, cli_json: Dict[str, Any]) -> Dict[str, A

if result.returncode != 0:
raise HTTPException(status_code=400, detail=result.stderr.decode("utf-8"))
return json.loads(result.stdout)
return {'response': result.stdout.decode("utf-8")} # decode stdout to a string

def convert_cli_json_to_api_format(cli_json: Dict[str, Any]) -> Dict[str, Any]:
key = get_single_key(cli_json)
Expand Down Expand Up @@ -95,4 +95,5 @@ def get_api(cli_json=Depends(load_cli_json)):
def post_llm(request: Dict[str, Any], cli_json=Depends(load_cli_json)):
LLMRequest = create_llm_request_model(cli_json)
validated_request = LLMRequest(**request)
return executor.submit(process_request, validated_request, cli_json)
future = executor.submit(process_request, validated_request, cli_json)
return future.result() # block until task is complete and return the result
65 changes: 63 additions & 2 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ def __init__(self, stdout='', stderr='', returncode=0):
self.stderr = stderr.encode('utf-8')
self.returncode = returncode

mock_run.return_value = MockCompletedProcess(stdout='{"response": "OK"}')
mock_run.return_value = MockCompletedProcess(stdout='OK')

sample_request = BaseModel()
result = process_request(sample_request, cli_json)
Expand Down Expand Up @@ -320,4 +320,65 @@ def test_post_llm(mock_executor):
mock_executor.submit.assert_called_once_with(main.process_request, validated_request, mock_cli_json)

# Assert the result is the Future returned by executor.submit
assert result == mock_executor.submit.return_value
assert result == mock_executor.submit.return_value.result.return_value

def test_get_api_with_executable():
expected_api_json = {
"LLaMA-7B": [
{
"name": "Instruct mode",
"type": "bool",
"default": False,
"description": "Run the program in instruction mode, which is particularly useful when working with Alpaca models."
},
{
"name": "Maximum length",
"type": "int",
"default": 128,
"description": "Set the number of tokens to predict when generating text. Adjusting this value can influence the length of the generated text.",
"min": 1,
"max": 2048,
"step": 10
},
{
"name": "Prompt",
"type": "str",
"required": True,
"description": "Provide a prompt"
},
{
"name": "Stop sequences",
"type": "str",
"description": "Specify one or multiple reverse prompts to pause text generation and switch to interactive mode."
},
{
"name": "Temperature",
"type": "float",
"default": 0.8,
"description": "Adjust the randomness of the generated text.",
"min": 0,
"max": 1,
"step": 0.01
},
{
"name": "Top P",
"type": "float",
"default": 0.9,
"description": "Limit the next token selection to a subset of tokens with a cumulative probability above a threshold P.",
"min": 0,
"max": 1,
"step": 0.01
}
]
}

cli_json_with_executable = cli_json.copy()
cli_json_with_executable['LLaMA-7B'] = [
{
"default": "./echo_args.sh",
"hidden": True,
"name": "Executable",
},
] + cli_json_with_executable['LLaMA-7B']
api_json = get_api(cli_json_with_executable)
assert api_json == expected_api_json

0 comments on commit 7507bd6

Please sign in to comment.