main.py: Fix /llm end-point

mdbecker · Jun 20, 2023 · 7507bd6 · 7507bd6
1 parent 8d6077f
commit 7507bd6
Show file tree

Hide file tree

Showing 6 changed files with 182 additions and 4 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -16,6 +16,9 @@ RUN pip install --no-cache-dir --upgrade pip && \
 # Copy the application code to the working directory
 COPY gull_api/ ./gull_api
 
+# Copy the mock LLM cli app to the working directory
+COPY echo_args.sh ./
+
 # Expose the port the app runs on
 EXPOSE 8000
 

diff --git a/README.md b/README.md
@@ -29,6 +29,18 @@ GULL-API is a web application backend that can be used to run Large Language Mod
 
 The API will be available at `http://localhost:8000`.
 
+### Docker Test Mode
+
+To build and run the Docker container in test mode, use the following commands:
+
+```bash
+docker build -t gull-api .
+docker run -v ./example_cli.json:/app/cli.json -p 8000:8000 gull-api
+```
+
+In test mode, an included script echo_args.sh is used instead of a real LLM. This script echoes the arguments it receives, which can be helpful for local testing.
+
+
 ### Local Installation
 
 1. Clone the repository:
@@ -82,6 +94,17 @@ Content-Type: application/json
 }
 ```
 
+### Example Requests
+
+```bash
+curl -X POST "http://localhost:8000/llm" -H  "accept: application/json" -H  "Content-Type: application/json" -d "{\"Instruct mode\":false, \"Maximum length\":256, \"Prompt\":\"Hello, world\", \"Stop sequences\":\"Goodbye, world\", \"Temperature\":0.7, \"Top P\":0.95}"
+curl -X GET "http://localhost:8000/api" -H "accept: application/json" | python -mjson.tool
+```
+
+### Example CLI JSON
+
+An example CLI JSON file is provided in the repository as `example_cli.json`. This file provides an example of the expected structure for defining the command-line arguments for the LLM.
+
 ## License
 
 See LICENSE
diff --git a/echo_args.sh b/echo_args.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+# First, print the name of the script itself
+echo -n "$0 "
+
+# Now print all arguments as they were received
+for arg in "$@"
+do
+    if [[ $arg =~ [[:space:]] ]]; then
+        echo -n "\"$arg\" "
+    else
+        echo -n "$arg "
+    fi
+done
+
+# Print a newline at the end for neat formatting
+echo ""
diff --git a/example_cli.json b/example_cli.json
@@ -0,0 +1,73 @@
+{
+    "LLaMA-7B": [
+        {
+            "name": "Executable",
+            "hidden": true,
+            "default": "./echo_args.sh"
+        },
+        {
+            "default": "models/7B/ggml-model.bin",
+            "description": "Specify the path to the LLaMA model file (e.g., models/7B/ggml-model.bin).",
+            "hidden": true,
+            "name": "Model",
+            "nargs": 1,
+            "required": true,
+            "flag": "-m",
+            "type": "str"
+        },
+        {
+            "default": false,
+            "description": "Run the program in instruction mode, which is particularly useful when working with Alpaca models.",
+            "name": "Instruct mode",
+            "flag": "-ins",
+            "type": "bool"
+        },
+        {
+            "default": 128,
+            "description": "Set the number of tokens to predict when generating text. Adjusting this value can influence the length of the generated text.",
+            "max": 2048,
+            "min": 1,
+            "name": "Maximum length",
+            "nargs": 1,
+            "flag": "-n",
+            "step": 10,
+            "type": "int"
+        },
+        {
+            "description": "Provide a prompt",
+            "flag": "--prompt",
+            "name": "Prompt",
+            "nargs": 1,
+            "required": true,
+            "type": "str"
+        },
+        {
+            "description": "Specify one or multiple reverse prompts to pause text generation and switch to interactive mode.",
+            "name": "Stop sequences",
+            "nargs": 1,
+            "required": false,
+            "flag": "-r",
+            "type": "str"
+        },
+        {
+            "default": 0.8,
+            "description": "Adjust the randomness of the generated text.",
+            "flag": "--temp",
+            "max": 1,
+            "min": 0,
+            "name": "Temperature",
+            "nargs": 1,
+            "type": "float"
+        },
+        {
+            "default": 0.9,
+            "description": "Limit the next token selection to a subset of tokens with a cumulative probability above a threshold P.",
+            "flag": "--top_p",
+            "max": 1,
+            "min": 0,
+            "name": "Top P",
+            "nargs": 1,
+            "type": "float"
+        }
+    ]
+}
diff --git a/gull_api/main.py b/gull_api/main.py
@@ -62,7 +62,7 @@ def process_request(request: BaseModel, cli_json: Dict[str, Any]) -> Dict[str, A
 
     if result.returncode != 0:
         raise HTTPException(status_code=400, detail=result.stderr.decode("utf-8"))
-    return json.loads(result.stdout)
+    return {'response': result.stdout.decode("utf-8")}  # decode stdout to a string
 
 def convert_cli_json_to_api_format(cli_json: Dict[str, Any]) -> Dict[str, Any]:
     key = get_single_key(cli_json)
@@ -95,4 +95,5 @@ def get_api(cli_json=Depends(load_cli_json)):
 def post_llm(request: Dict[str, Any], cli_json=Depends(load_cli_json)):
     LLMRequest = create_llm_request_model(cli_json)
     validated_request = LLMRequest(**request)
-    return executor.submit(process_request, validated_request, cli_json)
+    future = executor.submit(process_request, validated_request, cli_json)
+    return future.result()  # block until task is complete and return the result
diff --git a/tests/test_api.py b/tests/test_api.py
@@ -203,7 +203,7 @@ def __init__(self, stdout='', stderr='', returncode=0):
             self.stderr = stderr.encode('utf-8')
             self.returncode = returncode
 
-    mock_run.return_value = MockCompletedProcess(stdout='{"response": "OK"}')
+    mock_run.return_value = MockCompletedProcess(stdout='OK')
 
     sample_request = BaseModel()
     result = process_request(sample_request, cli_json)
@@ -320,4 +320,65 @@ def test_post_llm(mock_executor):
     mock_executor.submit.assert_called_once_with(main.process_request, validated_request, mock_cli_json)
 
     # Assert the result is the Future returned by executor.submit
-    assert result == mock_executor.submit.return_value
+    assert result == mock_executor.submit.return_value.result.return_value
+
+def test_get_api_with_executable():
+    expected_api_json = {
+        "LLaMA-7B": [
+            {
+                "name": "Instruct mode",
+                "type": "bool",
+                "default": False,
+                "description": "Run the program in instruction mode, which is particularly useful when working with Alpaca models."
+            },
+            {
+                "name": "Maximum length",
+                "type": "int",
+                "default": 128,
+                "description": "Set the number of tokens to predict when generating text. Adjusting this value can influence the length of the generated text.",
+                "min": 1,
+                "max": 2048,
+                "step": 10
+            },
+            {
+                "name": "Prompt",
+                "type": "str",
+                "required": True,
+                "description": "Provide a prompt"
+            },
+            {
+                "name": "Stop sequences",
+                "type": "str",
+                "description": "Specify one or multiple reverse prompts to pause text generation and switch to interactive mode."
+            },
+            {
+                "name": "Temperature",
+                "type": "float",
+                "default": 0.8,
+                "description": "Adjust the randomness of the generated text.",
+                "min": 0,
+                "max": 1,
+                "step": 0.01
+            },
+            {
+                "name": "Top P",
+                "type": "float",
+                "default": 0.9,
+                "description": "Limit the next token selection to a subset of tokens with a cumulative probability above a threshold P.",
+                "min": 0,
+                "max": 1,
+                "step": 0.01
+            }
+        ]
+    }
+
+    cli_json_with_executable = cli_json.copy()
+    cli_json_with_executable['LLaMA-7B'] = [
+            {
+                "default": "./echo_args.sh",
+                "hidden": True,
+                "name": "Executable",
+            },
+        ] + cli_json_with_executable['LLaMA-7B']
+    api_json = get_api(cli_json_with_executable)
+    assert api_json == expected_api_json