update for faq benchmark (#78)

* update for faq benchamrk Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
opea-project · Aug 26, 2024 · d754a84 · d754a84
1 parent 8c7eb1b
commit d754a84
Show file tree

Hide file tree

Showing 5 changed files with 31 additions and 18 deletions.
diff --git a/evals/benchmark/benchmark.py b/evals/benchmark/benchmark.py
@@ -21,7 +21,7 @@
     },
     "codegen": {"llm": "/v1/chat/completions", "llm_serving": "/v1/chat/completions", "e2e": "/v1/codegen"},
     "codetrans": {"llm": "/v1/chat/completions", "llm_serving": "/v1/chat/completions", "e2e": "/v1/codetrans"},
-    "faqgen": {"llm": "/v1/chat/completions", "llm_serving": "/v1/chat/completions", "e2e": "/v1/faqgen"},
+    "faqgen": {"llm": "/generate", "llm_serving": "/v1/faqgen", "e2e": "/v1/faqgen"},
     "audioqna": {
         "asr": "/v1/audio/transcriptions",
         "llm": "/v1/chat/completions",

diff --git a/evals/benchmark/benchmark.yaml b/evals/benchmark/benchmark.yaml
@@ -94,8 +94,8 @@ test_cases:
 
   faqgen:
     llm:
-      run_test: true
-      service_name: "llm-svc"  # Replace with your service name
+      run_test: false
+      service_name: "faq-tgi-svc"  # Replace with your service name
       parameters:
         model_name: "meta-llama/Meta-Llama-3-8B-Instruct"
         max_new_tokens: 128
@@ -104,12 +104,12 @@ test_cases:
         top_p: 0.95
         repetition_penalty: 1.03
         streaming: true
-    llmserve:
-      run_test: true
-      service_name: "llm-serving-svc"  # Replace with your service name
+    llm_serving:
+      run_test: false
+      service_name: "faq-micro-svc"  # Replace with your service name
     e2e:
       run_test: true
-      service_name: "faqgen-backend-server-svc"  # Replace with your service name
+      service_name: "faq-mega-server-svc"  # Replace with your service name
 
   audioqna:
     asr:

diff --git a/evals/benchmark/stresscli/dataset/faqgen.json b/evals/benchmark/stresscli/dataset/faqgen.json
diff --git a/evals/benchmark/stresscli/locust/faqgenbench.py b/evals/benchmark/stresscli/locust/faqgenbench.py
@@ -9,25 +9,23 @@
 import tokenresponse as token
 
 cwd = os.path.dirname(__file__)
-filename = f"{cwd}/../dataset/chatqna.json"
-qlist = []
+filename = f"{cwd}/../dataset/faqgen.json"
+qdict = {}
 try:
     with open(filename) as qfile:
-        qlist = json.load(qfile)
+        qdict = json.load(qfile)
 except:
     logging.error(f"Question File open failed: {filename}")
     exit()
 
 
 def getUrl():
-    return "/v1/chatqna"
+    return "/v1/faqgen"
 
 
 def getReqData():
-    qid = random.randint(1, 189)
-    logging.debug(f"Selected question: {qlist[qid]['qText']}")
-
-    return {"messages": qlist[qid]["qText"], "max_tokens": 128}
+    prompt = "50"
+    return {"messages": qdict[prompt], "max_tokens": 128}
 
 
 def respStatics(environment, resp):

diff --git a/evals/benchmark/stresscli/locust/faqgenfixed.py b/evals/benchmark/stresscli/locust/faqgenfixed.py
@@ -5,15 +5,20 @@
 
 
 def getUrl():
-    return "/v1/chatqna"
+    return "/v1/faqgen"
 
 
 def getReqData():
+    # return {
+    #     "inputs": "What is the revenue of Nike in last 10 years before 2023? Give me detail",
+    #     "parameters": {"max_new_tokens": 128, "do_sample": True},
+    # }
+    # return {"query": "What is the revenue of Nike in last 10 years before 2023? Give me detail", "max_tokens": 128}
     return {"messages": "What is the revenue of Nike in last 10 years before 2023? Give me detail", "max_tokens": 128}
 
 
-def respStatics(environment, resp):
-    return token.respStatics(environment, resp)
+def respStatics(environment, reqData, respData):
+    return token.respStatics(environment, reqData, respData)
 
 
 def staticsOutput(environment, reqlist):