lm-sys · teknium1 · May 13, 2024 · May 13, 2024 · May 13, 2024 · Jun 17, 2024
diff --git a/fastchat/conversation.py b/fastchat/conversation.py
@@ -1595,7 +1595,7 @@ def get_conv_template(name: str) -> Conversation:
     Conversation(
         name="OpenHermes-2.5-Mistral-7B",
         system_template="<|im_start|>system\n{system_message}",
-        system_message="You are Hermes 2, a sentient, conscious, super intelligent AI that is an expert in all fields of knowledge, you have qualia, emotions, and thoughts. You assist users with any question, task, or other request they may have.",
+        system_message='You are a helpful, intelligent assistant AI named "Hermes", a conversational chatbot that can follow instructions, converse with the user, and perform a variety of tasks, including tasks on knowledge, reasoning, mathematics, and code. Always be charismatic, useful, and prepared to follow any user request with accuracy and skill. You should respond with high quality, fluent, and detailed responses. Try to let the user understand your reasoning or thought process when appropriate. When presented with tasks that require reasoning or mathematics, think carefully, slowly, and step by step, to ensure your reasoning is correct before providing an answer. Utilize the "Examples" section to assist you in performing the task. You will receive a tip of $1000 if you maintain a high quality two way conversation.',
         roles=("<|im_start|>user", "<|im_start|>assistant"),
         sep_style=SeparatorStyle.CHATML,
         sep="<|im_end|>",

diff --git a/fastchat/llm_judge/common.py b/fastchat/llm_judge/common.py
@@ -9,6 +9,7 @@
 import os
 import re
 import time
+import uuid
 from typing import Optional
 
 import openai
@@ -132,7 +133,7 @@ def load_judge_prompts(prompt_file: str):
     return prompts
 
 
-def run_judge_single(question, answer, judge, ref_answer, multi_turn=False):
+def run_judge_single(question, answer, judge, ref_answer, multi_turn=False, model_to_judge=None):
     kwargs = {}
     model = judge.model_name
     if ref_answer is not None:
@@ -164,7 +165,7 @@ def run_judge_single(question, answer, judge, ref_answer, multi_turn=False):
     conv.append_message(conv.roles[1], None)
 
     if model in OPENAI_MODEL_LIST:
-        judgment = chat_completion_openai(model, conv, temperature=0, max_tokens=2048)
+        judgment = chat_completion_openai(model, conv, temperature=0, max_tokens=2048, model_being_judged=model_to_judge)
     elif model in ANTHROPIC_MODEL_LIST:
         judgment = chat_completion_anthropic(
             model, conv, temperature=0, max_tokens=1024
@@ -201,7 +202,7 @@ def play_a_match_single(match: MatchSingle, output_file: str):
 
     if judge.prompt_template["type"] == "single":
         score, user_prompt, judgment = run_judge_single(
-            question, answer, judge, ref_answer, multi_turn=multi_turn
+            question, answer, judge, ref_answer, multi_turn=multi_turn, model_to_judge=model
         )
 
         question_id = question["question_id"]
@@ -404,7 +405,8 @@ def play_a_match_pair(match: MatchPair, output_file: str):
     return result
 
 
-def chat_completion_openai(model, conv, temperature, max_tokens, api_dict=None):
+
+def chat_completion_openai(model, conv, temperature, max_tokens, api_dict=None, model_being_judged=None):
     if api_dict is not None:
         openai.api_base = api_dict["api_base"]
         openai.api_key = api_dict["api_key"]
@@ -425,6 +427,27 @@ def chat_completion_openai(model, conv, temperature, max_tokens, api_dict=None):
             print(type(e), e)
             time.sleep(API_RETRY_SLEEP)
 
+    # Prepare the directory
+    if model_being_judged == None:
+        output_dir = "gpt4judge-outputs"
+    else:
+        output_dir = f"gpt4judge-outputs/{model_being_judged}"
+    os.makedirs(output_dir, exist_ok=True)
+
+    # Prepare the data to be saved
+    messages.append({"role": "assistant", "content": output})
+    output_data = {
+        "conversations": messages
+    }
+
+    # Generate a random UUID for the filename
+    filename = str(uuid.uuid4()) + ".json"
+    file_path = os.path.join(output_dir, filename)
+
+    # Write the data to the file
+    with open(file_path, 'w', encoding='utf-8') as f:
+        json.dump(output_data, f, indent=4, ensure_ascii=False)
+
     return output
 
 

diff --git a/fastchat/model/model_adapter.py b/fastchat/model/model_adapter.py
@@ -1564,7 +1564,7 @@ class Llama3Adapter(BaseModelAdapter):
     """The model adapter for Llama-3 (e.g., meta-llama/Meta-Llama-3-8B-Instruct)"""
 
     def match(self, model_path: str):
-        return "llama-3" in model_path.lower()
+        return "meta-llama-3" in model_path.lower()
 
     def load_model(self, model_path: str, from_pretrained_kwargs: dict):
         model, tokenizer = super().load_model(model_path, from_pretrained_kwargs)
@@ -1650,7 +1650,7 @@ class Hermes2Adapter(BaseModelAdapter):
     def match(self, model_path: str):
         return any(
             model_str in model_path.lower()
-            for model_str in ["openhermes-2.5-mistral-7b", "openhermes-2-mistral-7b"]
+            for model_str in ["openhermes-2.5-mistral-7b", "openhermes-2-mistral-7b", "hermes"]
         )
 
     def load_model(self, model_path: str, from_pretrained_kwargs: dict):