microsoft · Hk669 · Apr 19, 2024 · Apr 19, 2024 · Apr 24, 2024 · Apr 29, 2024
diff --git a/autogen/agentchat/contrib/qdrant_retrieve_user_proxy_agent.py b/autogen/agentchat/contrib/qdrant_retrieve_user_proxy_agent.py
@@ -103,7 +103,7 @@ def __init__(
         self._hnsw_config = self._retrieve_config.get("hnsw_config", None)
         self._payload_indexing = self._retrieve_config.get("payload_indexing", False)
 
-    def retrieve_docs(self, problem: str, n_results: int = 20, search_string: str = ""):
+    def retrieve_docs(self, problem: str, n_results: Optional[int] = 20, search_string: Optional[str] = ""):
         """
         Args:
             problem (str): the problem to be solved.
@@ -150,21 +150,21 @@ def retrieve_docs(self, problem: str, n_results: int = 20, search_string: str =
 
 def create_qdrant_from_dir(
     dir_path: str,
-    max_tokens: int = 4000,
-    client: QdrantClient = None,
-    collection_name: str = "all-my-documents",
-    chunk_mode: str = "multi_lines",
-    must_break_at_empty_line: bool = True,
-    embedding_model: str = "BAAI/bge-small-en-v1.5",
-    custom_text_split_function: Callable = None,
-    custom_text_types: List[str] = TEXT_FORMATS,
-    recursive: bool = True,
-    extra_docs: bool = False,
-    parallel: int = 0,
-    on_disk: bool = False,
+    max_tokens: Optional[int] = 4000,
+    client: Optional[QdrantClient] = None,
+    collection_name: Optional[str] = "all-my-documents",
+    chunk_mode: Optional[str] = "multi_lines",
+    must_break_at_empty_line: Optional[bool] = True,
+    embedding_model: Optional[str] = "BAAI/bge-small-en-v1.5",
+    custom_text_split_function: Optional[Callable] = None,
+    custom_text_types: Optional[List[str]] = TEXT_FORMATS,
+    recursive: Optional[bool] = True,
+    extra_docs: Optional[bool] = False,
+    parallel: Optional[int] = 0,
+    on_disk: Optional[bool] = False,
     quantization_config: Optional[models.QuantizationConfig] = None,
     hnsw_config: Optional[models.HnswConfigDiff] = None,
-    payload_indexing: bool = False,
+    payload_indexing: Optional[bool] = False,
     qdrant_client_options: Optional[Dict] = {},
 ):
     """Create a Qdrant collection from all the files in a given directory, the directory can also be a single file or a
@@ -255,11 +255,11 @@ def create_qdrant_from_dir(
 
 def query_qdrant(
     query_texts: List[str],
-    n_results: int = 10,
-    client: QdrantClient = None,
-    collection_name: str = "all-my-documents",
-    search_string: str = "",
-    embedding_model: str = "BAAI/bge-small-en-v1.5",
+    n_results: Optional[int] = 10,
+    client: Optional[QdrantClient] = None,
+    collection_name: Optional[str] = "all-my-documents",
+    search_string: Optional[str] = "",
+    embedding_model: Optional[str] = "BAAI/bge-small-en-v1.5",
     qdrant_client_options: Optional[Dict] = {},
 ) -> List[List[QueryResponse]]:
     """Perform a similarity search with filters on a Qdrant collection
@@ -304,10 +304,18 @@ class QueryResponse(BaseModel, extra="forbid"):  # type: ignore
         ),
     )
 
-    data = {
-        "ids": [[result.id for result in sublist] for sublist in results],
-        "documents": [[result.document for result in sublist] for sublist in results],
-        "distances": [[result.score for result in sublist] for sublist in results],
-        "metadatas": [[result.metadata for result in sublist] for sublist in results],
-    }
+    data = [
+        [
+            QueryResponse(
+                id=result.id,
+                embedding=result.embedding,
+                metadata=result.metadata,
+                document=result.document,
+                score=result.score,
+            )
+            for result in sublist
+        ]
+        for sublist in results
+    ]
+
     return data
diff --git a/autogen/math_utils.py b/autogen/math_utils.py
@@ -1,4 +1,4 @@
-from typing import Optional
+from typing import Dict, Optional, Tuple, Union
 
 from autogen import DEFAULT_MODEL, oai
 
@@ -9,7 +9,7 @@
 }
 
 
-def solve_problem(problem: str, **config) -> str:
+def solve_problem(problem: str, **config) -> Tuple[Optional[str], int]:
     """(openai<1) Solve the math problem.
 
     Args:
@@ -25,7 +25,7 @@ def solve_problem(problem: str, **config) -> str:
     return results.get("voted_answer"), response["cost"]
 
 
-def remove_boxed(string: str) -> Optional[str]:
+def remove_boxed(string: str) -> Union[str, None]:
     """Source: https://github.com/hendrycks/math
     Extract the text within a \\boxed{...} environment.
     Example:
@@ -36,15 +36,15 @@ def remove_boxed(string: str) -> Optional[str]:
     """
     left = "\\boxed{"
     try:
-        if not all((string[: len(left)] == left, string[-1] == "}")):
+        if not ((string.startswith(left) and string[-1] == "}")):
             raise AssertionError
 
         return string[len(left) : -1]
     except Exception:
         return None
 
 
-def last_boxed_only_string(string: str) -> Optional[str]:
+def last_boxed_only_string(string: str) -> Union[str, None]:
     """Source: https://github.com/hendrycks/math
     Extract the last \\boxed{...} or \\fbox{...} element from a string.
     """
@@ -96,7 +96,7 @@ def _fix_fracs(string: str) -> str:
                 new_str += substr
             else:
                 try:
-                    if not len(substr) >= 2:
+                    if len(substr) < 2:
                         raise AssertionError
                 except Exception:
                     return string
@@ -132,7 +132,7 @@ def _fix_a_slash_b(string: str) -> str:
     try:
         a = int(a_str)
         b = int(b_str)
-        if not string == "{}/{}".format(a, b):
+        if string != "{}/{}".format(a, b):
             raise AssertionError
         new_string = "\\frac{" + str(a) + "}{" + str(b) + "}"
         return new_string
@@ -147,7 +147,7 @@ def _remove_right_units(string: str) -> str:
     """
     if "\\text{ " in string:
         splits = string.split("\\text{ ")
-        if not len(splits) == 2:
+        if len(splits) != 2:
             raise AssertionError
         return splits[0]
     else:
@@ -161,16 +161,17 @@ def _fix_sqrt(string: str) -> str:
     >>> _fix_sqrt("\\sqrt3")
     \\sqrt{3}
     """
-    if "\\sqrt" not in string:
+    SQRT_LITERAL = "\\sqrt"  # Define a constant for the repeated literal
+    if SQRT_LITERAL not in string:
         return string
-    splits = string.split("\\sqrt")
+    splits = string.split(SQRT_LITERAL)
     new_string = splits[0]
     for split in splits[1:]:
         if split[0] != "{":
             a = split[0]
-            new_substr = "\\sqrt{" + a + "}" + split[1:]
+            new_substr = SQRT_LITERAL + "{" + a + "}" + split[1:]
         else:
-            new_substr = "\\sqrt" + split
+            new_substr = SQRT_LITERAL + split
         new_string += new_substr
     return new_string
 
@@ -310,7 +311,7 @@ def voting_counts(responses):
     return answers
 
 
-def eval_math_responses(responses, solution=None, **args):
+def eval_math_responses(responses, solution=None, **args) -> Dict:
     """Select a response for a math problem using voting, and check if the response is correct if the solution is provided.
 
     Args:

diff --git a/autogen/retrieve_utils.py b/autogen/retrieve_utils.py
@@ -2,7 +2,7 @@
 import hashlib
 import os
 import re
-from typing import Callable, List, Tuple, Union
+from typing import Callable, List, Optional, Tuple, Union
 from urllib.parse import urlparse
 
 import chromadb
@@ -195,7 +195,9 @@ def split_files_to_chunks(
     return chunks, sources
 
 
-def get_files_from_dir(dir_path: Union[str, List[str]], types: list = TEXT_FORMATS, recursive: bool = True):
+def get_files_from_dir(
+    dir_path: Union[str, List[str]], types: Optional[list] = TEXT_FORMATS, recursive: Optional[bool] = True
+):
     """Return a list of all the files in a given directory, a url, a file path or a list of them."""
     if len(types) == 0:
         raise ValueError("types cannot be empty.")
@@ -245,7 +247,7 @@ def get_files_from_dir(dir_path: Union[str, List[str]], types: list = TEXT_FORMA
     return files
 
 
-def parse_html_to_markdown(html: str, url: str = None) -> str:
+def parse_html_to_markdown(html: str, url: Optional[str] = None) -> str:
     """Parse HTML to markdown."""
     soup = BeautifulSoup(html, "html.parser")
     title = soup.title.string
@@ -278,14 +280,16 @@ def parse_html_to_markdown(html: str, url: str = None) -> str:
 
 def _generate_file_name_from_url(url: str, max_length=255) -> str:
     url_bytes = url.encode("utf-8")
-    hash = hashlib.blake2b(url_bytes).hexdigest()
+    url_hash = hashlib.blake2b(url_bytes).hexdigest()
     parsed_url = urlparse(url)
     file_name = os.path.basename(url)
-    file_name = f"{parsed_url.netloc}_{file_name}_{hash[:min(8, max_length-len(parsed_url.netloc)-len(file_name)-1)]}"
+    file_name = (
+        f"{parsed_url.netloc}_{file_name}_{url_hash[:min(8, max_length-len(parsed_url.netloc)-len(file_name)-1)]}"
+    )
     return file_name
 
 
-def get_file_from_url(url: str, save_path: str = None) -> Tuple[str, str]:
+def get_file_from_url(url: str, save_path: str = None) -> Union[Tuple[str, str], None]:
     """Download a file from a URL."""
     if save_path is None:
         save_path = "tmp/chromadb"
@@ -333,19 +337,19 @@ def is_url(string: str):
 
 def create_vector_db_from_dir(
     dir_path: Union[str, List[str]],
-    max_tokens: int = 4000,
-    client: API = None,
-    db_path: str = "tmp/chromadb.db",
-    collection_name: str = "all-my-documents",
-    get_or_create: bool = False,
-    chunk_mode: str = "multi_lines",
-    must_break_at_empty_line: bool = True,
-    embedding_model: str = "all-MiniLM-L6-v2",
-    embedding_function: Callable = None,
-    custom_text_split_function: Callable = None,
-    custom_text_types: List[str] = TEXT_FORMATS,
-    recursive: bool = True,
-    extra_docs: bool = False,
+    max_tokens: Optional[int] = 4000,
+    client: Optional[API] = None,
+    db_path: Optional[str] = "tmp/chromadb.db",
+    collection_name: Optional[str] = "all-my-documents",
+    get_or_create: Optional[bool] = False,
+    chunk_mode: Optional[str] = "multi_lines",
+    must_break_at_empty_line: Optional[bool] = True,
+    embedding_model: Optional[str] = "all-MiniLM-L6-v2",
+    embedding_function: Optional[Callable] = None,
+    custom_text_split_function: Optional[Callable] = None,
+    custom_text_types: Optional[List[str]] = TEXT_FORMATS,
+    recursive: Optional[bool] = True,
+    extra_docs: Optional[bool] = False,
 ) -> API:
     """Create a vector db from all the files in a given directory, the directory can also be a single file or a url to
         a single file. We support chromadb compatible APIs to create the vector db, this function is not required if
@@ -426,13 +430,13 @@ def create_vector_db_from_dir(
 
 def query_vector_db(
     query_texts: List[str],
-    n_results: int = 10,
-    client: API = None,
-    db_path: str = "tmp/chromadb.db",
-    collection_name: str = "all-my-documents",
-    search_string: str = "",
-    embedding_model: str = "all-MiniLM-L6-v2",
-    embedding_function: Callable = None,
+    n_results: Optional[int] = 10,
+    client: Optional[API] = None,
+    db_path: Optional[str] = "tmp/chromadb.db",
+    collection_name: Optional[str] = "all-my-documents",
+    search_string: Optional[str] = "",
+    embedding_model: Optional[str] = "all-MiniLM-L6-v2",
+    embedding_function: Optional[Callable] = None,
 ) -> QueryResult:
     """Query a vector db. We support chromadb compatible APIs, it's not required if you prepared your own vector db
         and query function.

diff --git a/autogen/runtime_logging.py b/autogen/runtime_logging.py
@@ -31,8 +31,7 @@ def start(logger_type: str = "sqlite", config: Optional[Dict[str, Any]] = None)
         is_logging = True
     except Exception as e:
         logger.error(f"[runtime logging] Failed to start logging: {e}")
-    finally:
-        return session_id
+    return session_id
 
 
 def log_chat_completion(