From ea2fc0f85949998fb542fdc146ece63bb9a7a677 Mon Sep 17 00:00:00 2001
From: Andrew Brookins <andrew.b@prefect.io>
Date: Tue, 9 Dec 2025 09:08:11 -0800
Subject: [PATCH 1/5] Add GHA workflow to publish server to PyPI

---
 .github/workflows/agent-memory-server.yml | 119 ++++++++++++++++
 agent_memory_server/__init__.py           |   2 +-
 scripts/tag_and_push_server.py            | 165 ++++++++++++++++++++++
 3 files changed, 285 insertions(+), 1 deletion(-)
 create mode 100644 .github/workflows/agent-memory-server.yml
 create mode 100644 scripts/tag_and_push_server.py

diff --git a/.github/workflows/agent-memory-server.yml b/.github/workflows/agent-memory-server.yml
new file mode 100644
index 0000000..5547344
--- /dev/null
+++ b/.github/workflows/agent-memory-server.yml
@@ -0,0 +1,119 @@
+name: Agent Memory Server CI
+
+on:
+  push:
+    branches: [main]
+    tags:
+      - 'server/v*.*.*'
+  pull_request:
+    branches: [main]
+
+jobs:
+  test:
+    name: Test and build (Python 3.12)
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.12'
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v3
+
+      - name: Install dependencies
+        run: uv sync --all-extras
+
+      - name: Install agent-memory-client (editable)
+        run: uv pip install -e ./agent-memory-client
+
+      - name: Lint with Ruff
+        run: uv run ruff check
+
+      - name: Check formatting with Ruff formatter
+        run: uv run ruff format --check
+
+      - name: Run tests
+        run: uv run pytest --run-api-tests
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+
+      - name: Build package
+        run: |
+          python -m pip install --upgrade pip
+          pip install build
+          python -m build
+
+  publish-testpypi:
+    name: Publish to TestPyPI
+    needs: test
+    if: startsWith(github.ref, 'refs/tags/server/') && contains(github.ref, '-test')
+    runs-on: ubuntu-latest
+    environment: testpypi
+    permissions:
+      id-token: write
+      contents: read
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.12'
+
+      - name: Install build tools
+        run: |
+          python -m pip install --upgrade pip
+          pip install build
+
+      - name: Build package
+        run: python -m build
+
+      - name: Publish package to TestPyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          repository-url: https://test.pypi.org/legacy/
+          packages-dir: dist/
+
+  publish-pypi:
+    name: Publish to PyPI
+    needs: test
+    if: startsWith(github.ref, 'refs/tags/server/') && !contains(github.ref, '-test')
+    runs-on: ubuntu-latest
+    environment: pypi
+    permissions:
+      id-token: write
+      contents: read
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.12'
+
+      - name: Install build tools
+        run: |
+          python -m pip install --upgrade pip
+          pip install build
+
+      - name: Build package
+        run: python -m build
+
+      - name: Publish package to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          packages-dir: dist/
+
+# Tag Format Guide:
+# - For TestPyPI (testing): server/v1.0.0-test
+# - For PyPI (production): server/v1.0.0
+#
+# Use the script: python scripts/tag_and_push_server.py --test (for TestPyPI)
+#                 python scripts/tag_and_push_server.py (for PyPI)
+#
+# This workflow uses PyPI Trusted Publishing (OIDC). Ensure the project is configured
+# on PyPI to trust this GitHub repository before releasing.
+
diff --git a/agent_memory_server/__init__.py b/agent_memory_server/__init__.py
index 7bb66b9..2d39331 100644
--- a/agent_memory_server/__init__.py
+++ b/agent_memory_server/__init__.py
@@ -1,3 +1,3 @@
 """Redis Agent Memory Server - A memory system for conversational AI."""
 
-__version__ = "0.12.3"
+__version__ = "0.12.4"
diff --git a/scripts/tag_and_push_server.py b/scripts/tag_and_push_server.py
new file mode 100644
index 0000000..86e0b69
--- /dev/null
+++ b/scripts/tag_and_push_server.py
@@ -0,0 +1,165 @@
+#!/usr/bin/env python3
+"""
+Script to tag and push agent-memory-server releases.
+
+This script:
+1. Reads the current version from agent_memory_server/__init__.py
+2. Creates a git tag in the format: server/v{version}
+3. Pushes the tag to origin
+
+Usage:
+    python scripts/tag_and_push_server.py [--dry-run] [--force] [--test]
+"""
+
+import argparse
+import re
+import subprocess
+import sys
+from pathlib import Path
+
+
+def get_server_version() -> str:
+    """Read the version from agent_memory_server/__init__.py"""
+    init_file = Path("agent_memory_server/__init__.py")
+
+    if not init_file.exists():
+        raise FileNotFoundError(f"Could not find {init_file}")
+
+    content = init_file.read_text()
+
+    # Look for __version__ = "x.y.z"
+    version_match = re.search(r'__version__\s*=\s*["\']([^"\']+)["\']', content)
+
+    if not version_match:
+        raise ValueError(f"Could not find __version__ in {init_file}")
+
+    return version_match.group(1)
+
+
+def run_command(cmd: list[str], dry_run: bool = False) -> subprocess.CompletedProcess:
+    """Run a command, optionally in dry-run mode."""
+    print(f"Running: {' '.join(cmd)}")
+
+    if dry_run:
+        print("  (dry-run mode - command not executed)")
+        return subprocess.CompletedProcess(cmd, 0, "", "")
+
+    try:
+        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
+        if result.stdout:
+            print(f"  Output: {result.stdout.strip()}")
+        return result
+    except subprocess.CalledProcessError as e:
+        print(f"  Error: {e.stderr.strip()}")
+        raise
+
+
+def check_git_status():
+    """Check if git working directory is clean."""
+    try:
+        result = subprocess.run(
+            ["git", "status", "--porcelain"], capture_output=True, text=True, check=True
+        )
+        if result.stdout.strip():
+            print("Warning: Git working directory is not clean:")
+            print(result.stdout)
+            response = input("Continue anyway? (y/N): ")
+            if response.lower() != "y":
+                sys.exit(1)
+    except subprocess.CalledProcessError:
+        print("Error: Could not check git status")
+        sys.exit(1)
+
+
+def tag_exists(tag_name: str) -> bool:
+    """Check if a tag already exists."""
+    try:
+        subprocess.run(
+            ["git", "rev-parse", f"refs/tags/{tag_name}"],
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL,
+            check=True,
+        )
+        return True
+    except subprocess.CalledProcessError:
+        return False
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Tag and push agent-memory-server release")
+    parser.add_argument(
+        "--dry-run", action="store_true", help="Show what would be done without actually doing it"
+    )
+    parser.add_argument(
+        "--force", action="store_true", help="Force tag creation even if tag already exists"
+    )
+    parser.add_argument(
+        "--test", action="store_true", help="Add '-test' suffix to tag for TestPyPI deployment"
+    )
+
+    args = parser.parse_args()
+
+    # Change to project root directory
+    script_dir = Path(__file__).parent
+    project_root = script_dir.parent
+
+    try:
+        original_cwd = Path.cwd()
+        if project_root.resolve() != original_cwd.resolve():
+            print(f"Changing to project root: {project_root}")
+            import os
+
+            os.chdir(project_root)
+    except Exception as e:
+        print(f"Warning: Could not change to project root: {e}")
+
+    try:
+        # Get the current version
+        version = get_server_version()
+        tag_suffix = "-test" if args.test else ""
+        tag_name = f"server/v{version}{tag_suffix}"
+
+        print(f"Current server version: {version}")
+        print(f"Tag to create: {tag_name}")
+        print(f"Deployment target: {'TestPyPI' if args.test else 'PyPI (Production)'}")
+
+        if not args.dry_run:
+            # Check git status
+            check_git_status()
+
+            # Check if tag already exists
+            if tag_exists(tag_name):
+                if args.force:
+                    print(f"Tag {tag_name} already exists, but --force specified")
+                    run_command(["git", "tag", "-d", tag_name], args.dry_run)
+                else:
+                    print(f"Error: Tag {tag_name} already exists. Use --force to overwrite.")
+                    sys.exit(1)
+
+        # Create the tag
+        run_command(["git", "tag", tag_name], args.dry_run)
+
+        # Push the tag
+        push_cmd = ["git", "push", "origin", tag_name]
+        if args.force:
+            push_cmd.insert(2, "--force")
+
+        run_command(push_cmd, args.dry_run)
+
+        print(f"\n✅ Successfully tagged and pushed {tag_name}")
+
+        if not args.dry_run:
+            print("\nThis should trigger the GitHub Actions workflow for:")
+            if args.test:
+                print("  - TestPyPI publication (testing)")
+            else:
+                print("  - PyPI publication (production)")
+
+    except Exception as e:
+        print(f"Error: {e}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
+

From 7ff587c65e40e449d5ae34343e4208be6c6b8346 Mon Sep 17 00:00:00 2001
From: Andrew Brookins <andrew.b@prefect.io>
Date: Tue, 9 Dec 2025 09:10:21 -0800
Subject: [PATCH 2/5] lint

---
 .github/workflows/agent-memory-server.yml |  1 -
 scripts/tag_and_push_server.py            | 21 +++++++++++++++------
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/agent-memory-server.yml b/.github/workflows/agent-memory-server.yml
index 5547344..926887d 100644
--- a/.github/workflows/agent-memory-server.yml
+++ b/.github/workflows/agent-memory-server.yml
@@ -116,4 +116,3 @@ jobs:
 #
 # This workflow uses PyPI Trusted Publishing (OIDC). Ensure the project is configured
 # on PyPI to trust this GitHub repository before releasing.
-
diff --git a/scripts/tag_and_push_server.py b/scripts/tag_and_push_server.py
index 86e0b69..4975c0e 100644
--- a/scripts/tag_and_push_server.py
+++ b/scripts/tag_and_push_server.py
@@ -86,15 +86,23 @@ def tag_exists(tag_name: str) -> bool:
 
 
 def main():
-    parser = argparse.ArgumentParser(description="Tag and push agent-memory-server release")
+    parser = argparse.ArgumentParser(
+        description="Tag and push agent-memory-server release"
+    )
     parser.add_argument(
-        "--dry-run", action="store_true", help="Show what would be done without actually doing it"
+        "--dry-run",
+        action="store_true",
+        help="Show what would be done without actually doing it",
     )
     parser.add_argument(
-        "--force", action="store_true", help="Force tag creation even if tag already exists"
+        "--force",
+        action="store_true",
+        help="Force tag creation even if tag already exists",
     )
     parser.add_argument(
-        "--test", action="store_true", help="Add '-test' suffix to tag for TestPyPI deployment"
+        "--test",
+        action="store_true",
+        help="Add '-test' suffix to tag for TestPyPI deployment",
     )
 
     args = parser.parse_args()
@@ -133,7 +141,9 @@ def main():
                     print(f"Tag {tag_name} already exists, but --force specified")
                     run_command(["git", "tag", "-d", tag_name], args.dry_run)
                 else:
-                    print(f"Error: Tag {tag_name} already exists. Use --force to overwrite.")
+                    print(
+                        f"Error: Tag {tag_name} already exists. Use --force to overwrite."
+                    )
                     sys.exit(1)
 
         # Create the tag
@@ -162,4 +172,3 @@ def main():
 
 if __name__ == "__main__":
     main()
-

From 5f9474849e53c6b33b1154c7685b4392602565a9 Mon Sep 17 00:00:00 2001
From: Andrew Brookins <andrew.b@prefect.io>
Date: Tue, 9 Dec 2025 09:12:06 -0800
Subject: [PATCH 3/5] lint

---
 .../tests/test_tool_schemas.py                |  24 ++--
 tests/test_api.py                             |  66 ++++-----
 tests/test_client_tool_calls.py               |  12 +-
 tests/test_context_percentage_calculation.py  | 126 +++++++++---------
 .../test_contextual_grounding_integration.py  |  12 +-
 tests/test_full_integration.py                |  18 +--
 tests/test_mcp.py                             |  12 +-
 tests/test_thread_aware_grounding.py          |  12 +-
 tests/test_tool_contextual_grounding.py       |  30 ++---
 9 files changed, 156 insertions(+), 156 deletions(-)

diff --git a/agent-memory-client/tests/test_tool_schemas.py b/agent-memory-client/tests/test_tool_schemas.py
index 7182166..1cada2e 100644
--- a/agent-memory-client/tests/test_tool_schemas.py
+++ b/agent-memory-client/tests/test_tool_schemas.py
@@ -198,9 +198,9 @@ def test_creation_and_editing_tools_exclude_message_type(self):
                 memory_type_prop = params["properties"]["memory_type"]
                 if "enum" in memory_type_prop:
                     if function_name in restricted_tools:
-                        assert (
-                            "message" not in memory_type_prop["enum"]
-                        ), f"Creation/editing tool '{function_name}' should not expose 'message' memory type"
+                        assert "message" not in memory_type_prop["enum"], (
+                            f"Creation/editing tool '{function_name}' should not expose 'message' memory type"
+                        )
                     elif function_name in allowed_tools:
                         # These tools are allowed to have message in enum for filtering
                         pass
@@ -215,9 +215,9 @@ def test_creation_and_editing_tools_exclude_message_type(self):
                     and function_name in restricted_tools
                 ):
                     memory_type_prop = items["properties"]["memory_type"]
-                    assert (
-                        "message" not in memory_type_prop["enum"]
-                    ), f"Creation/editing tool '{function_name}' should not expose 'message' memory type in nested properties"
+                    assert "message" not in memory_type_prop["enum"], (
+                        f"Creation/editing tool '{function_name}' should not expose 'message' memory type in nested properties"
+                    )
 
 
 class TestAnthropicSchemas:
@@ -290,9 +290,9 @@ def test_anthropic_schemas_exclude_message_type_for_creation(self):
                 memory_type_prop = params["properties"]["memory_type"]
                 if "enum" in memory_type_prop:
                     if function_name in restricted_tools:
-                        assert (
-                            "message" not in memory_type_prop["enum"]
-                        ), f"Anthropic creation/editing tool '{function_name}' should not expose 'message' memory type"
+                        assert "message" not in memory_type_prop["enum"], (
+                            f"Anthropic creation/editing tool '{function_name}' should not expose 'message' memory type"
+                        )
                     elif function_name in allowed_tools:
                         # These tools are allowed to have message in enum for filtering
                         pass
@@ -307,6 +307,6 @@ def test_anthropic_schemas_exclude_message_type_for_creation(self):
                     and function_name in restricted_tools
                 ):
                     memory_type_prop = items["properties"]["memory_type"]
-                    assert (
-                        "message" not in memory_type_prop["enum"]
-                    ), f"Anthropic creation/editing tool '{function_name}' should not expose 'message' memory type in nested properties"
+                    assert "message" not in memory_type_prop["enum"], (
+                        f"Anthropic creation/editing tool '{function_name}' should not expose 'message' memory type in nested properties"
+                    )
diff --git a/tests/test_api.py b/tests/test_api.py
index 61d4550..b7da557 100644
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -378,54 +378,54 @@ async def test_put_memory_context_percentages_with_summarization_regression(
         # Verify summarization occurred (message count should be reduced)
         original_message_count = len(payload["messages"])
         final_message_count = len(data["messages"])
-        assert (
-            final_message_count < original_message_count
-        ), f"Expected summarization to reduce messages from {original_message_count} to less, but got {final_message_count}"
+        assert final_message_count < original_message_count, (
+            f"Expected summarization to reduce messages from {original_message_count} to less, but got {final_message_count}"
+        )
 
         # Verify context summary was created
-        assert (
-            data["context"] is not None
-        ), "Context should not be None after summarization"
-        assert (
-            data["context"].strip() != ""
-        ), "Context should not be empty after summarization"
+        assert data["context"] is not None, (
+            "Context should not be None after summarization"
+        )
+        assert data["context"].strip() != "", (
+            "Context should not be empty after summarization"
+        )
 
         # REGRESSION TEST: Context percentages should NOT be null even after summarization
         # They should reflect the current state (post-summarization) with small percentages
         assert "context_percentage_total_used" in data
         assert "context_percentage_until_summarization" in data
-        assert (
-            data["context_percentage_total_used"] is not None
-        ), "BUG REGRESSION: context_percentage_total_used should not be null when context_window_max is provided"
-        assert (
-            data["context_percentage_until_summarization"] is not None
-        ), "BUG REGRESSION: context_percentage_until_summarization should not be null when context_window_max is provided"
+        assert data["context_percentage_total_used"] is not None, (
+            "BUG REGRESSION: context_percentage_total_used should not be null when context_window_max is provided"
+        )
+        assert data["context_percentage_until_summarization"] is not None, (
+            "BUG REGRESSION: context_percentage_until_summarization should not be null when context_window_max is provided"
+        )
 
         # Verify the percentages are valid numbers
         total_used = data["context_percentage_total_used"]
         until_summarization = data["context_percentage_until_summarization"]
 
-        assert isinstance(
-            total_used, int | float
-        ), f"context_percentage_total_used should be a number, got {type(total_used)}"
-        assert isinstance(
-            until_summarization, int | float
-        ), f"context_percentage_until_summarization should be a number, got {type(until_summarization)}"
-        assert (
-            0 <= total_used <= 100
-        ), f"context_percentage_total_used should be 0-100, got {total_used}"
-        assert (
-            0 <= until_summarization <= 100
-        ), f"context_percentage_until_summarization should be 0-100, got {until_summarization}"
+        assert isinstance(total_used, int | float), (
+            f"context_percentage_total_used should be a number, got {type(total_used)}"
+        )
+        assert isinstance(until_summarization, int | float), (
+            f"context_percentage_until_summarization should be a number, got {type(until_summarization)}"
+        )
+        assert 0 <= total_used <= 100, (
+            f"context_percentage_total_used should be 0-100, got {total_used}"
+        )
+        assert 0 <= until_summarization <= 100, (
+            f"context_percentage_until_summarization should be 0-100, got {until_summarization}"
+        )
 
         # After summarization, percentages should be reasonable (not necessarily high)
         # They represent the current state of the session post-summarization
-        assert (
-            total_used >= 0
-        ), f"Expected non-negative total usage percentage, got {total_used}"
-        assert (
-            until_summarization >= 0
-        ), f"Expected non-negative until_summarization percentage, got {until_summarization}"
+        assert total_used >= 0, (
+            f"Expected non-negative total usage percentage, got {total_used}"
+        )
+        assert until_summarization >= 0, (
+            f"Expected non-negative until_summarization percentage, got {until_summarization}"
+        )
 
     @pytest.mark.requires_api_keys
     @pytest.mark.asyncio
diff --git a/tests/test_client_tool_calls.py b/tests/test_client_tool_calls.py
index b24e8df..70a022c 100644
--- a/tests/test_client_tool_calls.py
+++ b/tests/test_client_tool_calls.py
@@ -587,9 +587,9 @@ def test_all_tool_schemas_exclude_message_type(self):
             if "memory_type" in params["properties"]:
                 memory_type_prop = params["properties"]["memory_type"]
                 if function_name in restricted_tools:
-                    assert (
-                        "message" not in memory_type_prop.get("enum", [])
-                    ), f"Creation/editing tool {function_name} should not expose 'message' memory type"
+                    assert "message" not in memory_type_prop.get("enum", []), (
+                        f"Creation/editing tool {function_name} should not expose 'message' memory type"
+                    )
 
             # Check nested properties (like in create_long_term_memory)
             if "memories" in params["properties"]:
@@ -597,9 +597,9 @@ def test_all_tool_schemas_exclude_message_type(self):
                 if "properties" in items and "memory_type" in items["properties"]:
                     memory_type_prop = items["properties"]["memory_type"]
                     if function_name in restricted_tools:
-                        assert (
-                            "message" not in memory_type_prop.get("enum", [])
-                        ), f"Creation/editing tool {function_name} should not expose 'message' memory type in nested properties"
+                        assert "message" not in memory_type_prop.get("enum", []), (
+                            f"Creation/editing tool {function_name} should not expose 'message' memory type in nested properties"
+                        )
 
 
 class TestToolCallErrorHandling:
diff --git a/tests/test_context_percentage_calculation.py b/tests/test_context_percentage_calculation.py
index 4eca4fd..6238b0a 100644
--- a/tests/test_context_percentage_calculation.py
+++ b/tests/test_context_percentage_calculation.py
@@ -29,22 +29,22 @@ def test_context_percentages_with_context_window_max(self):
             )
         )
 
-        assert (
-            total_percentage is not None
-        ), "total_percentage should not be None when context_window_max is provided"
-        assert (
-            until_summarization_percentage is not None
-        ), "until_summarization_percentage should not be None when context_window_max is provided"
+        assert total_percentage is not None, (
+            "total_percentage should not be None when context_window_max is provided"
+        )
+        assert until_summarization_percentage is not None, (
+            "until_summarization_percentage should not be None when context_window_max is provided"
+        )
         assert isinstance(total_percentage, float), "total_percentage should be a float"
-        assert isinstance(
-            until_summarization_percentage, float
-        ), "until_summarization_percentage should be a float"
-        assert (
-            0 <= total_percentage <= 100
-        ), "total_percentage should be between 0 and 100"
-        assert (
-            0 <= until_summarization_percentage <= 100
-        ), "until_summarization_percentage should be between 0 and 100"
+        assert isinstance(until_summarization_percentage, float), (
+            "until_summarization_percentage should be a float"
+        )
+        assert 0 <= total_percentage <= 100, (
+            "total_percentage should be between 0 and 100"
+        )
+        assert 0 <= until_summarization_percentage <= 100, (
+            "until_summarization_percentage should be between 0 and 100"
+        )
 
     def test_context_percentages_with_model_name(self):
         """Test that context percentages are calculated when model_name is provided"""
@@ -59,16 +59,16 @@ def test_context_percentages_with_model_name(self):
             )
         )
 
-        assert (
-            total_percentage is not None
-        ), "total_percentage should not be None when model_name is provided"
-        assert (
-            until_summarization_percentage is not None
-        ), "until_summarization_percentage should not be None when model_name is provided"
+        assert total_percentage is not None, (
+            "total_percentage should not be None when model_name is provided"
+        )
+        assert until_summarization_percentage is not None, (
+            "until_summarization_percentage should not be None when model_name is provided"
+        )
         assert isinstance(total_percentage, float), "total_percentage should be a float"
-        assert isinstance(
-            until_summarization_percentage, float
-        ), "until_summarization_percentage should be a float"
+        assert isinstance(until_summarization_percentage, float), (
+            "until_summarization_percentage should be a float"
+        )
 
     def test_context_percentages_without_model_info(self):
         """Test that context percentages return None when no model info is provided"""
@@ -83,12 +83,12 @@ def test_context_percentages_without_model_info(self):
             )
         )
 
-        assert (
-            total_percentage is None
-        ), "total_percentage should be None when no model info is provided"
-        assert (
-            until_summarization_percentage is None
-        ), "until_summarization_percentage should be None when no model info is provided"
+        assert total_percentage is None, (
+            "total_percentage should be None when no model info is provided"
+        )
+        assert until_summarization_percentage is None, (
+            "until_summarization_percentage should be None when no model info is provided"
+        )
 
     def test_context_percentages_with_empty_messages(self):
         """Test context percentages with empty messages list but model info provided"""
@@ -101,12 +101,12 @@ def test_context_percentages_with_empty_messages(self):
         )
 
         # CORRECTED: Should return 0.0 when model info is provided, even with empty messages
-        assert (
-            total_percentage == 0.0
-        ), "total_percentage should be 0.0 for empty messages when model info provided"
-        assert (
-            until_summarization_percentage == 0.0
-        ), "until_summarization_percentage should be 0.0 for empty messages when model info provided"
+        assert total_percentage == 0.0, (
+            "total_percentage should be 0.0 for empty messages when model info provided"
+        )
+        assert until_summarization_percentage == 0.0, (
+            "until_summarization_percentage should be 0.0 for empty messages when model info provided"
+        )
 
     def test_context_percentages_precedence(self):
         """Test that context_window_max takes precedence over model_name"""
@@ -131,9 +131,9 @@ def test_context_percentages_precedence(self):
         )
 
         # Results should be the same, proving context_window_max takes precedence
-        assert (
-            total_percentage_both == total_percentage_max_only
-        ), "context_window_max should take precedence over model_name"
+        assert total_percentage_both == total_percentage_max_only, (
+            "context_window_max should take precedence over model_name"
+        )
         assert (
             until_summarization_percentage_both
             == until_summarization_percentage_max_only
@@ -163,9 +163,9 @@ def test_context_percentages_high_token_usage(self):
         assert until_summarization_percentage is not None
         # Should be capped at 100%
         assert total_percentage <= 100.0, "total_percentage should be capped at 100%"
-        assert (
-            until_summarization_percentage <= 100.0
-        ), "until_summarization_percentage should be capped at 100%"
+        assert until_summarization_percentage <= 100.0, (
+            "until_summarization_percentage should be capped at 100%"
+        )
 
     def test_context_percentages_zero_context_window_regression(self):
         """
@@ -185,9 +185,9 @@ def test_context_percentages_zero_context_window_regression(self):
 
         # Should return None for invalid context window
         assert total_percentage is None, "Should return None for zero context window"
-        assert (
-            until_summarization_percentage is None
-        ), "Should return None for zero context window"
+        assert until_summarization_percentage is None, (
+            "Should return None for zero context window"
+        )
 
         # Test with negative context window
         total_percentage, until_summarization_percentage = (
@@ -197,12 +197,12 @@ def test_context_percentages_zero_context_window_regression(self):
         )
 
         # Should return None for invalid context window
-        assert (
-            total_percentage is None
-        ), "Should return None for negative context window"
-        assert (
-            until_summarization_percentage is None
-        ), "Should return None for negative context window"
+        assert total_percentage is None, (
+            "Should return None for negative context window"
+        )
+        assert until_summarization_percentage is None, (
+            "Should return None for negative context window"
+        )
 
     def test_context_percentages_very_small_context_window_regression(self):
         """
@@ -224,17 +224,17 @@ def test_context_percentages_very_small_context_window_regression(self):
         )
 
         # Should handle this gracefully without division by zero
-        assert (
-            total_percentage is not None
-        ), "Should handle small context window without error"
-        assert (
-            until_summarization_percentage is not None
-        ), "Should handle small context window without error"
+        assert total_percentage is not None, (
+            "Should handle small context window without error"
+        )
+        assert until_summarization_percentage is not None, (
+            "Should handle small context window without error"
+        )
         assert isinstance(total_percentage, float), "Should return valid float"
-        assert isinstance(
-            until_summarization_percentage, float
-        ), "Should return valid float"
+        assert isinstance(until_summarization_percentage, float), (
+            "Should return valid float"
+        )
         # until_summarization_percentage should be 100% when threshold is 0
-        assert (
-            until_summarization_percentage == 100.0
-        ), "Should return 100% when token threshold is 0"
+        assert until_summarization_percentage == 100.0, (
+            "Should return 100% when token threshold is 0"
+        )
diff --git a/tests/test_contextual_grounding_integration.py b/tests/test_contextual_grounding_integration.py
index 15db72b..f9b8200 100644
--- a/tests/test_contextual_grounding_integration.py
+++ b/tests/test_contextual_grounding_integration.py
@@ -449,9 +449,9 @@ async def test_comprehensive_grounding_evaluation_with_judge(self):
 
             # CI Stability: Accept any valid score (>= 0.0) while grounding system is being improved
             # This allows us to track grounding quality without blocking CI on implementation details
-            assert (
-                result.overall_score >= 0.0
-            ), f"Invalid score for {example['category']}: {result.overall_score}"
+            assert result.overall_score >= 0.0, (
+                f"Invalid score for {example['category']}: {result.overall_score}"
+            )
 
             # Log performance for monitoring
             if result.overall_score < 0.05:
@@ -530,6 +530,6 @@ async def test_model_comparison_grounding_quality(self):
             print(f"{model}: {status}")
 
         # At least one model should succeed
-        assert any(
-            r["success"] for r in results_by_model.values()
-        ), "No model successfully completed grounding"
+        assert any(r["success"] for r in results_by_model.values()), (
+            "No model successfully completed grounding"
+        )
diff --git a/tests/test_full_integration.py b/tests/test_full_integration.py
index aa0ac6d..a8368bd 100644
--- a/tests/test_full_integration.py
+++ b/tests/test_full_integration.py
@@ -772,9 +772,9 @@ async def test_memory_prompt_with_long_term_search(
             )
             for msg in messages
         )
-        assert (
-            relevant_context_found
-        ), f"No relevant memory context found in messages: {messages}"
+        assert relevant_context_found, (
+            f"No relevant memory context found in messages: {messages}"
+        )
 
         # Cleanup
         await client.delete_long_term_memories([m.id for m in test_memories])
@@ -1078,9 +1078,9 @@ async def test_full_workflow_integration(
             )
             print(f"No topic filter search results: {no_topic_search}")
 
-        assert (
-            len(search_results["memories"]) > 0
-        ), f"No memories found in search results: {search_results}"
+        assert len(search_results["memories"]) > 0, (
+            f"No memories found in search results: {search_results}"
+        )
 
         # 6. Test tool integration with a realistic scenario
         tool_call = {
@@ -1125,9 +1125,9 @@ async def test_full_workflow_integration(
             m for m in long_term_memories.memories if m.id.startswith(memory_id_prefix)
         ]
 
-        assert (
-            len(our_memories) == 0
-        ), f"Expected 0 of our memories but found {len(our_memories)}: {our_memories}"
+        assert len(our_memories) == 0, (
+            f"Expected 0 of our memories but found {len(our_memories)}: {our_memories}"
+        )
 
 
 @pytest.mark.integration
diff --git a/tests/test_mcp.py b/tests/test_mcp.py
index 11d1de9..97a4f36 100644
--- a/tests/test_mcp.py
+++ b/tests/test_mcp.py
@@ -455,9 +455,9 @@ async def test_mcp_lenient_memory_record_defaults(self, session, mcp_test_setup)
             namespace="user_preferences",
         )
 
-        assert (
-            lenient_memory.discrete_memory_extracted == "t"
-        ), f"LenientMemoryRecord should default to 't', got '{lenient_memory.discrete_memory_extracted}'"
+        assert lenient_memory.discrete_memory_extracted == "t", (
+            f"LenientMemoryRecord should default to 't', got '{lenient_memory.discrete_memory_extracted}'"
+        )
         assert lenient_memory.memory_type.value == "semantic"
         assert lenient_memory.id is not None
 
@@ -466,9 +466,9 @@ async def test_mcp_lenient_memory_record_defaults(self, session, mcp_test_setup)
             id="test_001", text="User prefers coffee", memory_type="semantic"
         )
 
-        assert (
-            extracted_memory.discrete_memory_extracted == "t"
-        ), f"ExtractedMemoryRecord should default to 't', got '{extracted_memory.discrete_memory_extracted}'"
+        assert extracted_memory.discrete_memory_extracted == "t", (
+            f"ExtractedMemoryRecord should default to 't', got '{extracted_memory.discrete_memory_extracted}'"
+        )
         assert extracted_memory.memory_type.value == "semantic"
 
     @pytest.mark.asyncio
diff --git a/tests/test_thread_aware_grounding.py b/tests/test_thread_aware_grounding.py
index b4bd00a..6a8b021 100644
--- a/tests/test_thread_aware_grounding.py
+++ b/tests/test_thread_aware_grounding.py
@@ -184,9 +184,9 @@ async def test_debounce_mechanism(self, redis_url):
 
         # Immediate second call should be debounced
         should_extract_2 = await should_extract_session_thread(session_id, redis)
-        assert (
-            should_extract_2 is False
-        ), "Second extraction attempt should be debounced"
+        assert should_extract_2 is False, (
+            "Second extraction attempt should be debounced"
+        )
 
         # Clean up
         debounce_key = f"extraction_debounce:{session_id}"
@@ -304,9 +304,9 @@ async def test_multi_entity_conversation(self):
 
         # The main success criterion: significantly reduced pronoun usage
         # Since we have proper contextual grounding, we should see very few unresolved pronouns
-        assert (
-            pronoun_count <= 3
-        ), f"Should have significantly reduced pronoun usage with proper grounding, found {pronoun_count}"
+        assert pronoun_count <= 3, (
+            f"Should have significantly reduced pronoun usage with proper grounding, found {pronoun_count}"
+        )
 
         # Additional validation: if we see multiple memories, it's a good sign of thorough extraction
         if len(extracted_memories) >= 2:
diff --git a/tests/test_tool_contextual_grounding.py b/tests/test_tool_contextual_grounding.py
index 05b2f94..3b15584 100644
--- a/tests/test_tool_contextual_grounding.py
+++ b/tests/test_tool_contextual_grounding.py
@@ -67,9 +67,9 @@ def test_tool_description_has_grounding_instructions(self):
         ]
 
         for keyword in grounding_keywords:
-            assert (
-                keyword in tool_description
-            ), f"Tool description missing keyword: {keyword}"
+            assert keyword in tool_description, (
+                f"Tool description missing keyword: {keyword}"
+            )
             print(f"✓ Found: {keyword}")
 
         print(
@@ -107,9 +107,9 @@ async def test_judge_evaluation_of_tool_created_memories(self):
         print(f"Scores: {evaluation}")
 
         # Well-grounded tool memory should score well
-        assert (
-            evaluation["overall_score"] >= 0.7
-        ), f"Well-grounded tool memory should score high: {evaluation['overall_score']}"
+        assert evaluation["overall_score"] >= 0.7, (
+            f"Well-grounded tool memory should score high: {evaluation['overall_score']}"
+        )
 
         # Test case: Poorly grounded tool memory
         poor_grounded_memory = "He has extensive backend experience. She specializes in React. They collaborate effectively."
@@ -133,9 +133,9 @@ async def test_judge_evaluation_of_tool_created_memories(self):
 
         # Both should at least be evaluated successfully
         assert evaluation["overall_score"] >= 0.7, "Good grounding should score well"
-        assert (
-            poor_evaluation["overall_score"] >= 0.0
-        ), "Poor grounding should still be evaluated"
+        assert poor_evaluation["overall_score"] >= 0.0, (
+            "Poor grounding should still be evaluated"
+        )
 
     @pytest.mark.requires_api_keys
     async def test_realistic_tool_usage_scenario(self):
@@ -194,12 +194,12 @@ async def test_realistic_tool_usage_scenario(self):
         print(f"Evaluation: {evaluation}")
 
         # Should demonstrate good contextual grounding
-        assert (
-            evaluation["pronoun_resolution_score"] >= 0.8
-        ), "Should properly ground 'she' to 'Maria'"
-        assert (
-            evaluation["overall_score"] >= 0.6
-        ), f"Realistic tool usage should show good grounding: {evaluation['overall_score']}"
+        assert evaluation["pronoun_resolution_score"] >= 0.8, (
+            "Should properly ground 'she' to 'Maria'"
+        )
+        assert evaluation["overall_score"] >= 0.6, (
+            f"Realistic tool usage should show good grounding: {evaluation['overall_score']}"
+        )
 
         print(
             "✓ Tool-based memory creation with proper contextual grounding successful"

From b469188156a9203f707f66d1589e9e415be94fb0 Mon Sep 17 00:00:00 2001
From: Andrew Brookins <andrew.b@prefect.io>
Date: Tue, 9 Dec 2025 09:45:18 -0800
Subject: [PATCH 4/5] Dedupe lints/tests

---
 .github/workflows/agent-memory-client.yml | 14 +----
 .github/workflows/agent-memory-server.yml | 74 +++++++----------------
 .github/workflows/python-tests.yml        |  3 +
 3 files changed, 28 insertions(+), 63 deletions(-)

diff --git a/.github/workflows/agent-memory-client.yml b/.github/workflows/agent-memory-client.yml
index bef1f87..5f3fb24 100644
--- a/.github/workflows/agent-memory-client.yml
+++ b/.github/workflows/agent-memory-client.yml
@@ -31,17 +31,9 @@ jobs:
         working-directory: agent-memory-client
         run: uv sync --extra dev
 
-      - name: Lint with Ruff
-        working-directory: agent-memory-client
-        run: uv run ruff check agent_memory_client
-
-      - name: Check formatting with Ruff formatter
-        working-directory: agent-memory-client
-        run: uv run ruff format --check agent_memory_client
-
-      - name: Type check with mypy
-        working-directory: agent-memory-client
-        run: uv run mypy agent_memory_client
+      - name: Run pre-commit
+        run: |
+          uv run pre-commit run --all-files
 
       - name: Run tests
         working-directory: agent-memory-client
diff --git a/.github/workflows/agent-memory-server.yml b/.github/workflows/agent-memory-server.yml
index 926887d..45af6ed 100644
--- a/.github/workflows/agent-memory-server.yml
+++ b/.github/workflows/agent-memory-server.yml
@@ -9,8 +9,8 @@ on:
     branches: [main]
 
 jobs:
-  test:
-    name: Test and build (Python 3.12)
+  build:
+    name: Build package
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
@@ -20,35 +20,23 @@ jobs:
         with:
           python-version: '3.12'
 
-      - name: Install uv
-        uses: astral-sh/setup-uv@v3
-
-      - name: Install dependencies
-        run: uv sync --all-extras
-
-      - name: Install agent-memory-client (editable)
-        run: uv pip install -e ./agent-memory-client
-
-      - name: Lint with Ruff
-        run: uv run ruff check
-
-      - name: Check formatting with Ruff formatter
-        run: uv run ruff format --check
-
-      - name: Run tests
-        run: uv run pytest --run-api-tests
-        env:
-          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-
-      - name: Build package
+      - name: Install build tools
         run: |
           python -m pip install --upgrade pip
           pip install build
-          python -m build
+
+      - name: Build package
+        run: python -m build
+
+      - name: Upload dist artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: dist
+          path: dist/*
 
   publish-testpypi:
     name: Publish to TestPyPI
-    needs: test
+    needs: build
     if: startsWith(github.ref, 'refs/tags/server/') && contains(github.ref, '-test')
     runs-on: ubuntu-latest
     environment: testpypi
@@ -56,20 +44,11 @@ jobs:
       id-token: write
       contents: read
     steps:
-      - uses: actions/checkout@v4
-
-      - name: Set up Python
-        uses: actions/setup-python@v4
+      - name: Download dist artifact
+        uses: actions/download-artifact@v4
         with:
-          python-version: '3.12'
-
-      - name: Install build tools
-        run: |
-          python -m pip install --upgrade pip
-          pip install build
-
-      - name: Build package
-        run: python -m build
+          name: dist
+          path: dist
 
       - name: Publish package to TestPyPI
         uses: pypa/gh-action-pypi-publish@release/v1
@@ -79,7 +58,7 @@ jobs:
 
   publish-pypi:
     name: Publish to PyPI
-    needs: test
+    needs: build
     if: startsWith(github.ref, 'refs/tags/server/') && !contains(github.ref, '-test')
     runs-on: ubuntu-latest
     environment: pypi
@@ -87,20 +66,11 @@ jobs:
       id-token: write
       contents: read
     steps:
-      - uses: actions/checkout@v4
-
-      - name: Set up Python
-        uses: actions/setup-python@v4
+      - name: Download dist artifact
+        uses: actions/download-artifact@v4
         with:
-          python-version: '3.12'
-
-      - name: Install build tools
-        run: |
-          python -m pip install --upgrade pip
-          pip install build
-
-      - name: Build package
-        run: python -m build
+          name: dist
+          path: dist
 
       - name: Publish package to PyPI
         uses: pypa/gh-action-pypi-publish@release/v1
diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml
index a525521..b428ccc 100644
--- a/.github/workflows/python-tests.yml
+++ b/.github/workflows/python-tests.yml
@@ -3,6 +3,9 @@ name: Python Tests
 on:
   push:
     branches: [ main ]
+    tags:
+      - 'server/v*.*.*'
+      - 'client/v*.*.*'
   pull_request:
     branches: [ main ]
 

From bf5122bae40e31bee852b289e901db53aa9f6e5d Mon Sep 17 00:00:00 2001
From: Andrew Brookins <andrew.b@prefect.io>
Date: Tue, 9 Dec 2025 09:45:34 -0800
Subject: [PATCH 5/5] Lint

---
 .../tests/test_tool_schemas.py                |  24 ++--
 tests/test_api.py                             |  66 ++++-----
 tests/test_client_tool_calls.py               |  12 +-
 tests/test_context_percentage_calculation.py  | 126 +++++++++---------
 .../test_contextual_grounding_integration.py  |  12 +-
 tests/test_full_integration.py                |  18 +--
 tests/test_mcp.py                             |  12 +-
 tests/test_thread_aware_grounding.py          |  12 +-
 tests/test_tool_contextual_grounding.py       |  30 ++---
 9 files changed, 156 insertions(+), 156 deletions(-)

diff --git a/agent-memory-client/tests/test_tool_schemas.py b/agent-memory-client/tests/test_tool_schemas.py
index 1cada2e..7182166 100644
--- a/agent-memory-client/tests/test_tool_schemas.py
+++ b/agent-memory-client/tests/test_tool_schemas.py
@@ -198,9 +198,9 @@ def test_creation_and_editing_tools_exclude_message_type(self):
                 memory_type_prop = params["properties"]["memory_type"]
                 if "enum" in memory_type_prop:
                     if function_name in restricted_tools:
-                        assert "message" not in memory_type_prop["enum"], (
-                            f"Creation/editing tool '{function_name}' should not expose 'message' memory type"
-                        )
+                        assert (
+                            "message" not in memory_type_prop["enum"]
+                        ), f"Creation/editing tool '{function_name}' should not expose 'message' memory type"
                     elif function_name in allowed_tools:
                         # These tools are allowed to have message in enum for filtering
                         pass
@@ -215,9 +215,9 @@ def test_creation_and_editing_tools_exclude_message_type(self):
                     and function_name in restricted_tools
                 ):
                     memory_type_prop = items["properties"]["memory_type"]
-                    assert "message" not in memory_type_prop["enum"], (
-                        f"Creation/editing tool '{function_name}' should not expose 'message' memory type in nested properties"
-                    )
+                    assert (
+                        "message" not in memory_type_prop["enum"]
+                    ), f"Creation/editing tool '{function_name}' should not expose 'message' memory type in nested properties"
 
 
 class TestAnthropicSchemas:
@@ -290,9 +290,9 @@ def test_anthropic_schemas_exclude_message_type_for_creation(self):
                 memory_type_prop = params["properties"]["memory_type"]
                 if "enum" in memory_type_prop:
                     if function_name in restricted_tools:
-                        assert "message" not in memory_type_prop["enum"], (
-                            f"Anthropic creation/editing tool '{function_name}' should not expose 'message' memory type"
-                        )
+                        assert (
+                            "message" not in memory_type_prop["enum"]
+                        ), f"Anthropic creation/editing tool '{function_name}' should not expose 'message' memory type"
                     elif function_name in allowed_tools:
                         # These tools are allowed to have message in enum for filtering
                         pass
@@ -307,6 +307,6 @@ def test_anthropic_schemas_exclude_message_type_for_creation(self):
                     and function_name in restricted_tools
                 ):
                     memory_type_prop = items["properties"]["memory_type"]
-                    assert "message" not in memory_type_prop["enum"], (
-                        f"Anthropic creation/editing tool '{function_name}' should not expose 'message' memory type in nested properties"
-                    )
+                    assert (
+                        "message" not in memory_type_prop["enum"]
+                    ), f"Anthropic creation/editing tool '{function_name}' should not expose 'message' memory type in nested properties"
diff --git a/tests/test_api.py b/tests/test_api.py
index b7da557..61d4550 100644
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -378,54 +378,54 @@ async def test_put_memory_context_percentages_with_summarization_regression(
         # Verify summarization occurred (message count should be reduced)
         original_message_count = len(payload["messages"])
         final_message_count = len(data["messages"])
-        assert final_message_count < original_message_count, (
-            f"Expected summarization to reduce messages from {original_message_count} to less, but got {final_message_count}"
-        )
+        assert (
+            final_message_count < original_message_count
+        ), f"Expected summarization to reduce messages from {original_message_count} to less, but got {final_message_count}"
 
         # Verify context summary was created
-        assert data["context"] is not None, (
-            "Context should not be None after summarization"
-        )
-        assert data["context"].strip() != "", (
-            "Context should not be empty after summarization"
-        )
+        assert (
+            data["context"] is not None
+        ), "Context should not be None after summarization"
+        assert (
+            data["context"].strip() != ""
+        ), "Context should not be empty after summarization"
 
         # REGRESSION TEST: Context percentages should NOT be null even after summarization
         # They should reflect the current state (post-summarization) with small percentages
         assert "context_percentage_total_used" in data
         assert "context_percentage_until_summarization" in data
-        assert data["context_percentage_total_used"] is not None, (
-            "BUG REGRESSION: context_percentage_total_used should not be null when context_window_max is provided"
-        )
-        assert data["context_percentage_until_summarization"] is not None, (
-            "BUG REGRESSION: context_percentage_until_summarization should not be null when context_window_max is provided"
-        )
+        assert (
+            data["context_percentage_total_used"] is not None
+        ), "BUG REGRESSION: context_percentage_total_used should not be null when context_window_max is provided"
+        assert (
+            data["context_percentage_until_summarization"] is not None
+        ), "BUG REGRESSION: context_percentage_until_summarization should not be null when context_window_max is provided"
 
         # Verify the percentages are valid numbers
         total_used = data["context_percentage_total_used"]
         until_summarization = data["context_percentage_until_summarization"]
 
-        assert isinstance(total_used, int | float), (
-            f"context_percentage_total_used should be a number, got {type(total_used)}"
-        )
-        assert isinstance(until_summarization, int | float), (
-            f"context_percentage_until_summarization should be a number, got {type(until_summarization)}"
-        )
-        assert 0 <= total_used <= 100, (
-            f"context_percentage_total_used should be 0-100, got {total_used}"
-        )
-        assert 0 <= until_summarization <= 100, (
-            f"context_percentage_until_summarization should be 0-100, got {until_summarization}"
-        )
+        assert isinstance(
+            total_used, int | float
+        ), f"context_percentage_total_used should be a number, got {type(total_used)}"
+        assert isinstance(
+            until_summarization, int | float
+        ), f"context_percentage_until_summarization should be a number, got {type(until_summarization)}"
+        assert (
+            0 <= total_used <= 100
+        ), f"context_percentage_total_used should be 0-100, got {total_used}"
+        assert (
+            0 <= until_summarization <= 100
+        ), f"context_percentage_until_summarization should be 0-100, got {until_summarization}"
 
         # After summarization, percentages should be reasonable (not necessarily high)
         # They represent the current state of the session post-summarization
-        assert total_used >= 0, (
-            f"Expected non-negative total usage percentage, got {total_used}"
-        )
-        assert until_summarization >= 0, (
-            f"Expected non-negative until_summarization percentage, got {until_summarization}"
-        )
+        assert (
+            total_used >= 0
+        ), f"Expected non-negative total usage percentage, got {total_used}"
+        assert (
+            until_summarization >= 0
+        ), f"Expected non-negative until_summarization percentage, got {until_summarization}"
 
     @pytest.mark.requires_api_keys
     @pytest.mark.asyncio
diff --git a/tests/test_client_tool_calls.py b/tests/test_client_tool_calls.py
index 70a022c..b24e8df 100644
--- a/tests/test_client_tool_calls.py
+++ b/tests/test_client_tool_calls.py
@@ -587,9 +587,9 @@ def test_all_tool_schemas_exclude_message_type(self):
             if "memory_type" in params["properties"]:
                 memory_type_prop = params["properties"]["memory_type"]
                 if function_name in restricted_tools:
-                    assert "message" not in memory_type_prop.get("enum", []), (
-                        f"Creation/editing tool {function_name} should not expose 'message' memory type"
-                    )
+                    assert (
+                        "message" not in memory_type_prop.get("enum", [])
+                    ), f"Creation/editing tool {function_name} should not expose 'message' memory type"
 
             # Check nested properties (like in create_long_term_memory)
             if "memories" in params["properties"]:
@@ -597,9 +597,9 @@ def test_all_tool_schemas_exclude_message_type(self):
                 if "properties" in items and "memory_type" in items["properties"]:
                     memory_type_prop = items["properties"]["memory_type"]
                     if function_name in restricted_tools:
-                        assert "message" not in memory_type_prop.get("enum", []), (
-                            f"Creation/editing tool {function_name} should not expose 'message' memory type in nested properties"
-                        )
+                        assert (
+                            "message" not in memory_type_prop.get("enum", [])
+                        ), f"Creation/editing tool {function_name} should not expose 'message' memory type in nested properties"
 
 
 class TestToolCallErrorHandling:
diff --git a/tests/test_context_percentage_calculation.py b/tests/test_context_percentage_calculation.py
index 6238b0a..4eca4fd 100644
--- a/tests/test_context_percentage_calculation.py
+++ b/tests/test_context_percentage_calculation.py
@@ -29,22 +29,22 @@ def test_context_percentages_with_context_window_max(self):
             )
         )
 
-        assert total_percentage is not None, (
-            "total_percentage should not be None when context_window_max is provided"
-        )
-        assert until_summarization_percentage is not None, (
-            "until_summarization_percentage should not be None when context_window_max is provided"
-        )
+        assert (
+            total_percentage is not None
+        ), "total_percentage should not be None when context_window_max is provided"
+        assert (
+            until_summarization_percentage is not None
+        ), "until_summarization_percentage should not be None when context_window_max is provided"
         assert isinstance(total_percentage, float), "total_percentage should be a float"
-        assert isinstance(until_summarization_percentage, float), (
-            "until_summarization_percentage should be a float"
-        )
-        assert 0 <= total_percentage <= 100, (
-            "total_percentage should be between 0 and 100"
-        )
-        assert 0 <= until_summarization_percentage <= 100, (
-            "until_summarization_percentage should be between 0 and 100"
-        )
+        assert isinstance(
+            until_summarization_percentage, float
+        ), "until_summarization_percentage should be a float"
+        assert (
+            0 <= total_percentage <= 100
+        ), "total_percentage should be between 0 and 100"
+        assert (
+            0 <= until_summarization_percentage <= 100
+        ), "until_summarization_percentage should be between 0 and 100"
 
     def test_context_percentages_with_model_name(self):
         """Test that context percentages are calculated when model_name is provided"""
@@ -59,16 +59,16 @@ def test_context_percentages_with_model_name(self):
             )
         )
 
-        assert total_percentage is not None, (
-            "total_percentage should not be None when model_name is provided"
-        )
-        assert until_summarization_percentage is not None, (
-            "until_summarization_percentage should not be None when model_name is provided"
-        )
+        assert (
+            total_percentage is not None
+        ), "total_percentage should not be None when model_name is provided"
+        assert (
+            until_summarization_percentage is not None
+        ), "until_summarization_percentage should not be None when model_name is provided"
         assert isinstance(total_percentage, float), "total_percentage should be a float"
-        assert isinstance(until_summarization_percentage, float), (
-            "until_summarization_percentage should be a float"
-        )
+        assert isinstance(
+            until_summarization_percentage, float
+        ), "until_summarization_percentage should be a float"
 
     def test_context_percentages_without_model_info(self):
         """Test that context percentages return None when no model info is provided"""
@@ -83,12 +83,12 @@ def test_context_percentages_without_model_info(self):
             )
         )
 
-        assert total_percentage is None, (
-            "total_percentage should be None when no model info is provided"
-        )
-        assert until_summarization_percentage is None, (
-            "until_summarization_percentage should be None when no model info is provided"
-        )
+        assert (
+            total_percentage is None
+        ), "total_percentage should be None when no model info is provided"
+        assert (
+            until_summarization_percentage is None
+        ), "until_summarization_percentage should be None when no model info is provided"
 
     def test_context_percentages_with_empty_messages(self):
         """Test context percentages with empty messages list but model info provided"""
@@ -101,12 +101,12 @@ def test_context_percentages_with_empty_messages(self):
         )
 
         # CORRECTED: Should return 0.0 when model info is provided, even with empty messages
-        assert total_percentage == 0.0, (
-            "total_percentage should be 0.0 for empty messages when model info provided"
-        )
-        assert until_summarization_percentage == 0.0, (
-            "until_summarization_percentage should be 0.0 for empty messages when model info provided"
-        )
+        assert (
+            total_percentage == 0.0
+        ), "total_percentage should be 0.0 for empty messages when model info provided"
+        assert (
+            until_summarization_percentage == 0.0
+        ), "until_summarization_percentage should be 0.0 for empty messages when model info provided"
 
     def test_context_percentages_precedence(self):
         """Test that context_window_max takes precedence over model_name"""
@@ -131,9 +131,9 @@ def test_context_percentages_precedence(self):
         )
 
         # Results should be the same, proving context_window_max takes precedence
-        assert total_percentage_both == total_percentage_max_only, (
-            "context_window_max should take precedence over model_name"
-        )
+        assert (
+            total_percentage_both == total_percentage_max_only
+        ), "context_window_max should take precedence over model_name"
         assert (
             until_summarization_percentage_both
             == until_summarization_percentage_max_only
@@ -163,9 +163,9 @@ def test_context_percentages_high_token_usage(self):
         assert until_summarization_percentage is not None
         # Should be capped at 100%
         assert total_percentage <= 100.0, "total_percentage should be capped at 100%"
-        assert until_summarization_percentage <= 100.0, (
-            "until_summarization_percentage should be capped at 100%"
-        )
+        assert (
+            until_summarization_percentage <= 100.0
+        ), "until_summarization_percentage should be capped at 100%"
 
     def test_context_percentages_zero_context_window_regression(self):
         """
@@ -185,9 +185,9 @@ def test_context_percentages_zero_context_window_regression(self):
 
         # Should return None for invalid context window
         assert total_percentage is None, "Should return None for zero context window"
-        assert until_summarization_percentage is None, (
-            "Should return None for zero context window"
-        )
+        assert (
+            until_summarization_percentage is None
+        ), "Should return None for zero context window"
 
         # Test with negative context window
         total_percentage, until_summarization_percentage = (
@@ -197,12 +197,12 @@ def test_context_percentages_zero_context_window_regression(self):
         )
 
         # Should return None for invalid context window
-        assert total_percentage is None, (
-            "Should return None for negative context window"
-        )
-        assert until_summarization_percentage is None, (
-            "Should return None for negative context window"
-        )
+        assert (
+            total_percentage is None
+        ), "Should return None for negative context window"
+        assert (
+            until_summarization_percentage is None
+        ), "Should return None for negative context window"
 
     def test_context_percentages_very_small_context_window_regression(self):
         """
@@ -224,17 +224,17 @@ def test_context_percentages_very_small_context_window_regression(self):
         )
 
         # Should handle this gracefully without division by zero
-        assert total_percentage is not None, (
-            "Should handle small context window without error"
-        )
-        assert until_summarization_percentage is not None, (
-            "Should handle small context window without error"
-        )
+        assert (
+            total_percentage is not None
+        ), "Should handle small context window without error"
+        assert (
+            until_summarization_percentage is not None
+        ), "Should handle small context window without error"
         assert isinstance(total_percentage, float), "Should return valid float"
-        assert isinstance(until_summarization_percentage, float), (
-            "Should return valid float"
-        )
+        assert isinstance(
+            until_summarization_percentage, float
+        ), "Should return valid float"
         # until_summarization_percentage should be 100% when threshold is 0
-        assert until_summarization_percentage == 100.0, (
-            "Should return 100% when token threshold is 0"
-        )
+        assert (
+            until_summarization_percentage == 100.0
+        ), "Should return 100% when token threshold is 0"
diff --git a/tests/test_contextual_grounding_integration.py b/tests/test_contextual_grounding_integration.py
index f9b8200..15db72b 100644
--- a/tests/test_contextual_grounding_integration.py
+++ b/tests/test_contextual_grounding_integration.py
@@ -449,9 +449,9 @@ async def test_comprehensive_grounding_evaluation_with_judge(self):
 
             # CI Stability: Accept any valid score (>= 0.0) while grounding system is being improved
             # This allows us to track grounding quality without blocking CI on implementation details
-            assert result.overall_score >= 0.0, (
-                f"Invalid score for {example['category']}: {result.overall_score}"
-            )
+            assert (
+                result.overall_score >= 0.0
+            ), f"Invalid score for {example['category']}: {result.overall_score}"
 
             # Log performance for monitoring
             if result.overall_score < 0.05:
@@ -530,6 +530,6 @@ async def test_model_comparison_grounding_quality(self):
             print(f"{model}: {status}")
 
         # At least one model should succeed
-        assert any(r["success"] for r in results_by_model.values()), (
-            "No model successfully completed grounding"
-        )
+        assert any(
+            r["success"] for r in results_by_model.values()
+        ), "No model successfully completed grounding"
diff --git a/tests/test_full_integration.py b/tests/test_full_integration.py
index a8368bd..aa0ac6d 100644
--- a/tests/test_full_integration.py
+++ b/tests/test_full_integration.py
@@ -772,9 +772,9 @@ async def test_memory_prompt_with_long_term_search(
             )
             for msg in messages
         )
-        assert relevant_context_found, (
-            f"No relevant memory context found in messages: {messages}"
-        )
+        assert (
+            relevant_context_found
+        ), f"No relevant memory context found in messages: {messages}"
 
         # Cleanup
         await client.delete_long_term_memories([m.id for m in test_memories])
@@ -1078,9 +1078,9 @@ async def test_full_workflow_integration(
             )
             print(f"No topic filter search results: {no_topic_search}")
 
-        assert len(search_results["memories"]) > 0, (
-            f"No memories found in search results: {search_results}"
-        )
+        assert (
+            len(search_results["memories"]) > 0
+        ), f"No memories found in search results: {search_results}"
 
         # 6. Test tool integration with a realistic scenario
         tool_call = {
@@ -1125,9 +1125,9 @@ async def test_full_workflow_integration(
             m for m in long_term_memories.memories if m.id.startswith(memory_id_prefix)
         ]
 
-        assert len(our_memories) == 0, (
-            f"Expected 0 of our memories but found {len(our_memories)}: {our_memories}"
-        )
+        assert (
+            len(our_memories) == 0
+        ), f"Expected 0 of our memories but found {len(our_memories)}: {our_memories}"
 
 
 @pytest.mark.integration
diff --git a/tests/test_mcp.py b/tests/test_mcp.py
index 97a4f36..11d1de9 100644
--- a/tests/test_mcp.py
+++ b/tests/test_mcp.py
@@ -455,9 +455,9 @@ async def test_mcp_lenient_memory_record_defaults(self, session, mcp_test_setup)
             namespace="user_preferences",
         )
 
-        assert lenient_memory.discrete_memory_extracted == "t", (
-            f"LenientMemoryRecord should default to 't', got '{lenient_memory.discrete_memory_extracted}'"
-        )
+        assert (
+            lenient_memory.discrete_memory_extracted == "t"
+        ), f"LenientMemoryRecord should default to 't', got '{lenient_memory.discrete_memory_extracted}'"
         assert lenient_memory.memory_type.value == "semantic"
         assert lenient_memory.id is not None
 
@@ -466,9 +466,9 @@ async def test_mcp_lenient_memory_record_defaults(self, session, mcp_test_setup)
             id="test_001", text="User prefers coffee", memory_type="semantic"
         )
 
-        assert extracted_memory.discrete_memory_extracted == "t", (
-            f"ExtractedMemoryRecord should default to 't', got '{extracted_memory.discrete_memory_extracted}'"
-        )
+        assert (
+            extracted_memory.discrete_memory_extracted == "t"
+        ), f"ExtractedMemoryRecord should default to 't', got '{extracted_memory.discrete_memory_extracted}'"
         assert extracted_memory.memory_type.value == "semantic"
 
     @pytest.mark.asyncio
diff --git a/tests/test_thread_aware_grounding.py b/tests/test_thread_aware_grounding.py
index 6a8b021..b4bd00a 100644
--- a/tests/test_thread_aware_grounding.py
+++ b/tests/test_thread_aware_grounding.py
@@ -184,9 +184,9 @@ async def test_debounce_mechanism(self, redis_url):
 
         # Immediate second call should be debounced
         should_extract_2 = await should_extract_session_thread(session_id, redis)
-        assert should_extract_2 is False, (
-            "Second extraction attempt should be debounced"
-        )
+        assert (
+            should_extract_2 is False
+        ), "Second extraction attempt should be debounced"
 
         # Clean up
         debounce_key = f"extraction_debounce:{session_id}"
@@ -304,9 +304,9 @@ async def test_multi_entity_conversation(self):
 
         # The main success criterion: significantly reduced pronoun usage
         # Since we have proper contextual grounding, we should see very few unresolved pronouns
-        assert pronoun_count <= 3, (
-            f"Should have significantly reduced pronoun usage with proper grounding, found {pronoun_count}"
-        )
+        assert (
+            pronoun_count <= 3
+        ), f"Should have significantly reduced pronoun usage with proper grounding, found {pronoun_count}"
 
         # Additional validation: if we see multiple memories, it's a good sign of thorough extraction
         if len(extracted_memories) >= 2:
diff --git a/tests/test_tool_contextual_grounding.py b/tests/test_tool_contextual_grounding.py
index 3b15584..05b2f94 100644
--- a/tests/test_tool_contextual_grounding.py
+++ b/tests/test_tool_contextual_grounding.py
@@ -67,9 +67,9 @@ def test_tool_description_has_grounding_instructions(self):
         ]
 
         for keyword in grounding_keywords:
-            assert keyword in tool_description, (
-                f"Tool description missing keyword: {keyword}"
-            )
+            assert (
+                keyword in tool_description
+            ), f"Tool description missing keyword: {keyword}"
             print(f"✓ Found: {keyword}")
 
         print(
@@ -107,9 +107,9 @@ async def test_judge_evaluation_of_tool_created_memories(self):
         print(f"Scores: {evaluation}")
 
         # Well-grounded tool memory should score well
-        assert evaluation["overall_score"] >= 0.7, (
-            f"Well-grounded tool memory should score high: {evaluation['overall_score']}"
-        )
+        assert (
+            evaluation["overall_score"] >= 0.7
+        ), f"Well-grounded tool memory should score high: {evaluation['overall_score']}"
 
         # Test case: Poorly grounded tool memory
         poor_grounded_memory = "He has extensive backend experience. She specializes in React. They collaborate effectively."
@@ -133,9 +133,9 @@ async def test_judge_evaluation_of_tool_created_memories(self):
 
         # Both should at least be evaluated successfully
         assert evaluation["overall_score"] >= 0.7, "Good grounding should score well"
-        assert poor_evaluation["overall_score"] >= 0.0, (
-            "Poor grounding should still be evaluated"
-        )
+        assert (
+            poor_evaluation["overall_score"] >= 0.0
+        ), "Poor grounding should still be evaluated"
 
     @pytest.mark.requires_api_keys
     async def test_realistic_tool_usage_scenario(self):
@@ -194,12 +194,12 @@ async def test_realistic_tool_usage_scenario(self):
         print(f"Evaluation: {evaluation}")
 
         # Should demonstrate good contextual grounding
-        assert evaluation["pronoun_resolution_score"] >= 0.8, (
-            "Should properly ground 'she' to 'Maria'"
-        )
-        assert evaluation["overall_score"] >= 0.6, (
-            f"Realistic tool usage should show good grounding: {evaluation['overall_score']}"
-        )
+        assert (
+            evaluation["pronoun_resolution_score"] >= 0.8
+        ), "Should properly ground 'she' to 'Maria'"
+        assert (
+            evaluation["overall_score"] >= 0.6
+        ), f"Realistic tool usage should show good grounding: {evaluation['overall_score']}"
 
         print(
             "✓ Tool-based memory creation with proper contextual grounding successful"