From ea2fc0f85949998fb542fdc146ece63bb9a7a677 Mon Sep 17 00:00:00 2001 From: Andrew Brookins Date: Tue, 9 Dec 2025 09:08:11 -0800 Subject: [PATCH 1/5] Add GHA workflow to publish server to PyPI --- .github/workflows/agent-memory-server.yml | 119 ++++++++++++++++ agent_memory_server/__init__.py | 2 +- scripts/tag_and_push_server.py | 165 ++++++++++++++++++++++ 3 files changed, 285 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/agent-memory-server.yml create mode 100644 scripts/tag_and_push_server.py diff --git a/.github/workflows/agent-memory-server.yml b/.github/workflows/agent-memory-server.yml new file mode 100644 index 0000000..5547344 --- /dev/null +++ b/.github/workflows/agent-memory-server.yml @@ -0,0 +1,119 @@ +name: Agent Memory Server CI + +on: + push: + branches: [main] + tags: + - 'server/v*.*.*' + pull_request: + branches: [main] + +jobs: + test: + name: Test and build (Python 3.12) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.12' + + - name: Install uv + uses: astral-sh/setup-uv@v3 + + - name: Install dependencies + run: uv sync --all-extras + + - name: Install agent-memory-client (editable) + run: uv pip install -e ./agent-memory-client + + - name: Lint with Ruff + run: uv run ruff check + + - name: Check formatting with Ruff formatter + run: uv run ruff format --check + + - name: Run tests + run: uv run pytest --run-api-tests + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + + - name: Build package + run: | + python -m pip install --upgrade pip + pip install build + python -m build + + publish-testpypi: + name: Publish to TestPyPI + needs: test + if: startsWith(github.ref, 'refs/tags/server/') && contains(github.ref, '-test') + runs-on: ubuntu-latest + environment: testpypi + permissions: + id-token: write + contents: read + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.12' + + - name: Install build tools + run: | + python -m pip install --upgrade pip + pip install build + + - name: Build package + run: python -m build + + - name: Publish package to TestPyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + repository-url: https://test.pypi.org/legacy/ + packages-dir: dist/ + + publish-pypi: + name: Publish to PyPI + needs: test + if: startsWith(github.ref, 'refs/tags/server/') && !contains(github.ref, '-test') + runs-on: ubuntu-latest + environment: pypi + permissions: + id-token: write + contents: read + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.12' + + - name: Install build tools + run: | + python -m pip install --upgrade pip + pip install build + + - name: Build package + run: python -m build + + - name: Publish package to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + packages-dir: dist/ + +# Tag Format Guide: +# - For TestPyPI (testing): server/v1.0.0-test +# - For PyPI (production): server/v1.0.0 +# +# Use the script: python scripts/tag_and_push_server.py --test (for TestPyPI) +# python scripts/tag_and_push_server.py (for PyPI) +# +# This workflow uses PyPI Trusted Publishing (OIDC). Ensure the project is configured +# on PyPI to trust this GitHub repository before releasing. + diff --git a/agent_memory_server/__init__.py b/agent_memory_server/__init__.py index 7bb66b9..2d39331 100644 --- a/agent_memory_server/__init__.py +++ b/agent_memory_server/__init__.py @@ -1,3 +1,3 @@ """Redis Agent Memory Server - A memory system for conversational AI.""" -__version__ = "0.12.3" +__version__ = "0.12.4" diff --git a/scripts/tag_and_push_server.py b/scripts/tag_and_push_server.py new file mode 100644 index 0000000..86e0b69 --- /dev/null +++ b/scripts/tag_and_push_server.py @@ -0,0 +1,165 @@ +#!/usr/bin/env python3 +""" +Script to tag and push agent-memory-server releases. + +This script: +1. Reads the current version from agent_memory_server/__init__.py +2. Creates a git tag in the format: server/v{version} +3. Pushes the tag to origin + +Usage: + python scripts/tag_and_push_server.py [--dry-run] [--force] [--test] +""" + +import argparse +import re +import subprocess +import sys +from pathlib import Path + + +def get_server_version() -> str: + """Read the version from agent_memory_server/__init__.py""" + init_file = Path("agent_memory_server/__init__.py") + + if not init_file.exists(): + raise FileNotFoundError(f"Could not find {init_file}") + + content = init_file.read_text() + + # Look for __version__ = "x.y.z" + version_match = re.search(r'__version__\s*=\s*["\']([^"\']+)["\']', content) + + if not version_match: + raise ValueError(f"Could not find __version__ in {init_file}") + + return version_match.group(1) + + +def run_command(cmd: list[str], dry_run: bool = False) -> subprocess.CompletedProcess: + """Run a command, optionally in dry-run mode.""" + print(f"Running: {' '.join(cmd)}") + + if dry_run: + print(" (dry-run mode - command not executed)") + return subprocess.CompletedProcess(cmd, 0, "", "") + + try: + result = subprocess.run(cmd, capture_output=True, text=True, check=True) + if result.stdout: + print(f" Output: {result.stdout.strip()}") + return result + except subprocess.CalledProcessError as e: + print(f" Error: {e.stderr.strip()}") + raise + + +def check_git_status(): + """Check if git working directory is clean.""" + try: + result = subprocess.run( + ["git", "status", "--porcelain"], capture_output=True, text=True, check=True + ) + if result.stdout.strip(): + print("Warning: Git working directory is not clean:") + print(result.stdout) + response = input("Continue anyway? (y/N): ") + if response.lower() != "y": + sys.exit(1) + except subprocess.CalledProcessError: + print("Error: Could not check git status") + sys.exit(1) + + +def tag_exists(tag_name: str) -> bool: + """Check if a tag already exists.""" + try: + subprocess.run( + ["git", "rev-parse", f"refs/tags/{tag_name}"], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=True, + ) + return True + except subprocess.CalledProcessError: + return False + + +def main(): + parser = argparse.ArgumentParser(description="Tag and push agent-memory-server release") + parser.add_argument( + "--dry-run", action="store_true", help="Show what would be done without actually doing it" + ) + parser.add_argument( + "--force", action="store_true", help="Force tag creation even if tag already exists" + ) + parser.add_argument( + "--test", action="store_true", help="Add '-test' suffix to tag for TestPyPI deployment" + ) + + args = parser.parse_args() + + # Change to project root directory + script_dir = Path(__file__).parent + project_root = script_dir.parent + + try: + original_cwd = Path.cwd() + if project_root.resolve() != original_cwd.resolve(): + print(f"Changing to project root: {project_root}") + import os + + os.chdir(project_root) + except Exception as e: + print(f"Warning: Could not change to project root: {e}") + + try: + # Get the current version + version = get_server_version() + tag_suffix = "-test" if args.test else "" + tag_name = f"server/v{version}{tag_suffix}" + + print(f"Current server version: {version}") + print(f"Tag to create: {tag_name}") + print(f"Deployment target: {'TestPyPI' if args.test else 'PyPI (Production)'}") + + if not args.dry_run: + # Check git status + check_git_status() + + # Check if tag already exists + if tag_exists(tag_name): + if args.force: + print(f"Tag {tag_name} already exists, but --force specified") + run_command(["git", "tag", "-d", tag_name], args.dry_run) + else: + print(f"Error: Tag {tag_name} already exists. Use --force to overwrite.") + sys.exit(1) + + # Create the tag + run_command(["git", "tag", tag_name], args.dry_run) + + # Push the tag + push_cmd = ["git", "push", "origin", tag_name] + if args.force: + push_cmd.insert(2, "--force") + + run_command(push_cmd, args.dry_run) + + print(f"\n✅ Successfully tagged and pushed {tag_name}") + + if not args.dry_run: + print("\nThis should trigger the GitHub Actions workflow for:") + if args.test: + print(" - TestPyPI publication (testing)") + else: + print(" - PyPI publication (production)") + + except Exception as e: + print(f"Error: {e}") + sys.exit(1) + + +if __name__ == "__main__": + main() + From 7ff587c65e40e449d5ae34343e4208be6c6b8346 Mon Sep 17 00:00:00 2001 From: Andrew Brookins Date: Tue, 9 Dec 2025 09:10:21 -0800 Subject: [PATCH 2/5] lint --- .github/workflows/agent-memory-server.yml | 1 - scripts/tag_and_push_server.py | 21 +++++++++++++++------ 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/.github/workflows/agent-memory-server.yml b/.github/workflows/agent-memory-server.yml index 5547344..926887d 100644 --- a/.github/workflows/agent-memory-server.yml +++ b/.github/workflows/agent-memory-server.yml @@ -116,4 +116,3 @@ jobs: # # This workflow uses PyPI Trusted Publishing (OIDC). Ensure the project is configured # on PyPI to trust this GitHub repository before releasing. - diff --git a/scripts/tag_and_push_server.py b/scripts/tag_and_push_server.py index 86e0b69..4975c0e 100644 --- a/scripts/tag_and_push_server.py +++ b/scripts/tag_and_push_server.py @@ -86,15 +86,23 @@ def tag_exists(tag_name: str) -> bool: def main(): - parser = argparse.ArgumentParser(description="Tag and push agent-memory-server release") + parser = argparse.ArgumentParser( + description="Tag and push agent-memory-server release" + ) parser.add_argument( - "--dry-run", action="store_true", help="Show what would be done without actually doing it" + "--dry-run", + action="store_true", + help="Show what would be done without actually doing it", ) parser.add_argument( - "--force", action="store_true", help="Force tag creation even if tag already exists" + "--force", + action="store_true", + help="Force tag creation even if tag already exists", ) parser.add_argument( - "--test", action="store_true", help="Add '-test' suffix to tag for TestPyPI deployment" + "--test", + action="store_true", + help="Add '-test' suffix to tag for TestPyPI deployment", ) args = parser.parse_args() @@ -133,7 +141,9 @@ def main(): print(f"Tag {tag_name} already exists, but --force specified") run_command(["git", "tag", "-d", tag_name], args.dry_run) else: - print(f"Error: Tag {tag_name} already exists. Use --force to overwrite.") + print( + f"Error: Tag {tag_name} already exists. Use --force to overwrite." + ) sys.exit(1) # Create the tag @@ -162,4 +172,3 @@ def main(): if __name__ == "__main__": main() - From 5f9474849e53c6b33b1154c7685b4392602565a9 Mon Sep 17 00:00:00 2001 From: Andrew Brookins Date: Tue, 9 Dec 2025 09:12:06 -0800 Subject: [PATCH 3/5] lint --- .../tests/test_tool_schemas.py | 24 ++-- tests/test_api.py | 66 ++++----- tests/test_client_tool_calls.py | 12 +- tests/test_context_percentage_calculation.py | 126 +++++++++--------- .../test_contextual_grounding_integration.py | 12 +- tests/test_full_integration.py | 18 +-- tests/test_mcp.py | 12 +- tests/test_thread_aware_grounding.py | 12 +- tests/test_tool_contextual_grounding.py | 30 ++--- 9 files changed, 156 insertions(+), 156 deletions(-) diff --git a/agent-memory-client/tests/test_tool_schemas.py b/agent-memory-client/tests/test_tool_schemas.py index 7182166..1cada2e 100644 --- a/agent-memory-client/tests/test_tool_schemas.py +++ b/agent-memory-client/tests/test_tool_schemas.py @@ -198,9 +198,9 @@ def test_creation_and_editing_tools_exclude_message_type(self): memory_type_prop = params["properties"]["memory_type"] if "enum" in memory_type_prop: if function_name in restricted_tools: - assert ( - "message" not in memory_type_prop["enum"] - ), f"Creation/editing tool '{function_name}' should not expose 'message' memory type" + assert "message" not in memory_type_prop["enum"], ( + f"Creation/editing tool '{function_name}' should not expose 'message' memory type" + ) elif function_name in allowed_tools: # These tools are allowed to have message in enum for filtering pass @@ -215,9 +215,9 @@ def test_creation_and_editing_tools_exclude_message_type(self): and function_name in restricted_tools ): memory_type_prop = items["properties"]["memory_type"] - assert ( - "message" not in memory_type_prop["enum"] - ), f"Creation/editing tool '{function_name}' should not expose 'message' memory type in nested properties" + assert "message" not in memory_type_prop["enum"], ( + f"Creation/editing tool '{function_name}' should not expose 'message' memory type in nested properties" + ) class TestAnthropicSchemas: @@ -290,9 +290,9 @@ def test_anthropic_schemas_exclude_message_type_for_creation(self): memory_type_prop = params["properties"]["memory_type"] if "enum" in memory_type_prop: if function_name in restricted_tools: - assert ( - "message" not in memory_type_prop["enum"] - ), f"Anthropic creation/editing tool '{function_name}' should not expose 'message' memory type" + assert "message" not in memory_type_prop["enum"], ( + f"Anthropic creation/editing tool '{function_name}' should not expose 'message' memory type" + ) elif function_name in allowed_tools: # These tools are allowed to have message in enum for filtering pass @@ -307,6 +307,6 @@ def test_anthropic_schemas_exclude_message_type_for_creation(self): and function_name in restricted_tools ): memory_type_prop = items["properties"]["memory_type"] - assert ( - "message" not in memory_type_prop["enum"] - ), f"Anthropic creation/editing tool '{function_name}' should not expose 'message' memory type in nested properties" + assert "message" not in memory_type_prop["enum"], ( + f"Anthropic creation/editing tool '{function_name}' should not expose 'message' memory type in nested properties" + ) diff --git a/tests/test_api.py b/tests/test_api.py index 61d4550..b7da557 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -378,54 +378,54 @@ async def test_put_memory_context_percentages_with_summarization_regression( # Verify summarization occurred (message count should be reduced) original_message_count = len(payload["messages"]) final_message_count = len(data["messages"]) - assert ( - final_message_count < original_message_count - ), f"Expected summarization to reduce messages from {original_message_count} to less, but got {final_message_count}" + assert final_message_count < original_message_count, ( + f"Expected summarization to reduce messages from {original_message_count} to less, but got {final_message_count}" + ) # Verify context summary was created - assert ( - data["context"] is not None - ), "Context should not be None after summarization" - assert ( - data["context"].strip() != "" - ), "Context should not be empty after summarization" + assert data["context"] is not None, ( + "Context should not be None after summarization" + ) + assert data["context"].strip() != "", ( + "Context should not be empty after summarization" + ) # REGRESSION TEST: Context percentages should NOT be null even after summarization # They should reflect the current state (post-summarization) with small percentages assert "context_percentage_total_used" in data assert "context_percentage_until_summarization" in data - assert ( - data["context_percentage_total_used"] is not None - ), "BUG REGRESSION: context_percentage_total_used should not be null when context_window_max is provided" - assert ( - data["context_percentage_until_summarization"] is not None - ), "BUG REGRESSION: context_percentage_until_summarization should not be null when context_window_max is provided" + assert data["context_percentage_total_used"] is not None, ( + "BUG REGRESSION: context_percentage_total_used should not be null when context_window_max is provided" + ) + assert data["context_percentage_until_summarization"] is not None, ( + "BUG REGRESSION: context_percentage_until_summarization should not be null when context_window_max is provided" + ) # Verify the percentages are valid numbers total_used = data["context_percentage_total_used"] until_summarization = data["context_percentage_until_summarization"] - assert isinstance( - total_used, int | float - ), f"context_percentage_total_used should be a number, got {type(total_used)}" - assert isinstance( - until_summarization, int | float - ), f"context_percentage_until_summarization should be a number, got {type(until_summarization)}" - assert ( - 0 <= total_used <= 100 - ), f"context_percentage_total_used should be 0-100, got {total_used}" - assert ( - 0 <= until_summarization <= 100 - ), f"context_percentage_until_summarization should be 0-100, got {until_summarization}" + assert isinstance(total_used, int | float), ( + f"context_percentage_total_used should be a number, got {type(total_used)}" + ) + assert isinstance(until_summarization, int | float), ( + f"context_percentage_until_summarization should be a number, got {type(until_summarization)}" + ) + assert 0 <= total_used <= 100, ( + f"context_percentage_total_used should be 0-100, got {total_used}" + ) + assert 0 <= until_summarization <= 100, ( + f"context_percentage_until_summarization should be 0-100, got {until_summarization}" + ) # After summarization, percentages should be reasonable (not necessarily high) # They represent the current state of the session post-summarization - assert ( - total_used >= 0 - ), f"Expected non-negative total usage percentage, got {total_used}" - assert ( - until_summarization >= 0 - ), f"Expected non-negative until_summarization percentage, got {until_summarization}" + assert total_used >= 0, ( + f"Expected non-negative total usage percentage, got {total_used}" + ) + assert until_summarization >= 0, ( + f"Expected non-negative until_summarization percentage, got {until_summarization}" + ) @pytest.mark.requires_api_keys @pytest.mark.asyncio diff --git a/tests/test_client_tool_calls.py b/tests/test_client_tool_calls.py index b24e8df..70a022c 100644 --- a/tests/test_client_tool_calls.py +++ b/tests/test_client_tool_calls.py @@ -587,9 +587,9 @@ def test_all_tool_schemas_exclude_message_type(self): if "memory_type" in params["properties"]: memory_type_prop = params["properties"]["memory_type"] if function_name in restricted_tools: - assert ( - "message" not in memory_type_prop.get("enum", []) - ), f"Creation/editing tool {function_name} should not expose 'message' memory type" + assert "message" not in memory_type_prop.get("enum", []), ( + f"Creation/editing tool {function_name} should not expose 'message' memory type" + ) # Check nested properties (like in create_long_term_memory) if "memories" in params["properties"]: @@ -597,9 +597,9 @@ def test_all_tool_schemas_exclude_message_type(self): if "properties" in items and "memory_type" in items["properties"]: memory_type_prop = items["properties"]["memory_type"] if function_name in restricted_tools: - assert ( - "message" not in memory_type_prop.get("enum", []) - ), f"Creation/editing tool {function_name} should not expose 'message' memory type in nested properties" + assert "message" not in memory_type_prop.get("enum", []), ( + f"Creation/editing tool {function_name} should not expose 'message' memory type in nested properties" + ) class TestToolCallErrorHandling: diff --git a/tests/test_context_percentage_calculation.py b/tests/test_context_percentage_calculation.py index 4eca4fd..6238b0a 100644 --- a/tests/test_context_percentage_calculation.py +++ b/tests/test_context_percentage_calculation.py @@ -29,22 +29,22 @@ def test_context_percentages_with_context_window_max(self): ) ) - assert ( - total_percentage is not None - ), "total_percentage should not be None when context_window_max is provided" - assert ( - until_summarization_percentage is not None - ), "until_summarization_percentage should not be None when context_window_max is provided" + assert total_percentage is not None, ( + "total_percentage should not be None when context_window_max is provided" + ) + assert until_summarization_percentage is not None, ( + "until_summarization_percentage should not be None when context_window_max is provided" + ) assert isinstance(total_percentage, float), "total_percentage should be a float" - assert isinstance( - until_summarization_percentage, float - ), "until_summarization_percentage should be a float" - assert ( - 0 <= total_percentage <= 100 - ), "total_percentage should be between 0 and 100" - assert ( - 0 <= until_summarization_percentage <= 100 - ), "until_summarization_percentage should be between 0 and 100" + assert isinstance(until_summarization_percentage, float), ( + "until_summarization_percentage should be a float" + ) + assert 0 <= total_percentage <= 100, ( + "total_percentage should be between 0 and 100" + ) + assert 0 <= until_summarization_percentage <= 100, ( + "until_summarization_percentage should be between 0 and 100" + ) def test_context_percentages_with_model_name(self): """Test that context percentages are calculated when model_name is provided""" @@ -59,16 +59,16 @@ def test_context_percentages_with_model_name(self): ) ) - assert ( - total_percentage is not None - ), "total_percentage should not be None when model_name is provided" - assert ( - until_summarization_percentage is not None - ), "until_summarization_percentage should not be None when model_name is provided" + assert total_percentage is not None, ( + "total_percentage should not be None when model_name is provided" + ) + assert until_summarization_percentage is not None, ( + "until_summarization_percentage should not be None when model_name is provided" + ) assert isinstance(total_percentage, float), "total_percentage should be a float" - assert isinstance( - until_summarization_percentage, float - ), "until_summarization_percentage should be a float" + assert isinstance(until_summarization_percentage, float), ( + "until_summarization_percentage should be a float" + ) def test_context_percentages_without_model_info(self): """Test that context percentages return None when no model info is provided""" @@ -83,12 +83,12 @@ def test_context_percentages_without_model_info(self): ) ) - assert ( - total_percentage is None - ), "total_percentage should be None when no model info is provided" - assert ( - until_summarization_percentage is None - ), "until_summarization_percentage should be None when no model info is provided" + assert total_percentage is None, ( + "total_percentage should be None when no model info is provided" + ) + assert until_summarization_percentage is None, ( + "until_summarization_percentage should be None when no model info is provided" + ) def test_context_percentages_with_empty_messages(self): """Test context percentages with empty messages list but model info provided""" @@ -101,12 +101,12 @@ def test_context_percentages_with_empty_messages(self): ) # CORRECTED: Should return 0.0 when model info is provided, even with empty messages - assert ( - total_percentage == 0.0 - ), "total_percentage should be 0.0 for empty messages when model info provided" - assert ( - until_summarization_percentage == 0.0 - ), "until_summarization_percentage should be 0.0 for empty messages when model info provided" + assert total_percentage == 0.0, ( + "total_percentage should be 0.0 for empty messages when model info provided" + ) + assert until_summarization_percentage == 0.0, ( + "until_summarization_percentage should be 0.0 for empty messages when model info provided" + ) def test_context_percentages_precedence(self): """Test that context_window_max takes precedence over model_name""" @@ -131,9 +131,9 @@ def test_context_percentages_precedence(self): ) # Results should be the same, proving context_window_max takes precedence - assert ( - total_percentage_both == total_percentage_max_only - ), "context_window_max should take precedence over model_name" + assert total_percentage_both == total_percentage_max_only, ( + "context_window_max should take precedence over model_name" + ) assert ( until_summarization_percentage_both == until_summarization_percentage_max_only @@ -163,9 +163,9 @@ def test_context_percentages_high_token_usage(self): assert until_summarization_percentage is not None # Should be capped at 100% assert total_percentage <= 100.0, "total_percentage should be capped at 100%" - assert ( - until_summarization_percentage <= 100.0 - ), "until_summarization_percentage should be capped at 100%" + assert until_summarization_percentage <= 100.0, ( + "until_summarization_percentage should be capped at 100%" + ) def test_context_percentages_zero_context_window_regression(self): """ @@ -185,9 +185,9 @@ def test_context_percentages_zero_context_window_regression(self): # Should return None for invalid context window assert total_percentage is None, "Should return None for zero context window" - assert ( - until_summarization_percentage is None - ), "Should return None for zero context window" + assert until_summarization_percentage is None, ( + "Should return None for zero context window" + ) # Test with negative context window total_percentage, until_summarization_percentage = ( @@ -197,12 +197,12 @@ def test_context_percentages_zero_context_window_regression(self): ) # Should return None for invalid context window - assert ( - total_percentage is None - ), "Should return None for negative context window" - assert ( - until_summarization_percentage is None - ), "Should return None for negative context window" + assert total_percentage is None, ( + "Should return None for negative context window" + ) + assert until_summarization_percentage is None, ( + "Should return None for negative context window" + ) def test_context_percentages_very_small_context_window_regression(self): """ @@ -224,17 +224,17 @@ def test_context_percentages_very_small_context_window_regression(self): ) # Should handle this gracefully without division by zero - assert ( - total_percentage is not None - ), "Should handle small context window without error" - assert ( - until_summarization_percentage is not None - ), "Should handle small context window without error" + assert total_percentage is not None, ( + "Should handle small context window without error" + ) + assert until_summarization_percentage is not None, ( + "Should handle small context window without error" + ) assert isinstance(total_percentage, float), "Should return valid float" - assert isinstance( - until_summarization_percentage, float - ), "Should return valid float" + assert isinstance(until_summarization_percentage, float), ( + "Should return valid float" + ) # until_summarization_percentage should be 100% when threshold is 0 - assert ( - until_summarization_percentage == 100.0 - ), "Should return 100% when token threshold is 0" + assert until_summarization_percentage == 100.0, ( + "Should return 100% when token threshold is 0" + ) diff --git a/tests/test_contextual_grounding_integration.py b/tests/test_contextual_grounding_integration.py index 15db72b..f9b8200 100644 --- a/tests/test_contextual_grounding_integration.py +++ b/tests/test_contextual_grounding_integration.py @@ -449,9 +449,9 @@ async def test_comprehensive_grounding_evaluation_with_judge(self): # CI Stability: Accept any valid score (>= 0.0) while grounding system is being improved # This allows us to track grounding quality without blocking CI on implementation details - assert ( - result.overall_score >= 0.0 - ), f"Invalid score for {example['category']}: {result.overall_score}" + assert result.overall_score >= 0.0, ( + f"Invalid score for {example['category']}: {result.overall_score}" + ) # Log performance for monitoring if result.overall_score < 0.05: @@ -530,6 +530,6 @@ async def test_model_comparison_grounding_quality(self): print(f"{model}: {status}") # At least one model should succeed - assert any( - r["success"] for r in results_by_model.values() - ), "No model successfully completed grounding" + assert any(r["success"] for r in results_by_model.values()), ( + "No model successfully completed grounding" + ) diff --git a/tests/test_full_integration.py b/tests/test_full_integration.py index aa0ac6d..a8368bd 100644 --- a/tests/test_full_integration.py +++ b/tests/test_full_integration.py @@ -772,9 +772,9 @@ async def test_memory_prompt_with_long_term_search( ) for msg in messages ) - assert ( - relevant_context_found - ), f"No relevant memory context found in messages: {messages}" + assert relevant_context_found, ( + f"No relevant memory context found in messages: {messages}" + ) # Cleanup await client.delete_long_term_memories([m.id for m in test_memories]) @@ -1078,9 +1078,9 @@ async def test_full_workflow_integration( ) print(f"No topic filter search results: {no_topic_search}") - assert ( - len(search_results["memories"]) > 0 - ), f"No memories found in search results: {search_results}" + assert len(search_results["memories"]) > 0, ( + f"No memories found in search results: {search_results}" + ) # 6. Test tool integration with a realistic scenario tool_call = { @@ -1125,9 +1125,9 @@ async def test_full_workflow_integration( m for m in long_term_memories.memories if m.id.startswith(memory_id_prefix) ] - assert ( - len(our_memories) == 0 - ), f"Expected 0 of our memories but found {len(our_memories)}: {our_memories}" + assert len(our_memories) == 0, ( + f"Expected 0 of our memories but found {len(our_memories)}: {our_memories}" + ) @pytest.mark.integration diff --git a/tests/test_mcp.py b/tests/test_mcp.py index 11d1de9..97a4f36 100644 --- a/tests/test_mcp.py +++ b/tests/test_mcp.py @@ -455,9 +455,9 @@ async def test_mcp_lenient_memory_record_defaults(self, session, mcp_test_setup) namespace="user_preferences", ) - assert ( - lenient_memory.discrete_memory_extracted == "t" - ), f"LenientMemoryRecord should default to 't', got '{lenient_memory.discrete_memory_extracted}'" + assert lenient_memory.discrete_memory_extracted == "t", ( + f"LenientMemoryRecord should default to 't', got '{lenient_memory.discrete_memory_extracted}'" + ) assert lenient_memory.memory_type.value == "semantic" assert lenient_memory.id is not None @@ -466,9 +466,9 @@ async def test_mcp_lenient_memory_record_defaults(self, session, mcp_test_setup) id="test_001", text="User prefers coffee", memory_type="semantic" ) - assert ( - extracted_memory.discrete_memory_extracted == "t" - ), f"ExtractedMemoryRecord should default to 't', got '{extracted_memory.discrete_memory_extracted}'" + assert extracted_memory.discrete_memory_extracted == "t", ( + f"ExtractedMemoryRecord should default to 't', got '{extracted_memory.discrete_memory_extracted}'" + ) assert extracted_memory.memory_type.value == "semantic" @pytest.mark.asyncio diff --git a/tests/test_thread_aware_grounding.py b/tests/test_thread_aware_grounding.py index b4bd00a..6a8b021 100644 --- a/tests/test_thread_aware_grounding.py +++ b/tests/test_thread_aware_grounding.py @@ -184,9 +184,9 @@ async def test_debounce_mechanism(self, redis_url): # Immediate second call should be debounced should_extract_2 = await should_extract_session_thread(session_id, redis) - assert ( - should_extract_2 is False - ), "Second extraction attempt should be debounced" + assert should_extract_2 is False, ( + "Second extraction attempt should be debounced" + ) # Clean up debounce_key = f"extraction_debounce:{session_id}" @@ -304,9 +304,9 @@ async def test_multi_entity_conversation(self): # The main success criterion: significantly reduced pronoun usage # Since we have proper contextual grounding, we should see very few unresolved pronouns - assert ( - pronoun_count <= 3 - ), f"Should have significantly reduced pronoun usage with proper grounding, found {pronoun_count}" + assert pronoun_count <= 3, ( + f"Should have significantly reduced pronoun usage with proper grounding, found {pronoun_count}" + ) # Additional validation: if we see multiple memories, it's a good sign of thorough extraction if len(extracted_memories) >= 2: diff --git a/tests/test_tool_contextual_grounding.py b/tests/test_tool_contextual_grounding.py index 05b2f94..3b15584 100644 --- a/tests/test_tool_contextual_grounding.py +++ b/tests/test_tool_contextual_grounding.py @@ -67,9 +67,9 @@ def test_tool_description_has_grounding_instructions(self): ] for keyword in grounding_keywords: - assert ( - keyword in tool_description - ), f"Tool description missing keyword: {keyword}" + assert keyword in tool_description, ( + f"Tool description missing keyword: {keyword}" + ) print(f"✓ Found: {keyword}") print( @@ -107,9 +107,9 @@ async def test_judge_evaluation_of_tool_created_memories(self): print(f"Scores: {evaluation}") # Well-grounded tool memory should score well - assert ( - evaluation["overall_score"] >= 0.7 - ), f"Well-grounded tool memory should score high: {evaluation['overall_score']}" + assert evaluation["overall_score"] >= 0.7, ( + f"Well-grounded tool memory should score high: {evaluation['overall_score']}" + ) # Test case: Poorly grounded tool memory poor_grounded_memory = "He has extensive backend experience. She specializes in React. They collaborate effectively." @@ -133,9 +133,9 @@ async def test_judge_evaluation_of_tool_created_memories(self): # Both should at least be evaluated successfully assert evaluation["overall_score"] >= 0.7, "Good grounding should score well" - assert ( - poor_evaluation["overall_score"] >= 0.0 - ), "Poor grounding should still be evaluated" + assert poor_evaluation["overall_score"] >= 0.0, ( + "Poor grounding should still be evaluated" + ) @pytest.mark.requires_api_keys async def test_realistic_tool_usage_scenario(self): @@ -194,12 +194,12 @@ async def test_realistic_tool_usage_scenario(self): print(f"Evaluation: {evaluation}") # Should demonstrate good contextual grounding - assert ( - evaluation["pronoun_resolution_score"] >= 0.8 - ), "Should properly ground 'she' to 'Maria'" - assert ( - evaluation["overall_score"] >= 0.6 - ), f"Realistic tool usage should show good grounding: {evaluation['overall_score']}" + assert evaluation["pronoun_resolution_score"] >= 0.8, ( + "Should properly ground 'she' to 'Maria'" + ) + assert evaluation["overall_score"] >= 0.6, ( + f"Realistic tool usage should show good grounding: {evaluation['overall_score']}" + ) print( "✓ Tool-based memory creation with proper contextual grounding successful" From b469188156a9203f707f66d1589e9e415be94fb0 Mon Sep 17 00:00:00 2001 From: Andrew Brookins Date: Tue, 9 Dec 2025 09:45:18 -0800 Subject: [PATCH 4/5] Dedupe lints/tests --- .github/workflows/agent-memory-client.yml | 14 +---- .github/workflows/agent-memory-server.yml | 74 +++++++---------------- .github/workflows/python-tests.yml | 3 + 3 files changed, 28 insertions(+), 63 deletions(-) diff --git a/.github/workflows/agent-memory-client.yml b/.github/workflows/agent-memory-client.yml index bef1f87..5f3fb24 100644 --- a/.github/workflows/agent-memory-client.yml +++ b/.github/workflows/agent-memory-client.yml @@ -31,17 +31,9 @@ jobs: working-directory: agent-memory-client run: uv sync --extra dev - - name: Lint with Ruff - working-directory: agent-memory-client - run: uv run ruff check agent_memory_client - - - name: Check formatting with Ruff formatter - working-directory: agent-memory-client - run: uv run ruff format --check agent_memory_client - - - name: Type check with mypy - working-directory: agent-memory-client - run: uv run mypy agent_memory_client + - name: Run pre-commit + run: | + uv run pre-commit run --all-files - name: Run tests working-directory: agent-memory-client diff --git a/.github/workflows/agent-memory-server.yml b/.github/workflows/agent-memory-server.yml index 926887d..45af6ed 100644 --- a/.github/workflows/agent-memory-server.yml +++ b/.github/workflows/agent-memory-server.yml @@ -9,8 +9,8 @@ on: branches: [main] jobs: - test: - name: Test and build (Python 3.12) + build: + name: Build package runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -20,35 +20,23 @@ jobs: with: python-version: '3.12' - - name: Install uv - uses: astral-sh/setup-uv@v3 - - - name: Install dependencies - run: uv sync --all-extras - - - name: Install agent-memory-client (editable) - run: uv pip install -e ./agent-memory-client - - - name: Lint with Ruff - run: uv run ruff check - - - name: Check formatting with Ruff formatter - run: uv run ruff format --check - - - name: Run tests - run: uv run pytest --run-api-tests - env: - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - - - name: Build package + - name: Install build tools run: | python -m pip install --upgrade pip pip install build - python -m build + + - name: Build package + run: python -m build + + - name: Upload dist artifact + uses: actions/upload-artifact@v4 + with: + name: dist + path: dist/* publish-testpypi: name: Publish to TestPyPI - needs: test + needs: build if: startsWith(github.ref, 'refs/tags/server/') && contains(github.ref, '-test') runs-on: ubuntu-latest environment: testpypi @@ -56,20 +44,11 @@ jobs: id-token: write contents: read steps: - - uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v4 + - name: Download dist artifact + uses: actions/download-artifact@v4 with: - python-version: '3.12' - - - name: Install build tools - run: | - python -m pip install --upgrade pip - pip install build - - - name: Build package - run: python -m build + name: dist + path: dist - name: Publish package to TestPyPI uses: pypa/gh-action-pypi-publish@release/v1 @@ -79,7 +58,7 @@ jobs: publish-pypi: name: Publish to PyPI - needs: test + needs: build if: startsWith(github.ref, 'refs/tags/server/') && !contains(github.ref, '-test') runs-on: ubuntu-latest environment: pypi @@ -87,20 +66,11 @@ jobs: id-token: write contents: read steps: - - uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v4 + - name: Download dist artifact + uses: actions/download-artifact@v4 with: - python-version: '3.12' - - - name: Install build tools - run: | - python -m pip install --upgrade pip - pip install build - - - name: Build package - run: python -m build + name: dist + path: dist - name: Publish package to PyPI uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml index a525521..b428ccc 100644 --- a/.github/workflows/python-tests.yml +++ b/.github/workflows/python-tests.yml @@ -3,6 +3,9 @@ name: Python Tests on: push: branches: [ main ] + tags: + - 'server/v*.*.*' + - 'client/v*.*.*' pull_request: branches: [ main ] From bf5122bae40e31bee852b289e901db53aa9f6e5d Mon Sep 17 00:00:00 2001 From: Andrew Brookins Date: Tue, 9 Dec 2025 09:45:34 -0800 Subject: [PATCH 5/5] Lint --- .../tests/test_tool_schemas.py | 24 ++-- tests/test_api.py | 66 ++++----- tests/test_client_tool_calls.py | 12 +- tests/test_context_percentage_calculation.py | 126 +++++++++--------- .../test_contextual_grounding_integration.py | 12 +- tests/test_full_integration.py | 18 +-- tests/test_mcp.py | 12 +- tests/test_thread_aware_grounding.py | 12 +- tests/test_tool_contextual_grounding.py | 30 ++--- 9 files changed, 156 insertions(+), 156 deletions(-) diff --git a/agent-memory-client/tests/test_tool_schemas.py b/agent-memory-client/tests/test_tool_schemas.py index 1cada2e..7182166 100644 --- a/agent-memory-client/tests/test_tool_schemas.py +++ b/agent-memory-client/tests/test_tool_schemas.py @@ -198,9 +198,9 @@ def test_creation_and_editing_tools_exclude_message_type(self): memory_type_prop = params["properties"]["memory_type"] if "enum" in memory_type_prop: if function_name in restricted_tools: - assert "message" not in memory_type_prop["enum"], ( - f"Creation/editing tool '{function_name}' should not expose 'message' memory type" - ) + assert ( + "message" not in memory_type_prop["enum"] + ), f"Creation/editing tool '{function_name}' should not expose 'message' memory type" elif function_name in allowed_tools: # These tools are allowed to have message in enum for filtering pass @@ -215,9 +215,9 @@ def test_creation_and_editing_tools_exclude_message_type(self): and function_name in restricted_tools ): memory_type_prop = items["properties"]["memory_type"] - assert "message" not in memory_type_prop["enum"], ( - f"Creation/editing tool '{function_name}' should not expose 'message' memory type in nested properties" - ) + assert ( + "message" not in memory_type_prop["enum"] + ), f"Creation/editing tool '{function_name}' should not expose 'message' memory type in nested properties" class TestAnthropicSchemas: @@ -290,9 +290,9 @@ def test_anthropic_schemas_exclude_message_type_for_creation(self): memory_type_prop = params["properties"]["memory_type"] if "enum" in memory_type_prop: if function_name in restricted_tools: - assert "message" not in memory_type_prop["enum"], ( - f"Anthropic creation/editing tool '{function_name}' should not expose 'message' memory type" - ) + assert ( + "message" not in memory_type_prop["enum"] + ), f"Anthropic creation/editing tool '{function_name}' should not expose 'message' memory type" elif function_name in allowed_tools: # These tools are allowed to have message in enum for filtering pass @@ -307,6 +307,6 @@ def test_anthropic_schemas_exclude_message_type_for_creation(self): and function_name in restricted_tools ): memory_type_prop = items["properties"]["memory_type"] - assert "message" not in memory_type_prop["enum"], ( - f"Anthropic creation/editing tool '{function_name}' should not expose 'message' memory type in nested properties" - ) + assert ( + "message" not in memory_type_prop["enum"] + ), f"Anthropic creation/editing tool '{function_name}' should not expose 'message' memory type in nested properties" diff --git a/tests/test_api.py b/tests/test_api.py index b7da557..61d4550 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -378,54 +378,54 @@ async def test_put_memory_context_percentages_with_summarization_regression( # Verify summarization occurred (message count should be reduced) original_message_count = len(payload["messages"]) final_message_count = len(data["messages"]) - assert final_message_count < original_message_count, ( - f"Expected summarization to reduce messages from {original_message_count} to less, but got {final_message_count}" - ) + assert ( + final_message_count < original_message_count + ), f"Expected summarization to reduce messages from {original_message_count} to less, but got {final_message_count}" # Verify context summary was created - assert data["context"] is not None, ( - "Context should not be None after summarization" - ) - assert data["context"].strip() != "", ( - "Context should not be empty after summarization" - ) + assert ( + data["context"] is not None + ), "Context should not be None after summarization" + assert ( + data["context"].strip() != "" + ), "Context should not be empty after summarization" # REGRESSION TEST: Context percentages should NOT be null even after summarization # They should reflect the current state (post-summarization) with small percentages assert "context_percentage_total_used" in data assert "context_percentage_until_summarization" in data - assert data["context_percentage_total_used"] is not None, ( - "BUG REGRESSION: context_percentage_total_used should not be null when context_window_max is provided" - ) - assert data["context_percentage_until_summarization"] is not None, ( - "BUG REGRESSION: context_percentage_until_summarization should not be null when context_window_max is provided" - ) + assert ( + data["context_percentage_total_used"] is not None + ), "BUG REGRESSION: context_percentage_total_used should not be null when context_window_max is provided" + assert ( + data["context_percentage_until_summarization"] is not None + ), "BUG REGRESSION: context_percentage_until_summarization should not be null when context_window_max is provided" # Verify the percentages are valid numbers total_used = data["context_percentage_total_used"] until_summarization = data["context_percentage_until_summarization"] - assert isinstance(total_used, int | float), ( - f"context_percentage_total_used should be a number, got {type(total_used)}" - ) - assert isinstance(until_summarization, int | float), ( - f"context_percentage_until_summarization should be a number, got {type(until_summarization)}" - ) - assert 0 <= total_used <= 100, ( - f"context_percentage_total_used should be 0-100, got {total_used}" - ) - assert 0 <= until_summarization <= 100, ( - f"context_percentage_until_summarization should be 0-100, got {until_summarization}" - ) + assert isinstance( + total_used, int | float + ), f"context_percentage_total_used should be a number, got {type(total_used)}" + assert isinstance( + until_summarization, int | float + ), f"context_percentage_until_summarization should be a number, got {type(until_summarization)}" + assert ( + 0 <= total_used <= 100 + ), f"context_percentage_total_used should be 0-100, got {total_used}" + assert ( + 0 <= until_summarization <= 100 + ), f"context_percentage_until_summarization should be 0-100, got {until_summarization}" # After summarization, percentages should be reasonable (not necessarily high) # They represent the current state of the session post-summarization - assert total_used >= 0, ( - f"Expected non-negative total usage percentage, got {total_used}" - ) - assert until_summarization >= 0, ( - f"Expected non-negative until_summarization percentage, got {until_summarization}" - ) + assert ( + total_used >= 0 + ), f"Expected non-negative total usage percentage, got {total_used}" + assert ( + until_summarization >= 0 + ), f"Expected non-negative until_summarization percentage, got {until_summarization}" @pytest.mark.requires_api_keys @pytest.mark.asyncio diff --git a/tests/test_client_tool_calls.py b/tests/test_client_tool_calls.py index 70a022c..b24e8df 100644 --- a/tests/test_client_tool_calls.py +++ b/tests/test_client_tool_calls.py @@ -587,9 +587,9 @@ def test_all_tool_schemas_exclude_message_type(self): if "memory_type" in params["properties"]: memory_type_prop = params["properties"]["memory_type"] if function_name in restricted_tools: - assert "message" not in memory_type_prop.get("enum", []), ( - f"Creation/editing tool {function_name} should not expose 'message' memory type" - ) + assert ( + "message" not in memory_type_prop.get("enum", []) + ), f"Creation/editing tool {function_name} should not expose 'message' memory type" # Check nested properties (like in create_long_term_memory) if "memories" in params["properties"]: @@ -597,9 +597,9 @@ def test_all_tool_schemas_exclude_message_type(self): if "properties" in items and "memory_type" in items["properties"]: memory_type_prop = items["properties"]["memory_type"] if function_name in restricted_tools: - assert "message" not in memory_type_prop.get("enum", []), ( - f"Creation/editing tool {function_name} should not expose 'message' memory type in nested properties" - ) + assert ( + "message" not in memory_type_prop.get("enum", []) + ), f"Creation/editing tool {function_name} should not expose 'message' memory type in nested properties" class TestToolCallErrorHandling: diff --git a/tests/test_context_percentage_calculation.py b/tests/test_context_percentage_calculation.py index 6238b0a..4eca4fd 100644 --- a/tests/test_context_percentage_calculation.py +++ b/tests/test_context_percentage_calculation.py @@ -29,22 +29,22 @@ def test_context_percentages_with_context_window_max(self): ) ) - assert total_percentage is not None, ( - "total_percentage should not be None when context_window_max is provided" - ) - assert until_summarization_percentage is not None, ( - "until_summarization_percentage should not be None when context_window_max is provided" - ) + assert ( + total_percentage is not None + ), "total_percentage should not be None when context_window_max is provided" + assert ( + until_summarization_percentage is not None + ), "until_summarization_percentage should not be None when context_window_max is provided" assert isinstance(total_percentage, float), "total_percentage should be a float" - assert isinstance(until_summarization_percentage, float), ( - "until_summarization_percentage should be a float" - ) - assert 0 <= total_percentage <= 100, ( - "total_percentage should be between 0 and 100" - ) - assert 0 <= until_summarization_percentage <= 100, ( - "until_summarization_percentage should be between 0 and 100" - ) + assert isinstance( + until_summarization_percentage, float + ), "until_summarization_percentage should be a float" + assert ( + 0 <= total_percentage <= 100 + ), "total_percentage should be between 0 and 100" + assert ( + 0 <= until_summarization_percentage <= 100 + ), "until_summarization_percentage should be between 0 and 100" def test_context_percentages_with_model_name(self): """Test that context percentages are calculated when model_name is provided""" @@ -59,16 +59,16 @@ def test_context_percentages_with_model_name(self): ) ) - assert total_percentage is not None, ( - "total_percentage should not be None when model_name is provided" - ) - assert until_summarization_percentage is not None, ( - "until_summarization_percentage should not be None when model_name is provided" - ) + assert ( + total_percentage is not None + ), "total_percentage should not be None when model_name is provided" + assert ( + until_summarization_percentage is not None + ), "until_summarization_percentage should not be None when model_name is provided" assert isinstance(total_percentage, float), "total_percentage should be a float" - assert isinstance(until_summarization_percentage, float), ( - "until_summarization_percentage should be a float" - ) + assert isinstance( + until_summarization_percentage, float + ), "until_summarization_percentage should be a float" def test_context_percentages_without_model_info(self): """Test that context percentages return None when no model info is provided""" @@ -83,12 +83,12 @@ def test_context_percentages_without_model_info(self): ) ) - assert total_percentage is None, ( - "total_percentage should be None when no model info is provided" - ) - assert until_summarization_percentage is None, ( - "until_summarization_percentage should be None when no model info is provided" - ) + assert ( + total_percentage is None + ), "total_percentage should be None when no model info is provided" + assert ( + until_summarization_percentage is None + ), "until_summarization_percentage should be None when no model info is provided" def test_context_percentages_with_empty_messages(self): """Test context percentages with empty messages list but model info provided""" @@ -101,12 +101,12 @@ def test_context_percentages_with_empty_messages(self): ) # CORRECTED: Should return 0.0 when model info is provided, even with empty messages - assert total_percentage == 0.0, ( - "total_percentage should be 0.0 for empty messages when model info provided" - ) - assert until_summarization_percentage == 0.0, ( - "until_summarization_percentage should be 0.0 for empty messages when model info provided" - ) + assert ( + total_percentage == 0.0 + ), "total_percentage should be 0.0 for empty messages when model info provided" + assert ( + until_summarization_percentage == 0.0 + ), "until_summarization_percentage should be 0.0 for empty messages when model info provided" def test_context_percentages_precedence(self): """Test that context_window_max takes precedence over model_name""" @@ -131,9 +131,9 @@ def test_context_percentages_precedence(self): ) # Results should be the same, proving context_window_max takes precedence - assert total_percentage_both == total_percentage_max_only, ( - "context_window_max should take precedence over model_name" - ) + assert ( + total_percentage_both == total_percentage_max_only + ), "context_window_max should take precedence over model_name" assert ( until_summarization_percentage_both == until_summarization_percentage_max_only @@ -163,9 +163,9 @@ def test_context_percentages_high_token_usage(self): assert until_summarization_percentage is not None # Should be capped at 100% assert total_percentage <= 100.0, "total_percentage should be capped at 100%" - assert until_summarization_percentage <= 100.0, ( - "until_summarization_percentage should be capped at 100%" - ) + assert ( + until_summarization_percentage <= 100.0 + ), "until_summarization_percentage should be capped at 100%" def test_context_percentages_zero_context_window_regression(self): """ @@ -185,9 +185,9 @@ def test_context_percentages_zero_context_window_regression(self): # Should return None for invalid context window assert total_percentage is None, "Should return None for zero context window" - assert until_summarization_percentage is None, ( - "Should return None for zero context window" - ) + assert ( + until_summarization_percentage is None + ), "Should return None for zero context window" # Test with negative context window total_percentage, until_summarization_percentage = ( @@ -197,12 +197,12 @@ def test_context_percentages_zero_context_window_regression(self): ) # Should return None for invalid context window - assert total_percentage is None, ( - "Should return None for negative context window" - ) - assert until_summarization_percentage is None, ( - "Should return None for negative context window" - ) + assert ( + total_percentage is None + ), "Should return None for negative context window" + assert ( + until_summarization_percentage is None + ), "Should return None for negative context window" def test_context_percentages_very_small_context_window_regression(self): """ @@ -224,17 +224,17 @@ def test_context_percentages_very_small_context_window_regression(self): ) # Should handle this gracefully without division by zero - assert total_percentage is not None, ( - "Should handle small context window without error" - ) - assert until_summarization_percentage is not None, ( - "Should handle small context window without error" - ) + assert ( + total_percentage is not None + ), "Should handle small context window without error" + assert ( + until_summarization_percentage is not None + ), "Should handle small context window without error" assert isinstance(total_percentage, float), "Should return valid float" - assert isinstance(until_summarization_percentage, float), ( - "Should return valid float" - ) + assert isinstance( + until_summarization_percentage, float + ), "Should return valid float" # until_summarization_percentage should be 100% when threshold is 0 - assert until_summarization_percentage == 100.0, ( - "Should return 100% when token threshold is 0" - ) + assert ( + until_summarization_percentage == 100.0 + ), "Should return 100% when token threshold is 0" diff --git a/tests/test_contextual_grounding_integration.py b/tests/test_contextual_grounding_integration.py index f9b8200..15db72b 100644 --- a/tests/test_contextual_grounding_integration.py +++ b/tests/test_contextual_grounding_integration.py @@ -449,9 +449,9 @@ async def test_comprehensive_grounding_evaluation_with_judge(self): # CI Stability: Accept any valid score (>= 0.0) while grounding system is being improved # This allows us to track grounding quality without blocking CI on implementation details - assert result.overall_score >= 0.0, ( - f"Invalid score for {example['category']}: {result.overall_score}" - ) + assert ( + result.overall_score >= 0.0 + ), f"Invalid score for {example['category']}: {result.overall_score}" # Log performance for monitoring if result.overall_score < 0.05: @@ -530,6 +530,6 @@ async def test_model_comparison_grounding_quality(self): print(f"{model}: {status}") # At least one model should succeed - assert any(r["success"] for r in results_by_model.values()), ( - "No model successfully completed grounding" - ) + assert any( + r["success"] for r in results_by_model.values() + ), "No model successfully completed grounding" diff --git a/tests/test_full_integration.py b/tests/test_full_integration.py index a8368bd..aa0ac6d 100644 --- a/tests/test_full_integration.py +++ b/tests/test_full_integration.py @@ -772,9 +772,9 @@ async def test_memory_prompt_with_long_term_search( ) for msg in messages ) - assert relevant_context_found, ( - f"No relevant memory context found in messages: {messages}" - ) + assert ( + relevant_context_found + ), f"No relevant memory context found in messages: {messages}" # Cleanup await client.delete_long_term_memories([m.id for m in test_memories]) @@ -1078,9 +1078,9 @@ async def test_full_workflow_integration( ) print(f"No topic filter search results: {no_topic_search}") - assert len(search_results["memories"]) > 0, ( - f"No memories found in search results: {search_results}" - ) + assert ( + len(search_results["memories"]) > 0 + ), f"No memories found in search results: {search_results}" # 6. Test tool integration with a realistic scenario tool_call = { @@ -1125,9 +1125,9 @@ async def test_full_workflow_integration( m for m in long_term_memories.memories if m.id.startswith(memory_id_prefix) ] - assert len(our_memories) == 0, ( - f"Expected 0 of our memories but found {len(our_memories)}: {our_memories}" - ) + assert ( + len(our_memories) == 0 + ), f"Expected 0 of our memories but found {len(our_memories)}: {our_memories}" @pytest.mark.integration diff --git a/tests/test_mcp.py b/tests/test_mcp.py index 97a4f36..11d1de9 100644 --- a/tests/test_mcp.py +++ b/tests/test_mcp.py @@ -455,9 +455,9 @@ async def test_mcp_lenient_memory_record_defaults(self, session, mcp_test_setup) namespace="user_preferences", ) - assert lenient_memory.discrete_memory_extracted == "t", ( - f"LenientMemoryRecord should default to 't', got '{lenient_memory.discrete_memory_extracted}'" - ) + assert ( + lenient_memory.discrete_memory_extracted == "t" + ), f"LenientMemoryRecord should default to 't', got '{lenient_memory.discrete_memory_extracted}'" assert lenient_memory.memory_type.value == "semantic" assert lenient_memory.id is not None @@ -466,9 +466,9 @@ async def test_mcp_lenient_memory_record_defaults(self, session, mcp_test_setup) id="test_001", text="User prefers coffee", memory_type="semantic" ) - assert extracted_memory.discrete_memory_extracted == "t", ( - f"ExtractedMemoryRecord should default to 't', got '{extracted_memory.discrete_memory_extracted}'" - ) + assert ( + extracted_memory.discrete_memory_extracted == "t" + ), f"ExtractedMemoryRecord should default to 't', got '{extracted_memory.discrete_memory_extracted}'" assert extracted_memory.memory_type.value == "semantic" @pytest.mark.asyncio diff --git a/tests/test_thread_aware_grounding.py b/tests/test_thread_aware_grounding.py index 6a8b021..b4bd00a 100644 --- a/tests/test_thread_aware_grounding.py +++ b/tests/test_thread_aware_grounding.py @@ -184,9 +184,9 @@ async def test_debounce_mechanism(self, redis_url): # Immediate second call should be debounced should_extract_2 = await should_extract_session_thread(session_id, redis) - assert should_extract_2 is False, ( - "Second extraction attempt should be debounced" - ) + assert ( + should_extract_2 is False + ), "Second extraction attempt should be debounced" # Clean up debounce_key = f"extraction_debounce:{session_id}" @@ -304,9 +304,9 @@ async def test_multi_entity_conversation(self): # The main success criterion: significantly reduced pronoun usage # Since we have proper contextual grounding, we should see very few unresolved pronouns - assert pronoun_count <= 3, ( - f"Should have significantly reduced pronoun usage with proper grounding, found {pronoun_count}" - ) + assert ( + pronoun_count <= 3 + ), f"Should have significantly reduced pronoun usage with proper grounding, found {pronoun_count}" # Additional validation: if we see multiple memories, it's a good sign of thorough extraction if len(extracted_memories) >= 2: diff --git a/tests/test_tool_contextual_grounding.py b/tests/test_tool_contextual_grounding.py index 3b15584..05b2f94 100644 --- a/tests/test_tool_contextual_grounding.py +++ b/tests/test_tool_contextual_grounding.py @@ -67,9 +67,9 @@ def test_tool_description_has_grounding_instructions(self): ] for keyword in grounding_keywords: - assert keyword in tool_description, ( - f"Tool description missing keyword: {keyword}" - ) + assert ( + keyword in tool_description + ), f"Tool description missing keyword: {keyword}" print(f"✓ Found: {keyword}") print( @@ -107,9 +107,9 @@ async def test_judge_evaluation_of_tool_created_memories(self): print(f"Scores: {evaluation}") # Well-grounded tool memory should score well - assert evaluation["overall_score"] >= 0.7, ( - f"Well-grounded tool memory should score high: {evaluation['overall_score']}" - ) + assert ( + evaluation["overall_score"] >= 0.7 + ), f"Well-grounded tool memory should score high: {evaluation['overall_score']}" # Test case: Poorly grounded tool memory poor_grounded_memory = "He has extensive backend experience. She specializes in React. They collaborate effectively." @@ -133,9 +133,9 @@ async def test_judge_evaluation_of_tool_created_memories(self): # Both should at least be evaluated successfully assert evaluation["overall_score"] >= 0.7, "Good grounding should score well" - assert poor_evaluation["overall_score"] >= 0.0, ( - "Poor grounding should still be evaluated" - ) + assert ( + poor_evaluation["overall_score"] >= 0.0 + ), "Poor grounding should still be evaluated" @pytest.mark.requires_api_keys async def test_realistic_tool_usage_scenario(self): @@ -194,12 +194,12 @@ async def test_realistic_tool_usage_scenario(self): print(f"Evaluation: {evaluation}") # Should demonstrate good contextual grounding - assert evaluation["pronoun_resolution_score"] >= 0.8, ( - "Should properly ground 'she' to 'Maria'" - ) - assert evaluation["overall_score"] >= 0.6, ( - f"Realistic tool usage should show good grounding: {evaluation['overall_score']}" - ) + assert ( + evaluation["pronoun_resolution_score"] >= 0.8 + ), "Should properly ground 'she' to 'Maria'" + assert ( + evaluation["overall_score"] >= 0.6 + ), f"Realistic tool usage should show good grounding: {evaluation['overall_score']}" print( "✓ Tool-based memory creation with proper contextual grounding successful"