diff --git a/Makefile b/Makefile index ad7c5ac..a3d6dab 100644 --- a/Makefile +++ b/Makefile @@ -38,7 +38,7 @@ clean-build: rm -fr dist/ rm -fr .eggs/ find . -name '*.egg-info' -exec rm -fr {} + - find . -name '*.egg' -exec rm -f {} + + find . -name '*.egg' -exec rm -rf {} + clean-pyc: find . -name '*.pyc' -exec rm -f {} + diff --git a/docs/examples.rst b/docs/examples.rst index 59433e1..3b56e51 100644 --- a/docs/examples.rst +++ b/docs/examples.rst @@ -101,6 +101,22 @@ This example shows: :language: python :caption: examples/garlic/garlic_examples.py +Tag-Only Protocol Examples +--------------------------- + +The `examples/tag_only/` directory demonstrates how to work with tag-only protocols (protocols that do not accept values) in multiaddr addresses. + +This example shows: +- Basic tag-only protocol usage (http, https, tls, noise, webrtc, etc.) +- Protocol validation for tag-only protocols +- Error handling for invalid value assignments (both ``/tag/value`` and ``/tag=value`` syntaxes) +- Multiaddr integration with tag-only protocols +- Chaining multiple tag-only protocols + +.. literalinclude:: ../examples/tag_only/tag_only_examples.py + :language: python + :caption: examples/tag_only/tag_only_examples.py + Running the Examples -------------------- @@ -126,4 +142,7 @@ All examples can be run directly with Python: # Garlic protocol examples python examples/garlic/garlic_examples.py + # Tag-only protocol examples + python examples/tag_only/tag_only_examples.py + Note: Some examples require network connectivity and may take a few seconds to complete due to DNS resolution. diff --git a/examples/tag_only/tag_only_examples.py b/examples/tag_only/tag_only_examples.py new file mode 100644 index 0000000..f88cf80 --- /dev/null +++ b/examples/tag_only/tag_only_examples.py @@ -0,0 +1,297 @@ +#!/usr/bin/env python3 +""" +Tag-only protocol examples for py-multiaddr. + +This script demonstrates how to work with tag-only protocols in py-multiaddr. +Tag-only protocols are protocols that do not accept values - their presence +alone indicates a specific property or capability (e.g., ``/http``, ``/tls``, +``/noise``). + +## Overview + +This script shows various examples of tag-only protocol usage: + +1. **Basic Tag-Only Protocol Usage**: Creating and parsing simple tag-only addresses. +2. **Protocol Validation**: Testing valid and invalid tag-only protocol syntax. +3. **Error Handling**: Demonstrating clear error messages for invalid value assignments. +4. **Multiaddr Integration**: Using tag-only protocols in realistic multiaddr stacks. +5. **Common Tag-Only Protocols**: Examples with various tag-only protocols. + +## Expected Output + +When you run this script, you should see output similar to: + +``` +Tag-Only Protocol Examples +================================================== +=== Basic Tag-Only Protocol Usage === +Valid tag-only addresses: + /http + /https + /tls + /noise + /webrtc + +=== Protocol Validation === +Testing valid tag-only: /http + Valid: True + Protocols: ['http'] + +Testing invalid tag-only (with value): /http/value + Valid: False + Error: Protocol 'http' does not take an argument + +Testing invalid tag-only (= syntax): /http=value + Valid: False + Error: Protocol 'http' does not take an argument + +=== Multiaddr Integration === +Complex multiaddr with tag-only protocols: + Address: /ip4/127.0.0.1/tcp/443/https + Protocols: ['ip4', 'tcp', 'https'] + Has 'https' protocol: True + +=== Common Tag-Only Protocols === +HTTP: /ip4/127.0.0.1/tcp/80/http +HTTPS: /ip4/127.0.0.1/tcp/443/https +TLS: /ip4/127.0.0.1/tcp/443/tls +WebRTC: /ip4/127.0.0.1/udp/9090/webrtc-direct +Noise: /ip4/127.0.0.1/tcp/12345/noise + +================================================== +All examples completed! +``` + +## Key Features Demonstrated + +- **Tag-Only Protocols**: Protocols that don't accept values (http, https, tls, noise, webrtc, etc.) +- **Validation**: Ensures no value is provided to tag-only protocols +- **Error Messages**: Clear error messages that don't include invalid values +- **Multiaddr Integration**: Using tag-only protocols as part of connection stacks +- **Syntax Validation**: Both ``/tag/value`` and ``/tag=value`` syntaxes are rejected + +## Requirements + +- Python 3.10+ +- py-multiaddr library + +## Usage + +```bash +python examples/tag_only/tag_only_examples.py +``` +""" + +from multiaddr import Multiaddr +from multiaddr.exceptions import StringParseError + +# Common tag-only protocols +TAG_ONLY_PROTOCOLS = [ + "http", + "https", + "tls", + "noise", + "webrtc", + "webrtc-direct", + "quic", + "quic-v1", + "ws", + "wss", + "p2p-circuit", + "webtransport", +] + + +def basic_tag_only_usage(): + """ + Basic tag-only protocol usage example. + + This function demonstrates: + - Creating tag-only multiaddrs + - Extracting protocol information + - Validating tag-only addresses + """ + print("=== Basic Tag-Only Protocol Usage ===") + print("Valid tag-only addresses:") + + for proto_name in TAG_ONLY_PROTOCOLS[:5]: # Show first 5 + addr_str = f"/{proto_name}" + try: + _ = Multiaddr(addr_str) # Validate the address + print(f" {addr_str}") + except Exception as e: + print(f" {addr_str} - Error: {e}") + + +def protocol_validation(): + """ + Demonstrate protocol validation for tag-only protocols. + + This function shows: + - Valid tag-only addresses + - Invalid tag-only addresses with /tag/value syntax + - Invalid tag-only addresses with /tag=value syntax + - Error handling for validation failures + """ + print("\n=== Protocol Validation ===") + + # Test valid tag-only protocol + valid_addr = "/http" + print(f"Testing valid tag-only: {valid_addr}") + try: + ma = Multiaddr(valid_addr) + print(" Valid: True") + print(f" Protocols: {[p.name for p in ma.protocols()]}") + except Exception as e: + print(" Valid: False") + print(f" Error: {e}") + + # Test invalid tag-only with /tag/value syntax + invalid_addr_slash = "/http/value" + print(f"\nTesting invalid tag-only (with value): {invalid_addr_slash}") + try: + Multiaddr(invalid_addr_slash) + print(" Valid: True (ERROR: Should have failed)") + except StringParseError as e: + print(" Valid: False") + print(f" Error: {e}") + # Verify error message doesn't include the invalid value + error_str = str(e) + assert "value" not in error_str or "does not take an argument" in error_str + + # Test invalid tag-only with /tag=value syntax + invalid_addr_equals = "/http=value" + print(f"\nTesting invalid tag-only (= syntax): {invalid_addr_equals}") + try: + Multiaddr(invalid_addr_equals) + print(" Valid: True (ERROR: Should have failed)") + except StringParseError as e: + print(" Valid: False") + print(f" Error: {e}") + + +def multiaddr_integration(): + """ + Demonstrate tag-only protocol integration with other protocols. + + This function shows: + - Using tag-only protocols as part of realistic multiaddr stacks + - Protocol stack analysis + - Common use cases + """ + print("\n=== Multiaddr Integration ===") + + # HTTPS example + https_addr = "/ip4/127.0.0.1/tcp/443/https" + print("Complex multiaddr with tag-only protocols:") + print(f" Address: {https_addr}") + + try: + ma = Multiaddr(https_addr) + protocols = [p.name for p in ma.protocols()] + print(f" Protocols: {protocols}") + + # Check for 'https' protocol + has_https = "https" in protocols + print(f" Has 'https' protocol: {has_https}") + + except Exception as e: + print(f" Error: {e}") + + +def common_tag_only_protocols(): + """ + Demonstrate common tag-only protocol use cases. + + This function shows: + - HTTP and HTTPS usage + - TLS usage + - WebRTC usage + - Noise protocol usage + """ + print("\n=== Common Tag-Only Protocols ===") + + examples = [ + ("HTTP", "/ip4/127.0.0.1/tcp/80/http"), + ("HTTPS", "/ip4/127.0.0.1/tcp/443/https"), + ("TLS", "/ip4/127.0.0.1/tcp/443/tls"), + ("WebRTC", "/ip4/127.0.0.1/udp/9090/webrtc-direct"), + ("Noise", "/ip4/127.0.0.1/tcp/12345/noise"), + ] + + for name, addr_str in examples: + try: + _ = Multiaddr(addr_str) # Validate the address + print(f"{name}: {addr_str}") + except Exception as e: + print(f"{name}: {addr_str} - Error: {e}") + + +def chaining_tag_only_protocols(): + """ + Demonstrate chaining multiple tag-only protocols. + + This function shows: + - Multiple tag-only protocols in sequence + - Valid combinations + - Protocol stack analysis + """ + print("\n=== Chaining Tag-Only Protocols ===") + + examples = [ + "/webrtc/noise", + "/webrtc-direct/webrtc", + "/tls/http", + ] + + for addr_str in examples: + try: + ma = Multiaddr(addr_str) + protocols = [p.name for p in ma.protocols()] + print(f" {addr_str}") + print(f" Protocols: {protocols}") + except Exception as e: + print(f" {addr_str} - Error: {e}") + + +def main(): + """ + Run all tag-only protocol examples. + + This function orchestrates all the tag-only protocol examples: + 1. Basic tag-only usage + 2. Protocol validation + 3. Multiaddr integration + 4. Common tag-only protocols + 5. Chaining tag-only protocols + + Each example demonstrates different aspects of tag-only protocol + functionality and shows how to use them with py-multiaddr. + """ + print("Tag-Only Protocol Examples") + print("=" * 50) + + try: + basic_tag_only_usage() + protocol_validation() + multiaddr_integration() + common_tag_only_protocols() + chaining_tag_only_protocols() + + print("\n" + "=" * 50) + print("All examples completed!") + print("\nSummary:") + print("- Tag-only protocols work correctly") + print("- Validation catches invalid use (with values)") + print("- Both /tag/value and /tag=value syntaxes are rejected") + print("- Integration with other protocols works as expected") + print("- Multiple tag-only protocols can be chained") + + except KeyboardInterrupt: + print("\nExamples interrupted by user") + except Exception as e: + print(f"\nUnexpected error: {e}") + + +if __name__ == "__main__": + main() diff --git a/multiaddr/multiaddr.py b/multiaddr/multiaddr.py index 7f184a2..45efc6c 100644 --- a/multiaddr/multiaddr.py +++ b/multiaddr/multiaddr.py @@ -344,56 +344,54 @@ def _from_string(self, addr: str) -> None: return # Handle other protocols - parts = iter(addr.strip("/").split("/")) - if not parts: + # Convert to list to allow peeking ahead for validation + parts_list = addr.strip("/").split("/") + if not parts_list: raise exceptions.StringParseError("empty multiaddr", addr) self._bytes = b"" - for part in parts: + idx: int = 0 + while idx < len(parts_list): + part = parts_list[idx] if not part: + idx += 1 continue # Special handling for unix paths if part in ("unix",): - try: - # Get the next part as the path value - protocol_path_value = next(parts) - if not protocol_path_value: - raise exceptions.StringParseError("empty protocol path", addr) - - # Join any remaining parts as part of the path - remaining_parts = [] - while True: - try: - next_part = next(parts) - if not next_part: - continue - remaining_parts.append(next_part) - except StopIteration: - break - - if remaining_parts: - protocol_path_value = protocol_path_value + "/" + "/".join(remaining_parts) - - proto = protocol_with_name(part) - codec = codec_by_name(proto.codec) - if not codec: - raise exceptions.StringParseError(f"unknown codec: {proto.codec}", addr) - - try: - self._bytes += varint.encode(proto.code) - buf = codec.to_bytes(proto, protocol_path_value) - # Add length prefix for variable-sized or zero-sized codecs - if codec.SIZE <= 0: - self._bytes += varint.encode(len(buf)) - if buf: # Only append buffer if it's not empty - self._bytes += buf - except Exception as e: - raise exceptions.StringParseError(str(e), addr) from e - continue - except StopIteration: + # Get the next part as the path value + if idx + 1 >= len(parts_list): raise exceptions.StringParseError("missing value for unix protocol", addr) + protocol_path_value = parts_list[idx + 1] + if not protocol_path_value: + raise exceptions.StringParseError("empty protocol path", addr) + + # Join any remaining parts as part of the path (collect and consume the rest) + remaining_parts = [p for p in parts_list[idx + 2 :] if p] + # Consume all remaining parts so outer loop ends + idx = len(parts_list) + + if remaining_parts: + protocol_path_value = protocol_path_value + "/" + "/".join(remaining_parts) + + proto = protocol_with_name(part) + codec = codec_by_name(proto.codec) + if not codec: + raise exceptions.StringParseError(f"unknown codec: {proto.codec}", addr) + + try: + self._bytes += varint.encode(proto.code) + buf = codec.to_bytes(proto, protocol_path_value) + # Add length prefix for variable-sized or zero-sized codecs + if codec.SIZE <= 0: + self._bytes += varint.encode(len(buf)) + if buf: # Only append buffer if it's not empty + self._bytes += buf + except Exception as e: + raise exceptions.StringParseError(str(e), addr) from e + continue # Already advanced idx above + # Handle other protocols # Split protocol name and value if present protocol_value: str | None = None @@ -407,15 +405,30 @@ def _from_string(self, addr: str) -> None: except Exception as exc: raise exceptions.StringParseError(f"unknown protocol: {proto_name}", addr) from exc + # Fix 2: Validate that tag-only protocols don't accept values via = syntax + if proto.codec is None and protocol_value is not None: + # Construct address string without the invalid value + # to avoid including it in error message + addr_parts_before = parts_list[:idx] + if addr_parts_before or proto_name: + addr_up_to_protocol = "/" + "/".join([*addr_parts_before, proto_name]) + else: + addr_up_to_protocol = "/" + raise exceptions.StringParseError( + f"Protocol '{proto.name}' does not take an argument", + addr_up_to_protocol, + proto.name, + ) + # If the protocol expects a value, get it if proto.codec is not None: if protocol_value is None: - try: - protocol_value = next(parts) - except StopIteration: + if idx + 1 >= len(parts_list): raise exceptions.StringParseError( f"missing value for protocol: {proto_name}", addr ) + protocol_value = parts_list[idx + 1] + idx += 1 # Consume the value part # Validate value (optional: could add more checks here) # If value looks like a protocol name, that's an error if protocol_value is not None: @@ -434,13 +447,42 @@ def _from_string(self, addr: str) -> None: if not codec: raise exceptions.StringParseError(f"unknown codec: {proto.codec}", addr) - try: + # Special case: protocols with codec=None are flag protocols + # (no value, no length prefix, no buffer) + if proto.codec is None: + # Encode the protocol code first self._bytes += varint.encode(proto.code) - # Special case: protocols with codec=None are flag protocols - # (no value, no length prefix, no buffer) - if proto.codec is None: - continue + # Fix 1: Check if next part exists and is not a valid protocol name + # If it's not a valid protocol, it's an invalid value + # Look ahead to find the next non-empty part + next_idx = idx + 1 + while next_idx < len(parts_list) and not parts_list[next_idx]: + next_idx += 1 + + if next_idx < len(parts_list): + next_part = parts_list[next_idx] + try: + protocol_with_name(next_part) + # It's a valid protocol name, so advance idx to that part + idx = next_idx + continue + except exceptions.ProtocolNotFoundError: + # Not a valid protocol name, so it's an invalid value + # Construct address string up to (but not including) the invalid value + # to avoid including it in the error message + addr_up_to_protocol = "/" + "/".join(parts_list[: idx + 1]) + raise exceptions.StringParseError( + f"Protocol '{proto.name}' does not take an argument", + addr_up_to_protocol, + proto.name, + ) + # No next part, continue normally + idx += 1 + continue + + try: + self._bytes += varint.encode(proto.code) buf = codec.to_bytes(proto, protocol_value or "") if codec.SIZE <= 0: # Add length prefix for variable-sized or zero-sized codecs @@ -450,6 +492,8 @@ def _from_string(self, addr: str) -> None: except Exception as e: raise exceptions.StringParseError(str(e), addr) from e + idx += 1 # Move to next part + def _from_bytes(self, addr: bytes) -> None: """Parse a binary multiaddr. diff --git a/newsfragments/98.bugfix.rst b/newsfragments/98.bugfix.rst new file mode 100644 index 0000000..8fac653 --- /dev/null +++ b/newsfragments/98.bugfix.rst @@ -0,0 +1,5 @@ +Fixed validation for tag-only protocols (protocols that do not accept values). +Tag-only protocols like ``http``, ``https``, ``tls``, ``noise``, ``webrtc``, etc. +now correctly reject invalid value assignments via both ``/tag/value`` and +``/tag=value`` syntax, raising clear error messages that do not include the +invalid value. diff --git a/tests/test_multiaddr.py b/tests/test_multiaddr.py index e54bcc1..e71e0ab 100644 --- a/tests/test_multiaddr.py +++ b/tests/test_multiaddr.py @@ -1022,3 +1022,83 @@ def test_http_path_raw_value_access(): from urllib.parse import quote assert quote(raw_value, safe="") == encoded_value + + +def test_tag_only_protocol_rejects_value_slash_syntax(): + """Test that tag-only protocols reject values using /tag/value syntax""" + tag_only_protocols = [ + "webrtc", + "webrtc-direct", + "noise", + "quic", + "quic-v1", + "tls", + "http", + "https", + "ws", + "wss", + "p2p-circuit", + "webtransport", + ] + + for proto_name in tag_only_protocols: + # Should fail with clear error message + with pytest.raises(StringParseError) as exc_info: + Multiaddr(f"/{proto_name}/value") + assert "does not take an argument" in str(exc_info.value) + assert proto_name in str(exc_info.value) + + +def test_tag_only_protocol_rejects_value_equals_syntax(): + """Test that tag-only protocols reject values using /tag=value syntax""" + tag_only_protocols = [ + "webrtc", + "webrtc-direct", + "noise", + "quic", + "tls", + "http", + ] + + for proto_name in tag_only_protocols: + # Should fail with clear error message + with pytest.raises(StringParseError) as exc_info: + Multiaddr(f"/{proto_name}=value") + assert "does not take an argument" in str(exc_info.value) + assert proto_name in str(exc_info.value) + + +def test_tag_only_protocol_allows_valid_combinations(): + """Test that tag-only protocols work correctly in valid combinations""" + # Single tag protocol + assert str(Multiaddr("/webrtc")) == "/webrtc" + assert str(Multiaddr("/webrtc-direct")) == "/webrtc-direct" + + # Multiple tag protocols chained + assert str(Multiaddr("/webrtc/noise")) == "/webrtc/noise" + assert str(Multiaddr("/webrtc-direct/webrtc")) == "/webrtc-direct/webrtc" + + # Tag protocol followed by value protocol + assert str(Multiaddr("/webrtc-direct/ip4/127.0.0.1")) == "/webrtc-direct/ip4/127.0.0.1" + + # Complex valid address + addr = "/ip4/127.0.0.1/udp/9090/webrtc-direct/certhash/uEiDDq4_xNyDorZBH3TlGazyJdOWSwvo4PUo5YHFMrvDE8g" + assert str(Multiaddr(addr)) == addr + + +def test_tag_only_protocol_error_message_format(): + """Test that error messages for tag-only protocols are clear and helpful""" + # Test /tag/value syntax + with pytest.raises(StringParseError) as exc_info: + Multiaddr("/webrtc-direct/invalidvalue") + error_msg = str(exc_info.value) + assert "does not take an argument" in error_msg + assert "webrtc-direct" in error_msg + assert "invalidvalue" not in error_msg # Should not mention the invalid value + + # Test /tag=value syntax + with pytest.raises(StringParseError) as exc_info: + Multiaddr("/webrtc=somevalue") + error_msg = str(exc_info.value) + assert "does not take an argument" in error_msg + assert "webrtc" in error_msg diff --git a/tests/test_protocols.py b/tests/test_protocols.py index 65cddd1..3d9c515 100644 --- a/tests/test_protocols.py +++ b/tests/test_protocols.py @@ -565,7 +565,9 @@ def test_ipcidr_invalid_bytes_inputs(): # --------CERT-HASH--------- -VALID_MULTIHASH_BYTES = multihash.encode(b"hello world", "sha2-256") +# The multihash package provides `encode` at runtime, but some static +# checkers (ruff/pyright) may not see it. Ignore the attribute check here. +VALID_MULTIHASH_BYTES = multihash.encode(b"hello world", "sha2-256") # type: ignore[attr-defined] VALID_CERTHASH_STRING = multibase.encode("base64url", VALID_MULTIHASH_BYTES).decode("utf-8") INVALID_BYTES = b"this is not a multihash"