pytest-dev · RonnyPfannschmidt · Oct 18, 2025 · Oct 18, 2025 · Oct 18, 2025 · Oct 18, 2025
diff --git a/CHANGELOG b/CHANGELOG
@@ -1,3 +1,21 @@
+2.3.0
+=====
+
+* add IniConfig.parse() classmethod with strip_inline_comments parameter (fixes #55)
+  - by default (strip_inline_comments=True), inline comments are properly stripped from values
+  - set strip_inline_comments=False to preserve old behavior if needed
+* IniConfig() constructor maintains backward compatibility (does not strip inline comments)
+* users should migrate to IniConfig.parse() for correct comment handling
+* add strip_section_whitespace parameter to IniConfig.parse() (regarding #4)
+  - opt-in parameter to strip Unicode whitespace from section names
+  - when True, strips Unicode whitespace (U+00A0, U+2000, U+3000, etc.) from section names
+  - when False (default), preserves existing behavior for backward compatibility
+* clarify Unicode whitespace handling (regarding #4)
+  - since iniconfig 2.0.0 (Python 3 only), all strings are Unicode by default
+  - Python 3's str.strip() has handled Unicode whitespace since Python 3.0 (2008)
+  - iniconfig automatically benefits from this in all supported versions (Python >= 3.10)
+  - key names and values have Unicode whitespace properly stripped using Python's built-in methods
+
 2.2.0
 =====
 

diff --git a/src/iniconfig/__init__.py b/src/iniconfig/__init__.py
@@ -96,33 +96,86 @@ def __init__(
         path: str | os.PathLike[str],
         data: str | None = None,
         encoding: str = "utf-8",
+        *,
+        _sections: Mapping[str, Mapping[str, str]] | None = None,
+        _sources: Mapping[tuple[str, str | None], int] | None = None,
     ) -> None:
         self.path = os.fspath(path)
+
+        # Determine sections and sources
+        if _sections is not None and _sources is not None:
+            # Use provided pre-parsed data (called from parse())
+            sections_data = _sections
+            sources = _sources
+        else:
+            # Parse the data (backward compatible path)
+            if data is None:
+                with open(self.path, encoding=encoding) as fp:
+                    data = fp.read()
+
+            # Use old behavior (no stripping) for backward compatibility
+            sections_data, sources = _parse.parse_ini_data(
+                self.path, data, strip_inline_comments=False
+            )
+
+        # Assign once to Final attributes
+        self._sources = sources
+        self.sections = sections_data
+
+    @classmethod
+    def parse(
+        cls,
+        path: str | os.PathLike[str],
+        data: str | None = None,
+        encoding: str = "utf-8",
+        *,
+        strip_inline_comments: bool = True,
+        strip_section_whitespace: bool = False,
+    ) -> "IniConfig":
+        """Parse an INI file.
+
+        Args:
+            path: Path to the INI file (used for error messages)
+            data: Optional INI content as string. If None, reads from path.
+            encoding: Encoding to use when reading the file (default: utf-8)
+            strip_inline_comments: Whether to strip inline comments from values
+                (default: True). When True, comments starting with # or ; are
+                removed from values, matching the behavior for section comments.
+            strip_section_whitespace: Whether to strip whitespace from section and key names
+                (default: False). When True, strips Unicode whitespace from section and key names,
+                addressing issue #4. When False, preserves existing behavior for backward compatibility.
+
+        Returns:
+            IniConfig instance with parsed configuration
+
+        Example:
+            # With comment stripping (default):
+            config = IniConfig.parse("setup.cfg")
+            # value = "foo" instead of "foo # comment"
+
+            # Without comment stripping (old behavior):
+            config = IniConfig.parse("setup.cfg", strip_inline_comments=False)
+            # value = "foo # comment"
+
+            # With section name stripping (opt-in for issue #4):
+            config = IniConfig.parse("setup.cfg", strip_section_whitespace=True)
+            # section names and keys have Unicode whitespace stripped
+        """
+        fspath = os.fspath(path)
+
         if data is None:
-            with open(self.path, encoding=encoding) as fp:
+            with open(fspath, encoding=encoding) as fp:
                 data = fp.read()
 
-        tokens = _parse.parse_lines(self.path, data.splitlines(True))
-
-        self._sources = {}
-        sections_data: dict[str, dict[str, str]]
-        self.sections = sections_data = {}
-
-        for lineno, section, name, value in tokens:
-            if section is None:
-                raise ParseError(self.path, lineno, "no section header defined")
-            self._sources[section, name] = lineno
-            if name is None:
-                if section in self.sections:
-                    raise ParseError(
-                        self.path, lineno, f"duplicate section {section!r}"
-                    )
-                sections_data[section] = {}
-            else:
-                if name in self.sections[section]:
-                    raise ParseError(self.path, lineno, f"duplicate name {name!r}")
-                assert value is not None
-                sections_data[section][name] = value
+        sections_data, sources = _parse.parse_ini_data(
+            fspath,
+            data,
+            strip_inline_comments=strip_inline_comments,
+            strip_section_whitespace=strip_section_whitespace,
+        )
+
+        # Call constructor with pre-parsed sections and sources
+        return cls(path=fspath, _sections=sections_data, _sources=sources)
 
     def lineof(self, section: str, name: str | None = None) -> int | None:
         lineno = self._sources.get((section, name))

diff --git a/src/iniconfig/_parse.py b/src/iniconfig/_parse.py
@@ -1,3 +1,4 @@
+from collections.abc import Mapping
 from typing import NamedTuple
 
 from .exceptions import ParseError
@@ -12,11 +13,67 @@ class ParsedLine(NamedTuple):
     value: str | None
 
 
-def parse_lines(path: str, line_iter: list[str]) -> list[ParsedLine]:
+def parse_ini_data(
+    path: str,
+    data: str,
+    *,
+    strip_inline_comments: bool,
+    strip_section_whitespace: bool = False,
+) -> tuple[Mapping[str, Mapping[str, str]], Mapping[tuple[str, str | None], int]]:
+    """Parse INI data and return sections and sources mappings.
+
+    Args:
+        path: Path for error messages
+        data: INI content as string
+        strip_inline_comments: Whether to strip inline comments from values
+        strip_section_whitespace: Whether to strip whitespace from section and key names
+            (default: False). When True, addresses issue #4 by stripping Unicode whitespace.
+
+    Returns:
+        Tuple of (sections_data, sources) where:
+        - sections_data: mapping of section -> {name -> value}
+        - sources: mapping of (section, name) -> line number
+    """
+    tokens = parse_lines(
+        path,
+        data.splitlines(True),
+        strip_inline_comments=strip_inline_comments,
+        strip_section_whitespace=strip_section_whitespace,
+    )
+
+    sources: dict[tuple[str, str | None], int] = {}
+    sections_data: dict[str, dict[str, str]] = {}
+
+    for lineno, section, name, value in tokens:
+        if section is None:
+            raise ParseError(path, lineno, "no section header defined")
+        sources[section, name] = lineno
+        if name is None:
+            if section in sections_data:
+                raise ParseError(path, lineno, f"duplicate section {section!r}")
+            sections_data[section] = {}
+        else:
+            if name in sections_data[section]:
+                raise ParseError(path, lineno, f"duplicate name {name!r}")
+            assert value is not None
+            sections_data[section][name] = value
+
+    return sections_data, sources
+
+
+def parse_lines(
+    path: str,
+    line_iter: list[str],
+    *,
+    strip_inline_comments: bool = False,
+    strip_section_whitespace: bool = False,
+) -> list[ParsedLine]:
     result: list[ParsedLine] = []
     section = None
     for lineno, line in enumerate(line_iter):
-        name, data = _parseline(path, line, lineno)
+        name, data = _parseline(
+            path, line, lineno, strip_inline_comments, strip_section_whitespace
+        )
         # new value
         if name is not None and data is not None:
             result.append(ParsedLine(lineno, section, name, data))
@@ -42,7 +99,13 @@ def parse_lines(path: str, line_iter: list[str]) -> list[ParsedLine]:
     return result
 
 
-def _parseline(path: str, line: str, lineno: int) -> tuple[str | None, str | None]:
+def _parseline(
+    path: str,
+    line: str,
+    lineno: int,
+    strip_inline_comments: bool,
+    strip_section_whitespace: bool,
+) -> tuple[str | None, str | None]:
     # blank lines
     if iscommentline(line):
         line = ""
@@ -56,7 +119,11 @@ def _parseline(path: str, line: str, lineno: int) -> tuple[str | None, str | Non
         for c in COMMENTCHARS:
             line = line.split(c)[0].rstrip()
         if line[-1] == "]":
-            return line[1:-1], None
+            section_name = line[1:-1]
+            # Optionally strip whitespace from section name (issue #4)
+            if strip_section_whitespace:
+                section_name = section_name.strip()
+            return section_name, None
         return None, realline.strip()
     # value
     elif not line[0].isspace():
@@ -69,10 +136,26 @@ def _parseline(path: str, line: str, lineno: int) -> tuple[str | None, str | Non
                 name, value = line.split(":", 1)
             except ValueError:
                 raise ParseError(path, lineno, f"unexpected line: {line!r}") from None
-        return name.strip(), value.strip()
+
+        # Strip key name (always for backward compatibility, optionally with unicode awareness)
+        key_name = name.strip()
+
+        # Strip value
+        value = value.strip()
+        # Strip inline comments from values if requested (issue #55)
+        if strip_inline_comments:
+            for c in COMMENTCHARS:
+                value = value.split(c)[0].rstrip()
+
+        return key_name, value
     # continuation
     else:
-        return None, line.strip()
+        line = line.strip()
+        # Strip inline comments from continuations if requested (issue #55)
+        if strip_inline_comments:
+            for c in COMMENTCHARS:
+                line = line.split(c)[0].rstrip()
+        return None, line
 
 
 def iscommentline(line: str) -> bool:

diff --git a/testing/test_iniconfig.py b/testing/test_iniconfig.py
@@ -125,7 +125,7 @@ def test_iniconfig_from_file(tmp_path: Path) -> None:
     config = IniConfig(str(path), "[diff]")
     assert list(config.sections) == ["diff"]
     with pytest.raises(TypeError):
-        IniConfig(data=path.read_text())  # type: ignore
+        IniConfig(data=path.read_text())  # type: ignore[call-arg]
 
 
 def test_iniconfig_section_first() -> None:
@@ -304,3 +304,111 @@ def test_api_import() -> None:
 )
 def test_iscommentline_true(line: str) -> None:
     assert iscommentline(line)
+
+
+def test_parse_strips_inline_comments() -> None:
+    """Test that IniConfig.parse() strips inline comments from values by default."""
+    config = IniConfig.parse(
+        "test.ini",
+        data=dedent(
+            """
+            [section1]
+            name1 = value1 # this is a comment
+            name2 = value2 ; this is also a comment
+            name3 = value3# no space before comment
+            list = a, b, c # some items
+            """
+        ),
+    )
+    assert config["section1"]["name1"] == "value1"
+    assert config["section1"]["name2"] == "value2"
+    assert config["section1"]["name3"] == "value3"
+    assert config["section1"]["list"] == "a, b, c"
+
+
+def test_parse_strips_inline_comments_from_continuations() -> None:
+    """Test that inline comments are stripped from continuation lines."""
+    config = IniConfig.parse(
+        "test.ini",
+        data=dedent(
+            """
+            [section]
+            names =
+                Alice # first person
+                Bob ; second person
+                Charlie
+            """
+        ),
+    )
+    assert config["section"]["names"] == "Alice\nBob\nCharlie"
+
+
+def test_parse_preserves_inline_comments_when_disabled() -> None:
+    """Test that IniConfig.parse(strip_inline_comments=False) preserves comments."""
+    config = IniConfig.parse(
+        "test.ini",
+        data=dedent(
+            """
+            [section1]
+            name1 = value1 # this is a comment
+            name2 = value2 ; this is also a comment
+            list = a, b, c # some items
+            """
+        ),
+        strip_inline_comments=False,
+    )
+    assert config["section1"]["name1"] == "value1 # this is a comment"
+    assert config["section1"]["name2"] == "value2 ; this is also a comment"
+    assert config["section1"]["list"] == "a, b, c # some items"
+
+
+def test_constructor_preserves_inline_comments_for_backward_compatibility() -> None:
+    """Test that IniConfig() constructor preserves old behavior (no stripping)."""
+    config = IniConfig(
+        "test.ini",
+        data=dedent(
+            """
+            [section1]
+            name1 = value1 # this is a comment
+            name2 = value2 ; this is also a comment
+            """
+        ),
+    )
+    assert config["section1"]["name1"] == "value1 # this is a comment"
+    assert config["section1"]["name2"] == "value2 ; this is also a comment"
+
+
+def test_unicode_whitespace_stripped() -> None:
+    """Test that Unicode whitespace is stripped (issue #4)."""
+    config = IniConfig(
+        "test.ini",
+        data="[section]\n"
+        + "name1 = \u00a0value1\u00a0\n"  # NO-BREAK SPACE
+        + "name2 = \u2000value2\u2000\n"  # EN QUAD
+        + "name3 = \u3000value3\u3000\n",  # IDEOGRAPHIC SPACE
+    )
+    assert config["section"]["name1"] == "value1"
+    assert config["section"]["name2"] == "value2"
+    assert config["section"]["name3"] == "value3"
+
+
+def test_unicode_whitespace_in_section_names_with_opt_in() -> None:
+    """Test that Unicode whitespace can be stripped from section names with opt-in (issue #4)."""
+    config = IniConfig.parse(
+        "test.ini",
+        data="[section\u00a0]\n"  # NO-BREAK SPACE at end
+        + "key = value\n",
+        strip_section_whitespace=True,
+    )
+    assert "section" in config
+    assert config["section"]["key"] == "value"
+
+
+def test_unicode_whitespace_in_key_names() -> None:
+    """Test that Unicode whitespace is stripped from key names (issue #4)."""
+    config = IniConfig(
+        "test.ini",
+        data="[section]\n" + "key\u00a0 = value\n",  # NO-BREAK SPACE after key
+    )
+    assert "key" in config["section"]
+    assert config["section"]["key"] == "value"