Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,3 +1,21 @@
2.3.0
=====

* add IniConfig.parse() classmethod with strip_inline_comments parameter (fixes #55)
- by default (strip_inline_comments=True), inline comments are properly stripped from values
- set strip_inline_comments=False to preserve old behavior if needed
* IniConfig() constructor maintains backward compatibility (does not strip inline comments)
* users should migrate to IniConfig.parse() for correct comment handling
* add strip_section_whitespace parameter to IniConfig.parse() (regarding #4)
- opt-in parameter to strip Unicode whitespace from section names
- when True, strips Unicode whitespace (U+00A0, U+2000, U+3000, etc.) from section names
- when False (default), preserves existing behavior for backward compatibility
* clarify Unicode whitespace handling (regarding #4)
- since iniconfig 2.0.0 (Python 3 only), all strings are Unicode by default
- Python 3's str.strip() has handled Unicode whitespace since Python 3.0 (2008)
- iniconfig automatically benefits from this in all supported versions (Python >= 3.10)
- key names and values have Unicode whitespace properly stripped using Python's built-in methods

2.2.0
=====

Expand Down
97 changes: 75 additions & 22 deletions src/iniconfig/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,33 +96,86 @@ def __init__(
path: str | os.PathLike[str],
data: str | None = None,
encoding: str = "utf-8",
*,
_sections: Mapping[str, Mapping[str, str]] | None = None,
_sources: Mapping[tuple[str, str | None], int] | None = None,
) -> None:
self.path = os.fspath(path)

# Determine sections and sources
if _sections is not None and _sources is not None:
# Use provided pre-parsed data (called from parse())
sections_data = _sections
sources = _sources
else:
# Parse the data (backward compatible path)
if data is None:
with open(self.path, encoding=encoding) as fp:
data = fp.read()

# Use old behavior (no stripping) for backward compatibility
sections_data, sources = _parse.parse_ini_data(
self.path, data, strip_inline_comments=False
)

# Assign once to Final attributes
self._sources = sources
self.sections = sections_data

@classmethod
def parse(
cls,
path: str | os.PathLike[str],
data: str | None = None,
encoding: str = "utf-8",
*,
strip_inline_comments: bool = True,
strip_section_whitespace: bool = False,
) -> "IniConfig":
"""Parse an INI file.

Args:
path: Path to the INI file (used for error messages)
data: Optional INI content as string. If None, reads from path.
encoding: Encoding to use when reading the file (default: utf-8)
strip_inline_comments: Whether to strip inline comments from values
(default: True). When True, comments starting with # or ; are
removed from values, matching the behavior for section comments.
strip_section_whitespace: Whether to strip whitespace from section and key names
(default: False). When True, strips Unicode whitespace from section and key names,
addressing issue #4. When False, preserves existing behavior for backward compatibility.

Returns:
IniConfig instance with parsed configuration

Example:
# With comment stripping (default):
config = IniConfig.parse("setup.cfg")
# value = "foo" instead of "foo # comment"

# Without comment stripping (old behavior):
config = IniConfig.parse("setup.cfg", strip_inline_comments=False)
# value = "foo # comment"

# With section name stripping (opt-in for issue #4):
config = IniConfig.parse("setup.cfg", strip_section_whitespace=True)
# section names and keys have Unicode whitespace stripped
"""
fspath = os.fspath(path)

if data is None:
with open(self.path, encoding=encoding) as fp:
with open(fspath, encoding=encoding) as fp:
data = fp.read()

tokens = _parse.parse_lines(self.path, data.splitlines(True))

self._sources = {}
sections_data: dict[str, dict[str, str]]
self.sections = sections_data = {}

for lineno, section, name, value in tokens:
if section is None:
raise ParseError(self.path, lineno, "no section header defined")
self._sources[section, name] = lineno
if name is None:
if section in self.sections:
raise ParseError(
self.path, lineno, f"duplicate section {section!r}"
)
sections_data[section] = {}
else:
if name in self.sections[section]:
raise ParseError(self.path, lineno, f"duplicate name {name!r}")
assert value is not None
sections_data[section][name] = value
sections_data, sources = _parse.parse_ini_data(
fspath,
data,
strip_inline_comments=strip_inline_comments,
strip_section_whitespace=strip_section_whitespace,
)

# Call constructor with pre-parsed sections and sources
return cls(path=fspath, _sections=sections_data, _sources=sources)

def lineof(self, section: str, name: str | None = None) -> int | None:
lineno = self._sources.get((section, name))
Expand Down
95 changes: 89 additions & 6 deletions src/iniconfig/_parse.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from collections.abc import Mapping
from typing import NamedTuple

from .exceptions import ParseError
Expand All @@ -12,11 +13,67 @@ class ParsedLine(NamedTuple):
value: str | None


def parse_lines(path: str, line_iter: list[str]) -> list[ParsedLine]:
def parse_ini_data(
path: str,
data: str,
*,
strip_inline_comments: bool,
strip_section_whitespace: bool = False,
) -> tuple[Mapping[str, Mapping[str, str]], Mapping[tuple[str, str | None], int]]:
"""Parse INI data and return sections and sources mappings.

Args:
path: Path for error messages
data: INI content as string
strip_inline_comments: Whether to strip inline comments from values
strip_section_whitespace: Whether to strip whitespace from section and key names
(default: False). When True, addresses issue #4 by stripping Unicode whitespace.

Returns:
Tuple of (sections_data, sources) where:
- sections_data: mapping of section -> {name -> value}
- sources: mapping of (section, name) -> line number
"""
tokens = parse_lines(
path,
data.splitlines(True),
strip_inline_comments=strip_inline_comments,
strip_section_whitespace=strip_section_whitespace,
)

sources: dict[tuple[str, str | None], int] = {}
sections_data: dict[str, dict[str, str]] = {}

for lineno, section, name, value in tokens:
if section is None:
raise ParseError(path, lineno, "no section header defined")
sources[section, name] = lineno
if name is None:
if section in sections_data:
raise ParseError(path, lineno, f"duplicate section {section!r}")
sections_data[section] = {}
else:
if name in sections_data[section]:
raise ParseError(path, lineno, f"duplicate name {name!r}")
assert value is not None
sections_data[section][name] = value

return sections_data, sources


def parse_lines(
path: str,
line_iter: list[str],
*,
strip_inline_comments: bool = False,
strip_section_whitespace: bool = False,
) -> list[ParsedLine]:
result: list[ParsedLine] = []
section = None
for lineno, line in enumerate(line_iter):
name, data = _parseline(path, line, lineno)
name, data = _parseline(
path, line, lineno, strip_inline_comments, strip_section_whitespace
)
# new value
if name is not None and data is not None:
result.append(ParsedLine(lineno, section, name, data))
Expand All @@ -42,7 +99,13 @@ def parse_lines(path: str, line_iter: list[str]) -> list[ParsedLine]:
return result


def _parseline(path: str, line: str, lineno: int) -> tuple[str | None, str | None]:
def _parseline(
path: str,
line: str,
lineno: int,
strip_inline_comments: bool,
strip_section_whitespace: bool,
) -> tuple[str | None, str | None]:
# blank lines
if iscommentline(line):
line = ""
Expand All @@ -56,7 +119,11 @@ def _parseline(path: str, line: str, lineno: int) -> tuple[str | None, str | Non
for c in COMMENTCHARS:
line = line.split(c)[0].rstrip()
if line[-1] == "]":
return line[1:-1], None
section_name = line[1:-1]
# Optionally strip whitespace from section name (issue #4)
if strip_section_whitespace:
section_name = section_name.strip()
return section_name, None
return None, realline.strip()
# value
elif not line[0].isspace():
Expand All @@ -69,10 +136,26 @@ def _parseline(path: str, line: str, lineno: int) -> tuple[str | None, str | Non
name, value = line.split(":", 1)
except ValueError:
raise ParseError(path, lineno, f"unexpected line: {line!r}") from None
return name.strip(), value.strip()

# Strip key name (always for backward compatibility, optionally with unicode awareness)
key_name = name.strip()

# Strip value
value = value.strip()
# Strip inline comments from values if requested (issue #55)
if strip_inline_comments:
for c in COMMENTCHARS:
value = value.split(c)[0].rstrip()

return key_name, value
# continuation
else:
return None, line.strip()
line = line.strip()
# Strip inline comments from continuations if requested (issue #55)
if strip_inline_comments:
for c in COMMENTCHARS:
line = line.split(c)[0].rstrip()
return None, line


def iscommentline(line: str) -> bool:
Expand Down
110 changes: 109 additions & 1 deletion testing/test_iniconfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def test_iniconfig_from_file(tmp_path: Path) -> None:
config = IniConfig(str(path), "[diff]")
assert list(config.sections) == ["diff"]
with pytest.raises(TypeError):
IniConfig(data=path.read_text()) # type: ignore
IniConfig(data=path.read_text()) # type: ignore[call-arg]


def test_iniconfig_section_first() -> None:
Expand Down Expand Up @@ -304,3 +304,111 @@ def test_api_import() -> None:
)
def test_iscommentline_true(line: str) -> None:
assert iscommentline(line)


def test_parse_strips_inline_comments() -> None:
"""Test that IniConfig.parse() strips inline comments from values by default."""
config = IniConfig.parse(
"test.ini",
data=dedent(
"""
[section1]
name1 = value1 # this is a comment
name2 = value2 ; this is also a comment
name3 = value3# no space before comment
list = a, b, c # some items
"""
),
)
assert config["section1"]["name1"] == "value1"
assert config["section1"]["name2"] == "value2"
assert config["section1"]["name3"] == "value3"
assert config["section1"]["list"] == "a, b, c"


def test_parse_strips_inline_comments_from_continuations() -> None:
"""Test that inline comments are stripped from continuation lines."""
config = IniConfig.parse(
"test.ini",
data=dedent(
"""
[section]
names =
Alice # first person
Bob ; second person
Charlie
"""
),
)
assert config["section"]["names"] == "Alice\nBob\nCharlie"


def test_parse_preserves_inline_comments_when_disabled() -> None:
"""Test that IniConfig.parse(strip_inline_comments=False) preserves comments."""
config = IniConfig.parse(
"test.ini",
data=dedent(
"""
[section1]
name1 = value1 # this is a comment
name2 = value2 ; this is also a comment
list = a, b, c # some items
"""
),
strip_inline_comments=False,
)
assert config["section1"]["name1"] == "value1 # this is a comment"
assert config["section1"]["name2"] == "value2 ; this is also a comment"
assert config["section1"]["list"] == "a, b, c # some items"


def test_constructor_preserves_inline_comments_for_backward_compatibility() -> None:
"""Test that IniConfig() constructor preserves old behavior (no stripping)."""
config = IniConfig(
"test.ini",
data=dedent(
"""
[section1]
name1 = value1 # this is a comment
name2 = value2 ; this is also a comment
"""
),
)
assert config["section1"]["name1"] == "value1 # this is a comment"
assert config["section1"]["name2"] == "value2 ; this is also a comment"


def test_unicode_whitespace_stripped() -> None:
"""Test that Unicode whitespace is stripped (issue #4)."""
config = IniConfig(
"test.ini",
data="[section]\n"
+ "name1 = \u00a0value1\u00a0\n" # NO-BREAK SPACE
+ "name2 = \u2000value2\u2000\n" # EN QUAD
+ "name3 = \u3000value3\u3000\n", # IDEOGRAPHIC SPACE
)
assert config["section"]["name1"] == "value1"
assert config["section"]["name2"] == "value2"
assert config["section"]["name3"] == "value3"


def test_unicode_whitespace_in_section_names_with_opt_in() -> None:
"""Test that Unicode whitespace can be stripped from section names with opt-in (issue #4)."""
config = IniConfig.parse(
"test.ini",
data="[section\u00a0]\n" # NO-BREAK SPACE at end
+ "key = value\n",
strip_section_whitespace=True,
)
assert "section" in config
assert config["section"]["key"] == "value"


def test_unicode_whitespace_in_key_names() -> None:
"""Test that Unicode whitespace is stripped from key names (issue #4)."""
config = IniConfig(
"test.ini",
data="[section]\n" + "key\u00a0 = value\n", # NO-BREAK SPACE after key
)
assert "key" in config["section"]
assert config["section"]["key"] == "value"