Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -333,5 +333,6 @@ ASALocalRun/
.DS_Store

# rust
rust/target/
target/venv/
.venv/
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
using Xunit;
using System.Collections.Generic;

namespace Link.Foundation.Links.Notation.Tests
{
public class IndentationConsistencyTests
{
[Fact]
public void LeadingSpacesVsNoLeadingSpacesShouldProduceSameResult()
{
// Example with 2 leading spaces (from issue #135)
var withLeading = @" TELEGRAM_BOT_TOKEN: '849...355:AAG...rgk_YZk...aPU'
TELEGRAM_ALLOWED_CHATS:
-1002975819706
-1002861722681
TELEGRAM_HIVE_OVERRIDES:
--all-issues
--once
TELEGRAM_BOT_VERBOSE: true";

// Example without leading spaces (from issue #135)
var withoutLeading = @"TELEGRAM_BOT_TOKEN: '849...355:AAG...rgk_YZk...aPU'
TELEGRAM_ALLOWED_CHATS:
-1002975819706
-1002861722681
TELEGRAM_HIVE_OVERRIDES:
--all-issues
--once
TELEGRAM_BOT_VERBOSE: true";

var resultWith = new Parser().Parse(withLeading);
var resultWithout = new Parser().Parse(withoutLeading);

// Compare the entire formatted output (complete round trip test)
Assert.Equal(resultWithout.Format(), resultWith.Format());
}

[Fact]
public void TwoSpacesVsFourSpacesIndentation()
{
// Example with 2 spaces per level
var twoSpaces = @"TELEGRAM_BOT_TOKEN: '849...355:AAG...rgk_YZk...aPU'
TELEGRAM_ALLOWED_CHATS:
-1002975819706
-1002861722681
TELEGRAM_HIVE_OVERRIDES:
--all-issues
--once
--auto-fork
--skip-issues-with-prs
--attach-logs
--verbose
--no-tool-check
TELEGRAM_SOLVE_OVERRIDES:
--auto-fork
--auto-continue
--attach-logs
--verbose
--no-tool-check
TELEGRAM_BOT_VERBOSE: true";

// Example with 4 spaces per level
var fourSpaces = @"TELEGRAM_BOT_TOKEN: '849...355:AAG...rgk_YZk...aPU'
TELEGRAM_ALLOWED_CHATS:
-1002975819706
-1002861722681
TELEGRAM_HIVE_OVERRIDES:
--all-issues
--once
--auto-fork
--skip-issues-with-prs
--attach-logs
--verbose
--no-tool-check
TELEGRAM_SOLVE_OVERRIDES:
--auto-fork
--auto-continue
--attach-logs
--verbose
--no-tool-check
TELEGRAM_BOT_VERBOSE: true";

var resultTwo = new Parser().Parse(twoSpaces);
var resultFour = new Parser().Parse(fourSpaces);

// Compare the entire formatted output (complete round trip test)
Assert.Equal(resultFour.Format(), resultTwo.Format());
}

[Fact]
public void SimpleTwoVsFourSpacesIndentation()
{
// Simple example with 2 spaces
var twoSpaces = @"parent:
child1
child2";

// Simple example with 4 spaces
var fourSpaces = @"parent:
child1
child2";

var resultTwo = new Parser().Parse(twoSpaces);
var resultFour = new Parser().Parse(fourSpaces);

// Compare the entire formatted output (complete round trip test)
Assert.Equal(resultFour.Format(), resultTwo.Format());
}

[Fact]
public void ThreeLevelNestingWithDifferentIndentation()
{
// Three levels with 2 spaces
var twoSpaces = @"level1:
level2:
level3a
level3b
level2b";

// Three levels with 4 spaces
var fourSpaces = @"level1:
level2:
level3a
level3b
level2b";

var resultTwo = new Parser().Parse(twoSpaces);
var resultFour = new Parser().Parse(fourSpaces);

// Compare the entire formatted output (complete round trip test)
Assert.Equal(resultFour.Format(), resultTwo.Format());
}
}
}
12 changes: 7 additions & 5 deletions csharp/Link.Foundation.Links.Notation/Parser.peg
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
@namespace Link.Foundation.Links.Notation
@classname Parser
@using System.Linq
document <IList<Link<string>>> = #{ state["IndentationStack"] = new Stack<int>(); state["IndentationStack"].Push(0); } _ l:links eof { l.ToLinksList() } / #{ state["IndentationStack"] = new Stack<int>(); state["IndentationStack"].Push(0); } _ eof { new List<Link<string>>() }
document <IList<Link<string>>> = #{ state["IndentationStack"] = new Stack<int>(); state["IndentationStack"].Push(0); state["BaseIndentation"] = -1; } skipEmptyLines l:links _ eof { l.ToLinksList() } / #{ state["IndentationStack"] = new Stack<int>(); state["IndentationStack"].Push(0); state["BaseIndentation"] = -1; } _ eof { new List<Link<string>>() }
skipEmptyLines = ([ \t]* [\r\n])*
links <IList<LinksGroup<string>>> = fl:firstLine list:line* POP_INDENTATION { new List<LinksGroup<string>> { fl }.Concat(list).ToList() }
firstLine <LinksGroup<string>> = l:element { l }
firstLine <LinksGroup<string>> = SET_BASE_INDENTATION l:element { l }
line <LinksGroup<string>> = CHECK_INDENTATION l:element { l }
element <LinksGroup<string>> = e:anyLink PUSH_INDENTATION l:links { new LinksGroup<string>(e, l) } / e:anyLink { new LinksGroup<string>(e) }
referenceOrLink <Link<string>> = l:multiLineAnyLink { l } / i:reference { i }
Expand All @@ -20,13 +21,14 @@ singleLineValueLink <Link<string>> = v:singleLineValues { new Link<string>(v) }
multiLineValueLink <Link<string>> = "(" v:multiLineValues _ ")" { new Link<string>(v) }
indentedIdLink <Link<string>> = id:(reference) __ ":" eol { new Link<string>(id) }

reference <string> = doubleQuotedReference / singleQuotedReference / simpleReference
reference <string> = doubleQuotedReference / singleQuotedReference / simpleReference
simpleReference <string> = "" referenceSymbol+
doubleQuotedReference <string> = '"' r:([^"]+) '"' { string.Join("", r) }
singleQuotedReference <string> = "'" r:([^']+) "'" { string.Join("", r) }
PUSH_INDENTATION = spaces:" "* &{ spaces.Count > state["IndentationStack"].Peek() } #{ state["IndentationStack"].Push(spaces.Count); }
SET_BASE_INDENTATION = spaces:" "* #{ if ((int)state["BaseIndentation"] == -1) state["BaseIndentation"] = spaces.Count; }
PUSH_INDENTATION = spaces:" "* #{ state["NormalizedIndent"] = spaces.Count - ((int)state["BaseIndentation"] == -1 ? 0 : (int)state["BaseIndentation"]); if ((int)state["NormalizedIndent"] < 0) state["NormalizedIndent"] = 0; } &{ (int)state["NormalizedIndent"] > (int)state["IndentationStack"].Peek() } #{ state["IndentationStack"].Push((int)state["NormalizedIndent"]); }
POP_INDENTATION = #{ state["IndentationStack"].Pop(); }
CHECK_INDENTATION = spaces:" "* &{ spaces.Count >= state["IndentationStack"].Peek() }
CHECK_INDENTATION = spaces:" "* #{ state["NormalizedIndent"] = spaces.Count - ((int)state["BaseIndentation"] == -1 ? 0 : (int)state["BaseIndentation"]); if ((int)state["NormalizedIndent"] < 0) state["NormalizedIndent"] = 0; } &{ (int)state["NormalizedIndent"] >= (int)state["IndentationStack"].Peek() }
eol = __ ("" [\r\n]+ / eof)
eof = !.
__ = [ \t]*
Expand Down
97 changes: 97 additions & 0 deletions experiments/SOLUTION_SUMMARY.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
# Solution Summary for Issue #135

## Problem Statement

The parser was treating documents with leading spaces differently than documents without leading spaces, even when the relative indentation was the same.

### Example of the Bug:
These two should parse identically, but didn't:

```yaml
TELEGRAM_BOT_TOKEN: 'value'
TELEGRAM_ALLOWED_CHATS:
item1
item2
```

```yaml
TELEGRAM_BOT_TOKEN: 'value'
TELEGRAM_ALLOWED_CHATS:
item1
item2
```

In the first example, the parser incorrectly treated `TELEGRAM_ALLOWED_CHATS` as a child of `TELEGRAM_BOT_TOKEN` because both had 2 spaces, and the second line appeared to have the same indentation as the first.

## Root Cause

All parsers were counting **absolute** indentation (number of spaces from the start of the line) instead of **relative** indentation (increase/decrease compared to the parent level).

## Solution

The fix normalizes indentation by:
1. Detecting the first content line's indentation and treating it as the baseline (level 0)
2. Subtracting this baseline from all subsequent lines
3. This makes the indentation **relative** to the first content line

### Implementation Details

#### Rust (`rust/src/parser.rs`)
- Added `base_indentation` field to `ParserState`
- Added `set_base_indentation()`, `get_base_indentation()`, and `normalize_indentation()` methods
- Modified `first_line()` to capture and set the base indentation
- Updated `push_indentation()` and `check_indentation()` to normalize values before comparison

#### JavaScript (`js/src/grammar.pegjs`)
- Added `baseIndentation` variable to track the first line's indentation
- Added `setBaseIndentation()` and `normalizeIndentation()` functions
- Updated `document` rule to skip only empty lines (not leading spaces on content lines)
- Added `SET_BASE_INDENTATION` rule called from `firstLine`
- Modified `PUSH_INDENTATION` and `CHECK_INDENTATION` to use normalized values

#### C# (`csharp/Link.Foundation.Links.Notation/Parser.peg`)
- Added `BaseIndentation` to parser state
- Added `skipEmptyLines` rule to preserve leading spaces on first content line
- Added `SET_BASE_INDENTATION` rule
- Updated `PUSH_INDENTATION` and `CHECK_INDENTATION` to normalize indentation

#### Python (TODO)
- Needs similar changes to `python/links_notation/parser.py`
- Should track `base_indentation` in the Parser class
- Update `_parse_element()` to normalize indentation values

## Test Coverage

Added comprehensive test cases for all languages:
- `rust/tests/indentation_consistency_tests.rs`
- `js/tests/IndentationConsistency.test.js`
- `csharp/Link.Foundation.Links.Notation.Tests/IndentationConsistencyTests.cs`

Each test suite verifies:
1. Documents with leading spaces vs no leading spaces produce identical results
2. Different indentation sizes (2 vs 4 spaces) work correctly
3. Multi-level nesting preserves structure regardless of indentation style

## Results

✅ **Rust**: All tests passing (106 tests)
✅ **JavaScript**: All tests passing (106 tests)
🔧 **C#**: Fixed, tests pending build verification
⏳ **Python**: Implementation pending

## Verification

Run tests:
```bash
# Rust
cd rust && cargo test

# JavaScript
cd js && npm test

# C#
cd csharp && dotnet test

# Python
cd python && python -m pytest
```
62 changes: 62 additions & 0 deletions experiments/test_indentation_consistency.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# Indentation Consistency Test

This document contains test cases for issue #135: Any indentation as long as it is the same on single level should not change parser semantics.

## Test Case 1: Two spaces vs Four spaces

Both of these examples should parse to exactly the same result:

### Example with 2 spaces per level:
```
TELEGRAM_BOT_TOKEN: '849...355:AAG...rgk_YZk...aPU'
TELEGRAM_ALLOWED_CHATS:
-1002975819706
-1002861722681
TELEGRAM_HIVE_OVERRIDES:
--all-issues
--once
--auto-fork
--skip-issues-with-prs
--attach-logs
--verbose
--no-tool-check
TELEGRAM_SOLVE_OVERRIDES:
--auto-fork
--auto-continue
--attach-logs
--verbose
--no-tool-check
TELEGRAM_BOT_VERBOSE: true
```

### Example with 4 spaces per level:
```
TELEGRAM_BOT_TOKEN: '849...355:AAG...rgk_YZk...aPU'
TELEGRAM_ALLOWED_CHATS:
-1002975819706
-1002861722681
TELEGRAM_HIVE_OVERRIDES:
--all-issues
--once
--auto-fork
--skip-issues-with-prs
--attach-logs
--verbose
--no-tool-check
TELEGRAM_SOLVE_OVERRIDES:
--auto-fork
--auto-continue
--attach-logs
--verbose
--no-tool-check
TELEGRAM_BOT_VERBOSE: true
```

## Expected Behavior

The parser should only care about:
1. **Relative indentation** - what matters is whether a line is indented more or less than its parent
2. **Consistency** - all children at the same level should have the same indentation

The parser should NOT care about:
1. **Absolute indentation amount** - whether it's 2 spaces, 4 spaces, 8 spaces, or even tabs
64 changes: 64 additions & 0 deletions experiments/test_rust_indentation.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
use links_notation::parse_lino_to_links;

fn main() {
// Example with 2 spaces
let two_spaces = "parent:\n child1\n child2";

// Example with 4 spaces
let four_spaces = "parent:\n child1\n child2";

println!("=== Two Spaces ===");
match parse_lino_to_links(two_spaces) {
Ok(links) => {
println!("Parsed {} links:", links.len());
for (i, link) in links.iter().enumerate() {
println!(" Link {}: {}", i, link);
}
}
Err(e) => println!("Error: {}", e),
}

println!("\n=== Four Spaces ===");
match parse_lino_to_links(four_spaces) {
Ok(links) => {
println!("Parsed {} links:", links.len());
for (i, link) in links.iter().enumerate() {
println!(" Link {}: {}", i, link);
}
}
Err(e) => println!("Error: {}", e),
}

// Test the issue example
println!("\n=== Issue Example (leading 2 spaces) ===");
let issue_two_spaces = " TELEGRAM_BOT_TOKEN: '849...355:AAG...rgk_YZk...aPU'
TELEGRAM_ALLOWED_CHATS:
-1002975819706
-1002861722681";

match parse_lino_to_links(issue_two_spaces) {
Ok(links) => {
println!("Parsed {} links:", links.len());
for (i, link) in links.iter().enumerate() {
println!(" Link {}: {}", i, link);
}
}
Err(e) => println!("Error: {}", e),
}

println!("\n=== Issue Example (no leading spaces) ===");
let issue_no_leading = "TELEGRAM_BOT_TOKEN: '849...355:AAG...rgk_YZk...aPU'
TELEGRAM_ALLOWED_CHATS:
-1002975819706
-1002861722681";

match parse_lino_to_links(issue_no_leading) {
Ok(links) => {
println!("Parsed {} links:", links.len());
for (i, link) in links.iter().enumerate() {
println!(" Link {}: {}", i, link);
}
}
Err(e) => println!("Error: {}", e),
}
}
Loading
Loading