From 49b42a55a181f7b68663edcd2e99fc21001f967b Mon Sep 17 00:00:00 2001
From: Henrik Lindberg <563066+hlindberg@users.noreply.github.com>
Date: Mon, 24 Sep 2018 16:12:49 +0200
Subject: [PATCH] (PUP-9163) Fix lexer problem with heredoc and sublocator

Before this, heredoc set up a locator intended to define a subspace
for the heredoc string (for lexing its content to support
interpolation). What it ended up doing was to create a subspace offset
not from the total source string, but from within the heredoc string.
This resulted in offsets being calculated to point to char positions in
an undefined manner (depending of length of heredoc, its position and
what follows). This would then result in error messages pointing to the
wrong place in the source code as well as wrongly computing if a `[` is
an array start or not by checking if the preceding char is a space since
it would end up looking at some undefined position.

This was made worse (or the same) because the lexer would use the
returned char_offset as an offset in its subspace string instead of in
the total space.
---
 lib/puppet/pops/parser/heredoc_support.rb   |  3 +--
 lib/puppet/pops/parser/lexer2.rb            |  2 +-
 spec/unit/pops/parser/parse_heredoc_spec.rb | 16 ++++++++++++++++
 3 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/lib/puppet/pops/parser/heredoc_support.rb b/lib/puppet/pops/parser/heredoc_support.rb
index d0bd1073edd..53bd2b0bc68 100644
--- a/lib/puppet/pops/parser/heredoc_support.rb
+++ b/lib/puppet/pops/parser/heredoc_support.rb
@@ -98,8 +98,7 @@ def heredoc
 
         # Use a new lexer instance configured with a sub-locator to enable correct positioning
         sublexer = self.class.new()
-        locator = Locator::SubLocator.sub_locator(str,
-          locator.file, heredoc_line, heredoc_offset, leading.length())
+        locator = Locator::SubLocator.new(locator, heredoc_line, heredoc_offset, leading.length())
 
         # Emit a token that provides the grammar with location information about the lines on which the heredoc
         # content is based.
diff --git a/lib/puppet/pops/parser/lexer2.rb b/lib/puppet/pops/parser/lexer2.rb
index d095f69a956..c31bfd6aef8 100644
--- a/lib/puppet/pops/parser/lexer2.rb
+++ b/lib/puppet/pops/parser/lexer2.rb
@@ -189,7 +189,7 @@ def initialize()
       ',' => lambda {  emit(TOKEN_COMMA, @scanner.pos) },
       '[' => lambda do
         before = @scanner.pos
-        if (before == 0 || @scanner.string[locator.char_offset(before)-1,1] =~ /[[:blank:]\r\n]+/)
+        if (before == 0 || locator.string[locator.char_offset(before)-1,1] =~ /[[:blank:]\r\n]+/)
           emit(TOKEN_LISTSTART, before)
         else
           emit(TOKEN_LBRACK, before)
diff --git a/spec/unit/pops/parser/parse_heredoc_spec.rb b/spec/unit/pops/parser/parse_heredoc_spec.rb
index 5a7b60aa50d..3b7006517f4 100644
--- a/spec/unit/pops/parser/parse_heredoc_spec.rb
+++ b/spec/unit/pops/parser/parse_heredoc_spec.rb
@@ -140,6 +140,22 @@
     ].join("\n"))
   end
 
+  it "parses interpolated [] expression by looking at the correct preceding char for space" do
+    # NOTE: Important not to use the left margin feature here
+    src = <<-CODE
+$xxxxxxx = @("END")
+${facts['os']['family']}
+XXXXXXX XXX
+END
+CODE
+    expect(dump(parse(src))).to eq([
+      "(= $xxxxxxx (@()",
+      "  (sublocated (cat (str (slice (slice $facts 'os') 'family')) '",
+      "XXXXXXX XXX",
+      "'))",
+      "))"].join("\n"))
+  end
+
   it 'parses multiple heredocs on the same line' do
     src = <<-CODE
     notice({ @(foo) => @(bar) })