From 49b42a55a181f7b68663edcd2e99fc21001f967b Mon Sep 17 00:00:00 2001 From: Henrik Lindberg <563066+hlindberg@users.noreply.github.com> Date: Mon, 24 Sep 2018 16:12:49 +0200 Subject: [PATCH] (PUP-9163) Fix lexer problem with heredoc and sublocator Before this, heredoc set up a locator intended to define a subspace for the heredoc string (for lexing its content to support interpolation). What it ended up doing was to create a subspace offset not from the total source string, but from within the heredoc string. This resulted in offsets being calculated to point to char positions in an undefined manner (depending of length of heredoc, its position and what follows). This would then result in error messages pointing to the wrong place in the source code as well as wrongly computing if a `[` is an array start or not by checking if the preceding char is a space since it would end up looking at some undefined position. This was made worse (or the same) because the lexer would use the returned char_offset as an offset in its subspace string instead of in the total space. --- lib/puppet/pops/parser/heredoc_support.rb | 3 +-- lib/puppet/pops/parser/lexer2.rb | 2 +- spec/unit/pops/parser/parse_heredoc_spec.rb | 16 ++++++++++++++++ 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/lib/puppet/pops/parser/heredoc_support.rb b/lib/puppet/pops/parser/heredoc_support.rb index d0bd1073edd..53bd2b0bc68 100644 --- a/lib/puppet/pops/parser/heredoc_support.rb +++ b/lib/puppet/pops/parser/heredoc_support.rb @@ -98,8 +98,7 @@ def heredoc # Use a new lexer instance configured with a sub-locator to enable correct positioning sublexer = self.class.new() - locator = Locator::SubLocator.sub_locator(str, - locator.file, heredoc_line, heredoc_offset, leading.length()) + locator = Locator::SubLocator.new(locator, heredoc_line, heredoc_offset, leading.length()) # Emit a token that provides the grammar with location information about the lines on which the heredoc # content is based. diff --git a/lib/puppet/pops/parser/lexer2.rb b/lib/puppet/pops/parser/lexer2.rb index d095f69a956..c31bfd6aef8 100644 --- a/lib/puppet/pops/parser/lexer2.rb +++ b/lib/puppet/pops/parser/lexer2.rb @@ -189,7 +189,7 @@ def initialize() ',' => lambda { emit(TOKEN_COMMA, @scanner.pos) }, '[' => lambda do before = @scanner.pos - if (before == 0 || @scanner.string[locator.char_offset(before)-1,1] =~ /[[:blank:]\r\n]+/) + if (before == 0 || locator.string[locator.char_offset(before)-1,1] =~ /[[:blank:]\r\n]+/) emit(TOKEN_LISTSTART, before) else emit(TOKEN_LBRACK, before) diff --git a/spec/unit/pops/parser/parse_heredoc_spec.rb b/spec/unit/pops/parser/parse_heredoc_spec.rb index 5a7b60aa50d..3b7006517f4 100644 --- a/spec/unit/pops/parser/parse_heredoc_spec.rb +++ b/spec/unit/pops/parser/parse_heredoc_spec.rb @@ -140,6 +140,22 @@ ].join("\n")) end + it "parses interpolated [] expression by looking at the correct preceding char for space" do + # NOTE: Important not to use the left margin feature here + src = <<-CODE +$xxxxxxx = @("END") +${facts['os']['family']} +XXXXXXX XXX +END +CODE + expect(dump(parse(src))).to eq([ + "(= $xxxxxxx (@()", + " (sublocated (cat (str (slice (slice $facts 'os') 'family')) '", + "XXXXXXX XXX", + "'))", + "))"].join("\n")) + end + it 'parses multiple heredocs on the same line' do src = <<-CODE notice({ @(foo) => @(bar) })