From 6c90d469cff917d2b2acbc7eec523f9a71f32fd3 Mon Sep 17 00:00:00 2001 From: Tom Johnson Date: Fri, 8 Feb 2019 12:03:56 -0800 Subject: [PATCH] Escape all instances of `"` in `"""` delimited literals Fixes two bugs associated with `"""` delimited literals. First: such literals cannot end with a `"` (see `STRING_LITERAL_LONG_QUOTE` at https://www.w3.org/TR/turtle/#sec-grammar-grammar). Second: they cannot contain a sequence of three `"`. The prior `.gsub('"""', '\"""')` approach addresses this for cases where `"` appears in multiples of three, but fails for other cases (e.g. `""""`). Both bugs are fixed by escaping all `"` characters. An alternative addressing only the second bug might be: `.gsub('"""', '\""\"')`, which would ensure three quotes aren't ever left in a row. Closes #16. --- lib/rdf/turtle/writer.rb | 3 ++- spec/writer_spec.rb | 33 ++++++++++++++++++++++++--------- 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/lib/rdf/turtle/writer.rb b/lib/rdf/turtle/writer.rb index 5e5a937..17c3c96 100644 --- a/lib/rdf/turtle/writer.rb +++ b/lib/rdf/turtle/writer.rb @@ -447,7 +447,8 @@ def reset # @return [String] def quoted(string) if string.to_s.match(/[\t\n\r]/) - string = string.gsub('\\', '\\\\\\\\').gsub('"""', '\\"""') + string = string.gsub('\\', '\\\\\\\\').gsub('"', '\\"') + %("""#{string}""") else "\"#{escaped(string)}\"" diff --git a/spec/writer_spec.rb b/spec/writer_spec.rb index 74cf481..b09fae6 100644 --- a/spec/writer_spec.rb +++ b/spec/writer_spec.rb @@ -147,7 +147,7 @@ end end end - + describe "lists" do { "bare list": { @@ -301,7 +301,7 @@ describe "literals" do describe "plain" do { - "embedded \"\"\"": { + "\"\"\" delimited": { input: %( """testing string parsing in Turtle.\n""" .), regexp: [/testing string parsing in Turtle.\n/] }, @@ -318,20 +318,35 @@ regexp: [/string with \\\\ escaped quote mark/], prefixes: {nil => ""} }, + "embedded \"\"\" multi-line": { + input: %(:a :b """string with \\""" escaped triple-quote marks\n""" .), + regexp: [/string with \\"\\"\\" escaped triple-quote marks/], + prefixes: {nil => ""} + }, + "embedded \"\"\"\"\" multi-line": { + input: %(:a :b """string with many \\"""\\"" escaped quote marks\n""" .), + regexp: [/string with many \\"\\"\\"\\"\\" escaped quote marks/], + prefixes: {nil => ""} + }, + "ending \" multi-line": { + input: %(:a :b """multi-line \nstring with ending \\"quote marks\\\"""" .), + regexp: [/multi-line \nstring with ending \\"quote marks\\"/], + prefixes: {nil => ""} + }, }.each do |name, params| it name do serialize(params[:input], params[:regexp], params) end end end - + describe "with language" do it "specifies language for literal with language" do ttl = %q( "string"@en .) serialize(ttl, [%r("string"@en)]) end end - + describe "xsd:anyURI" do it "uses xsd namespace for datatype" do ttl = %q(@prefix xsd: . "http://foo/"^^xsd:anyURI .) @@ -341,7 +356,7 @@ ]) end end - + describe "xsd:boolean" do [ [%q("true"^^xsd:boolean), /true ./], @@ -397,7 +412,7 @@ end end end - + describe "xsd:integer" do [ [%q("1"^^xsd:integer), /1 ./], @@ -520,7 +535,7 @@ end end end - + describe "xsd:double" do [ [%q("1.0e1"^^xsd:double), /1.0e1 ./], @@ -639,7 +654,7 @@ def serialize(ntstr, regexps = [], base_uri: nil, **options) logger.info "match: #{re.inspect}" expect(result).to match_re(re, about: base_uri, logger: logger, input: ntstr), logger.to_s end - + result end -end \ No newline at end of file +end