Skip to content

Commit

Permalink
Regular expression update, parsing to UCHARS.
Browse files Browse the repository at this point in the history
  • Loading branch information
gkellogg committed Mar 16, 2017
1 parent 0a8ecd3 commit 19e42d3
Show file tree
Hide file tree
Showing 13 changed files with 549 additions and 924 deletions.
2 changes: 1 addition & 1 deletion Rakefile
Expand Up @@ -65,7 +65,7 @@ task context: "lib/shex/shex_context.rb"
file "lib/shex/shex_context.rb" do
require 'json/ld'
File.open("lib/shex/shex_context.rb", "w") do |f|
c = JSON::LD::Context.new().parse("https://shexspec.github.io/context.jsonld")
c = JSON::LD::Context.new().parse("http://shex.io/context.jsonld")
f.write c.to_rb
end
end
5 changes: 2 additions & 3 deletions etc/shex.ebnf
Expand Up @@ -129,8 +129,7 @@
[26] nonLiteralKind ::= "IRI" | "BNODE" | "NONLITERAL"
[27] xsFacet ::= stringFacet | numericFacet
[28] stringFacet ::= stringLength INTEGER
| "PATTERN" string
| '~' PATTERN # shortcut for "PATTERN"
| REGEXP
[29] stringLength ::= "LENGTH" | "MINLENGTH" | "MAXLENGTH"
[30] numericFacet ::= numericRange numericLiteral
| numericLength INTEGER
Expand Down Expand Up @@ -213,7 +212,7 @@
[84] STRING_LITERAL2 ::= '"' ([^#x22#x5C#xA#xD] | ECHAR | UCHAR)* '"' /* #x22=" #x5C=\ #xA=new line #xD=carriage return */
[85] STRING_LITERAL_LONG1 ::= "'''" (("'" | "''")? ([^\'\\] | ECHAR | UCHAR))* "'''"
[86] STRING_LITERAL_LONG2 ::= '"""' (('"' | '""')? ([^\"\\] | ECHAR | UCHAR))* '"""'
[XX] PATTERN ::= "/" ([^#x2f#x5C#xA#xD] | '\\' [tbnrf\\/] | UCHAR)+ "/" [smixq]*
[XX] REGEXP ::= '/' ([^/\\\n\r] | '\\' [nrt\\|.?*+(){}$-\[\]^/] | UCHAR)+ '/' [smix]*
[87] UCHAR ::= '\\u' HEX HEX HEX HEX
| '\\U' HEX HEX HEX HEX HEX HEX HEX HEX
[88] ECHAR ::= '\\' [tbnrf\\\"\']
Expand Down
9 changes: 4 additions & 5 deletions etc/shex.html
Expand Up @@ -281,8 +281,7 @@
<td>::=</td>
<td>
<a href="#grammar-production-stringLength">stringLength</a> <a href="#grammar-production-INTEGER">INTEGER</a>
<code>|</code> "<code class="grammar-literal">PATTERN</code>" <a href="#grammar-production-string">string</a>
<code>|</code> "<code class="grammar-literal">~</code>" <a href="#grammar-production-PATTERN">PATTERN</a>
<code>|</code> <a href="#grammar-production-REGEXP">REGEXP</a>
</td>
</tr>
<tr id='grammar-production-stringLength'>
Expand Down Expand Up @@ -803,13 +802,13 @@
<code>(</code> ('<code class="grammar-literal">"</code>' <code>|</code> '<code class="grammar-literal">""</code>')<code>?</code> <code>[</code> <code class="grammar-literal">^"\]</code><code class="grammar-char-escape">#x20</code><code class="grammar-literal">|</code><code class="grammar-char-escape">#x20</code><code class="grammar-literal">ECHAR</code><code class="grammar-char-escape">#x20</code><code class="grammar-literal">|</code><code class="grammar-char-escape">#x20</code><code class="grammar-literal">UCHAR))*</code><code class="grammar-char-escape">#x20</code><code class="grammar-literal">'"""'</code><code>]</code> <code>)</code>
</td>
</tr>
<tr id='grammar-production-PATTERN'>
<tr id='grammar-production-REGEXP'>
<td>[XX]</td>
<td><code>PATTERN</code></td>
<td><code>REGEXP</code></td>
<td>::=</td>
<td>
"<code class="grammar-literal">/</code>"
<code>(</code> <code>[</code> <code class="grammar-literal">^</code><code class="grammar-char-escape">#x2f</code><code class="grammar-char-escape">#x5C</code><code class="grammar-char-escape">#xA</code><code class="grammar-char-escape">#xD</code><code>]</code> <code>|</code> "<code class="grammar-literal">\</code>" <code>[</code> <code class="grammar-literal">tbnrf\/</code><code>]</code> <code>|</code> <a href="#grammar-production-UCHAR">UCHAR</a><code>)</code>
(<code>[</code> <code class="grammar-literal">^</code><code class="grammar-char-escape">#x2f</code><code>]</code> <code>|</code> "<code class="grammar-literal">\</code>" <code>[</code> <code class="grammar-literal">.?*+{}()|^$</code><code class="grammar-char-escape">#x5B</code><code class="grammar-char-escape">#x5D</code><code>]</code> <code>|</code> <a href="#grammar-production-UCHAR">UCHAR</a>)<code>+</code>
"<code class="grammar-literal">/</code>"
(<code>[</code> <code class="grammar-literal">smixq</code><code>]</code> )<code>*</code>
</td>
Expand Down
513 changes: 237 additions & 276 deletions etc/shex.ll1.sxp

Large diffs are not rendered by default.

15 changes: 7 additions & 8 deletions etc/shex.sxp
@@ -1,10 +1,11 @@
(
(rule _empty "0" (seq))
(pass (alt (plus (range " \t\r\n")) (seq "#" (star (range "^\r\n")))))
(terminal PATTERN "XX"
(terminal REGEXP "XX"
(seq "/"
(alt (range "^#x2f#x5C#xA#xD") (seq "\\" (range "tbnrf\\/")) UCHAR) "/"
(plus (alt (range "^#x2f") (seq "\\" (range ".?*+{}()|^$#x5B#x5D")) UCHAR))
"/"
(star (range "smixq"))) )
(rule _empty "0" (seq))
(pass (alt (plus (range " \t\r\n")) (seq "#" (star (range "^\r\n")))))
(rule shexDoc "1" (seq _shexDoc_1 _shexDoc_2))
(rule _shexDoc_1 "1.1" (cleanup star) (alt _empty _shexDoc_3))
(rule _shexDoc_2 "1.2" (cleanup opt) (alt _empty _shexDoc_4))
Expand Down Expand Up @@ -36,14 +37,14 @@
(rule _shapeExpression_5 "10.5" (alt shapeAtomNoRef shapeRef))
(rule _shapeExpression_6 "10.6" (cleanup opt) (alt _empty shapeOr))
(rule inlineShapeExpression "11" (seq inlineShapeOr))
(rule shapeOr "12" (alt shapeOrA _shapeOr_1))
(rule shapeOrA "12a" (cleanup plus) (seq _shapeOrA_1 _shapeOrA_2))
(rule _shapeOrA_2 "12a.2" (cleanup star) (alt _empty _shapeOrA_3))
(rule _shapeOrA_3 "12a.3" (cleanup merge) (seq _shapeOrA_1 _shapeOrA_2))
(rule _shapeOrA_1 "12a.1" (seq "OR" shapeAnd))
(rule shapeOrB "12b" (cleanup plus) (seq _shapeOrB_1 _shapeOrB_2))
(rule _shapeOrB_2 "12b.2" (cleanup star) (alt _empty _shapeOrB_3))
(rule _shapeOrB_3 "12b.3" (cleanup merge) (seq _shapeOrB_1 _shapeOrB_2))
(rule shapeOr "12" (alt shapeOrA _shapeOr_1))
(rule _shapeOrB_1 "12b.1" (seq "AND" shapeNot))
(rule _shapeOr_1 "12.1" (seq shapeOrB _shapeOr_2))
(rule _shapeOr_2 "12.2" (cleanup opt) (alt _empty shapeOrA))
Expand Down Expand Up @@ -132,10 +133,8 @@
(seq stringFacet _nonLitNodeConstraint_5))
(rule nonLiteralKind "26" (alt "IRI" "BNODE" "NONLITERAL"))
(rule xsFacet "27" (alt stringFacet numericFacet))
(rule stringFacet "28" (alt _stringFacet_1 _stringFacet_2 _stringFacet_3))
(rule stringFacet "28" (alt _stringFacet_1 REGEXP))
(rule _stringFacet_1 "28.1" (seq stringLength INTEGER))
(rule _stringFacet_2 "28.2" (seq "PATTERN" string))
(rule _stringFacet_3 "28.3" (seq "~" PATTERN))
(rule stringLength "29" (alt "LENGTH" "MINLENGTH" "MAXLENGTH"))
(rule numericFacet "30" (alt _numericFacet_1 _numericFacet_2))
(rule _numericFacet_1 "30.1" (seq numericRange numericLiteral))
Expand Down
2 changes: 1 addition & 1 deletion lib/shex.rb
Expand Up @@ -11,7 +11,7 @@ module ShEx
autoload :VERSION, 'shex/version'

# Location of the ShEx JSON-LD context
CONTEXT = "https://shexspec.github.io/context.jsonld"
CONTEXT = "http://shex.io/context.jsonld"

# Extensions defined in this gem
EXTENSIONS = %w{test}
Expand Down
20 changes: 12 additions & 8 deletions lib/shex/algebra/node_constraint.rb
@@ -1,3 +1,4 @@
# -*- encoding: utf-8 -*-
module ShEx::Algebra
##
class NodeConstraint < Operator
Expand Down Expand Up @@ -74,29 +75,32 @@ def satisfies_string_facet?(value, depth: 0)
minlength = op_fetch(:minlength)
maxlength = op_fetch(:maxlength)
pat = (operands.detect {|op| op.is_a?(Array) && op[0] == :pattern} || [])
pattern = pat[1]
flags = if pat[2]
f = 0
f |= Regexp::EXTENDED if pat[2].include?("x")
f |= Regexp::IGNORECASE if pat[2].include?("i")
f |= Regexp::MULTILINE if pat[2].include?("m")
f
pattern = if pat[1]
pat[1].to_s.gsub(ShEx::Terminals::UCHAR) do
[($1 || $2).hex].pack('U*')
end
end

flags = 0
flags |= Regexp::EXTENDED if pat[2].to_s.include?("x")
flags |= Regexp::IGNORECASE if pat[2].to_s.include?("i")
flags |= Regexp::MULTILINE if pat[2].to_s.include?("m")

return true if (length || minlength || maxlength || pattern).nil?

v_s = case value
when RDF::Node then value.id
else value.to_s
end

not_satisfied "Node #{v_s.inspect} length not #{length}", depth: depth if
length && v_s.length != length.to_i
not_satisfied"Node #{v_s.inspect} length < #{minlength}", depth: depth if
minlength && v_s.length < minlength.to_i
not_satisfied "Node #{v_s.inspect} length > #{maxlength}", depth: depth if
maxlength && v_s.length > maxlength.to_i
not_satisfied "Node #{v_s.inspect} does not match #{pattern}", depth: depth if
pat && !Regexp.new(pattern, flags).match(v_s)
pattern && !Regexp.new(pattern, flags).match(v_s)
status "right string facet: #{value}", depth: depth
true
end
Expand Down

0 comments on commit 19e42d3

Please sign in to comment.