/
decoder.ex
228 lines (181 loc) · 7.69 KB
/
decoder.ex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
defmodule RDF.Turtle.Decoder do
@moduledoc """
A decoder for Turtle serializations to `RDF.Graph`s.
As for all decoders of `RDF.Serialization.Format`s, you normally won't use these
functions directly, but via one of the `read_` functions on the `RDF.Turtle` format
module or the generic `RDF.Serialization` module.
## Options
- `:base`: allows to specify the base URI to be used against relative URIs
when no base URI is defined with a `@base` directive within the document
"""
use RDF.Serialization.Decoder
import RDF.Serialization.ParseHelper, only: [error_description: 1]
alias RDF.{Graph, IRI}
defmodule State do
@moduledoc !"The internal state of the `RDF.Turtle.Encoder`."
defstruct base_iri: nil, namespaces: %{}, bnode_counter: 0
def add_namespace(%State{namespaces: namespaces} = state, ns, iri) do
%State{state | namespaces: Map.put(namespaces, ns, iri)}
end
def ns(%State{namespaces: namespaces}, prefix) do
namespaces[prefix]
end
def next_bnode(%State{bnode_counter: bnode_counter} = state) do
{RDF.bnode("b#{bnode_counter}"), %State{state | bnode_counter: bnode_counter + 1}}
end
end
@impl RDF.Serialization.Decoder
@spec decode(String.t(), keyword) :: {:ok, Graph.t()} | {:error, any}
def decode(content, opts \\ []) do
base_iri =
Keyword.get_lazy(
opts,
:base_iri,
fn -> Keyword.get_lazy(opts, :base, fn -> RDF.default_base_iri() end) end
)
with {:ok, tokens, _} <- tokenize(content),
{:ok, ast} <- parse(tokens) do
build_graph(ast, base_iri && RDF.iri(base_iri))
else
{:error, {error_line, :turtle_lexer, error_descriptor}, _error_line_again} ->
{:error,
"Turtle scanner error on line #{error_line}: #{error_description(error_descriptor)}"}
{:error, {error_line, :turtle_parser, error_descriptor}} ->
{:error,
"Turtle parser error on line #{error_line}: #{error_description(error_descriptor)}"}
end
end
def tokenize(content), do: content |> to_charlist |> :turtle_lexer.string()
def parse([]), do: {:ok, []}
def parse(tokens), do: tokens |> :turtle_parser.parse()
defp build_graph(ast, base_iri) do
{graph, %State{namespaces: namespaces, base_iri: base_iri}} =
Enum.reduce(ast, {RDF.Graph.new(), %State{base_iri: base_iri}}, fn
{:triples, triples_ast}, {graph, state} ->
{statements, state} = triples(triples_ast, state)
{RDF.Graph.add(graph, statements), state}
{:directive, directive_ast}, {graph, state} ->
{graph, directive(directive_ast, state)}
end)
{:ok,
if Enum.empty?(namespaces) do
graph
else
RDF.Graph.add_prefixes(graph, namespaces)
end
|> RDF.Graph.set_base_iri(base_iri)}
rescue
error -> {:error, Exception.message(error)}
end
defp directive({:prefix, {:prefix_ns, _, ns}, iri}, state) do
absolute_iri =
if IRI.absolute?(iri) do
iri
else
iri |> IRI.absolute(state.base_iri) |> to_string()
end
State.add_namespace(state, ns, absolute_iri)
end
defp directive({:base, iri}, %State{base_iri: base_iri} = state) do
cond do
IRI.absolute?(iri) -> %State{state | base_iri: RDF.iri(iri)}
not is_nil(base_iri) -> %State{state | base_iri: IRI.absolute(iri, base_iri)}
true -> raise "Could not resolve relative IRI '#{iri}', no base iri provided"
end
end
defp triples({:blankNodePropertyList, _} = ast, state) do
{_, statements, state} = resolve_node(ast, [], state)
{statements, state}
end
defp triples({subject, predications}, state) do
{subject, statements, state} = resolve_node(subject, [], state)
predications(subject, predications, statements, state)
end
defp predications(subject, predications, statements, state) do
Enum.reduce(predications, {statements, state}, fn
{predicate, objects}, {statements, state} ->
{predicate, statements, state} = resolve_node(predicate, statements, state)
Enum.reduce(objects, {statements, state}, fn
{:annotation, annotation}, {[last_statement | _] = statements, state} ->
predications(last_statement, annotation, statements, state)
object, {statements, state} ->
{object, statements, state} = resolve_node(object, statements, state)
{[{subject, predicate, object} | statements], state}
end)
end)
end
defp resolve_node({:prefix_ln, line_number, {prefix, name}}, statements, state) do
if ns = State.ns(state, prefix) do
{RDF.iri(ns <> local_name_unescape(name)), statements, state}
else
raise "line #{line_number}: undefined prefix #{inspect(prefix)}"
end
end
defp resolve_node({:prefix_ns, line_number, prefix}, statements, state) do
if ns = State.ns(state, prefix) do
{RDF.iri(ns), statements, state}
else
raise "line #{line_number}: undefined prefix #{inspect(prefix)}"
end
end
defp resolve_node({:relative_iri, relative_iri}, _, %State{base_iri: nil}) do
raise "Could not resolve relative IRI '#{relative_iri}', no base iri provided"
end
defp resolve_node({:relative_iri, relative_iri}, statements, state) do
{IRI.absolute(relative_iri, state.base_iri), statements, state}
end
defp resolve_node({:anon}, statements, state) do
{node, state} = State.next_bnode(state)
{node, statements, state}
end
defp resolve_node({:blankNodePropertyList, property_list}, statements, state) do
{subject, state} = State.next_bnode(state)
{new_statements, state} = triples({subject, property_list}, state)
{subject, statements ++ new_statements, state}
end
defp resolve_node(
{{:string_literal_quote, _line, value}, {:datatype, datatype}},
statements,
state
) do
{datatype, statements, state} = resolve_node(datatype, statements, state)
{RDF.literal(value, datatype: datatype), statements, state}
end
defp resolve_node({:collection, []}, statements, state) do
{RDF.nil(), statements, state}
end
defp resolve_node({:collection, elements}, statements, state) do
{first_list_node, state} = State.next_bnode(state)
[first_element | rest_elements] = elements
{first_element_node, statements, state} = resolve_node(first_element, statements, state)
first_statement = [{first_list_node, RDF.first(), first_element_node}]
{last_list_node, statements, state} =
Enum.reduce(
rest_elements,
{first_list_node, statements ++ first_statement, state},
fn element, {list_node, statements, state} ->
{element_node, statements, state} = resolve_node(element, statements, state)
{next_list_node, state} = State.next_bnode(state)
{next_list_node,
statements ++
[
{list_node, RDF.rest(), next_list_node},
{next_list_node, RDF.first(), element_node}
], state}
end
)
{first_list_node, statements ++ [{last_list_node, RDF.rest(), RDF.nil()}], state}
end
defp resolve_node({:quoted_triple, s_node, p_node, o_node}, statements, state) do
{subject, statements, state} = resolve_node(s_node, statements, state)
{predicate, statements, state} = resolve_node(p_node, statements, state)
{object, statements, state} = resolve_node(o_node, statements, state)
{{subject, predicate, object}, statements, state}
end
defp resolve_node(node, statements, state), do: {node, statements, state}
defp local_name_unescape(string),
do: Macro.unescape_string(string, &local_name_unescape_map(&1))
@reserved_characters ~c[~.-!$&'()*+,;=/?#@%_]
defp local_name_unescape_map(e) when e in @reserved_characters, do: e
defp local_name_unescape_map(_), do: false
end