/
result.ex
213 lines (171 loc) · 6.56 KB
/
result.ex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
defmodule Meeseeks.Result do
@moduledoc """
Results are the product of running selections on a document, and package
together a node id and the `Meeseeks.Document` for which that id is
valid.
Results are generally used in one of two ways: either data, such as an
element's tag, is extracted from a result, or further selections are ran
using the result as a source.
When a result is used as a source for further selection, the original
document the result came from is used for context, meaning that questions
about the results ancestors may be asked, but also that queries involving
ancestors need to account for the whole document, not just the contents of
the result.
## Examples
iex> import Meeseeks.CSS
iex> document = Meeseeks.parse("<div><ul><li>1</li><li>2</li></ul></div>")
#Meeseeks.Document<{...}>
iex> ul = Meeseeks.one(document, css("ul"))
#Meeseeks.Result<{ <ul><li>1</li><li>2</li></ul> }>
iex> Meeseeks.tag(ul)
"ul"
iex> Meeseeks.all(ul, css("li")) |> List.last()
#Meeseeks.Result<{ <li>2</li> }>
"""
alias Meeseeks.{Document, Result, TupleTree}
@enforce_keys [:document, :id]
defstruct document: nil, id: nil
@type t :: %Result{document: Document.t(), id: Document.node_id()}
@doc """
Returns the value for attribute in result, or nil if there isn't one.
"""
@spec attr(Result.t(), String.t()) :: String.t() | nil
def attr(result, attribute)
def attr(%Result{id: id, document: document}, attribute) do
node = Document.get_node(document, id)
Document.Node.attr(node, attribute)
end
@doc """
Returns the result's attributes list, which may be empty, or nil if
result represents a node without attributes.
"""
@spec attrs(Result.t()) :: [{String.t(), String.t()}] | nil
def attrs(result)
def attrs(%Result{id: id, document: document}) do
node = Document.get_node(document, id)
Document.Node.attrs(node)
end
@doc """
Returns the combined data of result or result's children, which may be an
empty string.
Once the data has been combined the whitespace is compacted by replacing
all instances of more than one whitespace character with a single space
and then trimmed.
Data is the content of `<script>` or `<style>` tags, or the content of
comments starting with "[CDATA[" and ending with "]]". The latter behavior
is to support the extraction of CDATA from HTML, since HTML5 parsers parse
CDATA as comments.
## Options
* `:collapse_whitespace` - Boolean determining whether or not to replace
blocks of whitespace with a single space character. Defaults to `true`.
* `:trim` - Boolean determining whether or not to trim the resulting
text. Defaults to `true`.
"""
@spec data(Result.t(), Keyword.t()) :: String.t()
def data(result, opts \\ [])
def data(%Result{id: id, document: document}, opts) do
node = Document.get_node(document, id)
Document.Node.data(node, document, opts)
end
@doc """
Returns a map of result's data attributes, or nil if result represents a
node without attributes.
Behaves like HTMLElement.dataset; only valid data attributes are included,
and attribute names have "data-" removed and are converted to camelCase.
See: https://developer.mozilla.org/en-US/docs/Web/API/HTMLElement/dataset
"""
@spec dataset(Result.t()) :: %{optional(String.t()) => String.t()} | nil
def dataset(result) do
case attrs(result) do
nil -> nil
[] -> %{}
attributes -> attributes_to_dataset(attributes)
end
end
defp attributes_to_dataset(attributes) do
Enum.reduce(attributes, %{}, fn {attribute, value}, dataset ->
case Regex.run(~r/^data-([a-z0-9\-\.\:\_]+)$/, attribute) do
[_, raw_name] -> Map.put(dataset, dataset_name(raw_name), value)
_ -> dataset
end
end)
end
defp dataset_name(raw_name) do
Regex.replace(~r/\-([a-z])/, raw_name, fn _, c ->
String.upcase(c)
end)
end
@doc """
Returns the combined HTML of result and its descendants.
"""
@spec html(Result.t()) :: String.t()
def html(result)
def html(%Result{id: id, document: document}) do
node = Document.get_node(document, id)
Document.Node.html(node, document)
end
@doc """
Returns the combined text of result or result's children, which may be an
empty string.
Once the text has been combined the whitespace is compacted by replacing
all instances of more than one whitespace character with a single space
and then trimmed.
## Options
* `:collapse_whitespace` - Boolean determining whether or not to replace
blocks of whitespace with a single space character. Defaults to `true`.
* `:trim` - Boolean determining whether or not to trim the resulting
text. Defaults to `true`.
"""
@spec own_text(Result.t(), Keyword.t()) :: String.t()
def own_text(result, opts \\ [])
def own_text(%Result{id: id, document: document}, opts) do
node = Document.get_node(document, id)
Document.Node.own_text(node, document, opts)
end
@doc """
Returns result's tag, or nil if result represents a node without a tag.
"""
@spec tag(Result.t()) :: String.t() | nil
def tag(result)
def tag(%Result{id: id, document: document}) do
node = Document.get_node(document, id)
Document.Node.tag(node)
end
@doc """
Returns the combined text of result or result's descendants, which may be
an empty string.
Once the text has been combined the whitespace is compacted by replacing
all instances of more than one whitespace character with a single space
and then trimmed.
## Options
* `:collapse_whitespace` - Boolean determining whether or not to replace
blocks of whitespace with a single space character. Defaults to `true`.
* `:trim` - Boolean determining whether or not to trim the resulting
text. Defaults to `true`.
"""
@spec text(Result.t(), Keyword.t()) :: String.t()
def text(result, opts \\ [])
def text(%Result{id: id, document: document}, opts) do
node = Document.get_node(document, id)
Document.Node.text(node, document, opts)
end
@doc """
Returns a `Meeseeks.TupleTree` of result and its descendants.
"""
@spec tree(Result.t()) :: TupleTree.node_t()
def tree(result)
def tree(%Result{id: id, document: document}) do
node = Document.get_node(document, id)
Document.Node.tree(node, document)
end
end
defimpl Inspect, for: Meeseeks.Result do
@moduledoc false
alias Meeseeks.Result
def inspect(result, _opts) do
result_html =
Result.html(result)
|> String.replace(~r/[\s]+/, " ")
"#Meeseeks.Result<{ #{result_html} }>"
end
end