-
Notifications
You must be signed in to change notification settings - Fork 0
/
Html.php
407 lines (369 loc) · 13.6 KB
/
Html.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
<?php
/**
* @link https://github.com/ryanve/phat
* @license MIT
*/
namespace phat;
use \Closure;
use \DOMDocument;
class Html {
# Alias via mixins.
protected static $mixins = array('attrs' => array(__CLASS__, 'atts'), 'parseAttrs' => array(__CLASS__, 'parseAtts'));
public static function mixin($name, $fn = null) {
if (is_scalar($name)) $fn and static::$mixins[$name] = $fn;
else foreach ($name as $k => $v) self::mixin($k, $v);
}
/**
* Get the fully-qualified name of a method
* @param string $name
* @return string
*/
public static function method($name) {
return get_called_class() . "::$name";
}
# Overload methods suffixed with "_e" as echoers
public static function __callStatic($name, $params) {
if (isset(static::$mixins[$name]))
return call_user_func_array(static::$mixins[$name], $params);
if ('_e' === substr($name, -2))
echo call_user_func_array(static::method(substr($name, 0, -2)), $params);
else trigger_error(get_called_class() . "::$name is not callable.");
}
/**
* Invoke anonymous funcs.
* @param mixed $value
* @return mixed
*/
protected static function result($value) {
return $value instanceof Closure ? $value() : $value;
}
/**
* Generate an HTML tag.
* @param Closure|string $tagname
* @param Closure|array|string|null $atts
* @param Closure|string|null $inner
* @return string
*/
public static function tag($tagname, $atts = null, $inner = null) {
$tagname = static::tagname(static::result($tagname));
if (!$tagname) return '';
$atts and $atts = static::atts($atts);
$tag = $atts ? "<$tagname $atts>" : "<$tagname>";
$inner and $inner = static::result($inner);
return null === $inner ? $tag : $tag . $inner . "</$tagname>";
}
/**
* Escape a string for use in HTML or HTML attributes.
* @param string $value
* @return string
*/
public static function esc($value, $flag = ENT_QUOTES) {
# Prevent double-encoding entities.
return ($value = (string) $value) ? htmlentities($value, $flag, null, false) : $value;
}
/**
* @param int $timestamp unix timestamp (defaults to now)
* @return string
*/
public static function datetime($timestamp = null) {
return null === $timestamp ? date(DATE_W3C) : date(DATE_W3C, $timestamp);
}
/**
* Deep implode
* @param array|mixed $tokens
* @param string $glue defaults to a space
* @return string
*/
public static function implode($tokens, $glue = ' ') {
if (is_scalar($tokens)) return trim($tokens);
if (!$tokens) return '';
$ret = array();
foreach ($tokens as $v) $ret[] = self::implode($v, $glue); # flatten
return implode($glue, $ret);
}
/**
* @param string|mixed $tokens
* @param string|array $glue one or more delimiters
* @return array
*/
public static function explode($tokens, $glue = ' ') {
if (is_string($tokens)) $tokens = trim($tokens);
elseif (!$tokens || is_scalar($tokens)) return (array) $tokens;
else $tokens = self::implode(is_array($glue) ? $glue[0] : $glue, (array) $tokens);
if ('' === $tokens) return array(); # Applies to first or last condition above.
is_array($glue) and $tokens = str_replace($glue, $glue = $glue[0], $tokens); # Normalize glue.
return ctype_space($glue) ? preg_split('#\s+#', $tokens) : explode($glue, $tokens);
}
/**
* @param mixed $html
* @return string
*/
public static function express($html) {
if (!$html) return is_numeric($html) ? (string) $html : '';
return is_scalar($html) ? (string) $html : static::dom($html)->saveHTML();
}
/**
* A hash for token types (csv, etc.) Get or set $delimiter for the specified $name.
* @param string $name
* @param string= $delimiter
* @return string|null
*/
protected static function delimiter($name, $delimiter = null) {
# dev.w3.org/html5/spec-author-view/index.html#attributes-1
# whatwg.org/specs/web-apps/current-work/multipage/microdata.html#names:-the-itemprop-attribute
static $hash;
$hash or $hash = array_merge(
array_fill_keys(explode('|', 'accept|media'), ','),
array_fill_keys(explode('|',
'class|rel|itemprop|accesskey|dropzone|headers|sizes|sandbox|accept-charset'
), ' ')
);
$name = mb_strtolower($name);
isset($hash[$name]) or (is_string($delimiter) and $hash[$name] = $delimiter);
return $hash[$name];
}
/**
* @param mixed $value value to encode
* @param string|null $name optional attribute name
* @param bool $retain whether to keep null|bool values as is
* @return string
*/
public static function encode($value, $name = null, $retain = null) {
if (is_string($value))
return str_replace("'", ''', $value ? static::esc($value, ENT_NOQUOTES) : $value);
$retain = true === $retain;
if (!is_scalar($value)) {
if (!$value) return null === $value ? $retain ? null : 'null' : '';
if ($value instanceof Closure) return self::encode($value(), $name, $retain);
if ($name && is_string($d = static::delimiter($name))) return self::encode(self::implode($value, $d));
}
if ($retain && in_array($value, array(false, true, null))) return $value;
return str_replace("'", ''', json_encode($value)); # bool|number|array|object
}
/**
* @param string $value
* @param string|null $name
* @return mixed
*/
public static function decode($value, $name = null) {
if (!$value || !is_string($value)) return $value;
if ($name && is_string($d = static::delimiter($name)))
return self::explode(html_entity_decode($value, ENT_QUOTES), $d);
$result = json_decode($value, true); # null if not json
if (null !== $result || 'null' === $value) return $result;
return html_entity_decode($value, ENT_QUOTES);
}
/**
* Sanitize an HTML or XML tag name. Or read the tagname of a tag.
* @param mixed $name
* @return string|array
*/
public static function tagname($name) {
# w3.org/TR/html-markup/syntax.html#tag-name
# w3.org/TR/REC-xml/#NT-Name
# allow: alphanumeric|underscore|colon|period|hyphen
return preg_replace('#\s*<*([\w:.-]*).*#', '$1', $name);
}
/**
* Sanitize an HTML or XML attribute name.
* @param mixed $name
* @return string|array
*/
public static function attname($name) {
# stackoverflow.com/q/13283699/770127
# w3.org/TR/html-markup/syntax.html#syntax-attributes
# w3.org/TR/REC-xml/#NT-Attribute
# php.net/manual/en/regexp.reference.unicode.php
# should start with letter (or underscore|colon in xml) but not enforced here
# allow: unicode letters|digits|underscore|colon|period|hyphen
return preg_replace(array('#[=>].*#', '#[^\pL\d_:.-]*#'), '', $name);
}
/**
* Replace or normalize whitespace.
* @return string|array
*/
public static function respace($text, $replacement = ' ') {
return preg_replace('#\s+#', $replacement, $text);
}
/**
* Replace or normalize linebreaks.
* @return string|array
*/
public static function rebreak($text, $replacement = "\n\n") {
return preg_replace('#\n+\s*\n+#', $replacement, $text);
}
/**
* Sanitize a slug.
* @return string
*/
public static function slug($text, $delim = '-') {
$text and $text = static::respace(strip_tags(trim($text)), $delim);
# Strip entities and octets
$text and $text = preg_replace('#&.+?;|%([a-fA-F0-9][a-fA-F0-9])#', '', $text);
# Permit lowercase alphanumeric|underscore|dash
$text and $text = mb_strtolower(preg_replace('#[^\w-]#', '', $text));
# Normalize repeat delimiters into one
return implode($delim, array_diff(explode($delim, $text), array('')));
}
/**
* Produce an attributes string. Null values are skipped. Booleans
* convert properly to boolean atts. Other values encode via ::encode().
* @param mixed $name associative array or ssv or stack of att names
* @param mixed $value Value for context when $name is an attribute name.
*/
public static function atts($name, $value = '') {
# non-assoc recursion
is_int($name) and ($name = $value) === ($value = '');
# func args
$name and $name = static::result($name);
# false boolean atts | null names/values
# dev.w3.org/html5/spec/common-microsyntaxes.html#boolean-attributes
if (false === $value || null === $value || null === $name || is_bool($name))
return '';
# Name may need parsing or sanitizing:
if (is_scalar($name) && ($name = trim($name)) && ctype_alpha($name))
# parse if it looks already stringified like `title=""` or `async defer`
$name = preg_match('#(\=|\s)#', $name) ? static::parseAtts($name) : static::attname($name);
# key/value map a.k.a. "array to attr"
if (!is_scalar($name)) {
$value = array();
foreach ($name as $k => $v) strlen($pair = self::atts($k, $v)) and $value[] = $pair;
return implode(' ', $value);
}
# <p contenteditable> === <p contenteditable="">
# Use single quotes for compatibility with JSON
return '' === $value || true === $value || '' === $name || (
true === ($value = static::encode($value, $name)) || '' === $value
) ? $name : (false === $value || null === $value ? '' : "$name='$value'");
}
/**
* Parse a string of attributes into an array. If the string
* starts with a tag, then atts on the first tag are parsed.
* @param string|mixed $atts
* @return array
* @example parseAtts('src="example.jpg" alt="example"')
* @example parseAtts('<img src="example.jpg" alt="example">')
* @example parseAtts('<a href="example"></a>')
* @example parseAtts('<a href="example">')
*/
public static function parseAtts($atts) {
if (!is_scalar($atts)) return (array) $atts;
# trim, then strip tagname (if present), then split into array
$atts = str_split(preg_replace('#^<+\S*#', '', trim($atts)));
$arr = array(); # output
$name = ''; # for the current attr being parsed
$value = ''; # for the current attr being parsed
$mode = 0; # whether current char is part of the name (-), value (+), or neither (0)
$stop = false; # delimiter for the current $value being parsed
$space = ' '; # a single space
foreach ($atts as $j => $curr) {
if ($mode < 0) {# name
if ('=' === $curr) {
$mode = 1;
$stop = false;
} elseif ('>' === $curr) {
'' === $name or $arr[$name] = $value;
break;
} elseif (!ctype_space($curr)) {
if (ctype_space($atts[$j-1])) { # previous char
'' === $name or $arr[$name] = ''; # previous name
$name = $curr; # initiate new
} else {
$name .= $curr;
}
}
} elseif ($mode > 0) {# value
if ($stop === false) {
if (!ctype_space($curr)) {
if ('"' === $curr || "'" === $curr) {
$value = '';
$stop = $curr;
} else {
$value = $curr;
$stop = $space;
}
}
} elseif ($stop === $space ? ctype_space($curr) : $curr === $stop) {
$arr[$name] = $value;
$mode = 0;
$name = $value = '';
} else {
$value .= $curr;
}
} else {# neither
if ('>' === $curr) break;
if (!ctype_space($curr)) {
# initiate
$name = $curr;
$mode = -1;
}
}
}
# incl the final pair if it was quoteless
'' === $name or $arr[$name] = $value;
return $arr;
}
/**
* Parse markup (or an array of nodes) into a DOMDocument object
* @param DOMDocument|Closure|string|array $html
* @return DOMDocument
*/
public static function dom($html = false) {
$source = null;
$html and $html = static::result($html);
if (!is_scalar($html))
is_callable(array($html, 'saveHtml')) ? $html = $html->saveHtml() : $source = $html;
elseif (!is_string($html))
return new DOMDocument;
if (null === $source) {
$source = new DOMDocument;
$html = trim($html);
if ('' === $html) return $source;
$type = strtolower(substr($html, 0, 5));
$type === '<html' and $html = '<!DOCTYPE html>' . "\n" . $html;
libxml_use_internal_errors(true);
$source->loadHtml($html);
libxml_clear_errors();
if ('<!doc' === $type || '<html' === $type) return $source;
$save = $source->saveHtml();
if ($save === $html) return $source;
$save = strtolower(substr($save, 0, 5));
$source = $source->getElementsByTagName('*')->item(0)->childNodes;
if ($save !== $type && '<body' !== $type && '<head' !== $type)
$source = $source->item(0)->childNodes;
}
$html = new DOMDocument; # repurpose
foreach ($source as $i => $node) $html->appendChild($html->importNode($node, true));
return $html;
}
/**
* @param mixed $html
* @param string|array $tags whitelist
* @return string
*/
public static function keep($html, $tags = null) {
return strip_tags(static::express($html), array_reduce(self::explode($tags), function($kept, $tag) {
return strlen($tag = trim($tag, '</>')) ? "$kept<$tag>" : $kept;
}, ''));
}
/**
* @param mixed $html
* @param string|array $tags blacklist
* @return string
*/
public static function ban($html, $tags = null) {
$tags = self::explode($tags);
$html = static::dom($html);
foreach ($tags as $tag)
foreach ($html->getElementsByTagName(trim($tag, '</>')) as $node)
$node->parentNode->removeChild($node);
return $html->saveHTML();
}
/**
* @param string $html
* @return string
*/
public static function cdata($html) {
return "<![CDATA[$html]]>";
}
}