/
Sanitizer.php
5116 lines (4620 loc) · 181 KB
/
Sanitizer.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<?php namespace ProcessWire;
/**
* ProcessWire Sanitizer
*
* Sanitizer provides shared sanitization functions as commonly used throughout ProcessWire core and modules
*
* #pw-summary Provides methods for sanitizing and validating user input, preparing data for output, and more.
* #pw-use-constants
* #pw-body =
* Sanitizer is useful for sanitizing input or any other kind of data that you need to match a particular type or format.
* The Sanitizer methods are accessed from the `$sanitizer` API variable and/or `sanitizer()` API variable/function.
* For example:
* ~~~~~~
* $cleanValue = $sanitizer->text($dirtyValue);
* ~~~~~~
* You can replace the `text()` call above with any other sanitizer method. Many sanitizer methods also accept additional
* arguments—see each individual method for details.
*
* ### Sanitizer and input
*
* Sanitizer methods are most commonly used with user input. As a result, the methods in this class are also accessible
* from the `$input->get`, `$input->post` and `$input->cookie` API variables, in the same manner that they are here.
* This is a useful shortcut for instances where you don’t need to provide additional arguments to the sanitizer method.
* Below are a few examples of this usage:
* ~~~~~
* // get GET variable 'id' as integer
* $id = $input->get->int('id');
*
* // get POST variable 'name' as 1-line plain text
* $name = $input->post->text('name');
*
* // get POST variable 'comments' as multi-line plain text
* $comments = $input->post->textarea('comments');
* ~~~~~
* In ProcessWire 3.0.125 and newer you can also perform the same task as the above with one less `->` level like the
* example below:
* ~~~~~
* $comments = $input->post('comments','textarea');
* ~~~~~
* This is more convenient in some IDEs because it’ll never be flagged as an unrecognized function call. Though outside
* of that it makes little difference how you call it, as they both do the same thing.
*
* See the `$input` API variable for more details on how to call sanitizers directly from $input.
*
* ### Adding your own sanitizers
*
* You can easily add your own new sanitizers via ProcessWire hooks. Hooks are commonly added in a /site/ready.php file,
* or from a Module, though you may add them wherever you want. The following example adds a sanitizer method called
* `zip()` which enforces a 5 digit zip code:
* ~~~~~
* $sanitizer->addHook('zip', function(HookEvent $event) {
* $sanitizer = $event->object;
* $value = $event->arguments(0); // get first argument given to method
* $value = $sanitizer->digits($value, 5); // allow only digits, max-length 5
* if(strlen($value) < 5) $value = ''; // if fewer than 5 digits, it is not a zip
* $event->return = $value;
* });
*
* // now you can use your zip sanitizer
* $dirtyValue = 'Decatur GA 30030';
* $cleanValue = $sanitizer->zip($dirtyValue);
* echo $cleanValue; // outputs: 30030
* ~~~~~
*
* ### Additional options (3.0.125 or newer)
*
* In ProcessWire 3.0.125+ you can also combine sanitizer methods in a single call. These are defined by separating each
* sanitizer method with an understore. The example below runs the value through the text sanitizer and then through the
* entities sanitizer:
* ~~~~~
* $cleanValue = $sanitizer->text_entities($dirtyValue);
* ~~~~~
* If you append a number to any sanitizer call that returns a string, it is assumed to be maximum allowed length. For
* example, the following would sanitize the value to be text of no more than 20 characters:
* ~~~~~
* $cleanValue = $sanitizer->text20($dirtyValue);
* ~~~~~
* The above technique also works for any user-defined sanitizers you’ve added via hooks. We like this strategy for
* storage of sanitizer calls that are executed at some later point, like those you might store in a module config. It
* essentially enables you to define loose data types for sanitization. In addition, if there are other cases where you
* need multiple sanitizers to clean a particular value, this strategy can do it with a lot less code than you would
* with multiple sanitizer calls.
*
* Most methods in the Sanitizer class focus on sanitization rather than validation, with a few exceptions. You can
* convert a sanitizer call to validation call by calling the `validate()` method with the name of the sanitizer and the
* value. A validation call simply implies that if the value is modified by sanitization then it is considered invalid
* and thus it’ll return a non-value rather than a sanitized value. See the `Sanitizer::validate()` and
* `Sanitizer::valid()` methods for usage details.
*
* #pw-body
*
* ProcessWire 3.x, Copyright 2019 by Ryan Cramer
* https://processwire.com
*
* @link https://processwire.com/api/variables/sanitizer/ Offical $sanitizer API variable Documentation
*
* @method array($value, $sanitizer = null, array $options = array())
* @method array testAll($value)
*
*/
class Sanitizer extends Wire {
/**
* Constant used for the $beautify argument of name sanitizer methods to indicate transliteration may be used.
*
*/
const translate = 2;
/**
* Beautify argument for pageName() to IDN encode UTF8 to ascii
* #pw-internal
*
*/
const toAscii = 4;
/**
* Beautify argument for pageName() to allow decode IDN ascii to UTF8
* #pw-internal
*
*/
const toUTF8 = 8;
/**
* Beautify argument for pageName() to indicate that UTF8 (in whitelist) is allowed
*
* Unlike the toUTF8 option, no ascii to UTF8 conversion is allowed.
* #pw-internal
*
*/
const okUTF8 = 16;
/**
* Caches the status of multibyte support.
*
*/
protected $multibyteSupport = false;
/**
* Array of allowed ascii characters for name filters
*
*/
protected $allowedASCII = array();
/**
* ASCII alpha chars
*
* @var string
*
*/
protected $alphaASCII = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
/**
* ASCII digits chars
*
* @var string
*
*/
protected $digitASCII = '0123456789';
/**
* @var null|WireTextTools
*
*/
protected $textTools = null;
/**
* UTF-8 whitespace hex codes
*
* @var array
*
*/
protected $whitespaceUTF8 = array(
'0000', // null byte
'0009', // character tab
'000A', // line feed
'000B', // line tab (vertical tab)
'000C', // form feed
'000D', // carriage return
'0020', // space
'0085', // next line
'00A0', // non-breaking space
'1680', // ogham space mark
'180E', // mongolian vowel separator
'2000', // en quad
'2001', // em quad
'2002', // en space
'2003', // em space
'2004', // three per em space
'2005', // four per em space
'2006', // six per em space
'2007', // figure space
'2008', // punctuation space
'2009', // thin space
'200A', // hair space
'200B', // zero width space
'200C', // zero width non-join
'200D', // zero width join
'2028', // line seperator
'2029', // paragraph seperator
'202F', // narrow non-breaking space
'205F', // medium mathematical space
'2060', // word join
'3000', // ideographic space
'FEFF', // zero width non-breaking space
);
/**
* HTML entities representing whitespace
*
* Note that this array is populated with all decimal/hex entities after a call to
* getWhitespaceArray() method with the $html option as true.
*
* @var array
*
*/
protected $whitespaceHTML = array(
' ', // non-breaking space
' ', // en space
' ', // em space
' ', // thin space
'‌', // zero width non-join
'‍', // zero width join
);
/**
* Sanitizer method names (A-Z) and type(s) they return
*
* @var array
*
*/
protected $sanitizers = array(
'alpha' => 's',
'alphanumeric' => 's',
'array' => 'a',
'arrayVal' => 'a',
'attrName' => 's',
'bit' => 'i',
'bool' => 'b',
'camelCase' => 's',
'chars' => 's',
'checkbox' => 'b',
'date' => 'ins',
'digits' => 's',
'email' => 's',
'emailHeader' => 's',
'entities' => 's',
'entities1' => 's',
'entitiesMarkdown' => 's',
'fieldName' => 's',
'fieldSubfield' => 's',
'filename' => 's',
'flatArray' => 'a',
'float' => 'f',
'httpUrl' => 's',
'hyphenCase' => 's',
'int' => 'i',
'intArray' => 'a',
'intArrayVal' => 'a',
'intSigned' => 'i',
'intUnsigned' => 'i',
'kebabCase' => 's',
'markupToLine' => 's',
'markupToText' => 's',
'max' => 'fi',
'maxBytes' => 's',
'maxLength' => 'afis',
'minLength' => 's',
'min' => 'fi',
'minArray' => 'a',
'name' => 's',
'names' => 'as',
'normalizeWhitespace' => 's',
'pageName' => 's',
'pageNameTranslate' => 's',
'pageNameUTF8' => 's',
'pagePathName' => 's',
'pagePathNameUTF8' => 's',
'pascalCase' => 's',
'path' => 'bs',
'purify' => 's',
'range' => 'fi',
'reduceWhitespace' => 's',
'removeMB4' => 'ams',
'removeNewlines' => 's',
'removeWhitespace' => 's',
'sanitize' => 'm',
'selectorField' => 's',
'selectorValue' => 's',
'snakeCase' => 's',
'string' => 's',
'templateName' => 's',
'text' => 's',
'textarea' => 's',
'trim' => 's',
'truncate' => 's',
'unentities' => 's',
'url' => 's',
'valid' => 'b',
'validate' => 'm',
'varName' => 's',
'wordsArray' => 'a',
);
/**
* Construct the sanitizer
*
*/
public function __construct() {
$this->multibyteSupport = function_exists("mb_internal_encoding");
if($this->multibyteSupport) mb_internal_encoding("UTF-8");
$this->allowedASCII = str_split($this->alphaASCII . $this->digitASCII);
}
/*************************************************************************************************************
* STRING SANITIZERS
*
*/
/**
* Internal filter used by other name filtering methods in this class
*
* #pw-internal
*
* @param string $value Value to filter
* @param array $allowedExtras Additional characters that are allowed in the value
* @param string 1 character replacement value for invalid characters
* @param bool $beautify Whether to beautify the string, specify `Sanitizer::translate` to perform transliteration.
* @param int $maxLength
* @return string
*
*/
public function nameFilter($value, array $allowedExtras, $replacementChar, $beautify = false, $maxLength = 128) {
static $replacements = array();
if(!is_string($value)) $value = $this->string($value);
$allowed = array_merge($this->allowedASCII, $allowedExtras);
$needsWork = strlen(str_replace($allowed, '', $value));
$extras = implode('', $allowedExtras);
if($beautify && $needsWork) {
if($beautify === self::translate && $this->multibyteSupport) {
$value = mb_strtolower($value);
if(empty($replacements)) {
$configData = $this->wire('modules')->getModuleConfigData('InputfieldPageName');
$replacements = empty($configData['replacements']) ? InputfieldPageName::$defaultReplacements : $configData['replacements'];
}
foreach($replacements as $from => $to) {
if(mb_strpos($value, $from) !== false) {
$value = mb_eregi_replace($from, $to, $value);
}
}
}
if(function_exists("\\iconv")) {
$v = iconv("UTF-8", "ASCII//TRANSLIT//IGNORE", $value);
if($v) $value = $v;
}
$needsWork = strlen(str_replace($allowed, '', $value));
}
if(strlen($value) > $maxLength) $value = substr($value, 0, $maxLength);
if($needsWork) {
$value = str_replace(array("'", '"'), '', $value); // blank out any quotes
$_value = $value;
$filters = FILTER_FLAG_STRIP_LOW | FILTER_FLAG_STRIP_HIGH | FILTER_FLAG_NO_ENCODE_QUOTES;
$value = filter_var($value, FILTER_SANITIZE_STRING, $filters);
if(!strlen($value)) {
// if above filter blanked out the string, try with brackets already replaced
$value = str_replace(array('<', '>', '«', '»', '‹', '›'), $replacementChar, $_value);
$value = filter_var($value, FILTER_SANITIZE_STRING, $filters);
}
$hyphenPos = strpos($extras, '-');
if($hyphenPos !== false && $hyphenPos !== 0) {
// if hyphen present, ensure it's first (per PCRE requirements)
$extras = '-' . str_replace('-', '', $extras);
}
$chars = $extras . 'a-zA-Z0-9';
$value = preg_replace('{[^' . $chars . ']}', $replacementChar, $value);
}
// remove leading or trailing dashes, underscores, dots
if($beautify) {
if(strpos($extras, $replacementChar) === false) $extras .= $replacementChar;
$value = trim($value, $extras);
}
return $value;
}
/**
* Sanitize in "name" format (ASCII alphanumeric letters/digits, hyphens, underscores, periods)
*
* Default behavior:
*
* - Allows both upper and lowercase ASCII letters.
* - Limits maximum length to 128 characters.
* - Replaces non-name format characters with underscore "_".
*
* ~~~~~
* $test = "Foo+Bar Baz-123"
* echo $sanitizer->name($test); // outputs: Foo_Bar_Baz-123
* ~~~~~
*
* #pw-group-strings
*
* @param string $value Value that you want to convert to name format.
* @param bool|int $beautify Beautify the returned name?
* - Beautify makes returned name prettier by getting rid of doubled punctuation, leading/trailing punctuation and such.
* - Should be TRUE when creating a resource using the name for the first time (default is FALSE).
* - You may also specify the constant `Sanitizer::translate` (or integer 2) for the this argument, which will make it
* translate letters based on name format settings in ProcessWire.
* @param int $maxLength Maximum number of characters allowed in the name (default=128).
* @param string $replacement Replacement character for invalid characters. Should be either "_", "-" or "." (default="_").
* @param array $options Extra options to replace default 'beautify' behaviors
* - `allowAdjacentExtras` (bool): Whether to allow [-_.] characters next to each other (default=false).
* - `allowDoubledReplacement` (bool): Whether to allow two of the same replacement chars [-_] next to each other (default=false).
* - `allowedExtras (array): Specify extra allowed characters (default=`['-', '_', '.']`).
* @return string Sanitized value in name format
* @see Sanitizer::pageName()
*
*/
public function name($value, $beautify = false, $maxLength = 128, $replacement = '_', $options = array()) {
if(!empty($options['allowedExtras']) && is_array($options['allowedExtras'])) {
$allowedExtras = $options['allowedExtras'];
$allowedExtrasStr = implode('', $allowedExtras);
} else {
$allowedExtras = array('-', '_', '.');
$allowedExtrasStr = '-_.';
}
$value = $this->nameFilter($value, $allowedExtras, $replacement, $beautify, $maxLength);
if($beautify) {
$hasExtras = false;
foreach($allowedExtras as $c) {
$hasExtras = strpos($value, $c) !== false;
if($hasExtras) break;
}
if($hasExtras) {
if(empty($options['allowAdjacentExtras'])) {
// replace any of '-_.' next to each other with a single $replacement
$value = preg_replace('/[' . $allowedExtrasStr . ']{2,}/', $replacement, $value);
}
if(empty($options['allowDoubledReplacement'])) {
// replace double'd replacements
$r = "$replacement$replacement";
if(strpos($value, $r) !== false) $value = preg_replace('/' . $r . '+/', $replacement, $value);
}
// replace double dots
if(strpos($value, '..') !== false) $value = preg_replace('/\.\.+/', '.', $value);
}
if(strlen($value) > $maxLength) $value = substr($value, 0, $maxLength);
}
return $value;
}
/**
* Sanitize a string or array containing multiple names
*
* - Default behavior is to sanitize to ASCII alphanumeric and hyphen, underscore, and period.
* - If given a string, multiple names may be separated by a delimeter (which is a space by default).
* - Return value will be of the same type as the given value (i.e. string or array).
*
* #pw-group-strings
*
* @param string|array $value Value(s) to sanitize to name format.
* @param string $delimeter Character that delimits values, if $value is a string (default=" ").
* @param array $allowedExtras Additional characters that are allowed in the value (default=['-', '_', '.']).
* @param string $replacementChar Single character replacement value for invalid characters (default='_').
* @param bool $beautify Whether or not to beautify returned values (default=false). See Sanitizer::name() for beautify options.
* @return string|array Returns string if given a string for $value, returns array if given an array for $value.
*
*/
public function names($value, $delimeter = ' ', $allowedExtras = array('-', '_', '.'), $replacementChar = '_', $beautify = false) {
$isArray = false;
if(is_array($value)) {
$isArray = true;
$value = implode(' ', $value);
}
$replace = array(',', '|', ' ');
if($delimeter != ' ' && !in_array($delimeter, $replace)) $replace[] = $delimeter;
$value = str_replace($replace, ' ', $value);
$allowedExtras[] = ' ';
$value = $this->nameFilter($value, $allowedExtras, $replacementChar, $beautify, 8192);
if($delimeter != ' ') $value = str_replace(' ', $delimeter, $value);
while(strpos($value, "$delimeter$delimeter") !== false) {
$value = str_replace("$delimeter$delimeter", $delimeter, $value);
}
$value = trim($value, $delimeter);
if($isArray) $value = explode($delimeter, $value);
return $value;
}
/**
* Sanitizes a string to be consistent with PHP variable names (not including '$').
*
* Allows upper and lowercase ASCII letters, digits and underscore.
*
* #pw-internal
*
* @param string $value String you want to sanitize
* @return string Sanitized string
*
*/
public function varName($value) {
$value = $this->nameFilter($value, array('_'), '_');
if(!ctype_alpha($value)) $value = ltrim($value, $this->digitASCII); // vars cannot begin with numbers
return $value;
}
/**
* Sanitize to an ASCII-only HTML attribute name
*
* @param string $value
* @param int $maxLength
* @return string
* @since 3.0.133
*
*/
public function attrName($value, $maxLength = 255) {
$value = $this->string($value);
$value = trim($value); // force as trimmed string
if(ctype_alpha($value) && strlen($value) <= $maxLength) return $value; // simple 1-word attributes
// remove any non ":_a-zA-Z" characters from beginning of attribute name
while(strlen($value) && strpos(":_$this->alphaASCII", substr($value, 0, 1)) === false) {
$value = substr($value, 1);
}
if(ctype_alnum(str_replace(array('-', '_', ':', '.'), '', $value))) {
// names with HTML valid separators
if(strlen($value) <= $maxLength) return $value;
}
// at this point attribute name contains something unusual
if(!ctype_graph($value)) {
// contains non-visible characters
$value = preg_replace('/[\s\r\n\t]+/', '-', $value);
if(!ctype_graph($value)) $value = ''; // fail
}
if($value !== '') {
// replace non-word, non-digit, non-punct characters
$value = preg_replace('/[^-_.:\w\d]+/', '-', $value);
$value = htmlspecialchars($value, ENT_QUOTES, 'UTF-8');
}
if($value === 'data-') $value = ''; // data attribute with no name is disallowed
if(strlen($value) > $maxLength) {
$value = substr($value, 0, $maxLength);
}
return $value;
}
/**
* Sanitize consistent with names used by ProcessWire fields and/or PHP variables
*
* - Allows upper and lowercase ASCII letters, digits and underscore.
* - ProcessWire field names follow the same conventions as PHP variable names, though digits may lead.
* - This method is the same as the varName() sanitizer except that it supports beautification and max length.
* - Unlike other name formats, hyphen and period are excluded because they aren't allowed characters in PHP variables.
*
* ~~~~~
* $test = "Hello world";
* echo $sanitizer->fieldName($test); // outputs: Hello_world
* ~~~~~
*
* #pw-group-strings
*
* @param string $value Value you want to sanitize
* @param bool|int $beautify Should be true when using the name for a new field (default=false).
* You may also specify constant `Sanitizer::translate` (or number 2) for the $beautify param, which will make it translate letters
* based on the system page name translation settings.
* @param int $maxLength Maximum number of characters allowed in the name (default=128).
* @return string Sanitized string
*
*/
public function fieldName($value, $beautify = false, $maxLength = 128) {
return $this->nameFilter($value, array('_'), '_', $beautify, $maxLength);
}
/**
* Sanitize as a field name but with optional subfield(s) like “field.subfield”
*
* - Periods must be present to indicate subfield(s), otherwise behaves same as fieldName() sanitizer.
* - By default allows just one subfield. To allow more, increase the $limit argument.
* - To allow any quantity of subfields, specify -1.
* - To reduce a `field.subfield...` combo to just `field` specify 0 for limit argument.
* - Maximum length of returned string is (128 + ($limit * 128)).
*
* ~~~~~~
* echo $sanitizer->fieldSubfield('a.b.c'); // outputs: a.b (default behavior)
* echo $sanitizer->fieldSubfield('a.b.c', 2); // outputs: a.b.c
* echo $sanitizer->fieldSubfield('a.b.c', 0); // outputs: a
* echo $sanitizer->fieldSubfield('a.b.c', -1); // outputs: a.b.c (any quantity)
* echo $sanitizer->fieldSubfield('foo bar.baz'); // outputs: foo_bar.baz
* echo $sanitizer->fieldSubfield('foo bar baz'); // outputs: foo_bar_baz
* ~~~~~~
*
* @param string $value Value to sanitize
* @param int $limit Max allowed quantity of subfields, or use -1 for any quantity (default=1).
* @return string
* @since 3.0.126
*
*/
public function fieldSubfield($value, $limit = 1) {
$value = $this->string($value);
if(!strlen($value)) return '';
if(!strpos($value, '.')) return $this->fieldName($value);
$parts = array();
foreach(explode('.', trim($value, '.')) as $part) {
$part = $this->fieldName($part);
if(!strlen($part)) break;
$parts[] = $part;
if($limit > -1 && count($parts) - 1 >= $limit) break;
}
$cnt = count($parts);
if(!$cnt) return '';
return $cnt === 1 ? $parts[0] : implode('.', $parts);
}
/**
* Name filter as used by ProcessWire Templates
*
* #pw-internal
*
* @param string $value
* @param bool|int $beautify Should be true when creating a name for the first time. Default is false.
* You may also specify Sanitizer::translate (or number 2) for the $beautify param, which will make it translate letters
* based on the InputfieldPageName custom config settings.
* @param int $maxLength Maximum number of characters allowed in the name
* @return string
*
*/
public function templateName($value, $beautify = false, $maxLength = 128) {
return $this->nameFilter($value, array('_', '-'), '-', $beautify, $maxLength);
}
/**
* Sanitize as a ProcessWire page name
*
* - Page names by default support lowercase ASCII letters, digits, underscore, hyphen and period.
*
* - Because page names are often generated from a UTF-8 title, UTF-8 to ASCII conversion will take place when `$beautify` is enabled.
*
* - You may optionally omit the `$beautify` and/or `$maxLength` arguments and substitute the `$options` array instead.
*
* - When substituted, the beautify and maxLength options can be specified in $options as well.
*
* - If `$config->pageNameCharset` is "UTF8" then non-ASCII page names will be converted to punycode ("xn-") ASCII page names,
* rather than converted, regardless of `$beautify` setting.
*
* ~~~~~
* $test = "Hello world!";
* echo $sanitizer->pageName($test, true); // outputs: hello-world
* ~~~~~
*
* #pw-group-strings
* #pw-group-pages
*
* @param string $value Value to sanitize as a page name
* @param bool|int|array $beautify This argument accepts a few different possible values (default=false):
* - `true` (boolean): Make it pretty. Use this when using a pageName for the first time.
* - `$options` (array): You can optionally specify the $options array for this argument instead.
* - `Sanitizer::translate` (constant): This will make it translate non-ASCII letters based on *InputfieldPageName* module config settings.
* - `Sanitizer::toAscii` (constant): Convert UTF-8 characters to punycode ASCII.
* - `Sanitizer::toUTF8` (constant): Convert punycode ASCII to UTF-8.
* - `Sanitizer::okUTF8` (constant): Allow UTF-8 characters to appear in path (implied if $config->pageNameCharset is 'UTF8').
* @param int|array $maxLength Maximum number of characters allowed in the name.
* You may also specify the $options array for this argument instead.
* @param array $options Array of options to modify default behavior. See Sanitizer::name() method for available options.
* @return string
* @see Sanitizer::name()
*
*/
public function pageName($value, $beautify = false, $maxLength = 128, array $options = array()) {
$value = $this->string($value);
if(!strlen($value)) return '';
$defaults = array(
'charset' => $this->wire('config')->pageNameCharset
);
if(is_array($beautify)) {
$options = array_merge($beautify, $options);
$beautify = isset($options['beautify']) ? $options['beautify'] : false;
$maxLength = isset($options['maxLength']) ? $options['maxLength'] : 128;
} else if(is_array($maxLength)) {
$options = array_merge($maxLength, $options);
$maxLength = isset($options['maxLength']) ? $options['maxLength'] : 128;
} else {
$options = array_merge($defaults, $options);
}
if($options['charset'] !== 'UTF8' && is_int($beautify) && $beautify > self::translate) {
// UTF8 beautify modes aren't available if $config->pageNameCharset is not UTF8
if(in_array($beautify, array(self::toAscii, self::toUTF8, self::okUTF8))) {
// if modes aren't supported, disable
$beautify = false;
}
}
if($beautify === self::toAscii) {
// convert UTF8 to ascii (IDN/punycode)
$beautify = false;
if(strlen($value) > $maxLength) $value = substr($value, 0, $maxLength);
$_value = $value;
if(!ctype_alnum($value)
&& !ctype_alnum(str_replace(array('-', '_', '.'), '', $value))
&& strpos($value, 'xn-') !== 0) {
do {
// encode value
$value = $this->punyEncodeName($_value);
// if result stayed within our allowed character limit, then good, we're done
if(strlen($value) <= $maxLength) break;
// continue loop until encoded value is equal or less than allowed max length
$_value = substr($_value, 0, strlen($_value) - 1);
} while(true);
// if encode was necessary and successful, return with no further processing
if(strpos($value, 'xn-') === 0) {
return $value;
} else {
// can't be encoded, send to regular name sanitizer
$value = $_value;
}
}
} else if($beautify === self::toUTF8) {
// convert ascii IDN/punycode to UTF8
$beautify = self::okUTF8;
if(strpos($value, 'xn-') === 0) {
// found something to convert
$value = $this->punyDecodeName($value);
// now it will run through okUTF8
}
}
if($beautify === self::okUTF8) {
return $this->pageNameUTF8($value);
}
return strtolower($this->name($value, $beautify, $maxLength, '-', $options));
}
/**
* Name filter for ProcessWire Page names with transliteration
*
* This is the same as calling pageName with the `Sanitizer::translate` option for the `$beautify` argument.
*
* #pw-group-strings
* #pw-group-pages
*
* @param string $value Value to sanitize
* @param int $maxLength Maximum number of characters allowed in the name
* @return string Sanitized value
*
*/
public function pageNameTranslate($value, $maxLength = 128) {
return $this->pageName($value, self::translate, $maxLength);
}
/**
* Sanitize and allow for UTF-8 characters in page name
*
* - If `$config->pageNameCharset` is not `UTF8` then this function just passes control to the regular page name sanitizer.
* - Allowed UTF-8 characters are determined from `$config->pageNameWhitelist`.
* - This method does not convert to or from UTF-8, it only sanitizes it against the whitelist.
* - If given a value that has only ASCII characters, this will pass control to the regular page name sanitizer.
*
* #pw-group-strings
* #pw-group-pages
*
* @param string $value Value to sanitize
* @param int $maxLength Maximum number of characters allowed
* @return string Sanitized value
*
*/
public function pageNameUTF8($value, $maxLength = 128) {
$value = $this->string($value);
if(!strlen($value)) return '';
// if UTF8 module is not enabled then delegate this call to regular pageName sanitizer
if($this->wire('config')->pageNameCharset != 'UTF8') return $this->pageName($value, false, $maxLength);
$tt = $this->getTextTools();
// we don't allow UTF8 page names to be prefixed with "xn-"
if(strpos($value, 'xn-') === 0) $value = substr($value, 3);
// word separators that we always allow
$separators = array('.', '-', '_');
// whitelist of allowed characters and blacklist of disallowed characters
$whitelist = $this->wire('config')->pageNameWhitelist;
if(!strlen($whitelist)) $whitelist = false;
$blacklist = '/\\%"\'<>?#@:;,+=*^$()[]{}|&';
// we let regular pageName handle chars like these, if they appear without other UTF-8
$extras = array('.', '-', '_', ',', ';', ':', '(', ')', '!', '?', '&', '%', '$', '#', '@');
if($whitelist === false || strpos($whitelist, ' ') === false) $extras[] = ' ';
// proceed only if value has some non-ascii characters
if(ctype_alnum(str_replace($extras, '', $value))) {
// let regular pageName sanitizer handle this
return $this->pageName($value, false, $maxLength);
}
// validate that all characters are in our whitelist
$replacements = array();
for($n = 0; $n < $tt->strlen($value); $n++) {
$c = $tt->substr($value, $n, 1);
$inBlacklist = $tt->strpos($blacklist, $c) !== false || strpos($blacklist, $c) !== false;
$inWhitelist = !$inBlacklist && $whitelist !== false && $tt->strpos($whitelist, $c) !== false;
if($inWhitelist && !$inBlacklist) {
// in whitelist
} else if($inBlacklist || !strlen(trim($c)) || ctype_cntrl($c)) {
// character does not resolve to something visible or is in blacklist
$replacements[] = $c;
} else {
// character that is not in whitelist, double check case variants
$cLower = $tt->strtolower($c);
$cUpper = $tt->strtoupper($c);
if($cLower !== $c && $tt->strpos($whitelist, $cLower) !== false) {
// allow character and convert to lowercase variant
$value = $tt->substr($value, 0, $n) . $cLower . $tt->substr($value, $n+1);
} else if($cUpper !== $c && $tt->strpos($whitelist, $cUpper) !== false) {
// allow character and convert to uppercase varient
$value = $tt->substr($value, 0, $n) . $cUpper . $tt->substr($value, $n+1);
} else {
// queue character to be replaced
$replacements[] = $c;
}
}
}
// replace disallowed characters with "-"
if(count($replacements)) $value = str_replace($replacements, '-', $value);
// replace doubled word separators
foreach($separators as $c) {
while(strpos($value, "$c$c") !== false) {
$value = str_replace("$c$c", $c, $value);
}
}
// trim off any remaining separators/extras
$value = trim($value, '-_.');
if($tt->strlen($value) > $maxLength) $value = $tt->substr($value, 0, $maxLength);
return $value;
}
/**
* Decode a PW-punycode'd name value
*
* @param string $value
* @return string
*
*/
protected function punyDecodeName($value) {
// exclude values that we know can't be converted
if(strlen($value) < 4 || strpos($value, 'xn-') !== 0) return $value;
if(strpos($value, '__')) {
$_value = $value;
$parts = explode('__', $_value);
foreach($parts as $n => $part) {
$parts[$n] = $this->punyDecodeName($part);
}
$value = implode('', $parts);
return $value;
}
$_value = $value;
// convert "xn-" single hyphen to recognized punycode "xn--" double hyphen
if(strpos($value, 'xn--') !== 0) $value = 'xn--' . substr($value, 3);
if(function_exists('idn_to_utf8')) {
// use native php function if available
$value = @idn_to_utf8($value);
} else {
// otherwise use Punycode class
$pc = new Punycode();
$value = $pc->decode($value);
}
// if utf8 conversion failed, restore original value
if($value === false || !strlen($value)) $value = $_value;
return $value;
}
/**
* Encode a name value to PW-punycode
*
* @param string $value
* @return string
*
*/
protected function punyEncodeName($value) {
// exclude values that don't need to be converted
if(strpos($value, 'xn-') === 0) return $value;
if(ctype_alnum(str_replace(array('.', '-', '_'), '', $value))) return $value;
$tt = $this->getTextTools();
while(strpos($value, '__') !== false) {
$value = str_replace('__', '_', $value);
}
if(strlen($value) >= 50) {
$_value = $value;
$parts = array();
while(strlen($_value)) {
$part = $tt->substr($_value, 0, 12);
$_value = $tt->substr($_value, 12);
$parts[] = $this->punyEncodeName($part);
}
$value = implode('__', $parts);
return $value;
}
$_value = $value;
if(function_exists("idn_to_ascii")) {
// use native php function if available
$value = substr(@idn_to_ascii($value), 3);
} else {
// otherwise use Punycode class
$pc = new Punycode();
$value = substr($pc->encode($value), 3);
}
if(strlen($value) && $value !== '-') {
// in PW the xn- prefix has one fewer hyphen than in native Punycode
// for compatibility with pageName sanitization and beautification
$value = "xn-$value";
} else {
// fallback to regular 'name' sanitization on failure, ensuring that
// return value is always ascii
$value = $this->name($_value);
}
return $value;
}
/**
* Format required by ProcessWire user names
*
* #pw-internal
*
* @deprecated, use pageName instead.
* @param string $value
* @return string
*
*/
public function username($value) {
return $this->pageName($value);
}
/**
* Name filter for ProcessWire filenames (basenames only, not paths)
*
* This sanitizes a filename to be consistent with the name format in ProcessWire,
* ASCII-alphanumeric, hyphens, underscores and periods.
*
* #pw-group-strings
* #pw-group-files
*
* @param string $value Filename to sanitize
* @param bool|int $beautify Should be true when creating a file's name for the first time. Default is false.
* You may also specify Sanitizer::translate (or number 2) for the $beautify param, which will make it translate letters
* based on the InputfieldPageName custom config settings.
* @param int $maxLength Maximum number of characters allowed in the filename
* @return string Sanitized filename
*
*/
public function filename($value, $beautify = false, $maxLength = 128) {
if(!is_string($value)) return '';
$value = basename($value);