/
Convert.php
executable file
·342 lines (313 loc) · 10.2 KB
/
Convert.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
<?php
/**
* Library of conversion functions, implemented as static methods.
*
* The methods are all of the form (format)2(format), where the format is one of
*
* raw: A UTF8 string
* attr: A UTF8 string suitable for inclusion in an HTML attribute
* js: A UTF8 string suitable for inclusion in a double-quoted javascript string.
*
* array: A PHP associative array
* json: JavaScript object notation
*
* html: HTML source suitable for use in a page or email
* text: Plain-text content, suitable for display to a user as-is, or insertion in a plaintext email.
*
* Objects of type {@link ViewableData} can have an "escaping type",
* which determines if they are automatically escaped before output by {@link SSViewer}.
*
* @package sapphire
* @subpackage misc
*/
class Convert extends Object {
/**
* Convert a value to be suitable for an XML attribute.
*
* @param array|string $val String to escape, or array of strings
* @return array|string
*/
static function raw2att($val) {
if(is_array($val)) {
foreach($val as $k => $v) $val[$k] = self::raw2att($v);
return $val;
} else {
return str_replace(array('&','"',"'",'<','>'), array('&','"',''','<','>'), $val);
}
}
/**
* Convert a value to be suitable for an HTML attribute.
*
* This is useful for converting human readable values into
* a value suitable for an ID or NAME attribute.
*
* @see http://www.w3.org/TR/REC-html40/types.html#type-cdata
* @uses Convert::raw2att()
* @param array|string $val String to escape, or array of strings
* @return array|string
*/
static function raw2htmlatt($val) {
if(is_array($val)) {
foreach($val as $k => $v) $val[$k] = self::raw2htmlatt($v);
return $val;
} else {
$val = self::raw2att($val);
$val = preg_replace('/[^a-zA-Z0-9\-_]*/', '', $val);
return $val;
}
}
/**
* Ensure that text is properly escaped for XML.
*
* @see http://www.w3.org/TR/REC-xml/#dt-escape
* @param array|string $val String to escape, or array of strings
* @return array|string
*/
static function raw2xml($val) {
if(is_array($val)) {
foreach($val as $k => $v) $val[$k] = self::raw2xml($v);
return $val;
} else {
return str_replace(array('&','<','>',"\n",'"',"'"), array('&','<','>','<br />','"','''), $val);
}
}
/**
* Ensure that text is properly escaped for Javascript.
*
* @param array|string $val String to escape, or array of strings
* @return array|string
*/
static function raw2js($val) {
if(is_array($val)) {
foreach($val as $k => $v) $val[$k] = self::raw2js($v);
return $val;
} else {
return str_replace(array("\\", '"', "\n", "\r", "'"), array("\\\\", '\"', '\n', '\r', "\\'"), $val);
}
}
/**
* Uses the PHP 5.2 native json_encode function if available,
* otherwise falls back to the Services_JSON class.
*
* @see http://pear.php.net/pepr/pepr-proposal-show.php?id=198
* @uses Director::baseFolder()
* @uses Services_JSON
*
* @param mixed $val
* @return string JSON safe string
*/
static function raw2json($val) {
if(function_exists('json_encode')) {
return json_encode($val);
} else {
require_once(Director::baseFolder() . '/sapphire/thirdparty/json/JSON.php');
$json = new Services_JSON();
return $json->encode($val);
}
}
static function raw2sql($val) {
if(is_array($val)) {
foreach($val as $k => $v) $val[$k] = self::raw2sql($v);
return $val;
} else {
return DB::getConn()->addslashes($val);
}
}
/**
* Convert XML to raw text.
* @uses html2raw()
* @todo Currently &#xxx; entries are stripped; they should be converted
*/
static function xml2raw($val) {
if(is_array($val)) {
foreach($val as $k => $v) $val[$k] = self::xml2raw($v);
return $val;
} else {
// More complex text needs to use html2raw instead
if(strpos($val,'<') !== false) return self::html2raw($val);
$converted = str_replace(array('&','<','>','"',''', '''), array('&','<','>','"',"'", "'"), $val);
$converted = ereg_replace('&#[0-9]+;', '', $converted);
return $converted;
}
}
/**
* Uses the PHP 5.2 native json_encode function if available,
* otherwise falls back to the Services_JSON class.
*
* @param array $val Array to convert
* @return string JSON encoded string
*/
static function array2json($val) {
if(function_exists('json_encode')) {
return json_encode($val);
} else {
require_once(Director::baseFolder() . '/sapphire/thirdparty/json/JSON.php');
$json = new Services_JSON();
return $json->encode($val);
}
}
/**
* Uses the PHP 5.2 native json_decode function if available,
* otherwise falls back to the Services_JSON class.
*
* @see http://pear.php.net/pepr/pepr-proposal-show.php?id=198
*
* @param string $val
* @return mixed JSON safe string
*/
static function json2obj($val) {
if(function_exists('json_decode')) {
return json_decode($val);
} else {
require_once(Director::baseFolder() . '/sapphire/thirdparty/json/JSON.php');
$json = new Services_JSON();
return $json->decode($val);
}
}
/**
* Convert a JSON string into an array.
*
* @uses json2obj
* @param string $val JSON string to convert
* @return array|boolean
*/
static function json2array($val) {
$json = self::json2obj($val);
if(!$json) return false;
$arr = array();
foreach($json as $k => $v) {
$arr[$k] = $v;
}
return $arr;
}
/**
* @uses recursiveXMLToArray()
*/
static function xml2array($val) {
$xml = new SimpleXMLElement($val);
return self::recursiveXMLToArray($xml);
}
/**
* Function recursively run from {@link Convert::xml2array()}
* @uses SimpleXMLElement
*/
protected static function recursiveXMLToArray($xml) {
if(is_object($xml) && get_class($xml) == 'SimpleXMLElement') {
$attributes = $xml->attributes();
foreach($attributes as $k => $v) {
if($v) $a[$k] = (string) $v;
}
$x = $xml;
$xml = get_object_vars($xml);
}
if(is_array($xml)) {
if(count($xml) == 0) return (string) $x; // for CDATA
foreach($xml as $key => $value) {
$r[$key] = self::recursiveXMLToArray($value);
}
if(isset($a)) $r['@'] = $a; // Attributes
return $r;
}
return (string) $xml;
}
/**
* Create a link if the string is a valid URL
* @param string The string to linkify
* @return A link to the URL if string is a URL
*/
static function linkIfMatch($string) {
if( preg_match( '/^[a-z+]+\:\/\/[a-zA-Z0-9$-_.+?&=!*\'()%]+$/', $string ) )
return "<a style=\"white-space: nowrap\" href=\"$string\">$string</a>";
else
return $string;
}
/**
* Simple conversion of HTML to plaintext.
*
* @param $data string
* @param $preserveLinks boolean
* @param $wordwrap array
*/
static function html2raw($data, $preserveLinks = false, $wordWrap = 60, $config = null) {
$defaultConfig = array(
'PreserveLinks' => false,
'ReplaceBoldAsterisk' => true,
'CompressWhitespace' => true,
'ReplaceImagesWithAlt' => true,
);
if(isset($config)) {
$config = array_merge($defaultConfig,$config);
} else {
$config = $defaultConfig;
}
// sTRIp style and script
/* $data = eregi_replace("<style(^A-Za-z0-9>][^>]*)?>.*</style[^>]*>","", $data);*/
/* $data = eregi_replace("<script(^A-Za-z0-9>][^>]*)?>.*</script[^>]*>","", $data);*/
$data = preg_replace("/<style(^A-Za-z0-9>][^>]*)?>.*?<\/style[^>]*>/i","", $data);
$data = preg_replace("/<script(^A-Za-z0-9>][^>]*)?>.*?<\/script[^>]*>/i","", $data);
// TODO Deal with attributes inside tags
if($config['ReplaceBoldAsterisk']) {
$data = str_ireplace(
array('<strong>','</strong>','<b>','</b>'),
'*',
$data
);
}
// Expand hyperlinks
if(!$preserveLinks && !$config['PreserveLinks']) {
$data = preg_replace('/<a[^>]*href\s*=\s*"([^"]*)">(.*?)<\/a>/ie', "Convert::html2raw('\\2').'[\\1]'", $data);
$data = preg_replace('/<a[^>]*href\s*=\s*([^ ]*)>(.*?)<\/a>/ie', "Convert::html2raw('\\2').'[\\1]'", $data);
/* $data = eregi_replace('<a[^>]*href *= *"([^"]*)">([^<>]*)</a>', '\\2 [\\1]', $data); */
/* $data = eregi_replace('<a[^>]*href *= *([^ ]*)>([^<>]*)</a>', '\\2 [\\1]', $data); */
}
// Replace images with their alt tags
if($config['ReplaceImagesWithAlt']) {
$data = eregi_replace('<img[^>]*alt *= *"([^"]*)"[^>]*>', ' \\1 ', $data);
$data = eregi_replace('<img[^>]*alt *= *([^ ]*)[^>]*>', ' \\1 ', $data);
}
// Compress whitespace
if($config['CompressWhitespace']) {
$data = ereg_replace("[\n\r\t ]+", " ", $data);
}
// Parse newline tags
$data = ereg_replace("[ \n\r\t]*<[Hh][1-6]([^A-Za-z0-9>][^>]*)?> *", "\n\n", $data);
$data = ereg_replace("[ \n\r\t]*<[Pp]([^A-Za-z0-9>][^>]*)?> *", "\n\n", $data);
$data = ereg_replace("[ \n\r\t]*<[Dd][Ii][Vv]([^A-Za-z0-9>][^>]*)?> *", "\n\n", $data);
$data = ereg_replace("\n\n\n+","\n\n", $data);
$data = ereg_replace("<[Bb][Rr]([^A-Za-z0-9>][^>]*)?> *", "\n", $data);
$data = ereg_replace("<[Tt][Rr]([^A-Za-z0-9>][^>]*)?> *", "\n", $data);
$data = ereg_replace("</[Tt][Dd]([^A-Za-z0-9>][^>]*)?> *", " ", $data);
$data = preg_replace('/<\/p>/i', "\n\n", $data );
// Replace HTML entities
//$data = preg_replace("/&#([0-9]+);/e", 'chr(\1)', $data);
//$data = str_replace(array("<",">","&"," "), array("<", ">", "&", " "), $data);
$data = html_entity_decode($data, ENT_COMPAT , 'UTF-8');
// Remove all tags (but optionally keep links)
// strip_tags seemed to be restricting the length of the output
// arbitrarily. This essentially does the same thing.
if(!$preserveLinks && !$config['PreserveLinks']) {
$data = preg_replace('/<\/?[^>]*>/','', $data);
} else {
$data = strip_tags($data, '<a>');
}
return trim(wordwrap(trim($data), $wordWrap));
}
/**
* There are no real specifications on correctly encoding mailto-links,
* but this seems to be compatible with most of the user-agents.
* Does nearly the same as rawurlencode().
* Please only encode the values, not the whole url, e.g.
* "mailto:test@test.com?subject=" . Convert::raw2mailto($subject)
*
* @param $data string
* @return string
* @see http://www.ietf.org/rfc/rfc1738.txt
*/
static function raw2mailto($data) {
return str_ireplace(
array("\n",'?','=',' ','(',')','&','@','"','\'',';'),
array('%0A','%3F','%3D','%20','%28','%29','%26','%40','%22','%27','%3B'),
$data
);
}
}