Permalink
Newer
Older
100644 991 lines (830 sloc) 35.2 KB
1
<?php
2
3
/**
4
* Project: MagpieRSS: a simple RSS integration tool
5
* File: rss_parse.inc - parse an RSS or Atom feed
6
* return as a simple object.
7
*
8
* Handles RSS 0.9x, RSS 2.0, RSS 1.0, Atom 0.3, and Atom 1.0
9
*
10
* The lastest version of MagpieRSS can be obtained from:
11
* http://magpierss.sourceforge.net
12
*
13
* For questions, help, comments, discussion, etc., please join the
14
* Magpie mailing list:
15
* magpierss-general@lists.sourceforge.net
16
*
17
* @author Kellan Elliott-McCrea <kellan@protest.net>
19
* @license GPL
20
*
21
*/
22
23
define('RSS', 'RSS');
24
define('ATOM', 'Atom');
25
26
if (!defined('MAGPIE_DIR')) {
27
define('MAGPIE_DIR', dirname(__FILE__) . DIRECTORY_SEPARATOR);
28
}
29
30
require_once (MAGPIE_DIR . 'rss_utils.inc');
31
32
/**
33
* Hybrid parser, and object, takes RSS as a string and returns a simple object.
34
*
35
* see: rss_fetch.inc for a simpler interface with integrated caching support
36
*
37
*/
38
class MagpieRSS {
39
var $parser;
40
41
var $current_item = array(); // item currently being parsed
42
var $items = array(); // collection of parsed items
43
var $channel = array(); // hash of channel fields
44
var $textinput = array();
45
var $image = array();
46
var $feed_type;
47
var $feed_version;
48
var $encoding = ''; // output encoding of parsed rss
49
50
var $_source_encoding = ''; // only set if we have to parse xml prolog
51
52
var $ERROR = "";
53
var $WARNING = "";
54
Nov 26, 2010
55
# define some constants
57
var $_ATOM_CONTENT_CONSTRUCTS = array(
58
'content', 'summary', 'title', /* common */
59
'info', 'tagline', 'copyright', /* Atom 0.3 */
60
'rights', 'subtitle', /* Atom 1.0 */
61
);
62
63
var $_XHTML_CONTENT_CONSTRUCTS = array('body', 'div');
64
var $_KNOWN_ENCODINGS = array('UTF-8', 'US-ASCII', 'ISO-8859-1');
65
Nov 26, 2010
66
# parser variables, useless if you're not a parser, treat as private
67
68
var $stack = array(); # parser stack
69
var $inchannel = false;
70
var $initem = false;
Nov 26, 2010
72
var $incontent = array(); # non-empty if in namespaced XML content field
73
var $exclude_top = false; # true when Atom 1.0 type="xhtml"
75
var $intextinput = false;
76
var $inimage = false;
77
var $current_namespace = false;
78
79
/**
80
* Set up XML parser, parse source, and return populated RSS object..
81
*
82
* @param string $source string containing the RSS to be parsed
83
*
84
* NOTE: Probably a good idea to leave the encoding options alone unless
85
* you know what you're doing as PHP's character set support is
86
* a little weird.
87
*
88
* NOTE: A lot of this is unnecessary but harmless with PHP5
89
*
90
*
91
* @param string $output_encoding output the parsed RSS in this character
92
* set defaults to ISO-8859-1 as this is PHP's
93
* default.
94
*
95
* NOTE: might be changed to UTF-8 in future
96
* versions.
97
*
98
* @param string $input_encoding the character set of the incoming RSS source.
99
* Leave blank and Magpie will try to figure it
100
* out.
101
*
102
*
103
* @param bool $detect_encoding if false Magpie won't attempt to detect
104
* source encoding. (caveat emptor)
105
*
106
*/
107
function MagpieRSS ($source, $output_encoding='ISO-8859-1',
108
$input_encoding=null, $detect_encoding=true)
109
{
Nov 26, 2010
110
#
111
# if PHP xml isn't compiled in, die
112
#
Nov 26, 2010
113
114
if (!function_exists('xml_parser_create')) {
115
$this->error( "Failed to load PHP's XML Extension. " .
116
"http://www.php.net/manual/en/ref.xml.php",
117
E_USER_ERROR );
118
}
119
120
list($parser, $source) = $this->create_parser($source,
121
$output_encoding, $input_encoding, $detect_encoding);
122
123
124
if (!is_resource($parser)) {
125
$this->error( "Failed to create an instance of PHP's XML parser. " .
126
"http://www.php.net/manual/en/ref.xml.php",
127
E_USER_ERROR );
128
}
129
130
$this->parser = $parser;
131
Nov 26, 2010
132
#
133
# pass in parser, and a reference to this object
134
# setup handlers
135
#
Nov 26, 2010
136
137
xml_set_object( $this->parser, $this );
138
xml_set_element_handler($this->parser,
139
'feed_start_element', 'feed_end_element' );
140
141
xml_set_character_data_handler( $this->parser, 'feed_cdata' );
142
Nov 26, 2010
143
$status = xml_parse( $this->parser, $source ); # parse the feed
144
145
if (! $status ) {
146
$errorcode = xml_get_error_code( $this->parser );
147
if ( $errorcode != XML_ERROR_NONE ) {
148
$xml_error = xml_error_string( $errorcode );
149
$error_line = xml_get_current_line_number($this->parser);
150
$error_col = xml_get_current_column_number($this->parser);
151
$errormsg = "$xml_error at line $error_line, column $error_col";
152
153
$this->error( $errormsg );
154
}
155
}
156
157
xml_parser_free( $this->parser );
158
159
$this->normalize();
160
}
161
162
function feed_start_element($p, $element, &$attrs) {
163
$el = $element = strtolower($element);
164
$attrs = array_change_key_case($attrs, CASE_LOWER);
165
Nov 26, 2010
166
# check for a namespace, and split if found
167
# only if we're not inside a content tag
168
169
if ( empty($this->incontent) ) {
Nov 26, 2010
170
$ns = false;
171
172
if ( strpos( $element, ':' ) ) {
Dec 20, 2013
173
list($ns, $el) = explode( ':', $element, 2); //split is deprecated
Nov 26, 2010
174
}
175
176
if ( $ns and $ns != 'rdf' ) {
177
$this->current_namespace = $ns;
178
}
179
}
Nov 26, 2010
181
#
182
# if feed type isn't set, then this is first element of feed
183
# identify feed from root element
184
#
Nov 26, 2010
185
186
if (!isset($this->feed_type) ) {
187
if ( $el == 'rdf' ) {
188
$this->feed_type = RSS;
189
$this->feed_version = '1.0';
190
}
191
elseif ( $el == 'rss' ) {
192
$this->feed_type = RSS;
193
$this->feed_version = $attrs['version'];
194
}
195
elseif ( $el == 'feed' ) {
196
$this->feed_type = ATOM;
Nov 26, 2010
197
198
if ($attrs['xmlns'] == 'http://www.w3.org/2005/Atom') { // Atom 1.0
199
$this->feed_version = '1.0';
200
}
201
else {
202
# Atom 0.3, probably.
203
$this->feed_version = $attrs['version'];
204
}
205
206
$this->inchannel = true;
207
}
208
return;
209
}
210
Nov 26, 2010
211
#
212
# if we're inside a namespaced content construct, treat tags as text
213
#
Nov 26, 2010
215
if ( !empty($this->incontent) ) {
216
if ((count($this->incontent) > 1) or !$this->exclude_top) {
217
218
# if tags are inlined, then flatten
219
220
$attrs_str = join(' ',
221
array_map('map_attrs',
222
array_keys($attrs),
223
array_values($attrs))
224
);
225
226
if (strlen($attrs_str)) {
227
$attrs_str = ' '.$attrs_str;
Nov 26, 2010
229
230
$this->append_content( "<{$element}{$attrs_str}>" );
231
}
232
233
array_push($this->incontent, $el); # stack for parsing content XML
234
}
235
236
elseif ( $el == 'channel' ) {
237
$this->inchannel = true;
238
}
240
elseif ($el == 'item' or $el == 'entry' )
241
{
242
$this->initem = true;
243
if ( isset($attrs['rdf:about']) ) {
244
$this->current_item['about'] = $attrs['rdf:about'];
245
}
246
}
247
elseif (
248
$this->feed_type == RSS and
249
$this->current_namespace == '' and
250
$el == 'textinput' )
251
{
Nov 26, 2010
252
# else we're in the default namespace of an RSS feed,
253
# record textinput or image fields
254
255
$this->intextinput = true;
256
}
257
elseif (
258
$this->feed_type == RSS and
259
$this->current_namespace == '' and
260
$el == 'image' )
261
{
262
$this->inimage = true;
263
}
264
else {
Nov 26, 2010
265
// set stack[0] to current element
266
267
# Atom support many links per containing element.
268
# Magpie treats link elements of type rel='alternate'
269
# as being equivalent to RSS's simple link element.
Nov 26, 2010
271
$atom_link = false;
272
273
if ($this->feed_type == ATOM and $el == 'link') {
274
$atom_link = true;
275
if (isset($attrs['rel']) and $attrs['rel'] != 'alternate') {
276
$el = $el . "_" . $attrs['rel']; // pseudo-element names for Atom link elements
277
}
278
}
279
elseif ( $this->feed_type == ATOM and
280
in_array($el, $this->_ATOM_CONTENT_CONSTRUCTS) )
281
{
282
# handle atom content constructs
283
284
# avoid clashing w/ RSS mod_content
285
if ($el == 'content' ) {
286
$el = 'atom_content';
287
}
Nov 26, 2010
289
#
290
# assume that everything accepts namespaced XML
291
# (that will pass through some non-validating feeds;
292
# but so what? this isn't a validating parser)
293
#
294
295
$this->incontent = array();
296
array_push($this->incontent, $el); // start a stack
297
298
if ( isset($attrs['type']) and trim(strtolower($attrs['type']))=='xhtml') {
299
$this->exclude_top = true;
300
} else {
Nov 26, 2010
302
}
303
}
304
elseif (($this->current_namespace=='xhtml' or
305
(isset($attrs['xmlns']) and $attrs['xmlns'] == 'http://www.w3.org/1999/xhtml'))
306
and in_array($el, $this->_XHTML_CONTENT_CONSTRUCTS) )
307
{
308
# Handle inline XHTML body elements --CWJ
309
310
$this->current_namespace = 'xhtml';
311
$this->incontent = array();
312
array_push($this->incontent, $el); // start a stack
313
$this->exclude_top = false;
314
}
315
316
array_unshift($this->stack, $el);
317
$elpath = join('_', array_reverse($this->stack));
318
319
$n = $this->element_count($elpath);
320
$this->element_count($elpath, $n+1);
321
322
if ($n > 0) {
323
array_shift($this->stack);
324
array_unshift($this->stack, $el.'#'.($n+1)); # TODO: revisit this
325
$elpath = join('_', array_reverse($this->stack));
326
}
327
328
# this makes the baby Jesus cry, but we can't do it in normalize()
329
# because we've made the element name for Atom links unpredictable
330
# by tacking on the relation to the end. -CWJ
331
332
if ($atom_link and isset($attrs['href'])) {
333
$this->append($elpath, $attrs['href']);
334
}
335
336
# add attributes
337
if (count($attrs) > 0) {
338
$this->append($elpath.'@', join(',', array_keys($attrs)));
339
foreach ($attrs as $attr => $value) {
340
$this->append($elpath.'@'.$attr, $value);
341
}
342
}
343
}
Nov 26, 2010
346
347
348
function feed_cdata ($p, $text) {
351
$this->append_content( $text );
352
}
353
else {
354
$current_el = join('_', array_reverse($this->stack));
355
$this->append($current_el, $text);
356
}
357
}
358
359
function feed_end_element ($p, $el) {
360
$el = strtolower($el);
Nov 26, 2010
363
$opener = array_pop($this->incontent);
Nov 26, 2010
365
# Don't get bamboozled by namespace voodoo
366
if (strpos($el, ':')) {
367
list($ns, $closer) = split(':', $el);
368
}
369
else {
370
$ns = false; $closer = $el;
371
}
Nov 26, 2010
373
# Don't get bamboozled by our munging of <atom:content>, either
374
if ($this->feed_type == ATOM and $closer == 'content') {
375
$closer = 'atom_content';
376
}
Nov 26, 2010
378
# balance tags properly
379
# NOTE: i don't think this is actually neccessary
380
381
if ($opener != $closer) {
382
array_push($this->incontent, $opener);
383
$this->append_content("<$el />");
384
}
385
elseif ($this->incontent) {
386
# in the content construct
387
388
if ((count($this->incontent) > 1) or !$this->exclude_top) {
389
$this->append_content("</$el>");
390
}
391
}
392
else {
393
# shift the opening of the content construct off the normal stack
394
array_shift( $this->stack );
Nov 26, 2010
397
elseif ( $el == 'item' or $el == 'entry' ) {
398
$this->items[] = $this->current_item;
399
$this->current_item = array();
400
$this->initem = false;
Nov 26, 2010
402
$this->current_category = 0;
Nov 26, 2010
404
elseif ($this->feed_type == RSS and
405
$this->current_namespace == '' and $el == 'textinput' ) {
406
$this->intextinput = false;
407
}
Nov 26, 2010
408
elseif ($this->feed_type == RSS and
409
$this->current_namespace == '' and $el == 'image' ) {
410
$this->inimage = false;
411
}
Nov 26, 2010
412
elseif ($el == 'channel' or $el == 'feed' ) {
413
$this->inchannel = false;
414
}
415
else {
Nov 26, 2010
416
array_shift( $this->stack );
Nov 26, 2010
419
if ( !$this->incontent ) {
420
# Don't munge the namespace after finishing with elements in namespaced content constructs -CWJ
421
$this->current_namespace = false;
422
}
424
425
function concat (&$str1, $str2="") {
426
if (!isset($str1) ) {
427
$str1="";
428
}
429
$str1 .= $str2;
430
}
431
432
function append_content($text) {
Nov 26, 2010
433
if ( $this->initem ) {
434
if ($this->current_namespace) {
435
$this->concat( $this->current_item[$this->current_namespace][ reset($this->incontent) ], $text );
436
} else {
437
$this->concat( $this->current_item[ reset($this->incontent) ], $text );
438
}
Nov 26, 2010
440
elseif ( $this->inchannel ) {
441
if ($this->current_namespace) {
442
$this->concat( $this->channel[$this->current_namespace][ reset($this->incontent) ], $text );
443
}
444
else {
445
$this->concat( $this->channel[ reset($this->incontent) ], $text );
446
}
Nov 26, 2010
450
# smart append - field and namespace aware
451
function append($el, $text) {
452
if (!$el) {
453
return;
454
}
Nov 26, 2010
455
456
if ( $this->current_namespace ) {
457
if ( $this->initem ) {
Nov 26, 2010
458
$this->concat(
459
$this->current_item[ $this->current_namespace ][ $el ], $text );
460
}
461
elseif ($this->inchannel) {
Nov 26, 2010
462
$this->concat(
463
$this->channel[ $this->current_namespace][ $el ], $text );
464
}
465
elseif ($this->intextinput) {
466
$this->concat(
467
$this->textinput[ $this->current_namespace][ $el ], $text );
468
}
469
elseif ($this->inimage) {
470
$this->concat(
471
$this->image[ $this->current_namespace ][ $el ], $text );
472
}
473
}
474
else {
475
if ( $this->initem ) {
Nov 26, 2010
476
$this->concat( $this->current_item[ $el ], $text );
477
}
478
elseif ($this->intextinput) {
Nov 26, 2010
479
$this->concat( $this->textinput[ $el ], $text );
480
}
481
elseif ($this->inimage) {
Nov 26, 2010
482
$this->concat( $this->image[ $el ], $text );
483
}
484
elseif ($this->inchannel) {
Nov 26, 2010
485
$this->concat( $this->channel[ $el ], $text );
Nov 26, 2010
491
# smart count - field and namespace aware
492
function element_count ($el, $set = NULL) {
493
if (!$el) {
494
return;
495
}
Nov 26, 2010
496
497
if ( $this->current_namespace ) {
Nov 26, 2010
499
if (!is_null($set)) {
500
$this->current_item[ $this->current_namespace ][ $el.'#' ] = $set;
501
}
502
503
$ret = (isset($this->current_item[ $this->current_namespace ][ $el.'#' ]) ?
504
$this->current_item[ $this->current_namespace ][ $el.'#' ] : 0);
505
}
506
elseif ($this->inchannel) {
Nov 26, 2010
507
if (!is_null($set)) {
508
$this->channel[ $this->current_namespace ][ $el.'#' ] = $set;
509
}
510
511
$ret = (isset($this->channel[ $this->current_namespace][ $el.'#' ]) ?
512
$this->channel[ $this->current_namespace][ $el.'#' ] : 0);
513
}
514
}
515
else {
516
if ( $this->initem ) {
Nov 26, 2010
517
if (!is_null($set)) {
518
$this->current_item[ $el.'#' ] = $set;
519
}
520
521
$ret = (isset($this->current_item[ $el.'#' ]) ?
522
$this->current_item[ $el.'#' ] : 0);
523
}
524
elseif ($this->inchannel) {
Nov 26, 2010
525
if (!is_null($set)) {
526
$this->channel[ $el.'#' ] = $set;
527
}
528
529
$ret = (isset($this->channel[ $el.'#' ]) ?
530
$this->channel[ $el.'#' ] : 0);
531
}
Nov 26, 2010
533
534
return $ret;
535
}
536
537
function normalize_enclosure (&$source, $from, &$dest, $to, $i) {
538
$id_from = $this->element_id($from, $i);
539
$id_to = $this->element_id($to, $i);
Nov 26, 2010
540
541
if (isset($source["{$id_from}@"])) {
542
foreach (explode(',', $source["{$id_from}@"]) as $attr) {
Nov 26, 2010
543
if ($from=='link_enclosure' and $attr=='href') {
544
# from Atom
545
$dest["{$id_to}@url"] = $source["{$id_from}@{$attr}"];
546
$dest["{$id_to}"] = $source["{$id_from}@{$attr}"];
547
}
Nov 26, 2010
548
elseif ($from=='enclosure' and $attr=='url') {
549
# from RSS
550
$dest["{$id_to}@href"] = $source["{$id_from}@{$attr}"];
551
$dest["{$id_to}"] = $source["{$id_from}@{$attr}"];
552
}
553
else {
554
$dest["{$id_to}@{$attr}"] = $source["{$id_from}@{$attr}"];
555
}
556
}
557
}
558
}
559
560
function normalize_atom_person (&$source, $person, &$dest, $to, $i) {
561
$id = $this->element_id($person, $i);
562
$id_to = $this->element_id($to, $i);
563
Nov 26, 2010
564
# Atom 0.3 <=> Atom 1.0
565
if ($this->feed_version >= 1.0) {
566
$used = 'uri'; $norm = 'url';
567
}
568
else {
569
$used = 'url'; $norm = 'uri';
570
}
571
572
if (isset($source["{$id}_{$used}"])) {
573
$dest["{$id_to}_{$norm}"] = $source["{$id}_{$used}"];
574
}
575
Nov 26, 2010
576
# Atom to RSS 2.0 and Dublin Core
577
# RSS 2.0 person strings should be valid e-mail addresses if possible.
578
if (isset($source["{$id}_email"])) {
579
$rss_author = $source["{$id}_email"];
580
}
Nov 26, 2010
581
582
if (isset($source["{$id}_name"])) {
Nov 26, 2010
583
$rss_author = $source["{$id}_name"] . (isset($rss_author) ? " <$rss_author>" : '');
Nov 26, 2010
585
Nov 26, 2010
587
$source[$id] = $rss_author; # goes to top-level author or contributor
588
$dest[$id_to] = $rss_author; # goes to dc:creator or dc:contributor
Nov 26, 2010
592
# Normalize Atom 1.0 and RSS 2.0 categories to Dublin Core...
593
function normalize_category (&$source, $from, &$dest, $to, $i) {
594
$cat_id = $this->element_id($from, $i);
595
$dc_id = $this->element_id($to, $i);
596
Nov 26, 2010
597
# first normalize category elements: Atom 1.0 <=> RSS 2.0
598
if ( isset($source["{$cat_id}@term"]) ) {
599
# category identifier
600
$source[$cat_id] = $source["{$cat_id}@term"];
Nov 26, 2010
601
}
602
elseif ( $this->feed_type == RSS ) {
603
$source["{$cat_id}@term"] = $source[$cat_id];
604
}
605
606
if ( isset($source["{$cat_id}@scheme"]) ) { // URI to taxonomy
607
$source["{$cat_id}@domain"] = $source["{$cat_id}@scheme"];
608
} elseif ( isset($source["{$cat_id}@domain"]) ) {
609
$source["{$cat_id}@scheme"] = $source["{$cat_id}@domain"];
610
}
611
612
// Now put the identifier into dc:subject
613
$dest[$dc_id] = $source[$cat_id];
614
}
Nov 26, 2010
616
# ... or vice versa
617
function normalize_dc_subject (&$source, $from, &$dest, $to, $i) {
618
$dc_id = $this->element_id($from, $i);
619
$cat_id = $this->element_id($to, $i);
620
621
$dest[$cat_id] = $source[$dc_id]; // RSS 2.0
622
$dest["{$cat_id}@term"] = $source[$dc_id]; // Atom 1.0
623
}
624
Nov 26, 2010
625
# simplify the logic for normalize(). Makes sure that count of elements and
626
# each of multiple elements is normalized properly. If you need to mess
627
# with things like attributes or change formats or the like, pass it a
628
# callback to handle each element.
629
630
function normalize_element (&$source, $from, &$dest, $to, $via = NULL) {
631
if (isset($source[$from]) or isset($source["{$from}#"])) {
632
if (isset($source["{$from}#"])) {
633
$n = $source["{$from}#"];
634
$dest["{$to}#"] = $source["{$from}#"];
635
}
Nov 26, 2010
636
else {
637
$n = 1;
638
}
639
640
for ($i = 1; $i <= $n; $i++) {
641
if (isset($via)) { // custom callback for ninja attacks
642
$this->{$via}($source, $from, $dest, $to, $i);
643
}
644
else { // just make it the same
645
$from_id = $this->element_id($from, $i);
646
$to_id = $this->element_id($to, $i);
647
$dest[$to_id] = $source[$from_id];
648
}
649
}
650
}
651
}
652
653
function normalize () {
654
// if atom populate rss fields and normalize 0.3 and 1.0 feeds
Nov 26, 2010
655
656
if ( $this->is_atom() ) {
Nov 26, 2010
657
// Atom 1.0 elements <=> Atom 0.3 elements (Thanks, o brilliant wordsmiths of the Atom 1.0 standard!)
658
if ($this->feed_version < 1.0) {
659
$this->normalize_element($this->channel, 'tagline', $this->channel, 'subtitle');
660
$this->normalize_element($this->channel, 'copyright', $this->channel, 'rights');
661
$this->normalize_element($this->channel, 'modified', $this->channel, 'updated');
662
}
663
else {
664
$this->normalize_element($this->channel, 'subtitle', $this->channel, 'tagline');
665
$this->normalize_element($this->channel, 'rights', $this->channel, 'copyright');
666
$this->normalize_element($this->channel, 'updated', $this->channel, 'modified');
667
}
668
669
$this->normalize_element($this->channel, 'author', $this->channel['dc'], 'creator', 'normalize_atom_person');
670
$this->normalize_element($this->channel, 'contributor', $this->channel['dc'], 'contributor', 'normalize_atom_person');
Nov 26, 2010
672
// Atom elements to RSS elements
673
$this->normalize_element($this->channel, 'subtitle', $this->channel, 'description');
Nov 26, 2010
675
if ( isset($this->channel['logo']) ) {
676
$this->normalize_element($this->channel, 'logo', $this->image, 'url');
677
$this->normalize_element($this->channel, 'link', $this->image, 'link');
678
$this->normalize_element($this->channel, 'title', $this->image, 'title');
679
}
Nov 26, 2010
681
for ( $i = 0; $i < count($this->items); $i++) {
682
$item = $this->items[$i];
Nov 26, 2010
684
// Atom 1.0 elements <=> Atom 0.3 elements
685
if ($this->feed_version < 1.0) {
686
$this->normalize_element($item, 'modified', $item, 'updated');
687
$this->normalize_element($item, 'issued', $item, 'published');
688
} else {
689
$this->normalize_element($item, 'updated', $item, 'modified');
690
$this->normalize_element($item, 'published', $item, 'issued');
Nov 26, 2010
693
// "If an atom:entry element does not contain
694
// atom:author elements, then the atom:author elements
695
// of the contained atom:source element are considered
696
// to apply. In an Atom Feed Document, the atom:author
697
// elements of the containing atom:feed element are
698
// considered to apply to the entry if there are no
699
// atom:author elements in the locations described
700
// above." <http://atompub.org/2005/08/17/draft-ietf-atompub-format-11.html#rfc.section.4.2.1>
701
if (!isset($item["author#"])) {
702
if (isset($item["source_author#"])) { // from aggregation source
703
$source = $item;
704
$author = "source_author";
705
} elseif (isset($this->channel["author#"])) { // from containing feed
706
$source = $this->channel;
707
$author = "author";
708
}
709
710
$item["author#"] = $source["{$author}#"];
Nov 26, 2010
712
for ($au = 1; $au <= $item["author#"]; $au++) {
713
$id_to = $this->element_id('author', $au);
714
$id_from = $this->element_id($author, $au);
715
716
$item[$id_to] = $source[$id_from];
717
718
foreach (array('name', 'email', 'uri', 'url') as $what) {
719
if (isset($source["{$id_from}_{$what}"])) {
720
$item["{$id_to}_{$what}"] = $source["{$id_from}_{$what}"];
721
}
Nov 26, 2010
726
// Atom elements to RSS elements
727
$this->normalize_element($item, 'author', $item['dc'], 'creator', 'normalize_atom_person');
728
$this->normalize_element($item, 'contributor', $item['dc'], 'contributor', 'normalize_atom_person');
729
$this->normalize_element($item, 'summary', $item, 'description');
730
$this->normalize_element($item, 'atom_content', $item['content'], 'encoded');
731
$this->normalize_element($item, 'link_enclosure', $item, 'enclosure', 'normalize_enclosure');
732
733
// Categories
734
if ( isset($item['category#']) ) {
735
# Atom 1.0 categories to dc:subject and RSS 2.0 categories
736
$this->normalize_element($item, 'category', $item['dc'], 'subject', 'normalize_category');
737
}
738
elseif ( isset($item['dc']['subject#']) ) {
739
# dc:subject to Atom 1.0 and RSS 2.0 categories
740
$this->normalize_element($item['dc'], 'subject', $item, 'category', 'normalize_dc_subject');
Nov 26, 2010
743
// Normalized item timestamp
744
$atom_date = (isset($item['published']) ) ? $item['published'] : $item['updated'];
745
746
if ( $atom_date ) {
747
$epoch = @parse_w3cdtf($atom_date);
748
749
if ($epoch and $epoch > 0) {
750
$item['date_timestamp'] = $epoch;
751
}
752
}
753
754
$this->items[$i] = $item;
755
}
756
}
757
elseif ( $this->is_rss() ) {
Nov 26, 2010
758
// RSS elements to Atom elements
759
$this->normalize_element($this->channel, 'description', $this->channel, 'tagline'); // Atom 0.3
760
$this->normalize_element($this->channel, 'description', $this->channel, 'subtitle'); // Atom 1.0 (yay wordsmithing!)
761
$this->normalize_element($this->image, 'url', $this->channel, 'logo');
763
for ( $i = 0; $i < count($this->items); $i++) {
764
$item = $this->items[$i];
Nov 26, 2010
766
// RSS elements to Atom elements
767
$this->normalize_element($item, 'description', $item, 'summary');
768
$this->normalize_element($item['content'], 'encoded', $item, 'atom_content');
Nov 26, 2010
769
$this->normalize_element($item, 'enclosure', $item, 'link_enclosure', 'normalize_enclosure');
Nov 26, 2010
771
// Categories
772
if ( isset($item['category#']) ) {
773
# RSS 2.0 categories to dc:subject and Atom 1.0 categories
774
$this->normalize_element($item, 'category', $item['dc'], 'subject', 'normalize_category');
775
}
776
elseif ( isset($item['dc']['subject#']) ) {
777
# dc:subject to Atom 1.0 and RSS 2.0 categories
778
$this->normalize_element($item['dc'], 'subject', $item, 'category', 'normalize_dc_subject');
779
}
Nov 26, 2010
781
// Normalized item timestamp
782
if ( $this->is_rss() == '1.0' and isset($item['dc']['date']) ) {
783
$epoch = @parse_w3cdtf($item['dc']['date']);
Nov 26, 2010
784
785
if ($epoch and $epoch > 0) {
786
$item['date_timestamp'] = $epoch;
787
}
788
}
789
elseif ( isset($item['pubdate']) ) {
790
$epoch = @strtotime($item['pubdate']);
791
if ($epoch > 0) {
792
$item['date_timestamp'] = $epoch;
793
}
794
}
796
$this->items[$i] = $item;
797
}
798
}
799
}
800
801
802
function is_rss () {
803
if ( $this->feed_type == RSS ) {
804
return $this->feed_version;
805
}
806
else {
807
return false;
808
}
809
}
810
811
function is_atom() {
812
if ( $this->feed_type == ATOM ) {
813
return $this->feed_version;
814
}
815
else {
816
return false;
817
}
818
}
819
Nov 26, 2010
820
#
821
# return XML parser, and possibly re-encoded source
822
#
823
824
function create_parser($source, $out_enc, $in_enc, $detect) {
825
if ( substr(phpversion(),0,1) == 5) {
826
$parser = $this->php5_create_parser($in_enc, $detect);
827
}
828
else {
829
list($parser, $source) = $this->php4_create_parser($source, $in_enc, $detect);
830
}
831
if ($out_enc) {
832
$this->encoding = $out_enc;
833
xml_parser_set_option($parser, XML_OPTION_TARGET_ENCODING, $out_enc);
834
}
835
836
return array($parser, $source);
837
}
838
Nov 26, 2010
839
#
840
# Instantiate an XML parser under PHP5
841
#
842
# PHP5 will do a fine job of detecting input encoding
843
# if passed an empty string as the encoding.
844
#
845
# All hail libxml2!
846
#
847
848
function php5_create_parser($in_enc, $detect) {
Nov 26, 2010
849
850
# by default php5 does a fine job of detecting input encodings
851
if(!$detect && $in_enc) {
852
return xml_parser_create($in_enc);
853
}
854
else {
855
return xml_parser_create('');
856
}
857
}
858
Nov 26, 2010
859
#
860
# Instaniate an XML parser under PHP4
861
#
862
# Unfortunately PHP4's support for character encodings
863
# and especially XML and character encodings sucks. As
864
# long as the documents you parse only contain characters
865
# from the ISO-8859-1 character set (a superset of ASCII,
866
# and a subset of UTF-8) you're fine. However once you
867
# step out of that comfy little world things get mad, bad,
868
# and dangerous to know.
869
#
870
# The following code is based on SJM's work with FoF
871
# @see http://minutillo.com/steve/weblog/2004/6/17/php-xml-and-character-encodings-a-tale-of-sadness-rage-and-data-loss
872
#
873
874
function php4_create_parser($source, $in_enc, $detect) {
875
if ( !$detect ) {
876
return array(xml_parser_create($in_enc), $source);
877
}
878
879
if (!$in_enc) {
880
if (preg_match('/<?xml.*encoding=[\'"](.*?)[\'"].*?>/m', $source, $m)) {
881
$in_enc = strtoupper($m[1]);
882
$this->source_encoding = $in_enc;
883
}
884
else {
885
$in_enc = 'UTF-8';
886
}
887
}
888
889
if ($this->known_encoding($in_enc)) {
890
return array(xml_parser_create($in_enc), $source);
891
}
892
Nov 26, 2010
893
# the dectected encoding is not one of the simple encodings PHP knows
Nov 26, 2010
895
# attempt to use the iconv extension to
896
# cast the XML to a known encoding
897
# @see http://php.net/iconv
898
899
if (function_exists('iconv')) {
900
$encoded_source = iconv($in_enc,'UTF-8', $source);
901
if ($encoded_source) {
902
return array(xml_parser_create('UTF-8'), $encoded_source);
903
}
904
}
905
Nov 26, 2010
906
# iconv didn't work, try mb_convert_encoding
907
# @see http://php.net/mbstring
908
909
if( function_exists('mb_convert_encoding')) {
910
$encoded_source = mb_convert_encoding($source, 'UTF-8', $in_enc );
911
if ($encoded_source) {
912
return array(xml_parser_create('UTF-8'), $encoded_source);
913
}
914
}
915
Nov 26, 2010
916
#
917
# else
918
#
919
920
$this->error("Feed is in an unsupported character encoding. ($in_enc) " .
921
"You may see strange artifacts, and mangled characters.",
922
E_USER_NOTICE);
923
924
return array(xml_parser_create(), $source);
925
}
926
927
function known_encoding($enc) {
928
$enc = strtoupper($enc);
929
if ( in_array($enc, $this->_KNOWN_ENCODINGS) ) {
930
return $enc;
931
}
932
else {
933
return false;
934
}
935
}
936
937
function error ($errormsg, $lvl=E_USER_WARNING) {
938
// append PHP's error message if track_errors enabled
939
if ( isset($php_errormsg) ) {
940
$errormsg .= " ($php_errormsg)";
941
}
942
if ( MAGPIE_DEBUG ) {
943
trigger_error( $errormsg, $lvl);
944
}
945
else {
946
error_log( $errormsg, 0);
947
}
948
949
$notices = E_USER_NOTICE|E_NOTICE;
950
if ( $lvl&$notices ) {
951
$this->WARNING = $errormsg;
952
} else {
953
$this->ERROR = $errormsg;
954
}
955
}
956
957
// magic ID function for multiple elemenets.
958
// can be called as static MagpieRSS::element_id()
959
function element_id ($el, $counter) {
960
return $el . (($counter > 1) ? '#'.$counter : '');
961
}
962
} // end class RSS
963
964
function map_attrs($k, $v) {
965
return "$k=\"$v\"";
966
}
967
968
// patch to support medieval versions of PHP4.1.x,
969
// courtesy, Ryan Currie, ryan@digibliss.com
970
971
if (!function_exists('array_change_key_case')) {
972
define("CASE_UPPER",1);
973
define("CASE_LOWER",0);
976
function array_change_key_case($array,$case=CASE_LOWER) {
Nov 26, 2010
977
if ($case==CASE_LOWER) {
978
$cmd='strtolower';
979
}
980
elseif ($case==CASE_UPPER) {
981
$cmd='strtoupper';
982
}
Nov 26, 2010
984
foreach($array as $key=>$value) {
985
$output[$cmd($key)]=$value;
986
}
Nov 26, 2010
988
return $output;
989
}
990
}