Permalink
Browse files

Update create.php to properly cope with the format of the file (yay f…

…or random amounts of whitespace), and regen. Properly fixes #38. It appears that strnatcasecmp behaves differently on this computer, so some of the IBM0* character encodings are reordered.
  • Loading branch information...
1 parent 9a75344 commit 930491cb5e436fcf314d4f2721207d5e43bb3537 Geoffrey Sneddon committed with rmccue Feb 18, 2010
Showing with 97 additions and 106 deletions.
  1. +15 −21 create.php
  2. +82 −85 simplepie.inc
View
@@ -22,43 +22,38 @@ function build_character_set_list()
foreach ($data as $line)
{
// New character set
- if (substr($line, 0, 5) === 'Name:')
+ if (preg_match('/^Name:\s+(\S+)/', $line, $match))
{
// If we already have one, push it on to the array
if (isset($aliases))
{
- for ($i = 0, $count = count($aliases); $i < $count; $i++)
+ foreach ($aliases as &$alias)
{
- $aliases[$i] = normalize_character_set($aliases[$i]);
+ $alias = normalize_character_set($alias);
}
$charsets[$preferred] = array_unique($aliases);
natsort($charsets[$preferred]);
}
- $start = 5 + strspn($line, "\x09\x0A\x0B\xC\x0D\x20", 5);
- $chars = strcspn($line, "\x09\x0A\x0B\xC\x0D\x20", $start);
- $aliases = array(substr($line, $start, $chars));
- $preferred = end($aliases);
+ $aliases = array($match[1]);
+ $preferred = $match[1];
}
// Another alias
- elseif(substr($line, 0, 6) === 'Alias:')
+ elseif (preg_match('/^Alias:\s+(\S+)(\s+\(preferred MIME name\))?\s*$/', $line, $match))
{
- $start = 7 + strspn($line, "\x09\x0A\x0B\xC\x0D\x20", 7);
- $chars = strcspn($line, "\x09\x0A\x0B\xC\x0D\x20", $start);
- $aliases[] = substr($line, $start, $chars);
-
- if (end($aliases) === 'None')
- {
- array_pop($aliases);
- }
- elseif (substr($line, 7 + $chars + 1, 21) === '(preferred MIME name)')
+ if ($match[1] !== 'None')
{
- $preferred = end($aliases);
+ $aliases[] = $match[1];
+ if ($match[2])
+ {
+ $preferred = $match[1];
+ }
}
}
}
// Compatibility replacements
+ // From http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#misinterpreted-for-compatibility
$compat = array(
'EUC-KR' => 'windows-949',
'GB2312' => 'GBK',
@@ -67,10 +62,9 @@ function build_character_set_list()
'ISO-8859-9' => 'windows-1254',
'ISO-8859-11' => 'windows-874',
'KS_C_5601-1987' => 'windows-949',
+ 'Shift_JIS' => 'Windows-31J',
'TIS-620' => 'windows-874',
//'US-ASCII' => 'windows-1252',
- 'x-x-big5' => 'Big5',
- 'Extended_UNIX_Code_Packed_Format_for_Japanese' => 'EUC-JP',
);
foreach ($compat as $real => $replace)
@@ -176,4 +170,4 @@ public static function encoding(\$charset)
echo build_function();
}
-?>
+?>
View
@@ -9477,7 +9477,7 @@ class SimplePie_Misc
}
/**
- * Standardise an encoding name
+ * Normalize an encoding name
*
* This is automatically generated by create.php
*
@@ -9520,7 +9520,6 @@ class SimplePie_Misc
case 'big5':
case 'csbig5':
- case 'xxbig5':
return 'Big5';
case 'big5hkscs':
@@ -9676,15 +9675,15 @@ class SimplePie_Misc
case 'isoir85':
return 'ES2';
- case 'cseucfixwidjapanese':
- case 'extendedunixcodefixedwidthforjapanese':
- return 'Extended_UNIX_Code_Fixed_Width_for_Japanese';
-
case 'cseucpkdfmtjapanese':
case 'eucjp':
case 'extendedunixcodepackedformatforjapanese':
return 'EUC-JP';
+ case 'cseucfixwidjapanese':
+ case 'extendedunixcodefixedwidthforjapanese':
+ return 'Extended_UNIX_Code_Fixed_Width_for_Japanese';
+
case 'gb18030':
return 'GB18030';
@@ -9762,80 +9761,6 @@ class SimplePie_Misc
case 'ibmthai':
return 'IBM-Thai';
- case 'ccsid858':
- case 'cp858':
- case 'ibm858':
- case 'pcmultilingual850euro':
- return 'IBM00858';
-
- case 'ccsid924':
- case 'cp924':
- case 'ebcdiclatin9euro':
- case 'ibm924':
- return 'IBM00924';
-
- case 'ccsid1140':
- case 'cp1140':
- case 'ebcdicus37euro':
- case 'ibm1140':
- return 'IBM01140';
-
- case 'ccsid1141':
- case 'cp1141':
- case 'ebcdicde273euro':
- case 'ibm1141':
- return 'IBM01141';
-
- case 'ccsid1142':
- case 'cp1142':
- case 'ebcdicdk277euro':
- case 'ebcdicno277euro':
- case 'ibm1142':
- return 'IBM01142';
-
- case 'ccsid1143':
- case 'cp1143':
- case 'ebcdicfi278euro':
- case 'ebcdicse278euro':
- case 'ibm1143':
- return 'IBM01143';
-
- case 'ccsid1144':
- case 'cp1144':
- case 'ebcdicit280euro':
- case 'ibm1144':
- return 'IBM01144';
-
- case 'ccsid1145':
- case 'cp1145':
- case 'ebcdices284euro':
- case 'ibm1145':
- return 'IBM01145';
-
- case 'ccsid1146':
- case 'cp1146':
- case 'ebcdicgb285euro':
- case 'ibm1146':
- return 'IBM01146';
-
- case 'ccsid1147':
- case 'cp1147':
- case 'ebcdicfr297euro':
- case 'ibm1147':
- return 'IBM01147';
-
- case 'ccsid1148':
- case 'cp1148':
- case 'ebcdicinternational500euro':
- case 'ibm1148':
- return 'IBM01148';
-
- case 'ccsid1149':
- case 'cp1149':
- case 'ebcdicis871euro':
- case 'ibm1149':
- return 'IBM01149';
-
case 'cp37':
case 'csibm37':
case 'ebcdiccpca':
@@ -9983,6 +9908,12 @@ class SimplePie_Misc
case 'ibm857':
return 'IBM857';
+ case 'ccsid858':
+ case 'cp858':
+ case 'ibm858':
+ case 'pcmultilingual850euro':
+ return 'IBM00858';
+
case '860':
case 'cp860':
case 'csibm860':
@@ -10085,6 +10016,12 @@ class SimplePie_Misc
case 'ibm918':
return 'IBM918';
+ case 'ccsid924':
+ case 'cp924':
+ case 'ebcdiclatin9euro':
+ case 'ibm924':
+ return 'IBM00924';
+
case 'cp1026':
case 'csibm1026':
case 'ibm1026':
@@ -10093,6 +10030,68 @@ class SimplePie_Misc
case 'ibm1047':
return 'IBM1047';
+ case 'ccsid1140':
+ case 'cp1140':
+ case 'ebcdicus37euro':
+ case 'ibm1140':
+ return 'IBM01140';
+
+ case 'ccsid1141':
+ case 'cp1141':
+ case 'ebcdicde273euro':
+ case 'ibm1141':
+ return 'IBM01141';
+
+ case 'ccsid1142':
+ case 'cp1142':
+ case 'ebcdicdk277euro':
+ case 'ebcdicno277euro':
+ case 'ibm1142':
+ return 'IBM01142';
+
+ case 'ccsid1143':
+ case 'cp1143':
+ case 'ebcdicfi278euro':
+ case 'ebcdicse278euro':
+ case 'ibm1143':
+ return 'IBM01143';
+
+ case 'ccsid1144':
+ case 'cp1144':
+ case 'ebcdicit280euro':
+ case 'ibm1144':
+ return 'IBM01144';
+
+ case 'ccsid1145':
+ case 'cp1145':
+ case 'ebcdices284euro':
+ case 'ibm1145':
+ return 'IBM01145';
+
+ case 'ccsid1146':
+ case 'cp1146':
+ case 'ebcdicgb285euro':
+ case 'ibm1146':
+ return 'IBM01146';
+
+ case 'ccsid1147':
+ case 'cp1147':
+ case 'ebcdicfr297euro':
+ case 'ibm1147':
+ return 'IBM01147';
+
+ case 'ccsid1148':
+ case 'cp1148':
+ case 'ebcdicinternational500euro':
+ case 'ibm1148':
+ return 'IBM01148';
+
+ case 'ccsid1149':
+ case 'cp1149':
+ case 'ebcdicis871euro':
+ case 'ibm1149':
+ return 'IBM01149';
+
case 'csiso143iecp271':
case 'iecp271':
case 'isoir143':
@@ -10635,11 +10634,6 @@ class SimplePie_Misc
case 'sen850200c':
return 'SEN_850200_C';
- case 'csshiftjis':
- case 'mskanji':
- case 'shiftjis':
- return 'Shift_JIS';
-
case 'csiso102t617bit':
case 'isoir102':
case 't617bit':
@@ -10738,7 +10732,10 @@ class SimplePie_Misc
case 'viscii':
return 'VISCII';
+ case 'csshiftjis':
case 'cswindows31j':
+ case 'mskanji':
+ case 'shiftjis':
case 'windows31j':
return 'Windows-31J';

0 comments on commit 930491c

Please sign in to comment.