Skip to content

Commit

Permalink
Update create.php to properly cope with the format of the file (yay f…
Browse files Browse the repository at this point in the history
…or random amounts of whitespace), and regen. Properly fixes #38. It appears that strnatcasecmp behaves differently on this computer, so some of the IBM0* character encodings are reordered.
  • Loading branch information
Geoffrey Sneddon authored and rmccue committed Apr 23, 2010
1 parent 9a75344 commit 930491c
Show file tree
Hide file tree
Showing 2 changed files with 97 additions and 106 deletions.
36 changes: 15 additions & 21 deletions create.php
Expand Up @@ -22,43 +22,38 @@ function build_character_set_list()
foreach ($data as $line) foreach ($data as $line)
{ {
// New character set // New character set
if (substr($line, 0, 5) === 'Name:') if (preg_match('/^Name:\s+(\S+)/', $line, $match))
{ {
// If we already have one, push it on to the array // If we already have one, push it on to the array
if (isset($aliases)) if (isset($aliases))
{ {
for ($i = 0, $count = count($aliases); $i < $count; $i++) foreach ($aliases as &$alias)
{ {
$aliases[$i] = normalize_character_set($aliases[$i]); $alias = normalize_character_set($alias);
} }
$charsets[$preferred] = array_unique($aliases); $charsets[$preferred] = array_unique($aliases);
natsort($charsets[$preferred]); natsort($charsets[$preferred]);
} }


$start = 5 + strspn($line, "\x09\x0A\x0B\xC\x0D\x20", 5); $aliases = array($match[1]);
$chars = strcspn($line, "\x09\x0A\x0B\xC\x0D\x20", $start); $preferred = $match[1];
$aliases = array(substr($line, $start, $chars));
$preferred = end($aliases);
} }
// Another alias // Another alias
elseif(substr($line, 0, 6) === 'Alias:') elseif (preg_match('/^Alias:\s+(\S+)(\s+\(preferred MIME name\))?\s*$/', $line, $match))
{ {
$start = 7 + strspn($line, "\x09\x0A\x0B\xC\x0D\x20", 7); if ($match[1] !== 'None')
$chars = strcspn($line, "\x09\x0A\x0B\xC\x0D\x20", $start);
$aliases[] = substr($line, $start, $chars);

if (end($aliases) === 'None')
{
array_pop($aliases);
}
elseif (substr($line, 7 + $chars + 1, 21) === '(preferred MIME name)')
{ {
$preferred = end($aliases); $aliases[] = $match[1];
if ($match[2])
{
$preferred = $match[1];
}
} }
} }
} }


// Compatibility replacements // Compatibility replacements
// From http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#misinterpreted-for-compatibility
$compat = array( $compat = array(
'EUC-KR' => 'windows-949', 'EUC-KR' => 'windows-949',
'GB2312' => 'GBK', 'GB2312' => 'GBK',
Expand All @@ -67,10 +62,9 @@ function build_character_set_list()
'ISO-8859-9' => 'windows-1254', 'ISO-8859-9' => 'windows-1254',
'ISO-8859-11' => 'windows-874', 'ISO-8859-11' => 'windows-874',
'KS_C_5601-1987' => 'windows-949', 'KS_C_5601-1987' => 'windows-949',
'Shift_JIS' => 'Windows-31J',
'TIS-620' => 'windows-874', 'TIS-620' => 'windows-874',
//'US-ASCII' => 'windows-1252', //'US-ASCII' => 'windows-1252',
'x-x-big5' => 'Big5',
'Extended_UNIX_Code_Packed_Format_for_Japanese' => 'EUC-JP',
); );


foreach ($compat as $real => $replace) foreach ($compat as $real => $replace)
Expand Down Expand Up @@ -176,4 +170,4 @@ public static function encoding(\$charset)
echo build_function(); echo build_function();
} }


?> ?>
167 changes: 82 additions & 85 deletions simplepie.inc
Expand Up @@ -9477,7 +9477,7 @@ class SimplePie_Misc
} }


/** /**
* Standardise an encoding name * Normalize an encoding name
* *
* This is automatically generated by create.php * This is automatically generated by create.php
* *
Expand Down Expand Up @@ -9520,7 +9520,6 @@ class SimplePie_Misc


case 'big5': case 'big5':
case 'csbig5': case 'csbig5':
case 'xxbig5':
return 'Big5'; return 'Big5';


case 'big5hkscs': case 'big5hkscs':
Expand Down Expand Up @@ -9676,15 +9675,15 @@ class SimplePie_Misc
case 'isoir85': case 'isoir85':
return 'ES2'; return 'ES2';


case 'cseucfixwidjapanese':
case 'extendedunixcodefixedwidthforjapanese':
return 'Extended_UNIX_Code_Fixed_Width_for_Japanese';

case 'cseucpkdfmtjapanese': case 'cseucpkdfmtjapanese':
case 'eucjp': case 'eucjp':
case 'extendedunixcodepackedformatforjapanese': case 'extendedunixcodepackedformatforjapanese':
return 'EUC-JP'; return 'EUC-JP';


case 'cseucfixwidjapanese':
case 'extendedunixcodefixedwidthforjapanese':
return 'Extended_UNIX_Code_Fixed_Width_for_Japanese';

case 'gb18030': case 'gb18030':
return 'GB18030'; return 'GB18030';


Expand Down Expand Up @@ -9762,80 +9761,6 @@ class SimplePie_Misc
case 'ibmthai': case 'ibmthai':
return 'IBM-Thai'; return 'IBM-Thai';


case 'ccsid858':
case 'cp858':
case 'ibm858':
case 'pcmultilingual850euro':
return 'IBM00858';

case 'ccsid924':
case 'cp924':
case 'ebcdiclatin9euro':
case 'ibm924':
return 'IBM00924';

case 'ccsid1140':
case 'cp1140':
case 'ebcdicus37euro':
case 'ibm1140':
return 'IBM01140';

case 'ccsid1141':
case 'cp1141':
case 'ebcdicde273euro':
case 'ibm1141':
return 'IBM01141';

case 'ccsid1142':
case 'cp1142':
case 'ebcdicdk277euro':
case 'ebcdicno277euro':
case 'ibm1142':
return 'IBM01142';

case 'ccsid1143':
case 'cp1143':
case 'ebcdicfi278euro':
case 'ebcdicse278euro':
case 'ibm1143':
return 'IBM01143';

case 'ccsid1144':
case 'cp1144':
case 'ebcdicit280euro':
case 'ibm1144':
return 'IBM01144';

case 'ccsid1145':
case 'cp1145':
case 'ebcdices284euro':
case 'ibm1145':
return 'IBM01145';

case 'ccsid1146':
case 'cp1146':
case 'ebcdicgb285euro':
case 'ibm1146':
return 'IBM01146';

case 'ccsid1147':
case 'cp1147':
case 'ebcdicfr297euro':
case 'ibm1147':
return 'IBM01147';

case 'ccsid1148':
case 'cp1148':
case 'ebcdicinternational500euro':
case 'ibm1148':
return 'IBM01148';

case 'ccsid1149':
case 'cp1149':
case 'ebcdicis871euro':
case 'ibm1149':
return 'IBM01149';

case 'cp37': case 'cp37':
case 'csibm37': case 'csibm37':
case 'ebcdiccpca': case 'ebcdiccpca':
Expand Down Expand Up @@ -9983,6 +9908,12 @@ class SimplePie_Misc
case 'ibm857': case 'ibm857':
return 'IBM857'; return 'IBM857';


case 'ccsid858':
case 'cp858':
case 'ibm858':
case 'pcmultilingual850euro':
return 'IBM00858';

case '860': case '860':
case 'cp860': case 'cp860':
case 'csibm860': case 'csibm860':
Expand Down Expand Up @@ -10085,6 +10016,12 @@ class SimplePie_Misc
case 'ibm918': case 'ibm918':
return 'IBM918'; return 'IBM918';


case 'ccsid924':
case 'cp924':
case 'ebcdiclatin9euro':
case 'ibm924':
return 'IBM00924';

case 'cp1026': case 'cp1026':
case 'csibm1026': case 'csibm1026':
case 'ibm1026': case 'ibm1026':
Expand All @@ -10093,6 +10030,68 @@ class SimplePie_Misc
case 'ibm1047': case 'ibm1047':
return 'IBM1047'; return 'IBM1047';


case 'ccsid1140':
case 'cp1140':
case 'ebcdicus37euro':
case 'ibm1140':
return 'IBM01140';

case 'ccsid1141':
case 'cp1141':
case 'ebcdicde273euro':
case 'ibm1141':
return 'IBM01141';

case 'ccsid1142':
case 'cp1142':
case 'ebcdicdk277euro':
case 'ebcdicno277euro':
case 'ibm1142':
return 'IBM01142';

case 'ccsid1143':
case 'cp1143':
case 'ebcdicfi278euro':
case 'ebcdicse278euro':
case 'ibm1143':
return 'IBM01143';

case 'ccsid1144':
case 'cp1144':
case 'ebcdicit280euro':
case 'ibm1144':
return 'IBM01144';

case 'ccsid1145':
case 'cp1145':
case 'ebcdices284euro':
case 'ibm1145':
return 'IBM01145';

case 'ccsid1146':
case 'cp1146':
case 'ebcdicgb285euro':
case 'ibm1146':
return 'IBM01146';

case 'ccsid1147':
case 'cp1147':
case 'ebcdicfr297euro':
case 'ibm1147':
return 'IBM01147';

case 'ccsid1148':
case 'cp1148':
case 'ebcdicinternational500euro':
case 'ibm1148':
return 'IBM01148';

case 'ccsid1149':
case 'cp1149':
case 'ebcdicis871euro':
case 'ibm1149':
return 'IBM01149';

case 'csiso143iecp271': case 'csiso143iecp271':
case 'iecp271': case 'iecp271':
case 'isoir143': case 'isoir143':
Expand Down Expand Up @@ -10635,11 +10634,6 @@ class SimplePie_Misc
case 'sen850200c': case 'sen850200c':
return 'SEN_850200_C'; return 'SEN_850200_C';


case 'csshiftjis':
case 'mskanji':
case 'shiftjis':
return 'Shift_JIS';

case 'csiso102t617bit': case 'csiso102t617bit':
case 'isoir102': case 'isoir102':
case 't617bit': case 't617bit':
Expand Down Expand Up @@ -10738,7 +10732,10 @@ class SimplePie_Misc
case 'viscii': case 'viscii':
return 'VISCII'; return 'VISCII';


case 'csshiftjis':
case 'cswindows31j': case 'cswindows31j':
case 'mskanji':
case 'shiftjis':
case 'windows31j': case 'windows31j':
return 'Windows-31J'; return 'Windows-31J';


Expand Down

0 comments on commit 930491c

Please sign in to comment.