Skip to content
Find file
Fetching contributors…
Cannot retrieve contributors at this time
executable file 92 lines (77 sloc) 3.14 KB
#!/usr/bin/perl
#==============================================================================
# h2u
# File ID: e93feb18-5d3a-11df-bda7-90e6ba3022ac
# Converts from numeric entities in HTML/SGML (☺ and ☺) to UTF-8.
#
# Options:
# -i allow Invalid character range U+D800 through U+DFFF, U+FFFE and U+FFFF.
# -l also convert Latin-1 characters.
#
# Created by &#xD8;yvind A. Holm <sunny@sunbase.org>.
# License: GNU General Public License version 2 or later.
#==============================================================================
use strict;
require 'getopts.pl'; # FIXME: Generates a warning when command line option is used and -w is specified in the shebang.
($main::opt_i, $main::opt_l) = (0, 0);
$| = 1;
&Getopts('il');
while (<>) {
$main::opt_l && s/([\x80-\xFF])/widechar(ord($1))/ge;
s/&#(\d{1,10});/widechar($1)/ge;
s/&#x([0-9a-f]{1,8});/widechar(hex($1))/gei;
print;
}
sub widechar {
my $Val = shift;
if ($Val < 0x80) {
return sprintf("%c", $Val);
} elsif ($Val < 0x800) {
return sprintf("%c%c", 0xC0 | ($Val >> 6),
0x80 | ($Val & 0x3F));
} elsif ($Val < 0x10000) {
unless ($main::opt_i) {
if (($Val >= 0xD800 && $Val <= 0xDFFF) || ($Val eq 0xFFFE) || ($Val eq 0xFFFF)) {
$Val = 0xFFFD;
}
}
return sprintf("%c%c%c", 0xE0 | ($Val >> 12),
0x80 | (($Val >> 6) & 0x3F),
0x80 | ($Val & 0x3F));
} elsif ($Val < 0x200000) {
return sprintf("%c%c%c%c", 0xF0 | ($Val >> 18),
0x80 | (($Val >> 12) & 0x3F),
0x80 | (($Val >> 6) & 0x3F),
0x80 | ($Val & 0x3F));
} elsif ($Val < 0x4000000) {
return sprintf("%c%c%c%c%c", 0xF8 | ($Val >> 24),
0x80 | (($Val >> 18) & 0x3F),
0x80 | (($Val >> 12) & 0x3F),
0x80 | (($Val >> 6) & 0x3F),
0x80 | ( $Val & 0x3F));
} elsif ($Val < 0x80000000) {
return sprintf("%c%c%c%c%c%c", 0xFC | ($Val >> 30),
0x80 | (($Val >> 24) & 0x3F),
0x80 | (($Val >> 18) & 0x3F),
0x80 | (($Val >> 12) & 0x3F),
0x80 | (($Val >> 6) & 0x3F),
0x80 | ( $Val & 0x3F));
} else {
return widechar(0xFFFD);
}
} # widechar()
=pod
=head1 LICENCE
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 2 of the License, or (at your
option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
=cut
__END__
Something went wrong with that request. Please try again.