diff --git a/lib/Text/Autoformat.pm b/lib/Text/Autoformat.pm index b883a0c..385a1b5 100644 --- a/lib/Text/Autoformat.pm +++ b/lib/Text/Autoformat.pm @@ -181,7 +181,7 @@ sub autoformat # ($text, %args) $lines[-1]{hang} = Text::Autoformat::Hang->new($_, $args{lists}); - s/([ \t]*)(.*?)(\s*)$// + s/([ \x{A0}\t]*)(.*?)(\s*)$// or die "Internal Error ($@) on '$_'"; $lines[-1]{hangspace} = defn $1; $lines[-1]{text} = defn $2; diff --git a/lib/Text/Autoformat/Hang.pm b/lib/Text/Autoformat/Hang.pm index ce691b9..099e473 100644 --- a/lib/Text/Autoformat/Hang.pm +++ b/lib/Text/Autoformat/Hang.pm @@ -4,6 +4,8 @@ use 5.006; use strict; use warnings; +use utf8; + # ROMAN NUMERALS sub inv($@) { my ($k, %inv)=shift; for(0..$#_) {$inv{$_[$_]}=$_*$k} %inv } @@ -46,8 +48,8 @@ my %close = ( '[' => ']', '(' => ')', '<' => '>', "" => '' ); my $hangPS = qq{(?i:ps:|(?:p\\.?)+s\\b\\.?(?:[ \\t]*:)?)}; my $hangNB = qq{(?i:n\\.?b\\.?(?:[ \\t]*:)?)}; my $hangword = qq{(?:(?:Note)[ \\t]*:)}; -my $hangbullet = qq{[*.+-]}; -my $hang = qq{(?:(?i)(?:$hangNB|$hangword|$hangbullet)(?=[ \t]))}; +my $hangbullet = qq{[•*.+-]}; +my $hang = qq{(?:(?i)(?:$hangNB|$hangword|$hangbullet)(?=[ \x{A0}\t]))}; # IMPLEMENTATION diff --git a/t/04.non-ascii.t b/t/04.non-ascii.t new file mode 100644 index 0000000..61dbc95 --- /dev/null +++ b/t/04.non-ascii.t @@ -0,0 +1,19 @@ +use utf8; +use strict; +use Test::More tests => 1; +use Text::Autoformat; + +# Possibly I'm breaking this on EBCDIC… -- rjbs, 2020-10-01 +my $NBSP = "\x{A0}"; + +my $str = <<"END"; +•${NBSP}Analyze problem +•${NBSP}Design algorithm +• Code solution +• Test +• Ship +END + +my $after = autoformat $str; + +is($after, $str, 'we treat \N{BULLET} as a bullet and NBSP after it as space');