Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Don't skip # before an address. Don't remove from results.
The number regex now captures the value itself.
  • Loading branch information
timbunce committed May 5, 2011
1 parent 00e4ec3 commit 444292a
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 11 deletions.
28 changes: 18 additions & 10 deletions US.pm
Expand Up @@ -693,7 +693,7 @@ subtle ways between releases.

our %Addr_Match = (
type => join("|", keys %_Street_Type_List),
number => qr/\d+-?\d*/,
number => qr/(\d+-?\d*) (?{ $_{number} = $^N })/x,
fraction => qr{\d+\/\d+},
state => join("|",
# escape spaces in state names (e.g., "new york" --> "new\\ york")
Expand Down Expand Up @@ -796,22 +796,30 @@ our %Addr_Match = (
(?:($Addr_Match{zip}) (?{ $_{zip} = $^N }))?
/ix;

$Addr_Match{address} = qr/^\W*
( $Addr_Match{number})\W* (?{ $_{number} = $^N })
$Addr_Match{address} = qr/
^
[^\w\#]* # skip non-word chars except # (eg unit)
( $Addr_Match{number} )\W*
(?:$Addr_Match{fraction}\W*)?
$Addr_Match{street}\W+
(?:$Addr_Match{sec_unit}\W+)?
$Addr_Match{place}
\W*$/ix;
\W* # require on non-word chars at end
$ # right up to end of string
/ix;

my $sep = qr/(?:\W+|\Z)/;

$Addr_Match{informal_address} = qr/
^\s*
(?:$Addr_Match{sec_unit}\W+)?
( $Addr_Match{number})?\W* (?{ $_{number} = $^N })
^
\s* # skip leading whitespace
(?:$Addr_Match{sec_unit} $sep)?
(?:$Addr_Match{number})?\W*
(?:$Addr_Match{fraction}\W*)?
$Addr_Match{street}\W+
(?:$Addr_Match{sec_unit}\W+)?
$Addr_Match{street} $sep
(?:$Addr_Match{sec_unit} $sep)?
(?:$Addr_Match{place})?
# don't require match to reach end of string
/ix;

$Addr_Match{intersection} = qr/^\W*
Expand Down Expand Up @@ -981,7 +989,7 @@ sub normalize_address {
my ($class, $part) = @_;

# strip off punctuation
defined($_) && s/^\s+|\s+$|[^\w\s\-]//gos for values %$part;
defined($_) && s/^\s+|\s+$|[^\w\s\-\#\&]//gos for values %$part;

if ($Old_Undef_Fields_Behaviour) {
my @undef_fields = (exists $part->{street1})
Expand Down
21 changes: 20 additions & 1 deletion t/01_parser.t
@@ -1,5 +1,5 @@
use blib;
use Test::More tests => 43;
use Test::More;
use strict;
use warnings;
use Data::Dumper;
Expand Down Expand Up @@ -292,6 +292,23 @@ my %address = (
'prefix' => 'S',
'sec_unit_type' => 'lobby',
},
"(233 S Wacker Dr lobby 60606)" => { # surrounding punctuation
'number' => '233',
'street' => 'Wacker',
'zip' => '60606',
'type' => 'Dr',
'prefix' => 'S',
'sec_unit_type' => 'lobby',
},
"#42 233 S Wacker Dr 60606" => { # leading numbered secondary unit type
'sec_unit_num' => '42',
'zip' => '60606',
'number' => '233',
'street' => 'Wacker',
'sec_unit_type' => '#',
'type' => 'Dr',
'prefix' => 'S'
},
);

my @failures = (
Expand All @@ -313,3 +330,5 @@ for my $fail (@failures) {
my $parse = Geo::StreetAddress::US->parse_location( $fail );
ok( !$parse || !defined($parse->{state}), "can't parse $fail" );
}

done_testing();

0 comments on commit 444292a

Please sign in to comment.