From f72c37be2f6edd3958c8368682a3a5cb57e5fe16 Mon Sep 17 00:00:00 2001 From: Graham Knop Date: Fri, 22 Sep 2017 12:04:31 +0200 Subject: [PATCH] improve changelog date parsing Make sure we actually got a usable date before using it, and be a bit more flexible in the accepted formats. --- lib/MetaCPAN/Web/Model/API/Changes/Parser.pm | 45 ++++++++++++++++++-- 1 file changed, 42 insertions(+), 3 deletions(-) diff --git a/lib/MetaCPAN/Web/Model/API/Changes/Parser.pm b/lib/MetaCPAN/Web/Model/API/Changes/Parser.pm index 43850befbf..eb29c2489d 100644 --- a/lib/MetaCPAN/Web/Model/API/Changes/Parser.pm +++ b/lib/MetaCPAN/Web/Model/API/Changes/Parser.pm @@ -3,10 +3,45 @@ package MetaCPAN::Web::Model::API::Changes::Parser; use Moose; use version qw(); -use CPAN::Changes; my %months; my $m = 0; $months{$_} = ++$m for qw( Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec ); +my $months = join '|', keys %months; + +our $W3CDTF_REGEX = qr{ + (\d\d\d\d) # Year + (?: + [-/](\d\d|$months) # -Month + (?: + [-/](\d\d) # -Day + (?: + [T\s] + (\d\d):(\d\d) # Hour:Minute + (?: + :(\d\d) # :Second + (\.\d+)? # .Fractional_Second + )? + ( + Z # UTC + | [+-]\d\d:\d\d # Hour:Minute TZ offset + (?::\d\d)? # :Second TZ offset + )? + )? + )? + )? +}x; + +our $UNKNOWN_VALS = join( + '|', + ( + 'Unknown Release Date', + 'Unknown', + 'Not Released', + 'Development Release', + 'Development', + 'Developer Release', + ) +); sub load { my ( $class, $file ) = @_; @@ -42,7 +77,7 @@ sub parse { if ($note) { # unknown dates - if ( $note =~ s{^($CPAN::Changes::UNKNOWN_VALS)}{}i ) { + if ( $note =~ s{^($UNKNOWN_VALS)}{}i ) { $date = $1; } @@ -94,8 +129,12 @@ sub parse { } # start with W3CDTF, ignore rest - elsif ( $note =~ s{^($CPAN::Changes::W3CDTF_REGEX)}{} ) { + elsif ( $note =~ s{^($W3CDTF_REGEX)}{} && defined $3 ) { $date = $1; + my $month = $3; + if ( $month =~ /\D/ ) { + $date =~ s{$month}{sprintf "%02d", $months{$month}}e; + } $date =~ s{ }{T}; # Add UTC TZ if date ends at H:M, H:M:S or H:M:S.FS