diff --git a/.gitignore b/.gitignore index 994627b..5523f7b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ *.gz +.*swp +.DS_Store Makefile blib pm_to_blib diff --git a/Changes b/Changes index fe3c75b..3981001 100644 --- a/Changes +++ b/Changes @@ -3,6 +3,9 @@ Revision history for Archive-BagIt {{$NEXT}} +0.053 2014-12-20 + - utf8 fixes + 0.052 2014-11-20 - Fixed dist.ini to include dotfiles (needed for dotbagit test) diff --git a/README b/README index 617cc3b..a7c3622 100644 --- a/README +++ b/README @@ -2,7 +2,7 @@ NAME Archive::BagIt VERSION - version 0.053 + version 0.054 SYNOPSIS This modules will hopefully help with the basic commands needed to @@ -131,7 +131,7 @@ AUTHOR Rob Schmidt COPYRIGHT AND LICENSE - This software is copyright (c) 2014 by Rob Schmidt and William + This software is copyright (c) 2015 by Rob Schmidt and William Wueppelmann. This is free software; you can redistribute it and/or modify it under diff --git a/README.mkdn b/README.mkdn index ae25631..1be61fa 100644 --- a/README.mkdn +++ b/README.mkdn @@ -4,7 +4,7 @@ Archive::BagIt # VERSION -version 0.053 +version 0.054 # SYNOPSIS @@ -149,7 +149,7 @@ Rob Schmidt # COPYRIGHT AND LICENSE -This software is copyright (c) 2014 by Rob Schmidt and William Wueppelmann. +This software is copyright (c) 2015 by Rob Schmidt and William Wueppelmann. This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. diff --git a/dist.ini b/dist.ini index 92739a7..87d6e64 100644 --- a/dist.ini +++ b/dist.ini @@ -4,6 +4,7 @@ author = Rob Schmidt license = Perl_5 copyright_holder = Rob Schmidt and William Wueppelmann + [@Filter] -bundle = @Author::DOHERTY -remove = GatherDir diff --git a/lib/Archive/BagIt.pm b/lib/Archive/BagIt.pm index 4382aa2..da4323a 100644 --- a/lib/Archive/BagIt.pm +++ b/lib/Archive/BagIt.pm @@ -4,12 +4,17 @@ use strict; use 5.006; use warnings; + # VERSION +use utf8; +use open ':std', ':utf8'; our @checksum_algos = qw(md5 sha1); our $DEBUG=0; +use Encode qw(decode); use File::Find; use Data::Dumper; +#use Data::Printer; =head1 WARNING This is experimental software for the moment and under active development. I @@ -83,11 +88,11 @@ sub _load_manifests { my @manifests = $self->manifest_files(); foreach my $manifest_file (@manifests) { - die("Cannot open $manifest_file: $!") unless (open (my $MANIFEST,"<", $manifest_file)); + die("Cannot open $manifest_file: $!") unless (open (my $MANIFEST,"<:encoding(utf8)", $manifest_file)); while (my $line = <$MANIFEST>) { chomp($line); my ($digest,$file); - ($digest, $file) = $line =~ /^([a-f0-9]+)\s+([a-zA-Z0-9_\.\/\-]+)/; + ($digest, $file) = $line =~ /^([a-f0-9]+)\s+(.+)$/; if(!$file) { die ("This is not a valid manifest file"); } else { @@ -107,7 +112,7 @@ sub _load_tagmanifests { my @tagmanifests = $self->tagmanifest_files(); foreach my $tagmanifest_file (@tagmanifests) { - die("Cannot open $tagmanifest_file: $!") unless (open(my $TAGMANIFEST,"<", $tagmanifest_file)); + die("Cannot open $tagmanifest_file: $!") unless (open(my $TAGMANIFEST,"<:encoding(utf8)", $tagmanifest_file)); while (my $line = <$TAGMANIFEST>) { chomp($line); my($digest,$file) = split(/\s+/, $line, 2); @@ -172,12 +177,13 @@ sub _manifest_crc32 { my $data_dir = "$bagit/data"; # Generate MD5 digests for all of the files under ./data - open(my $fh, ">",$manifest_file) or die("Cannot create manifest-crc32.txt: $!\n"); + open(my $fh, ">:encoding(utf8)",$manifest_file) or die("Cannot create manifest-crc32.txt: $!\n"); find( sub { - my $file = $File::Find::name; + $_=decode('utf8', $_); + my $file = decode('utf8', $File::Find::name); if (-f $_) { - open(my $DATA, "<", $_) or die("Cannot read $_: $!"); + open(my $DATA, "<:encoding(utf8)", $_) or die("Cannot read $_: $!"); my $digest = sprintf("%010d",crc32($DATA)); close($DATA); my $filename = substr($file, length($bagit) + 1); @@ -197,12 +203,12 @@ sub _manifest_md5 { my $data_dir = "$bagit/data"; print "creating manifest: $data_dir\n"; # Generate MD5 digests for all of the files under ./data - open(my $md5_fh, ">",$manifest_file) or die("Cannot create manifest-md5.txt: $!\n"); + open(my $md5_fh, ">:encoding(utf8)",$manifest_file) or die("Cannot create manifest-md5.txt: $!\n"); find( sub { - my $file = $File::Find::name; + my $file = decode('utf8', $File::Find::name); if (-f $_) { - open(my $DATA, "<", "$_") or die("Cannot read $_: $!"); + open(my $DATA, "<:raw", "$_") or die("Cannot read $_: $!"); my $digest = Digest::MD5->new->addfile($DATA)->hexdigest; close($DATA); my $filename = substr($file, length($bagit) + 1); @@ -222,11 +228,12 @@ sub _tagmanifest_md5 { my $tagmanifest_file= "$bagit/tagmanifest-md5.txt"; - open (my $md5_fh, ">", $tagmanifest_file) or die ("Cannot create tagmanifest-md5.txt: $! \n"); + open (my $md5_fh, ">:encoding(utf8)", $tagmanifest_file) or die ("Cannot create tagmanifest-md5.txt: $! \n"); find ( sub { - my $file = $File::Find::name; + $_ = decode('utf8',$_); + my $file = decode('utf8',$File::Find::name); if ($_=~m/^data$/) { $File::Find::prune=1; } @@ -234,7 +241,7 @@ sub _tagmanifest_md5 { # Ignore, we can't take digest from ourselves } elsif ( -f $_ ) { - open(my $DATA, "<", "$_") or die("Cannot read $_: $!"); + open(my $DATA, "<:raw", "$_") or die("Cannot read $_: $!"); my $digest = Digest::MD5->new->addfile($DATA)->hexdigest; close($DATA); my $filename = substr($file, length($bagit) + 1); @@ -280,7 +287,7 @@ sub verify_bag { } # Compile a list of payload files - find(sub{ push(@payload, $File::Find::name) }, $payload_dir); + find(sub{ push(@payload, decode('utf8',$File::Find::name)) }, $payload_dir); # Evaluate each file against the manifest my $digestobj = new Digest::MD5; @@ -288,11 +295,12 @@ sub verify_bag { next if (-d ($file)); my $local_name = substr($file, length($bagit) + 1); my ($digest); + #p %manifest; unless ($manifest{$local_name}) { die ("file found not in manifest: [$local_name]"); } #my $start_time=time(); - open(my $fh, "<", "$bagit/$local_name") or die ("Cannot open $local_name"); + open(my $fh, "<:raw", "$bagit/$local_name") or die ("Cannot open $local_name"); $digest = $digestobj->addfile($fh)->hexdigest; close($fh); #print "$bagit/$local_name md5 in ".(time()-$start_time)."\n"; @@ -327,8 +335,7 @@ sub verify_bag { sub get_checksum { my($self) =@_; my $bagit = $self->{'bag_path'}; - open(my $SRCFILE, "<", $bagit."/manifest-md5.txt"); - binmode($SRCFILE); + open(my $SRCFILE, "<:raw", $bagit."/manifest-md5.txt"); my $srchex=Digest::MD5->new->addfile($SRCFILE)->hexdigest; close($SRCFILE); return $srchex; @@ -370,7 +377,8 @@ sub _payload_files{ my @payload=(); File::Find::find( sub{ - push(@payload,$File::Find::name); + + push(@payload,decode('utf8',$File::Find::name)); #print "name: ".$File::Find::name."\n"; }, $payload_dir); @@ -397,6 +405,7 @@ sub _non_payload_files { my @payload = (); File::Find::find( sub { + $File::Find::name = decode ('utf8', $File::Find::name); if(-f $File::Find::name) { my ($relpath) = ($File::Find::name=~m!$self->{"bag_path"}/(.*$)!); push(@payload, $relpath); diff --git a/lib/Archive/BagIt/Base.pm b/lib/Archive/BagIt/Base.pm index e760b92..6c8b807 100644 --- a/lib/Archive/BagIt/Base.pm +++ b/lib/Archive/BagIt/Base.pm @@ -6,6 +6,9 @@ package Archive::BagIt::Base; use Moose; +use utf8; +use open ':std', ':encoding(utf8)'; +use Encode qw(decode); use File::Find; use File::Spec; use Digest::MD5; @@ -210,8 +213,7 @@ sub _build_checksum_algos { sub _build_bag_checksum { my($self) =@_; my $bagit = $self->{'bag_path'}; - open(my $SRCFILE, "<", $bagit."/manifest-md5.txt"); - binmode($SRCFILE); + open(my $SRCFILE, "<:raw", $bagit."/manifest-md5.txt"); my $srchex=Digest::MD5->new->addfile($SRCFILE)->hexdigest; close($SRCFILE); return $srchex; @@ -250,7 +252,7 @@ sub _build_tagmanifest_entries { my @tagmanifests = @{$self->tagmanifest_files}; my $tagmanifest_entries = {}; foreach my $tagmanifest_file (@tagmanifests) { - die("Cannot open $tagmanifest_file: $!") unless (open(my $TAGMANIFEST,"<", $tagmanifest_file)); + die("Cannot open $tagmanifest_file: $!") unless (open(my $TAGMANIFEST,"<:encoding(utf8)", $tagmanifest_file)); while (my $line = <$TAGMANIFEST>) { chomp($line); my($digest,$file) = split(/\s+/, $line, 2); @@ -268,11 +270,11 @@ sub _build_manifest_entries { my @manifests = @{$self->manifest_files}; my $manifest_entries = {}; foreach my $manifest_file (@manifests) { - die("Cannot open $manifest_file: $!") unless (open (my $MANIFEST, "<", $manifest_file)); + die("Cannot open $manifest_file: $!") unless (open (my $MANIFEST, "<:encoding(utf8)", $manifest_file)); while (my $line = <$MANIFEST>) { chomp($line); my ($digest,$file); - ($digest, $file) = $line =~ /^([a-f0-9]+)\s+([a-zA-Z0-9_\.\/\-]+)/; + ($digest, $file) = $line =~ /^([a-f0-9]+)\s+(.+)/; if(!$file) { die ("This is not a valid manifest file"); } else { @@ -294,6 +296,8 @@ sub _build_payload_files{ my @payload=(); File::Find::find( sub{ + $File::Find::name = decode ('utf8', $File::Find::name); + $_ = decode ('utf8', $_); if (-f $_) { my $rel_path=File::Spec->catdir($self->rel_payload_path,File::Spec->abs2rel($File::Find::name, $payload_dir)); #print "pushing ".$rel_path." payload_dir: $payload_dir \n"; @@ -333,6 +337,8 @@ sub _build_non_payload_files { my @non_payload = (); File::Find::find( sub{ + $File::Find::name = decode('utf8', $File::Find::name); + $_=decode ('utf8', $_); if (-f $_) { my $rel_path=File::Spec->catdir($self->rel_metadata_path,File::Spec->abs2rel($File::Find::name, $self->metadata_path)); #print "pushing ".$rel_path." payload_dir: $payload_dir \n"; @@ -402,10 +408,11 @@ sub verify_bag { my $digestobj = new Digest::MD5; foreach my $local_name (@payload) { my ($digest); - unless ($manifest{$local_name}) { + #p %manifest; + unless ($manifest{"$local_name"}) { die ("file found not in manifest: [$local_name]"); } - open(my $fh, "<", "$bagit/$local_name") or die ("Cannot open $local_name"); + open(my $fh, "<:raw", "$bagit/$local_name") or die ("Cannot open $local_name"); $digest = $digestobj->addfile($fh)->hexdigest; #print $digest."\n"; close($fh); @@ -470,5 +477,4 @@ sub make_bag { return $self; } - 1; diff --git a/t/base.t b/t/base.t index 1e969bc..4b678c5 100644 --- a/t/base.t +++ b/t/base.t @@ -1,6 +1,8 @@ BEGIN { chdir 't' if -d 't' } +use utf8; +use open ':std', ':encoding(utf8)'; use Test::More 'no_plan'; use strict; @@ -25,6 +27,7 @@ my $SRC_FILES = File::Spec->catdir( @ROOT, 'src_files'); my $DST_BAG = File::Spec->catdir(@ROOT, 'dst_bag'); + #validate tests { @@ -52,6 +55,7 @@ my $DST_BAG = File::Spec->catdir(@ROOT, 'dst_bag'); mkdir($DST_BAG); copy($SRC_FILES."/1", $DST_BAG); copy($SRC_FILES."/2", $DST_BAG); + copy($SRC_FILES."/thréê", $DST_BAG); note "making bag $DST_BAG"; my $bag = $Class->make_bag($DST_BAG);