Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
merging back some changes for utf8 support
  • Loading branch information
rjeschmi committed Jan 3, 2015
2 parents 11f7b10 + aa7b1ce commit dbb9142
Show file tree
Hide file tree
Showing 8 changed files with 54 additions and 29 deletions.
2 changes: 2 additions & 0 deletions .gitignore
@@ -1,4 +1,6 @@
*.gz
.*swp
.DS_Store
Makefile
blib
pm_to_blib
Expand Down
3 changes: 3 additions & 0 deletions Changes
Expand Up @@ -3,6 +3,9 @@ Revision history for Archive-BagIt

{{$NEXT}}

0.053 2014-12-20
- utf8 fixes

0.052 2014-11-20
- Fixed dist.ini to include dotfiles (needed for dotbagit test)

Expand Down
4 changes: 2 additions & 2 deletions README
Expand Up @@ -2,7 +2,7 @@ NAME
Archive::BagIt

VERSION
version 0.053
version 0.054

SYNOPSIS
This modules will hopefully help with the basic commands needed to
Expand Down Expand Up @@ -131,7 +131,7 @@ AUTHOR
Rob Schmidt <rjeschmi@gmail.com>

COPYRIGHT AND LICENSE
This software is copyright (c) 2014 by Rob Schmidt and William
This software is copyright (c) 2015 by Rob Schmidt and William
Wueppelmann.

This is free software; you can redistribute it and/or modify it under
Expand Down
4 changes: 2 additions & 2 deletions README.mkdn
Expand Up @@ -4,7 +4,7 @@ Archive::BagIt

# VERSION

version 0.053
version 0.054

# SYNOPSIS

Expand Down Expand Up @@ -149,7 +149,7 @@ Rob Schmidt <rjeschmi@gmail.com>

# COPYRIGHT AND LICENSE

This software is copyright (c) 2014 by Rob Schmidt and William Wueppelmann.
This software is copyright (c) 2015 by Rob Schmidt and William Wueppelmann.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.
1 change: 1 addition & 0 deletions dist.ini
Expand Up @@ -4,6 +4,7 @@ author = Rob Schmidt <rjeschmi@gmail.com>
license = Perl_5
copyright_holder = Rob Schmidt and William Wueppelmann


[@Filter]
-bundle = @Author::DOHERTY
-remove = GatherDir
Expand Down
43 changes: 26 additions & 17 deletions lib/Archive/BagIt.pm
Expand Up @@ -4,12 +4,17 @@ use strict;
use 5.006;
use warnings;


# VERSION

use utf8;
use open ':std', ':utf8';
our @checksum_algos = qw(md5 sha1);
our $DEBUG=0;
use Encode qw(decode);
use File::Find;
use Data::Dumper;
#use Data::Printer;
=head1 WARNING
This is experimental software for the moment and under active development. I
Expand Down Expand Up @@ -83,11 +88,11 @@ sub _load_manifests {

my @manifests = $self->manifest_files();
foreach my $manifest_file (@manifests) {
die("Cannot open $manifest_file: $!") unless (open (my $MANIFEST,"<", $manifest_file));
die("Cannot open $manifest_file: $!") unless (open (my $MANIFEST,"<:encoding(utf8)", $manifest_file));
while (my $line = <$MANIFEST>) {
chomp($line);
my ($digest,$file);
($digest, $file) = $line =~ /^([a-f0-9]+)\s+([a-zA-Z0-9_\.\/\-]+)/;
($digest, $file) = $line =~ /^([a-f0-9]+)\s+(.+)$/;
if(!$file) {
die ("This is not a valid manifest file");
} else {
Expand All @@ -107,7 +112,7 @@ sub _load_tagmanifests {

my @tagmanifests = $self->tagmanifest_files();
foreach my $tagmanifest_file (@tagmanifests) {
die("Cannot open $tagmanifest_file: $!") unless (open(my $TAGMANIFEST,"<", $tagmanifest_file));
die("Cannot open $tagmanifest_file: $!") unless (open(my $TAGMANIFEST,"<:encoding(utf8)", $tagmanifest_file));
while (my $line = <$TAGMANIFEST>) {
chomp($line);
my($digest,$file) = split(/\s+/, $line, 2);
Expand Down Expand Up @@ -172,12 +177,13 @@ sub _manifest_crc32 {
my $data_dir = "$bagit/data";

# Generate MD5 digests for all of the files under ./data
open(my $fh, ">",$manifest_file) or die("Cannot create manifest-crc32.txt: $!\n");
open(my $fh, ">:encoding(utf8)",$manifest_file) or die("Cannot create manifest-crc32.txt: $!\n");
find(
sub {
my $file = $File::Find::name;
$_=decode('utf8', $_);
my $file = decode('utf8', $File::Find::name);
if (-f $_) {
open(my $DATA, "<", $_) or die("Cannot read $_: $!");
open(my $DATA, "<:encoding(utf8)", $_) or die("Cannot read $_: $!");
my $digest = sprintf("%010d",crc32($DATA));
close($DATA);
my $filename = substr($file, length($bagit) + 1);
Expand All @@ -197,12 +203,12 @@ sub _manifest_md5 {
my $data_dir = "$bagit/data";
print "creating manifest: $data_dir\n";
# Generate MD5 digests for all of the files under ./data
open(my $md5_fh, ">",$manifest_file) or die("Cannot create manifest-md5.txt: $!\n");
open(my $md5_fh, ">:encoding(utf8)",$manifest_file) or die("Cannot create manifest-md5.txt: $!\n");
find(
sub {
my $file = $File::Find::name;
my $file = decode('utf8', $File::Find::name);
if (-f $_) {
open(my $DATA, "<", "$_") or die("Cannot read $_: $!");
open(my $DATA, "<:raw", "$_") or die("Cannot read $_: $!");
my $digest = Digest::MD5->new->addfile($DATA)->hexdigest;
close($DATA);
my $filename = substr($file, length($bagit) + 1);
Expand All @@ -222,19 +228,20 @@ sub _tagmanifest_md5 {

my $tagmanifest_file= "$bagit/tagmanifest-md5.txt";

open (my $md5_fh, ">", $tagmanifest_file) or die ("Cannot create tagmanifest-md5.txt: $! \n");
open (my $md5_fh, ">:encoding(utf8)", $tagmanifest_file) or die ("Cannot create tagmanifest-md5.txt: $! \n");

find (
sub {
my $file = $File::Find::name;
$_ = decode('utf8',$_);
my $file = decode('utf8',$File::Find::name);
if ($_=~m/^data$/) {
$File::Find::prune=1;
}
elsif ($_=~m/^tagmanifest-.*\.txt/) {
# Ignore, we can't take digest from ourselves
}
elsif ( -f $_ ) {
open(my $DATA, "<", "$_") or die("Cannot read $_: $!");
open(my $DATA, "<:raw", "$_") or die("Cannot read $_: $!");
my $digest = Digest::MD5->new->addfile($DATA)->hexdigest;
close($DATA);
my $filename = substr($file, length($bagit) + 1);
Expand Down Expand Up @@ -280,19 +287,20 @@ sub verify_bag {
}

# Compile a list of payload files
find(sub{ push(@payload, $File::Find::name) }, $payload_dir);
find(sub{ push(@payload, decode('utf8',$File::Find::name)) }, $payload_dir);

# Evaluate each file against the manifest
my $digestobj = new Digest::MD5;
foreach my $file (@payload) {
next if (-d ($file));
my $local_name = substr($file, length($bagit) + 1);
my ($digest);
#p %manifest;
unless ($manifest{$local_name}) {
die ("file found not in manifest: [$local_name]");
}
#my $start_time=time();
open(my $fh, "<", "$bagit/$local_name") or die ("Cannot open $local_name");
open(my $fh, "<:raw", "$bagit/$local_name") or die ("Cannot open $local_name");
$digest = $digestobj->addfile($fh)->hexdigest;
close($fh);
#print "$bagit/$local_name md5 in ".(time()-$start_time)."\n";
Expand Down Expand Up @@ -327,8 +335,7 @@ sub verify_bag {
sub get_checksum {
my($self) =@_;
my $bagit = $self->{'bag_path'};
open(my $SRCFILE, "<", $bagit."/manifest-md5.txt");
binmode($SRCFILE);
open(my $SRCFILE, "<:raw", $bagit."/manifest-md5.txt");
my $srchex=Digest::MD5->new->addfile($SRCFILE)->hexdigest;
close($SRCFILE);
return $srchex;
Expand Down Expand Up @@ -370,7 +377,8 @@ sub _payload_files{

my @payload=();
File::Find::find( sub{
push(@payload,$File::Find::name);

push(@payload,decode('utf8',$File::Find::name));
#print "name: ".$File::Find::name."\n";
}, $payload_dir);

Expand All @@ -397,6 +405,7 @@ sub _non_payload_files {

my @payload = ();
File::Find::find( sub {
$File::Find::name = decode ('utf8', $File::Find::name);
if(-f $File::Find::name) {
my ($relpath) = ($File::Find::name=~m!$self->{"bag_path"}/(.*$)!);
push(@payload, $relpath);
Expand Down
22 changes: 14 additions & 8 deletions lib/Archive/BagIt/Base.pm
Expand Up @@ -6,6 +6,9 @@ package Archive::BagIt::Base;
use Moose;


use utf8;
use open ':std', ':encoding(utf8)';
use Encode qw(decode);
use File::Find;
use File::Spec;
use Digest::MD5;
Expand Down Expand Up @@ -210,8 +213,7 @@ sub _build_checksum_algos {
sub _build_bag_checksum {
my($self) =@_;
my $bagit = $self->{'bag_path'};
open(my $SRCFILE, "<", $bagit."/manifest-md5.txt");
binmode($SRCFILE);
open(my $SRCFILE, "<:raw", $bagit."/manifest-md5.txt");
my $srchex=Digest::MD5->new->addfile($SRCFILE)->hexdigest;
close($SRCFILE);
return $srchex;
Expand Down Expand Up @@ -250,7 +252,7 @@ sub _build_tagmanifest_entries {
my @tagmanifests = @{$self->tagmanifest_files};
my $tagmanifest_entries = {};
foreach my $tagmanifest_file (@tagmanifests) {
die("Cannot open $tagmanifest_file: $!") unless (open(my $TAGMANIFEST,"<", $tagmanifest_file));
die("Cannot open $tagmanifest_file: $!") unless (open(my $TAGMANIFEST,"<:encoding(utf8)", $tagmanifest_file));
while (my $line = <$TAGMANIFEST>) {
chomp($line);
my($digest,$file) = split(/\s+/, $line, 2);
Expand All @@ -268,11 +270,11 @@ sub _build_manifest_entries {
my @manifests = @{$self->manifest_files};
my $manifest_entries = {};
foreach my $manifest_file (@manifests) {
die("Cannot open $manifest_file: $!") unless (open (my $MANIFEST, "<", $manifest_file));
die("Cannot open $manifest_file: $!") unless (open (my $MANIFEST, "<:encoding(utf8)", $manifest_file));
while (my $line = <$MANIFEST>) {
chomp($line);
my ($digest,$file);
($digest, $file) = $line =~ /^([a-f0-9]+)\s+([a-zA-Z0-9_\.\/\-]+)/;
($digest, $file) = $line =~ /^([a-f0-9]+)\s+(.+)/;
if(!$file) {
die ("This is not a valid manifest file");
} else {
Expand All @@ -294,6 +296,8 @@ sub _build_payload_files{

my @payload=();
File::Find::find( sub{
$File::Find::name = decode ('utf8', $File::Find::name);
$_ = decode ('utf8', $_);
if (-f $_) {
my $rel_path=File::Spec->catdir($self->rel_payload_path,File::Spec->abs2rel($File::Find::name, $payload_dir));
#print "pushing ".$rel_path." payload_dir: $payload_dir \n";
Expand Down Expand Up @@ -333,6 +337,8 @@ sub _build_non_payload_files {
my @non_payload = ();

File::Find::find( sub{
$File::Find::name = decode('utf8', $File::Find::name);
$_=decode ('utf8', $_);
if (-f $_) {
my $rel_path=File::Spec->catdir($self->rel_metadata_path,File::Spec->abs2rel($File::Find::name, $self->metadata_path));
#print "pushing ".$rel_path." payload_dir: $payload_dir \n";
Expand Down Expand Up @@ -402,10 +408,11 @@ sub verify_bag {
my $digestobj = new Digest::MD5;
foreach my $local_name (@payload) {
my ($digest);
unless ($manifest{$local_name}) {
#p %manifest;
unless ($manifest{"$local_name"}) {
die ("file found not in manifest: [$local_name]");
}
open(my $fh, "<", "$bagit/$local_name") or die ("Cannot open $local_name");
open(my $fh, "<:raw", "$bagit/$local_name") or die ("Cannot open $local_name");
$digest = $digestobj->addfile($fh)->hexdigest;
#print $digest."\n";
close($fh);
Expand Down Expand Up @@ -470,5 +477,4 @@ sub make_bag {
return $self;
}


1;
4 changes: 4 additions & 0 deletions t/base.t
@@ -1,6 +1,8 @@

BEGIN { chdir 't' if -d 't' }

use utf8;
use open ':std', ':encoding(utf8)';
use Test::More 'no_plan';
use strict;

Expand All @@ -25,6 +27,7 @@ my $SRC_FILES = File::Spec->catdir( @ROOT, 'src_files');
my $DST_BAG = File::Spec->catdir(@ROOT, 'dst_bag');



#validate tests

{
Expand Down Expand Up @@ -52,6 +55,7 @@ my $DST_BAG = File::Spec->catdir(@ROOT, 'dst_bag');
mkdir($DST_BAG);
copy($SRC_FILES."/1", $DST_BAG);
copy($SRC_FILES."/2", $DST_BAG);
copy($SRC_FILES."/thréê", $DST_BAG);

note "making bag $DST_BAG";
my $bag = $Class->make_bag($DST_BAG);
Expand Down

0 comments on commit dbb9142

Please sign in to comment.