Skip to content
This repository has been archived by the owner on May 28, 2020. It is now read-only.

Commit

Permalink
Browse files Browse the repository at this point in the history
Implementation of multipart upload and multiobject delete
  • Loading branch information
robert clarke committed Sep 20, 2012
1 parent 1b68674 commit 868709f
Show file tree
Hide file tree
Showing 10 changed files with 541 additions and 24 deletions.
9 changes: 7 additions & 2 deletions lib/Net/Amazon/S3.pm 100644 → 100755
Expand Up @@ -112,16 +112,21 @@ use Net::Amazon::S3::Client::Bucket;
use Net::Amazon::S3::Client::Object;
use Net::Amazon::S3::HTTPRequest;
use Net::Amazon::S3::Request;
use Net::Amazon::S3::Request::CompleteMultipartUpload;
use Net::Amazon::S3::Request::CreateBucket;
use Net::Amazon::S3::Request::DeleteBucket;
use Net::Amazon::S3::Request::DeleteMultiObject;
use Net::Amazon::S3::Request::DeleteObject;
use Net::Amazon::S3::Request::GetBucketAccessControl;
use Net::Amazon::S3::Request::GetBucketLocationConstraint;
use Net::Amazon::S3::Request::GetObject;
use Net::Amazon::S3::Request::GetObjectAccessControl;
use Net::Amazon::S3::Request::InitiateMultipartUpload;
use Net::Amazon::S3::Request::ListAllMyBuckets;
use Net::Amazon::S3::Request::ListBucket;
use Net::Amazon::S3::Request::ListParts;
use Net::Amazon::S3::Request::PutObject;
use Net::Amazon::S3::Request::PutPart;
use Net::Amazon::S3::Request::SetBucketAccessControl;
use Net::Amazon::S3::Request::SetObjectAccessControl;
use LWP::UserAgent::Determined;
Expand Down Expand Up @@ -196,13 +201,13 @@ sub BUILD {
if ( $self->retry ) {
$ua = LWP::UserAgent::Determined->new(
keep_alive => $KEEP_ALIVE_CACHESIZE,
requests_redirectable => [qw(GET HEAD DELETE PUT)],
requests_redirectable => [qw(GET HEAD DELETE PUT POST)],
);
$ua->timing('1,2,4,8,16,32');
} else {
$ua = LWP::UserAgent->new(
keep_alive => $KEEP_ALIVE_CACHESIZE,
requests_redirectable => [qw(GET HEAD DELETE PUT)],
requests_redirectable => [qw(GET HEAD DELETE PUT POST)],
);
}

Expand Down
20 changes: 20 additions & 0 deletions lib/Net/Amazon/S3/Client/Bucket.pm 100644 → 100755
Expand Up @@ -132,6 +132,18 @@ sub list {
);
}

sub delete_multi_object {
my $self = shift;
my @objects = @_;
return unless( scalar(@objects) );
my $http_request = Net::Amazon::S3::Request::DeleteMultiObject->new(
s3 => $self->client->s3,
bucket => $self->name,
keys => [ map($_->key, @objects) ],
)->http_request;
return $self->client->_send_request($http_request);
}

sub object {
my ( $self, %conf ) = @_;
return Net::Amazon::S3::Client::Object->new(
Expand All @@ -141,6 +153,7 @@ sub object {
);
}


1;

__END__
Expand Down Expand Up @@ -228,3 +241,10 @@ This module represents buckets.
# be used to get or put
my $object = $bucket->object( key => 'this is the key' );
=head2 delete_multi_object
# delete multiple objects using a multi object delete operation
# Accepts a list of L<Net::Amazon::S3::Client::Object> objects.
# Limited to a maximum of 1000 objects in one operation
$bucket->delete_multi_object($object1, $object2)
103 changes: 91 additions & 12 deletions lib/Net/Amazon/S3/Client/Object.pm 100644 → 100755
Expand Up @@ -74,13 +74,9 @@ sub get {
my $content = $http_response->content;

my $md5_hex = md5_hex($content);
my $etag = $self->etag || $self->_etag($http_response);
confess 'Corrupted download' if( !$self->_is_multipart_etag($etag) && $etag ne $md5_hex);

if ( $self->etag ) {
confess 'Corrupted download' if $self->etag ne $md5_hex;
} else {
confess 'Corrupted download'
if $self->_etag($http_response) ne $md5_hex;
}
return $content;
}

Expand All @@ -99,12 +95,8 @@ sub get_filename {

my $md5_hex = file_md5_hex($filename);

if ( $self->etag ) {
confess 'Corrupted download' if $self->etag ne $md5_hex;
} else {
confess 'Corrupted download'
if $self->_etag($http_response) ne $md5_hex;
}
my $etag = $self->etag || $self->_etag($http_response);
confess 'Corrupted download' if( !$self->_is_multipart_etag($etag) && $etag ne $md5_hex);
}

sub put {
Expand Down Expand Up @@ -210,6 +202,57 @@ sub delete {
$self->client->_send_request($http_request);
}

sub initiate_multipart_upload {
my $self = shift;
my $http_request = Net::Amazon::S3::Request::InitiateMultipartUpload->new(
s3 => $self->client->s3,
bucket => $self->bucket->name,
key => $self->key,
)->http_request;
my $res = $self->client->_send_request($http_request);
return unless $res->is_success;

my $doc = $self->client->s3->libxml->parse_string($res->content);
my $xpc = XML::LibXML::XPathContext->new($doc);
$xpc->registerNs( 's3',
'http://s3.amazonaws.com/doc/2006-03-01/' );
my $upload_id = $xpc->findvalue('//s3:UploadId');
return $upload_id;
}

sub complete_multipart_upload {
my $self = shift;

my %args = ref($_[0]) ? {$_[0]} : @_;

#set default args
$args{s3} = $self->client->s3;
$args{key} = $self->key;
$args{bucket} = $self->bucket->name;

my $http_request = Net::Amazon::S3::Request::CompleteMultipartUpload->new(%args)->http_request;
return $self->client->_send_request($http_request);
}

sub put_part {
my $self = shift;

my %args = ref($_[0]) ? {$_[0]} : @_;

#set default args
$args{s3} = $self->client->s3;
$args{key} = $self->key;
$args{bucket} = $self->bucket->name;

my $http_request = Net::Amazon::S3::Request::PutPart->new(%args)->http_request;
return $self->client->_send_request($http_request);
}

sub list_parts {
confess "Not implemented";
#TODO - Net::Amazon::S3::Request:ListParts is implemented, but need to define best interface at this level. Currently returns raw XML
}

sub uri {
my $self = shift;
return Net::Amazon::S3::Request::GetObject->new(
Expand Down Expand Up @@ -283,6 +326,11 @@ sub _etag {
return $etag;
}

sub _is_multipart_etag {
my ( $self, $etag ) = @_;
return 1 if($etag =~ /\-\d+$/);
}

1;

__END__
Expand Down Expand Up @@ -453,3 +501,34 @@ Content-Disposition using content_disposition.
# return the URI of a publically-accessible object
my $uri = $object->uri;
=head2 initiate_multipart_upload
#initiate a new multipart upload for this object
my $object = $bucket->object(
key => 'massive_video.avi'
);
my $upload_id = $object->initiate_multipart_upload;
=head2 put_part
#add a part to a multipart upload
my $put_part_response = $object->put_part(
upload_id => $upload_id,
part_number => 1,
value => $chunk_content,
);
my $part_etag = $put_part_response->header('ETag')
Returns an L<HTTP::Response> object. It is necessary to keep the ETags for each part, as these are required to complete the upload.
=head2 complete_multipart_upload
#complete a multipart upload
$object->complete_multipart_upload(
upload_id => $upload_id,
etags => [$etag_1, $etag_2],
part_numbers => [$part_number_1, $part_number2],
);
The etag and part_numbers parameters are ordered lists specifying the part numbers and ETags for each individual part of the multipart upload.
45 changes: 37 additions & 8 deletions lib/Net/Amazon/S3/HTTPRequest.pm 100644 → 100755
Expand Up @@ -6,13 +6,14 @@ use MIME::Base64 qw( encode_base64 );
use Moose::Util::TypeConstraints;
use URI::Escape qw( uri_escape_utf8 );
use URI::QueryParam;
use URI;

# ABSTRACT: Create a signed HTTP::Request

my $METADATA_PREFIX = 'x-amz-meta-';
my $AMAZON_HEADER_PREFIX = 'x-amz-';

enum 'HTTPMethod' => qw(DELETE GET HEAD PUT);
enum 'HTTPMethod' => qw(DELETE GET HEAD PUT POST);

has 's3' => ( is => 'ro', isa => 'Net::Amazon::S3', required => 1 );
has 'method' => ( is => 'ro', isa => 'HTTPMethod', required => 1 );
Expand Down Expand Up @@ -140,15 +141,43 @@ sub _canonical_string {
$path =~ /^([^?]*)/;
$buf .= "/$1";

# ...unless there is an acl or torrent parameter
if ( $path =~ /[&?]acl($|=|&)/ ) {
$buf .= '?acl';
} elsif ( $path =~ /[&?]torrent($|=|&)/ ) {
$buf .= '?torrent';
} elsif ( $path =~ /[&?]location($|=|&)/ ) {
$buf .= '?location';
# ...unless there any parameters we're interested in...
if ( $path =~ /[&?](acl|torrent|location|uploads|delete)($|=|&)/ ) {
$buf .= "?$1";
} elsif ( my %query_params = URI->new($path)->query_form ){
#see if the remaining parsed query string provides us with any
if($query_params{partNumber} && $query_params{uploadId}){
#re-evaluate query string, the order of the params is important for request signing, so we can't depend on URI to do the right thing
$buf .= sprintf("?partNumber=%s&uploadId=%s", $query_params{partNumber}, $query_params{uploadId});
}
elsif($query_params{uploadId}){
$buf .= sprintf("?uploadId=%s",$query_params{uploadId});
}
}

# action parameters
#check for allowed CGI params
# my $uri = URI->new($path);
# if(my $qs = $uri->query){
# #sometimes the query string might composed only of a single key, which URI won't parse properl
# #if that's the case check that it's a valid one, and just bolt it on the end
# if($qs ~~ [qw/delete torrent uploads location/]){
# $buf .= "?$qs";
# }
# else {
# my %query_params = $uri->query_form;
# #strip out disallowed query params
# if($query_params{partNumber} && $query_params{uploadId}){
# #re-evaluate query string, the order of the params is important, so we can't depend on URI
# $qs = sprintf("partNumber=%s&uploadId=%s", $query_params{partNumber}, $query_params{uploadId});
# }
# elsif($query_params{uploadId}){
# $qs = sprintf("uploadId=%s",$query_params{uploadId});
# }
# $buf .= "?$qs" if($qs);
# }
# }

return $buf;
}

Expand Down
84 changes: 84 additions & 0 deletions lib/Net/Amazon/S3/Request/CompleteMultipartUpload.pm
@@ -0,0 +1,84 @@
package Net::Amazon::S3::Request::CompleteMultipartUpload;
use Moose 0.85;

use Digest::MD5 qw/md5 md5_hex/;
use MIME::Base64;
use Carp qw/croak/;
use XML::LibXML;

extends 'Net::Amazon::S3::Request';

has 'bucket' => ( is => 'ro', isa => 'BucketName', required => 1 );
has 'etags' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'key' => ( is => 'ro', isa => 'Str', required => 1 );
has 'part_numbers' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'upload_id' => ( is => 'ro', isa => 'Str', required => 1 );

__PACKAGE__->meta->make_immutable;

sub http_request {
my $self = shift;

croak "must have an equally sized list of etags and part numbers" unless scalar(@{$self->part_numbers}) eq scalar(@{$self->etags});
#build XML doc
my $xml_doc = XML::LibXML::Document->new('1.0','UTF-8');
my $root_element = $xml_doc->createElement('CompleteMultipartUpload');
$xml_doc->addChild($root_element);

#add content
for(my $i = 0; $i < scalar(@{$self->part_numbers}); $i++ ){
my $part = $xml_doc->createElement('Part');
$part->appendTextChild('PartNumber' => $self->part_numbers->[$i]);
$part->appendTextChild('ETag' => $self->etags->[$i]);
$root_element->addChild($part);
}

my $content = $xml_doc->toString;

my $md5 = md5($content);

my $md5_base64 = encode_base64($md5);
chomp $md5_base64;

my $header_spec = {
'Content-MD5' => $md5_base64,
'Content-Length' => length $content,
'Content-Type' => 'application/xml'
};

#build signed request
return Net::Amazon::S3::HTTPRequest->new( #See patch below
s3 => $self->s3,
method => 'POST',
path => $self->_uri( $self->key ). '?uploadId='.$self->upload_id,
content => $content,
headers => $header_spec,
)->http_request;
}

1;

__END__
=head1 NAME
Net::Amazon::S3::Request::CompleteMultipartUpload - An internal class to complete a multipart upload
=head1 SYNOPSIS
my $http_request = Net::Amazon::S3::Request::CompleteMultipartUpload->new(
s3 => $s3,
bucket => $bucket,
etags => \@etags,
part_numbers => \@part_numbers,
)->http_request;
=head1 DESCRIPTION
This module deletes multiple objects from a bucket.
=head1 METHODS
=head2 http_request
This method returns a HTTP::Request object.

0 comments on commit 868709f

Please sign in to comment.