Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Branch: cpan
Fetching contributors…

Cannot retrieve contributors at this time

executable file 775 lines (638 sloc) 20.326 kB
#!/usr/bin/perl
=head1 NAME
pristine-tar - regenerate pristine tarballs
=head1 VERSION
Version 1.17
=cut
# ALSO update lib/Pristine/Tar.pm
our $VERSION = '1.17';
=head1 SYNOPSIS
B<pristine-tar> [-vdk] gendelta I<tarball> I<delta>
B<pristine-tar> [-vdk] gentar I<delta> I<tarball>
B<pristine-tar> [-vdk] [-m message] commit I<tarball> [I<upstream>]
B<pristine-tar> [-vdk] checkout I<tarball>
=head1 DESCRIPTION
pristine-tar can regenerate an exact copy of a pristine upstream tarball
using only a small binary I<delta> file and the contents of the tarball,
which are typically kept in an I<upstream> branch in version control.
The I<delta> file is designed to be checked into version control along-side
the I<upstream> branch, thus allowing Debian packages to be built entirely
using sources in version control, without the need to keep copies of
upstream tarballs.
pristine-tar supports compressed tarballs, calling out to pristine-gz(1)
and pristine-bz2(1) to produce the pristine gzip and bzip2 files.
=head1 COMMANDS
=over 4
=item pristine-tar gendelta I<tarball> I<delta>
This takes the specified upstream I<tarball>, and generates a small binary
delta file that can later be used by pristine-tar gentar to recreate the
tarball.
If the delta filename is "-", it is written to standard output.
=item pristine-tar gentar I<delta> I<tarball>
This takes the specified I<delta> file, and the files in the current
directory, which must have identical content to those in the upstream
tarball, and uses these to regenerate the pristine upstream I<tarball>.
If the delta filename is "-", it is read from standard input.
=item pristine-tar commit I<tarball> [I<upstream>]
B<pristine-tar commit> generates a pristine-tar delta file for the specified
I<tarball>, and commits it to version control. The B<pristine-tar checkout>
command can later be used to recreate the original tarball based only
on the information stored in version control.
The I<upstream> parameter specifies the tag or branch that contains the
same content that is present in the tarball. This defaults to
"refs/heads/upstream", or if there's no such branch, any
branch matching "upstream". The name of the tree it points to will be
recorded for later use by B<pristine-tar checkout>. Note that the content
does not need to be 100% identical to the content of the tarball, but
if it is not, additional space will be used in the delta file.
The delta files are stored in a branch named "pristine-tar", with filenames
corresponding to the input tarball, with ".delta" appended. This
branch is created or updated as needed to add each new delta.
=item pristine-tar checkout I<tarball>
This regenerates a copy of the specified I<tarball> using information
previously saved in version control by B<pristine-tar commit>.
=back
=head1 OPTIONS
=over 4
=item -v
=item --verbose
Verbose mode, show each command that is run.
=item -d
=item --debug
Debug mode.
=item -k
=item --keep
Don't clean up the temporary directory on exit.
=item -m message
=item --message=message
Use this option to specify a custom commit message to pristine-tar commit.
=back
=head1 EXAMPLES
Suppose you maintain the hello package, in a git repository. You have
just created a tarball of the release, I<hello-1.0.tar.gz>, which you
will upload to a "forge" site.
You want to ensure that, if the "forge" loses the tarball, you can always
recreate exactly that same tarball. And you'd prefer not to keep copies
of tarballs for every release, as that could use a lot of disk space
when hello gets the background mp3s and user-contributed levels you
are planning for version 2.0.
The solution is to use pristine-tar to commit a delta file that efficiently
stores enough information to reproduce the tarball later.
cd hello
git tag -s 1.0
pristine-tar commit ../hello-1.0.tar.gz 1.0
Remember to tell git to push both the pristine-tar branch, and your tag:
git push --all --tags
Now it is a year later. The worst has come to pass; the "forge" lost
all its data, you deleted the tarballs to make room for bug report emails,
and you want to regenerate them. Happily, the git repository is still
available.
git clone git://github.com/joeyh/hello.git
cd hello
pristine-tar checkout ../hello-1.0.tar.gz
=head1 LIMITATIONS
Only tarballs, gzipped tarballs, and bzip2ed tarballs are currently
supported.
Currently only the git revision control system is supported by the
"checkout" and "commit" commands. It's ok if the working copy
is not clean or has uncommitted changes, or has changes staged in the
index; none of that will be touched by "checkout" or "commit".
=head1 ENVIRONMENT
=over 4
=item B<TMPDIR>
Specifies a location to place temporary files, other than the default.
=back
=head1 AUTHOR
Joey Hess <joeyh@debian.org>
Licensed under the GPL, version 2 or above.
=cut
use warnings;
use strict;
use Pristine::Tar;
use Pristine::Tar::Delta;
use Pristine::Tar::Formats;
use File::Path;
use File::Basename;
use Cwd qw{getcwd abs_path};
# Force locale to C since tar may output utf-8 filenames differently
# depending on the locale.
$ENV{LANG}='C';
# Don't let environment change tar's behavior.
delete $ENV{TAR_OPTIONS};
delete $ENV{TAPE};
my $message;
dispatch(
commands => {
usage => [\&usage],
gentar => [\&gentar, 2],
gendelta => [\&gendelta, 2],
commit => [\&commit],
ci => [\&commit, 1],
checkout => [\&checkout, 1],
co => [\&checkout, 1],
},
options => {
"m|message=s" => \$message,
},
);
sub usage {
print STDERR "Usage: pristine-tar [-vdk] gendelta tarball delta\n";
print STDERR " pristine-tar [-vdk] gentar delta tarball\n";
print STDERR " pristine-tar [-vdk] [-m message] commit tarball [upstream]\n";
print STDERR " pristine-tar [-vdk] checkout tarball\n";
exit 1;
}
my $recreatetarball_tempdir;
sub recreatetarball {
my $manifestfile=shift;
my $source=shift;
my %options=@_;
my $tempdir=tempdir();
my @manifest;
open (IN, "<", $manifestfile) || die "$manifestfile: $!";
while (<IN>) {
chomp;
push @manifest, $_;
}
close IN;
link($manifestfile, "$tempdir/manifest") || die "link $tempdir/manifest: $!";
# The manifest and source should have the same filenames,
# but the manifest probably has all the files under a common
# subdirectory. Check if it does.
my $subdir="";
foreach my $file (@manifest) {
#debug("file: $file");
if ($file=~m!^(/?[^/]+)(/|$)!) {
if (length $subdir && $subdir ne $1) {
debug("found file not in subdir $subdir: $file");
$subdir="";
last;
}
elsif (! length $subdir) {
$subdir=$1;
debug("set subdir to $subdir");
}
}
else {
debug("found file not in subdir: $file");
$subdir="";
last;
}
}
if (length $subdir) {
if ( -e "$source/$subdir" ) {
debug("subdir $subdir exists");
$subdir='';
}
else {
debug("subdir is $subdir");
doit("mkdir", "$tempdir/workdir");
$subdir="/$subdir";
}
}
if (! $options{clobber_source}) {
doit("cp", "-a", $source, "$tempdir/workdir$subdir");
}
else {
doit("mv", $source, "$tempdir/workdir$subdir");
}
# It's important that this create an identical tarball each time
# for a given set of input files. So don't include file metadata
# in the tarball, since it can easily vary.
my $full_sweep=0;
foreach my $file (@manifest) {
if (-l "$tempdir/workdir/$file") {
# Can't set timestamp of a symlink, so
# replace the symlink with an empty file.
unlink("$tempdir/workdir/$file") || die "unlink: $!";
open(OUT, ">", "$tempdir/workdir/$file") || die "open: $!";
close OUT;
}
elsif (! -e "$tempdir/workdir/$file") {
debug("$file is listed in the manifest but may not be present in the source directory");
$full_sweep=1;
# Avoid tar failing on the nonexistent item by
# creating a dummy directory.
debug("creating missing $file");
mkpath "$tempdir/workdir/$file";
}
if (-d "$tempdir/workdir/$file" && (-u _ || -g _ || -k _)) {
# tar behaves weirdly for some special modes
# and ignores --mode, so clear them.
debug("chmod $file");
chmod(0755, "$tempdir/workdir/$file") ||
die "chmod: $!";
}
if (-e "$tempdir/workdir/$file") {
utime(0, 0, "$tempdir/workdir/$file") || die "utime: $file: $!";
}
}
# If some files couldn't be matched up with the manifest,
# it's possible they do exist, but just with names that make sense
# to tar, but not to this program. Work around this and make sure
# such files have their metadata tweaked, by doing a full sweep of
# the tree.
if ($full_sweep) {
debug("doing full tree sweep to catch missing files");
use File::Find;
find(sub {
if (-l $_) {
unlink($_) || die "unlink: $!";
open(OUT, ">", $_) || die "open: $!";
close OUT;
}
if (-d $_ && (-u _ || -g _ || -k _)) {
chmod(0755, $_) ||
die "chmod: $!";
}
utime(0, 0, $_) || die "utime: $_: $!";
}, "$tempdir/workdir");
}
delete $ENV{TAR_LONGLINK_100};
$recreatetarball_tempdir=$tempdir;
return recreatetarball_helper();
}
sub recreatetarball_helper {
my $tempdir=$recreatetarball_tempdir;
my $ret="$tempdir/recreatetarball";
doit("tar", "cf", $ret, "--owner", 0, "--group", 0,
"--numeric-owner", "-C", "$tempdir/workdir",
"--no-recursion", "--mode", "0644",
"--files-from", "$tempdir/manifest");
return $ret;
}
sub recreatetarball_longlink_100 {
# For a long time, Debian's tar had a patch that made it output
# larger tar files if a filename was exactly 100 bytes. Now that
# Debian's tar has been fixed, in order to recreate the tarball
# created by that version of tar, we reply on on an environment
# variable to turn back on the old behavior.
#
# This variable is currently only available in Debian's tar,
# so users of non-debian tar who want to recreate tarballs from
# deltas created using the old version of Debian's tar are SOL.
$ENV{TAR_LONGLINK_100}=1;
my $ret=recreatetarball_helper();
delete $ENV{TAR_LONGLINK_100};
return $ret;
}
sub gentar {
my $deltafile=shift;
my $tarball=shift;
my %opts=@_;
my $delta=Pristine::Tar::Delta::read(Tarball => $deltafile);
Pristine::Tar::Delta::assert($delta, type => "tar", maxversion => 2,
minversion => 2, fields => [qw{manifest delta}]);
my $out=(defined $delta->{wrapper}
? tempdir()."/".basename($tarball).".tmp"
: $tarball);
my @try;
push @try, sub { recreatetarball($delta->{manifest}, getcwd,
clobber_source => 0, %opts) };
push @try, \&recreatetarball_longlink_100;
my $ok;
foreach my $variant (@try) {
my $recreatetarball=$variant->();
my $ret=try_doit("xdelta", "patch", $delta->{delta}, $recreatetarball, $out);
if ($ret == 0) {
$ok=1;
last;
}
}
if (! $ok) {
error "Failed to reproduce original tarball. Please file a bug report.";
}
if (defined $delta->{wrapper}) {
my $delta_wrapper=Pristine::Tar::Delta::read(Tarball => $delta->{wrapper});
if ($delta_wrapper->{type} eq 'gz') {
doit("pristine-gz",
($verbose ? "-v" : "--no-verbose"),
($debug ? "-d" : "--no-debug"),
($keep ? "-k" : "--no-keep"),
"gengz", $delta->{wrapper}, $out);
doit("mv", "-f", $out.".gz", $tarball);
}
elsif ($delta_wrapper->{type} eq 'bz2') {
doit("pristine-bz2",
($verbose ? "-v" : "--no-verbose"),
($debug ? "-d" : "--no-debug"),
($keep ? "-k" : "--no-keep"),
"genbz2", $delta->{wrapper}, $out);
doit("mv", "-f", $out.".bz2", $tarball);
}
else {
error "unknown wrapper file type: ".
$delta_wrapper->{type};
}
}
}
sub genmanifest {
my $tarball=shift;
my $manifest=shift;
open(IN, "tar tf $tarball |") || die "tar tf: $!";
open(OUT, ">", $manifest) || die "$!";
while (<IN>) {
chomp;
# ./ or / in the manifest just confuses tar
s/^\.?\/+//;
print OUT "$_\n" if length $_;
}
close IN;
close OUT;
}
sub gendelta {
my $tarball=shift;
my $deltafile=shift;
my %opts=@_;
my $tempdir=tempdir();
my %delta;
# Check to see if it's compressed, and get uncompressed tarball.
my $compression=undef;
if (is_gz($tarball)) {
$compression='gz';
open(IN, "-|", "zcat", $tarball) || die "zcat: $!";
open(OUT, ">", "$tempdir/origtarball") || die "$tempdir/origtarball: $!";
print OUT $_ while <IN>;
close IN || die "zcat: $!";
close OUT || die "$tempdir/origtarball: $!";
}
elsif (is_bz2($tarball)) {
$compression='bz2';
open(IN, "-|", "bzcat", $tarball) || die "bzcat: $!";
open(OUT, ">", "$tempdir/origtarball") || die "$tempdir/origtarball: $!";
print OUT $_ while <IN>;
close IN || die "bzcat: $!";
close OUT || die "$tempdir/origtarball: $!";
}
close IN;
# Generate a wrapper file to recreate the compressed file.
if (defined $compression) {
$delta{wrapper}="$tempdir/wrapper";
doit("pristine-$compression",
($verbose ? "-v" : "--no-verbose"),
($debug ? "-d" : "--no-debug"),
($keep ? "-k" : "--no-keep"),
"gendelta", $tarball, $delta{wrapper});
$tarball="$tempdir/origtarball";
}
$delta{manifest}="$tempdir/manifest";
genmanifest($tarball, $delta{manifest});
my $recreatetarball;
if (! exists $opts{recreatetarball}) {
my $sourcedir="$tempdir/tmp";
doit("mkdir", $sourcedir);
doit("tar", "xf", File::Spec->rel2abs($tarball), "-C", $sourcedir);
# if all files were in a subdir, use the subdir as the sourcedir
my @out=grep { $_ ne "$sourcedir/.." && $_ ne "$sourcedir/." }
(glob("$sourcedir/*"), glob("$sourcedir/.*"));
if ($#out == 0 && -d $out[0]) {
$sourcedir=$out[0];
}
$recreatetarball=recreatetarball("$tempdir/manifest", $sourcedir, clobber_source => 1);
}
else {
$recreatetarball=$opts{recreatetarball};
}
$delta{delta}="$tempdir/delta";
my $ret=system("xdelta delta -0 --pristine $recreatetarball $tarball $delta{delta}") >> 8;
# xdelta exits 1 on success if there were differences
if ($ret != 1 && $ret != 0) {
error "xdelta failed with return code $ret";
}
Pristine::Tar::Delta::write(Tarball => $deltafile, {
version => 2,
type => 'tar',
%delta,
});
}
sub vcstype {
if (-d ".git" ||
(exists $ENV{GIT_DIR} && length $ENV{GIT_DIR})) {
return 'git';
}
else {
error("cannot determine type of vcs used for the current directory");
}
}
sub export {
my $upstream=shift;
my $dest=tempdir();
my $id;
my $vcs=vcstype();
if ($vcs eq "git") {
if (defined $upstream && $upstream =~ /[A-Za-z0-9]{40}/) {
$id=$upstream;
}
else {
if (! defined $upstream) {
$upstream='upstream';
}
my @reflines=map { chomp; $_ } `git show-ref \Q$upstream\E`;
if (! @reflines) {
error "failed to find ref using: git show-ref $upstream";
}
# if one line's ref matches exactly, use it
foreach my $line (@reflines) {
my ($b)=$line=~/^[A-Za-z0-9]+\s(.*)/;
if ($b eq $upstream || $b eq "refs/heads/$upstream") {
($id)=$line=~/^([A-Za-z0-9]+)\s/;
last;
}
}
if (! defined $id) {
if (@reflines == 1) {
($id)=$reflines[0]=~/^([A-Za-z0-9]+)\s/;
}
else {
error "more than one ref matches \"$upstream\":\n".
join("\n", @reflines);
}
}
}
doit("git archive --format=tar \Q$id\E | (cd '$dest' && tar x)");
}
else {
die "unsupported vcs $vcs";
}
return ($dest, $id);
}
sub git_findbranch {
# Looks for a branch with the given name. If a local branch exists,
# returns it. Otherwise, looks for a remote branch, and if exactly
# one exists, returns that. If there's no such branch at all, returns
# undef. Finally, if there are multiple remote branches and no
# local branch, fails with an error.
my $branch=shift;
my @reflines=split(/\n/, `git show-ref \Q$branch\E`);
my @remotes=grep { ! m/ refs\/heads\/\Q$branch\E$/ } @reflines;
if ($#reflines != $#remotes) {
return $branch;
}
else {
if (@reflines == 0) {
return undef;
}
elsif (@remotes == 1) {
my ($remote_branch)=$remotes[0]=~/^[A-Za-z0-9]+\s(.*)/;
return $remote_branch;
}
else {
error "There's no local $branch branch. Several remote $branch branches exist.\n".
"Run \"git branch --track $branch <remote>\" to create a local $branch branch\n".
join("\n", @remotes);
}
}
}
sub checkoutdelta {
my $tarball=shift;
my $branch="pristine-tar";
my $deltafile=basename($tarball).".delta";
my $idfile=basename($tarball).".id";
my ($delta, $id);
my $vcs=vcstype();
if ($vcs eq "git") {
my $b=git_findbranch($branch);
if (! defined $b) {
error "no $branch branch found, use \"pristine-tar commit\" first";
}
elsif ($b eq $branch) {
$branch="refs/heads/$branch";
}
else {
# use remote branch
$branch=$b;
}
$delta=`git show $branch:\Q$deltafile\E`;
if ($?) {
error "git show $branch:$deltafile failed";
}
if (! length $delta) {
error "git show $branch:$deltafile returned no content";
}
$id=`git show $branch:\Q$idfile\E`;
if ($?) {
error "git show $branch:$idfile failed";
}
chomp $id;
if (! length $id) {
error "git show $branch:$idfile returned no id";
}
}
else {
die "unsupported vcs $vcs";
}
return ($delta, $id);
}
sub commitdelta {
my $delta=shift;
my $id=shift;
my $tarball=shift;
my $branch="pristine-tar";
my $deltafile=basename($tarball).".delta";
my $idfile=basename($tarball).".id";
my $commit_message=defined $message ? $message :
"pristine-tar data for ".basename($tarball);
my $vcs=vcstype();
if ($vcs eq "git") {
my $tempdir=tempdir();
open(OUT, ">$tempdir/$deltafile") || die "$tempdir/$deltafile: $!";
print OUT $delta;
close OUT;
open(OUT, ">$tempdir/$idfile") || die "$tempdir/$idfile: $!";
print OUT "$id\n";
close OUT;
# Commit the delta to a branch in git without affecting the
# index, and without touching the working tree. Aka deep
# git magick.
$ENV{GIT_INDEX_FILE}="$tempdir/index";
if (! exists $ENV{GIT_DIR} || ! length $ENV{GIT_DIR}) {
$ENV{GIT_DIR}=getcwd."/.git";
}
else {
$ENV{GIT_DIR}=abs_path($ENV{GIT_DIR});
}
chdir($tempdir) || die "chdir: $!";
# If there's no local branch, branch from a remote branch
# if one exists. If there's no remote branch either, the
# code below will create the local branch.
my $b=git_findbranch($branch);
if (defined $b && $b ne $branch) {
doit("git branch --track \Q$branch\E \Q$b\E");
}
my $branch_exists=(system("git show-ref --quiet --verify refs/heads/$branch") == 0);
if ($branch_exists) {
doit("git ls-tree -r --full-name $branch | git update-index --index-info");
}
doit("git", "update-index", "--add", $deltafile, $idfile);
my $sha=`git write-tree`;
if ($?) {
error("git write-tree failed");
}
$sha=~s/\n//sg;
if (! length $sha) {
error("git write-tree did not return a sha");
}
my $pid = open(COMMIT, "|-");
if (! $pid) {
# child
my $commitopts=$branch_exists ? "-p $branch" : "";
my $commitsha=`git commit-tree $sha $commitopts`;
if ($?) {
error("git commit-tree failed");
}
$commitsha=~s/\n//sg;
if (! length $commitsha) {
error("git commit-tree did not return a sha");
}
doit("git", "update-ref", "refs/heads/$branch", $commitsha);
exit 0;
}
else {
# parent
print COMMIT $commit_message."\n";
close COMMIT || error("git commit-tree failed");
}
message("committed $deltafile to branch $branch");
}
else {
die "unsupported vcs $vcs";
}
}
sub commit {
my $tarball=shift;
my $upstream=shift; # optional
if (! defined $tarball || @_) {
usage();
}
my $tempdir=tempdir();
my ($sourcedir, $id)=export($upstream);
genmanifest($tarball, "$tempdir/manifest");
my $recreatetarball=recreatetarball("$tempdir/manifest", $sourcedir,
clobber_source => 1);
my $pid = open(GENDELTA, "-|");
if (! $pid) {
# child
gendelta($tarball, "-", recreatetarball => $recreatetarball);
exit 0;
}
local $/=undef;
my $delta=<GENDELTA>;
close GENDELTA || error "failed to generate delta";
commitdelta($delta, $id, $tarball);
}
sub checkout {
my $tarball=shift;
my ($delta, $id)=checkoutdelta($tarball);
my ($sourcedir, undef)=export($id);
my $pid = open(GENTAR, "|-");
if (! $pid) {
# child
$tarball=abs_path($tarball);
chdir($sourcedir) || die "chdir $sourcedir: $!";
gentar("-", $tarball, clobber_source => 1);
exit 0;
}
print GENTAR $delta;
close GENTAR || error "failed to generate tarball";
message("successfully generated $tarball");
}
Jump to Line
Something went wrong with that request. Please try again.