Permalink
Find file
Fetching contributors…
Cannot retrieve contributors at this time
executable file 4908 lines (4370 sloc) 119 KB
#!/usr/bin/perl
use strict vars, subs;
use warnings;
use Scriptalicious;
use Maptastic;
use Maptastic::DBI qw(row_iter);
use FindBin qw($Bin);
use List::Util qw(sum max min);
use Scalar::Util qw(looks_like_number);
use Data::Dumper;
use DBI;
use Cwd;
use IO::Handle;
use File::Path qw(rmtree);
use Fatal qw(:void open);
use Digest::SHA1;
use Digest::MD5 qw(md5_hex);
use Text::Wrap;
$Text::Wrap::columns = 72;
use JSON 1.15;
use YAML;
use Encode;
BEGIN { eval "use Term::ProgressBar" };
use vars qw($SHARE $SCRIPT_MODE $SOURCE $FULLY_AUTOMATIC);
$SHARE = $Bin;
sub do_init {
my $dbh = shift;
run("git", "init");
run("git", "config", "p4raw.source", getcwd);
$dbh->begin_work;
say "Setting up tables on DB ".$dbh->{pg_db}." (in a transaction)";
open SQL, "<$SHARE/tables.sql" or die $!;
my $statements = join "", <SQL>;
close SQL;
mutter "Running: $statements";
eval {
local($dbh->{PrintError});
$dbh->do($statements);
$dbh->commit;
};
if ( $@ ) {
my $msg = $@;
chomp($msg);
$dbh->rollback;
barf "couldn't set up tables - already setup? ($msg)";
}
say "Set up DB OK";
}
sub do_drop {
my $dbh = shift;
open SQL, "<$SHARE/tables.sql" or die $!;
my $statements = join ";\n", reverse
map { (m{^create (table|sequence) (\S+)}
? ("drop $1 $2"
. ($1 eq "table" ? " cascade" : ""))
: ()) } <SQL>;
close SQL;
mutter "Running: $statements";
$dbh->begin_work;
$dbh->do("set constraints all deferred");
$dbh->do($statements);
$dbh->commit;
say "Dropped DB OK";
my $git_dir = capture("git", "rev-parse", "--git-dir");
my $source = capture("git", "config", "p4raw.source");
if ( $source ) {
if ( $source eq getcwd ) {
rmtree($git_dir);
}
else {
moan("source is '$source', not ".getcwd
."; rm -rf $git_dir yourself");
}
}
else {
moan "no p4raw.source; not removing no git dir";
}
}
# when loading data, this gives the part of the query that yields the
# rows that are new for the "change_parents" table, which can't use
# the normal magic because it really doesn't have a primary key.
my %prebuilt_keys =
('change_parents' => "branchpath, change, ref, parent_branchpath, parent_change");
my %prebuilt_cond =
('change_parents' => <<SQL);
(t1.branchpath = t2.branchpath and
t1.change = t2.change and
(t1.ref = t2.ref) or
(t1.parent_branchpath = t2.parent_branchpath and
t1.parent_change = t2.parent_change)
)
SQL
# this is the set of tables that we load from the Perforce repository.
# everything else - views, client maps, etc, I didn't see to be
# interesting historically.
my %wanted =
( "db.desc" => "change_desc",
"db.integed" => "integed",
"db.change" => "change",
"db.depot" => "depot",
"db.revcx" => "revcx",
"db.user" => "p4user",
"db.rev" => "rev",
"db.label" => "label",
"db.change_branches" => "change_branches",
"db.change_parents" => "change_parents",
"db.marks" => "marks",
"db.rev_marks" => "rev_marks",
"db.change_marks" => "change_marks",
);
# open_journal($filename) returns IO::Handle
#
# utility function to open a journal or checkpoint file.
sub open_journal {
my $filename = shift;
my $open_filename = $filename;
if ( $filename =~ m{\.gz$} ) {
$open_filename = "zcat $filename|";
}
open my $fh, "$open_filename" or die $!;
return $fh;
}
# journal_iter($filename)
# returns sub()
# returns ($action, $x, $table, @rowdata);
#
# return an iterator for journal file $filename. This decodes the
# Perforce checkpoint format, which is relatively simple. It assumes
# that the input is iso-8859-1 encoding, and converts to utf-8 along
# the way.
sub journal_iter {
my $filename = shift;
my $fh = open_journal($filename);
my $wanted = qr{^\@.v\@\s+\d+\s\@(?:${\(join"|",keys%wanted)})\@};
sub {
my $L;
do {
$L = $fh->getline;
if ( ! defined($L) ) {
$fh->close;
return;
}
} until ($L =~ m{$wanted});
my @columns;
while ( (pos($L)||0)+1 < length $L ) {
my $pre = pos $L;
my $ok = $L =~ m{\G(?:\@((?:[^@]+|\@\@)*)\@
|(-?\w+)
|(\\N))\s}gx;
if ( !$ok ) {
pos($L) = $pre;
}
my ($string, $token, $null)
= ($1, $2, $3) if $ok;
if ( defined $string ) {
$string =~ s{\@\@}{\@}g;
utf8::upgrade($string);
push @columns, $string;
}
elsif ( defined $token ) {
push @columns, $token;
}
elsif ( $null ) {
push @columns, undef;
}
else {
die "end of file; $L" if $fh->eof;
my $p = pos $L;
pos($L)=0;
my $line;
my @extra;
do {
$line = $fh->getline;
push @extra, $line;
} until ($line =~ m{^(?:[^@]|\@\@)*@ ?$});
$L=join("",$L,@extra);
pos($L) = $p;
redo;
}
}
return @columns;
}
}
sub Q {
my $ri = row_iter(@_);
my $x = $ri->();
if ( $x ) {
my @rv = map { $x->{$_} } sort keys %$x;
if ( wantarray ) {
@rv
}
else {
$rv[0];
}
}
else {
();
}
}
sub do_load {
my $dbh = shift;
my %load_opts;
{
@ARGV = @_;
getopt("f|force" => \$load_opts{force},
"i|insist" => \$load_opts{all_interesting},
);
@_ = @ARGV;
}
my @files;
if ( @_ ) {
@files = @_;
}
else {
@files = grep { -f }
(<journal checkpoint journal.*.gz checkpoint.*.gz>,
<p4raw-extra-*.asv p4raw-journal-*.asv.gz>);
}
goto no_filter if $load_opts{force};
my $already_got = row_iter($dbh, <<'SQL');
select source_file from source_filename
SQL
my %got;
my $max_checkpoint;
my $max_journal;
while (my $got = $already_got->()) {
my $file = $got->{source_file};
$got{$file}++;
if ($file =~ m{^(checkpoint|journal).(\d+).gz}) {
$max_journal = $2 if $1 eq "journal";
$max_checkpoint = $2 if $1 eq "checkpoint";
}
}
# we shouldn't load more than the latest checkpoint - find the top one(s)
my $top_checkpoint;
my $top_journal;
for my $file (@files) {
if ($file =~ m{^(checkpoint|journal).(\d+).gz}) {
$top_journal = $2 if $1 eq "journal";
$top_checkpoint = $2 if $1 eq "checkpoint";
}
}
if ( $max_journal or $max_checkpoint or $top_checkpoint ) {
# we might need to exclude some files. check.
@files = grep {
if (m{^checkpoint.(\d+).gz} and
(($max_journal or $max_checkpoint) or
($max_checkpoint and $max_checkpoint > $1))
and ! $load_opts{all_interesting}
) {
mutter "excluding $_ - not interesting";
();
}
elsif ( m{^journal.(\d+).gz} and
($max_checkpoint && $max_checkpoint > $1
or $max_journal && $max_journal >= $1
or $top_checkpoint && $top_checkpoint > $1)
and !$load_opts{all_interesting}
) {
mutter "excluding $_ - not interesting";
0;
}
elsif ( $_ ne "journal" and
$got{$_} ) {
moan "excluding $_ - already got";
0;
}
else {
1;
}
} @files;
@files = sort { ($b=~m{^checkpoint.\d+.gz}) <=> ($a=~m{^checkpoint.\d+.gz}) or
($b=~m{^p4raw-extra-\w+.asv$}) <=> ($a=~m{^p4raw-extra-\w+.asv$}) or
($b=~m{^journal.\d+.gz}) <=> ($a=~m{^journal.\d+.gz}) or do {
my $a_r = $a =~ m{(\d+)};
my $b_r = $b =~ m{(\d+)};
$a_r && $b_r && ($a_r <=> $b_r);
} or $a cmp $b } @files;
}
say "Remaining files: @files";
no_filter:
my $marks_dirty;
my (%sth, %count);
my $loaded_checkpoint_data;
my $loaded_updates;
while ( my $filename = shift @files ) {
my $is_checkpoint = $filename =~ m{^checkpoint.\d+.gz|^p4raw-extra-\w+.asv$};
if ( $is_checkpoint ) {
$loaded_checkpoint_data++;
}
elsif ( $loaded_checkpoint_data ) {
say "We're moving from checkpoint to data now - better apply constraints";
do_check($dbh);
%sth=();
}
elsif ( !$loaded_updates ) {
$dbh->begin_work;
}
$loaded_updates = !$is_checkpoint;
say "loading data from $filename";
$dbh->do
('insert into source_filename (source_file) '
.'values (?)', undef, $filename);
my $source_file_id = $dbh->last_insert_id
(undef,undef,'source_filename', undef);
start_timer;
my $ji = journal_iter($filename);
my $get_sth = sub {
my ($table, $size)=@_;
$sth{$table."\0".$size} ||= do {
unless ( $is_checkpoint ) {
unless (scalar(grep m{^\Q$table\E\0},
keys %sth) > 1) {
$dbh->do(<<SQL)
create temporary table "${table}_new" (like "${table}", incident serial)
SQL
or die $dbh->errstr;
}
$table .= "_new";
mutter "loading into temp table $table";
}
my $sql = "INSERT INTO $table "
."VALUES (".join(",",("?")x$size).")";
whisper "Preparing: $sql";
$dbh->prepare($sql);
};
};
#binmode JOURNAL, ":encoding(iso-8859-1)";
my ($rows, $dirty, $dirty_rows);
$dbh->{AutoCommit} = 0;
$dbh->{PrintError} = 0;
while ( my @columns = $ji->() ) {
my $action = shift @columns; # "rv", "dv", etc
shift @columns; # some number. maybe a row version.
my $db = $wanted{shift @columns};
# Get rid of an extra unknown column
# (always set to 0 in my checkpoint files).
if ($db eq "rev" && @columns > 12) {
splice(@columns,9,1);
}
my $sth = $get_sth->($db,scalar(@columns)+2);
eval {
if ( $action eq "dv" ) {
$sth->execute($source_file_id, $source_file_id, @columns);
}
else {
$sth->execute($source_file_id, undef, @columns);
}
$rows++;
$dirty_rows++;
$dirty+=sum map {
defined($_) ? length($_) : 0
} @columns;
if ( $VERBOSE > 1 ) {
print substr("Saved: @columns", 0, 78), "\n";
}
};
if ( $@ ) {
barf "DBI error ($@); Data: ".Dumper(\@columns);
}
$count{$db}++;
if ( $db eq "marks" ) {
$marks_dirty++;
}
# you know, it's entirely possible that this
# commit every 5000 rows is pointless with Pg.
if ( $is_checkpoint and ($dirty > (1<<18) or $dirty_rows >= 5000) ) {
say "commit after $rows rows: ".
join("; ", map{"$count{$_} x $_"}
keys %count);
%count=();
$dirty = 0;
$dirty_rows = 0;
$dbh->commit;
$dbh->{AutoCommit} = 0;
}
}
if ( !$loaded_updates ) {
$dbh->commit;
$dbh->{AutoCommit} = 1;
say "Loaded $rows rows from $filename in ".show_elapsed
if $rows;
}
}
if ( $loaded_updates ) {
my %tables = map { $_ => 1 } map { m{(.*)\0} }
keys %sth;
my @tables;
for my $table ( qw( depot user
change change_desc
rev revcx
integed
marks
rev_marks
change_branches
change_parents
change_marks
) ) {
if ( $tables{$table} ) {
push @tables, $table;
}
}
say "Loading tables in this order: @tables";
$dbh->do("set constraints all deferred");
my (@deletes, @inserts);
# Oh, whither MERGE INTO :)
for my $table ( @tables ) {
my $sth = $dbh->primary_key_info
('', '', $table, {pg_onerow => 1});
my $pki = $sth && $sth->fetchall_arrayref;
my $join_cond;
my $keys;
if ( !$pki ) {
$join_cond = $prebuilt_cond{$table}
or do {
moan "don't know how to merge $table";
next;
};
$keys = $prebuilt_keys{$table};
}
else {
$keys = $pki->[0][3]
or do {
moan "$table has no primary key!";
moan "did NOT load ${table}_new in";
next;
};
$join_cond = "(".join(" AND ", map { "\"$table\".$_ = t2.$_" }
split /\s*,\s*/, $keys).")";
}
# print statistics on temp table
if ( $VERBOSE > 0 ) {
my $num_rows = Q($dbh, "select count(*) from ${table}_new");
my $num_x = Q($dbh, "select count(*) from ${table}_new where source_file_max is not null");
mutter "temp table ${table}_new has $num_rows rows ($num_x deletes)";
}
# delete all matching rows
my $sql = <<SQL;
delete from
"$table"
using
"${table}_new" t2
where
$join_cond
SQL
push @deletes, $sql;
#whisper "Deleting old data using: $sql";
#my $rows = 0+$dbh->do($sql);
#mutter "deleted $rows rows from $table";
my $cols = $dbh->column_info("", "", "${table}_new", '%');
my @cols = map { $_->[3] } @{ $cols->fetchall_arrayref };
pop @cols;
my $cols_txt = join ", ", @cols;
# insert all the new ones...
$sql = <<SQL;
insert into
"$table"
select * from
(
select distinct on ($keys)
$cols_txt
from
"${table}_new"
order by
$keys, incident desc
) x
where
x.source_file_max is null
SQL
#whisper "inserting new data with: ", $sql;
#$rows = 0+$dbh->do($sql);
#mutter "inserted $rows rows into $table";
push @inserts, $sql;
if ( $table eq "marks" ) {
$marks_dirty++;
}
}
for my $sql ( reverse(@deletes), @inserts ) {
mutter "running query: $sql";
my $rows = $dbh->do($sql);
mutter "$rows rows affected";
}
$dbh->commit;
$dbh->{AutoCommit} = 1;
}
if ( $marks_dirty ) {
my ($top) = values%{row_iter($dbh,<<SQL)->()};
select max(mark)+1 from marks
SQL
say "resetting gfi_mark to $top";
$dbh->do("alter sequence gfi_mark "
."restart with $top");
}
}
sub _to_p4_journal_format {
my $db_name = shift;
return
((join " ", map {
my $x = $_;
if ( looks_like_number $x ) {
$x
}
else {
if ( defined $x ) {
$x =~ s{\@}{\@\@}g;
"\@$x\@"
}
else {
'\N'
}
}
} ("pv", 3, "db.$db_name", @_))."\n");
}
sub _journal_p4_row {
my $statefile = shift;
my $table = shift;
my $mode = ( -e $statefile ? ">>" : ">" );
open JOURNAL, $mode, $statefile;
print JOURNAL _to_p4_journal_format $table, @_;
close JOURNAL;
}
sub add_p4users {
my $dbh = shift;
my $data = shift;
my $statefile = "p4raw-extra-users.asv";
for my $datum ( @$data ) {
my ($who, $count) = @$datum;
say "$who made $count changes";
my ($realname, $email);
do {
$realname = prompt_string
"Who was using this '$who' moniker?", $realname;
say "'$realname' huh. Ok.";
$email = prompt_string
("And what was (or is) their e-mail address?",
$email);
say "Right, so I'll attribute commits from that usercode "
."to $realname <$email>";
} until ( prompt_Yn("Sound good?") );
_journal_p4_row $statefile, "user",
($who, $email, "", time, time,
$realname, "", 0, "", 0);
}
return $statefile;
}
sub do_check {
my $dbh = shift;
require Text::CSV_XS;
open SQL, "<$SHARE/constraints.sql" or die $!;
my $constraints = join "", <SQL>;
close SQL;
my $do_this;
my $one_row;
while ( $constraints =~ m{\G(?: ( \s* --(?-s:.*)
(?: \n\s*--(?-s:.*) )* )
| \s* (.*?) (?:;|\Z) ) }sgx ) {
my ($comment, $sql) = ($1, $2);
if ( $comment ) {
$comment =~ s{^\s*--\s*}{}mg;
if ( $comment =~ s{^FOUND: ((?-s:.*))\n?}{}ms ) {
$do_this = $1;
}
else {
undef($do_this);
}
if ( $comment =~ s{^ONEROW(?-s:.*)\n?}{}ms ) {
$one_row = 1;
}
else {
undef($one_row);
}
say $comment;
}
elsif ( $sql ) {
$sql =~ s{^\s*}{}s;
if ( $sql =~ m{^select}i ) {
mutter "query: $sql";
my $sth = $dbh->prepare($sql);
$sth->execute;
my $csv;
if ( $VERBOSE > 0 or $one_row ) {
$csv = Text::CSV_XS->new
({binary => 1,
eol => "\n"});
my @N = @{ $sth->{NAME} };
$csv->print(\*STDOUT, \@N);
}
my $rows = 0;
my @data;
while ( my @row = $sth->fetchrow_array ) {
if ( $csv ) {
$csv->print(\*STDOUT, \@row)
or barf $csv->error;
}
if ( $do_this ) {
push @data, \@row;
}
$rows++;
}
if ( !$one_row ) {
say "($rows rows".
($rows&&$VERBOSE==0
?"; use -v to see them"
:"").")";
}
if ( $do_this and $rows ) {
no strict 'refs';
my $statefile =
&{"$do_this"}($dbh, \@data);
if ( $statefile ) {
eval {
do_load($dbh, $statefile);
};
moan("load of new data failed; $@")
if $@;
}
}
}
else {
mutter "running: $sql";
eval { local($dbh->{PrintError});
$dbh->do($sql) };
if ( $@ ) {
my $x = $@;
chomp($x);
say "error from DB ($x), continuing"
unless $x =~ m{already exists};
}
}
}
}
}
sub do_find_change {
my $dbh = shift;
my $rev = shift;
$rev or abort "no revision passed to find-change";
if ( $rev =~ m{^\d{1,6}$} ) {
show_git_paths($dbh, $rev);
}
elsif ( $rev =~ m{^[a-f0-9]{40}$} ) {
show_p4_change($dbh, $rev);
}
else {
my ($rc, $revision) = capture_err
(-out2 => "/dev/null",
qw(git rev-parse --verify), $rev);
if ( $? ) {
barf "'$rev' is not a valid revision";
}
chomp($revision);
show_p4_change($dbh, $revision);
}
}
sub show_p4_change {
my $dbh = shift;
my $git_rev = shift;
my $query = $dbh->prepare(<<SQL);
select
cm.change,
cm.branchpath
from
change_marks cm
inner join marks m using (mark)
where
m.commitid = ?
SQL
$query->execute($git_rev);
my $x = $query->fetchrow_hashref;
$query->finish;
if ( !$SCRIPT_MODE ) {
if ( $x ) {
say "commit ".substr($git_rev, 0, 12)
.(" is Change $x->{change} on branch "
.$x->{branchpath});
}
else {
barf "commit $git_rev not found in DB";
}
}
else {
if ( $x ) {
print "$x->{change},$x->{branchpath}\n";
}
else {
exit 1;
}
}
}
sub show_git_paths {
my $dbh = shift;
my $change = shift;
my $query = $dbh->prepare(<<SQL);
select
m.commitid,
cm.branchpath
from
change_marks cm
inner join marks m using (mark)
where
cm.change = ?
SQL
$query->execute($change);
my @d;
while ( my $x = $query->fetchrow_hashref ) {
push @d, $x;
}
$query->finish;
if ( !$SCRIPT_MODE) {
if ( @d > 1 ) {
say "change $change affected multiple branches:";
for ( @d ) {
print "branch $_->{branchpath}, see "
."commit $_->{commitid}\n";
}
}
elsif ( @d ) {
say "change $change was on branch "
.("$d[0]{branchpath}, see commit "
.$d[0]{commitid}."\n");
}
else {
barf "change $change not found in DB. perhaps it was cancelled?";
}
}
else {
if ( @d ) {
print "$_->{commitid},$_->{branchpath}\n"
for @d;
}
else {
exit 1;
}
}
}
sub do_filelog {
my $dbh = shift;
my %filelog_opts;
{
@ARGV = @_;
getopt("i" => \$filelog_opts{follow_branches},
"t" => \$filelog_opts{show_time},
"l" => \$filelog_opts{show_desc},
"L" => \$filelog_opts{show_some_desc},
"m=i" => \$filelog_opts{maxRevs},
);
@_ = @ARGV;
}
my ($follow_branches, $show_type, $show_desc, $show_some_desc,
$maxRevs);
my @placeholders;
my $pathspec = shift;
my ($depotpath, $rev) = ($pathspec =~ m{^(.*?)(?:#(\d+))?$});
show_filelogs($dbh, $depotpath, $rev, \%filelog_opts);
}
sub _p4_disp_rev {
my $low = shift;
my $high = shift;
$low++;
($low == $high ? "#$high" : "#$low,#$high");
}
# this function converts a perforce type bitmap to a text type. I
# didn't try very hard to understand the layout or make this function
# very clever, I just cared about the types I saw in my own test
# repository.
use constant P4_TYPE_EXEC => 0b10_0000_0000;
use constant P4_TYPE_KORRUPT => 0b10_0000; # korrupt on checkout
sub _p4_type {
my $type = shift;
my @supp;
if ($type & 4096) {
$type ^= 4096;
push @supp, "+w";
}
return join "",
($type == 0 ? "text" :
$type == 0b1_0000_0011 ? "binary" :
$type == 0b1_0000_0001 ? "ubinary" :
$type == P4_TYPE_EXEC ? "xtext" :
$type == P4_TYPE_KORRUPT ? "ktext" :
$type == 0b10_0010_0000 ? "kxtext" :
$type == 0b01_0000_0000 ? "binary+D" :
$type ==0b1101_0000_0011 ? "apple" :
$type == 0b100_0000_0000 ? "symlink" : "xxx-".sprintf("%b",$type)),
@supp;
}
sub _p4_changelog {
my $row = shift;
my $o = shift || {};
my @rv = ("$row->{change} ",
(defined $row->{change_type}
? ($row->{change_type}, " ") : ()),
"on $row->{when} by $row->{who_user}",
"\@$row->{who_host}",
($row->{file_type}
? (" (",_p4_type($row->{file_type}), ")") : ()));
if ( $o->{show_desc} || $o->{show_some_desc} ) {
push @rv, "\n\n",
(map { "\t$_\n" }
split /\n/,
( $o->{show_some_desc}
? substr $row->{description}, 0, 250
: $row->{description} ));
push @rv, "\n";
}
else {
my $short = $row->{short_desc};
$short =~ s{\s}{ }g;
push @rv, " '$short'\n";
}
@rv;
}
sub show_filelogs {
my $dbh = shift;
my $depotpath = shift;
my $rev = shift;
my $o = shift;
# build the query.
my @placeholders;
my $time_fmt = 'YYYY/MM/DD';
if ( $o->{show_time} ) {
$time_fmt .= ' HH:MI:SS';
}
push @placeholders, $time_fmt, $depotpath;
my $revision_clause = '';
if ( $rev ) {
$revision_clause = 'and revision <= ?';
push @placeholders, $rev;
}
my $limit_clause = '';
if ( $o->{maxRevs}) {
$limit_clause = "limit ?";
push @placeholders, $o->{maxRevs};
}
my $x = $o->{select_extra}||"";
my $output = $o->{output_func};
if ( !$output ) {
$output = sub {
my $row = shift;
my $ii = shift;
print "... #$row->{revision} change ",
_p4_changelog($row, $o);
while ( my $i = $ii->() ) {
my $other;
my ($low, $high);
if ( $i->{subject} eq $row->{depotpath} ) {
$other = $i->{object};
($low, $high) = @{$i}{
qw(object_minrev
object_maxrev)};
}
else {
$other = $i->{subject};
($low, $high) = @{$i}{
qw(subject_minrev
subject_maxrev)};
}
my $disp_rev = _p4_disp_rev($low, $high);
print "... ... $i->{int_title} $other",
"$disp_rev\n";
}
};
}
my $oh = $o->{output_header} || sub {
print "$depotpath\n";
};
my $long_desc = "";
my $desc_join = "";
if ( $o->{show_desc} or $o->{show_some_desc} ) {
$long_desc = "\tchange_desc.description,";
$desc_join = "\tleft join change_desc\n"
."\t\tusing (change_desc_id)";
}
my $sql = <<SQL;
select
$long_desc
$x
to_char(to_timestamp(change_time), ?) as when,
*
from
revcx_path
$desc_join
where
depotpath = ? $revision_clause
order by
revision desc
$limit_clause
SQL
whisper "running: $sql";
my $query = $dbh->prepare($sql);
$query->execute(@placeholders);
my $integed_fetch = $dbh->prepare(<<SQL);
select
integed.*,
int_type.title as int_title
from
integed
inner join int_type
using (int_type)
where
(subject = ? and subject_maxrev = ?)
-- or (object = ? and object_maxrev = ?)
order by
object, object_maxrev desc
SQL
$oh->();
while ( my $row = $query->fetchrow_hashref ) {
my $executed;
my $int_rows_iter = sub {
$integed_fetch->execute
($depotpath, $row->{revision})
unless $executed++;
$integed_fetch->fetchrow_hashref;
};
$output->($row, $int_rows_iter);
}
}
sub do_integrated {
my $dbh = shift;
my %integed_opts;
{
@ARGV = @_;
getopt("r" => \$integed_opts{reverse},
);
@_ = @ARGV;
}
my $depotpath = shift;
show_integes($dbh, $depotpath, \%integed_opts);
}
sub show_integes {
my $dbh = shift;
my $depotpath = shift;
my $o = shift;
my $which = "subject";
if ( $o->{reverse} ) {
$which = "object";
}
my $output = $o->{output} || sub {
my $row = shift;
my $subj_dr = _p4_disp_rev
($row->{subject_minrev}, $row->{subject_maxrev});
my $obj_dr = _p4_disp_rev
($row->{object_minrev}, $row->{object_maxrev});
print "$row->{subject}$subj_dr - ",
("$row->{int_title} $row->{object}$obj_dr",
"\n");
};
my $sth = $dbh->prepare(<<SQL);
select
integed.*,
int_type.title as int_title
from
integed
inner join int_type
using (int_type)
where
$which = ?
order by
object,
object_minrev,
subject_maxrev
SQL
$sth->execute($depotpath);
while ( my $row = $sth->fetchrow_hashref ) {
$output->($row);
}
}
sub do_describe {
my $dbh = shift;
my $write;
my %desc_opts;
{
@ARGV = @_;
getopt( "l" => \$desc_opts{long},
"w" => \$write,
"f" => \$desc_opts{derive_again},
);
@_ = @ARGV;
}
abort "no change passed to describe" if !@_;
my $change = shift;
if ( $change !~ m{^\d+$} ) {
abort "'$change' is not a valid change number";
}
if ( !$write ) {
$desc_opts{add_change_branch} = sub {
my $branch = shift;
my $change = shift;
print "action: add change_branch for $branch\@$change\n";
};
$desc_opts{add_parent} = sub {
my $row = shift;
my $bad;
if ( defined $row->{none_unseen} and
! $row->{none_unseen}) {
$bad = 1;
}
print "action: add $row->{parent_branchpath}\@"
.("$row->{parent_change} as a parent of"
." $row->{branchpath}\@$change"
.($bad ? " (BAD MERGE)" : "")
."\n");
};
}
$dbh->begin_work;
change_stats($dbh, $change, \%desc_opts);
$dbh->commit;
}
# this function is verging on ridiculous, implementing a tiny portion
# of a DBIx::Class::ResultSet, badly.
sub change_branches_iter {
my $dbh = shift;
my $change = shift;
my $what = shift;
my $o = shift;
my ($filter, $select, $group, $group_by, $order, $order_by);
my $join = "";
$select = "branchpath";
$group_by = "branchpath";
if ( $o->{commits} ) {
$join = "\tjoin change_marks using (branchpath,change)\n"
."\tjoin marks using (mark)\n";
$select .= ",\n\tchange,\n\tcommitid";
$select = "distinct on (branchpath) ".$select;
$order = 1;
$order_by = "branchpath";
if ( $what eq "eq" ) {
$filter = "change = ?";
}
elsif ( $what eq "max" ) {
$filter = "change <= ?";
$order_by .= ",\n\tchange desc";
}
elsif ( $what eq "min" ) {
$filter = "change <= ?";
$order = 1;
$order_by .= ",\n\tchange asc";
}
$group = 0;
}
else {
if ( $what eq "eq" ) {
$filter = "change = ?";
$select .= ",\n\tchange";
}
elsif ( $what eq "max" ) {
$filter = "change <= ?";
$select .= ",\n\tmax(change) as change";
$group = 1;
}
elsif ( $what eq "min" ) {
$filter = "change <= ?";
$select .= ",\n\tmin(change) as change";
$group = 1;
}
}
my $sql = (<<SQL.($group?<<SQL:"").($order?<<SQL:""));
select
$select
from
change_branches
$join
where
$filter
SQL
group by
$group_by
SQL
order by
$order_by
SQL
whisper "new change_branches iter:\n$sql" if $VERBOSE > 1;
$DB::single = 1;
row_iter($dbh, $change, $sql);
}
sub derive_branch_src {
my $path = shift;
my $PcO = shift;
my $callback = shift;
my $branches = $PcO->{branch};
if ( keys %$branches > 1 ) {
print "multiple branch sources for $path, erp!";
print "Too many parents: @{[ map{ $_||$path } keys %$branches ]}\n";
}
while ( my ($source_path, $info) = each %$branches ) {
$callback->({ branchpath => $path,
parent_branchpath => $source_path,
parent_change => $info->{int_change},
manual => 0,
});
}
}
sub derive_extra_parents {
my $dbh = shift;
my $path = shift;
my $change = shift;
my $PcO = shift;
my $POc = {%{+shift}};
my $prev = shift;
my $callback = shift;
my $root_changes = delete $POc->{""};
my $confused_changes = delete $POc->{$path};
my $has_changes = !!($root_changes || $confused_changes);
my @extra;
push @extra, evil => 1 if $has_changes;
# warn for octopus and evil merges
if ( keys %$POc > 1 ) {
print( ($has_changes ? "Shoggoth" : "Octopus")
." merge of: @{[map { $_||$path} keys %$POc]}\n" );
push @extra, octopus => 1;
}
elsif ( !keys %$POc ) {
print "Intra-branch integrate. Sick.\n";
}
elsif ( $has_changes ) {
print "This is an EVIL merge\n";
}
my $manual;
my @parents =
{ branchpath => $path,
parent_branchpath => $path,
parent_change => $prev->{$path},
manual => 0 };
my @ignored;
# add all parents
while ( my ($source_path, $chg_info) = each %$POc ) {
my @extra_per_src;
my ($all_headrev, $none_unseen, $o_change );
$o_change = max map { $_->{int_subj_max_change}||-1 }
map { @$_ } map { values %$_ }
values %$chg_info;
if ( !grep { !$_->{int_subj_headrev} }
map { @$_ } map { values %$_ }
values %$chg_info ) {
$all_headrev = 1;
$o_change = $prev->{$source_path};
}
# prepare a list of relative paths that are integrated
# by this change.
my %integrated;
my $integed_c;
my %revcx_by_relpath;
while ( my ($type, $c_d) = each %{$POc->{$source_path}} ) {
for my $revcx ( @{ $c_d->{revcxs} } ) {
my $relpath = $revcx->{int_obj};
$relpath =~ s{^\Q$source_path\E/}{} &&
($integrated{$relpath}++);
$integed_c++;
$revcx_by_relpath{$relpath}=$revcx;
}
}
# now here's the fancy-pants stuff. Find the merge base...
my (@mb) = find_merge_base
($dbh,
[ $path, $prev->{$path} ],
[ $source_path, $o_change ]);
print "merge base of $path\@$prev->{$path} and "
.("$source_path\@$o_change is "
."@{[ map { join '@', @$_ } @mb ]}\n");
# ... then try a 3-way merge with each of the merge bases ...
my $left = index_for_version($dbh, $path, $change);
my $right = index_for_version($dbh, $source_path, $o_change);
$none_unseen = 1;
my $count = 0;
my $moan_count = 0;
my %unseen;
my (%curious, %omitted);
my %obvious;
for my $mb ( @mb ) {
# ok, so, it's actually *here* that we need to
# pull down the integed rows. This is
# somewhat approximate; the assumption is made
# that any changes that appear on a branch are
# always on the ancestry tree back to the
# merge base. If you delete branches,
# re-branch them from a point prior to that,
# and then merge them this precondition may
# break. I think.
my %old_integed;
my $row_iter = row_iter
($dbh, $path, $change, $source_path,
$o_change, $mb->[1], <<SQL);
select
*
from
integed
join int_type using (int_type)
where
(subject like (?||'/%') and change < ?)
and (object like (?||'/%') and change < ?)
and change > ?
SQL
my $base = index_for_version($dbh, @$mb);
my @changed = grep {
(!index_eq($right->{$_}, $base->{$_}) and
((($right->{$_}&&$right->{$_}[4])||$change) > $mb->[1]) and
!( (index_eq($right->{$_}, $left->{$_}))
&& do {
$obvious{$_}++;
$curious{$_}=0;
$omitted{$_}=0;
}))
} keys %{{ %$right, %$base }};
print "there were ".@changed." path(s) changed "
.("between $mb->[0]\@$mb->[1] and "
."$source_path\@$o_change\n");
my $bad = 0;
for my $relpath ( @changed ) {
if ( !exists $integrated{$relpath} and
!$obvious{$relpath}
) {
$bad++;
print("path $relpath ",
index_diff
($base->{$relpath},
$right->{$relpath}),
" in $source_path".
($right->{$relpath}?"\@$right->{$relpath}[4]":"")
." but ",
index_diff
($base->{$relpath},
$left->{$relpath}),
" in ", $path,
($left->{$relpath}?" since change $left->{$relpath}[4]":""))
unless ++$moan_count > 10;
my $hmm;
if ( my $x = $root_changes ) {
if ( my $i = $x->{integrate} ) {
for my $rcx ( @{$i->{revcxs}} ) {
if ( $rcx->{depotpath} eq "$path/$relpath" ) {
print " but unspec. integration found, so OK" unless $moan_count > 10;
$hmm = 1;
}
}
}
}
print "\n" unless $moan_count > 10;
unless ($hmm) {
# ok, so if we get here then call that row_iter
if ( !keys %old_integed ) {
print "Just checking for integs between changes "
.(($mb->[1]+1)." and ".(max($change, $o_change)-1)
." between $source_path and $path\n");
while ( my $row = $row_iter->() ) {
(my $relpath2 = $row->{subject}) =~ s{$path/}{};
$old_integed{$relpath2} = $row;
}
$old_integed{\0}++;
}
if ( my $oi = $old_integed{$relpath} ) {
print "BUT HOLD IT! In change $oi->{change}, there was an $source_path $oi->{title} $path ($oi->{description}). That'll do us.\n"
unless $moan_count > 10;
}
else {
$none_unseen = 0;
$unseen{$relpath}++;
if ( index_eq($base->{$relpath},
$left->{$relpath})) {
$omitted{$relpath}++
unless exists $omitted{$relpath};
}
else {
$curious{$relpath}++
unless exists $curious{$relpath};
}
if ( $VERBOSE > 1 ) {
whisper "index info:",
Dump({ left => $left->{$relpath},
right => $right->{$relpath},
base => $base->{$relpath} });
}
}
}
}
else {
$count++;
}
}
if ( $moan_count > 10 ) {
print "(total of $bad files not listed)\n"
}
}
if ( $none_unseen ) {
print "Confirmed $count differences all integrated - it's a merge!\n";
}
else {
my $seen = keys %integrated;
my $unseen = keys %unseen;
my $total = $seen + $unseen;
my $bo = keys %obvious;
my $s_b_o = keys %{{ %obvious, %integrated }};
delete @omitted{grep {!$omitted{$_}} keys %omitted};
delete @curious{grep {!$curious{$_}} keys %curious};
my $dis = keys %{{ %omitted, %curious }};
my @bad = sort keys %{{ %omitted, %curious }};
if ( @bad > 10 ) {
$#bad = 9;
push @bad, "...";
}
my $pc = sprintf "%d", (($s_b_o)/($s_b_o+$unseen))*100;
my $adj = sub {
\($_[0] > 95 ? "an overwhelming" :
$_[0] > 80 ? "a massive" :
$_[0] > 50 ? "about" :
$_[0] > 20 ? "only" :
$_[0] > 5 ? "a meagre" : "a pathetic");
};
my $o_pc = sprintf "%d", $seen/$total * 100;
whisper "seen: $seen, unseen: $unseen, bo: $bo, pc:$pc opc:$o_pc, integed: $integed_c";
my $func = ($o_pc > 75 ? \&prompt_Yn : \&prompt_yN);
# dirty hack - less than 1 in 25 => no
my $auto = 1;
if ( $o_pc > 95 or
( $o_pc > 75 and @bad < 10 ) ) {
$func = sub { 1 };
}
elsif ( $o_pc < 5 or
( $o_pc < 25 and @bad < 10 ) ) {
$func = sub { 0 };
}
elsif ( $FULLY_AUTOMATIC ) {
$func = sub { ($o_pc > 66 and @bad < 10) or
$o_pc > 90 };
}
else {
$auto = 0;
}
print map { my $x = $_; $x =~ s{\n}{ }g;
wrap("","",$x)."\n\n" } split /\n\n/, <<THIS unless $auto;
Was this change was a merge in the git sense?
Integration records mention ${$adj->($o_pc)} $o_pc% of $total
path(s) that would support the hypothesis.
If it were, then that means ${$adj->($pc)} ${pc}% of possible
merges, were either mentioned in integration records or obvious.
There were $bo obvious merges and $seen paths mentioned in the
integration records. $dis changes remain ambiguous (@bad).
Recording this as a merge is not necessarily the right thing to do; it
might be better to just record this as a separate change.
Alternatively you might be seeing a complicated merge unfold over
several commits, which is usually entirely disinteresting and should
be grafted away.
If it looks like the intent of this change is to merge ALL changes
from $source_path\@$o_change into $path,
then say "Yes". Otherwise, say "No". No information will be lost
either way; but getting this wrong will affect later merge-base
calculations, potentially making it harder to figure out what later
merges are.
THIS
unless ( $func->("Mark as a parent?") ) {
push @ignored,
$source_path => \%revcx_by_relpath;
next;
}
#FIXME - these often aren't 'manual'
$manual = 1;
push @extra_per_src,
(manual => 1,
(keys %unseen
? (json_info => to_json
({omitted=>[grep { $omitted{$_} }
keys %omitted],
curious=>[grep { $curious{$_} }
keys %curious ]}))
: ()));
}
push @parents,
{ branchpath => $path,
parent_branchpath => $source_path,
parent_change => $o_change,
all_headrev => $all_headrev,
none_unseen => $none_unseen,
manual => 0,
@extra, @extra_per_src };
}
for my $p ( @parents ) {
if ( @ignored and !$p->{json_info} ) {
$p->{json_info}=to_json{integrated=>{@ignored}};
}
$callback->( ($manual and !$p->{manual})
? {%$p} : $p );
}
if ( $manual ) {
for my $p ( @parents ) {
if ( ! $p->{manual} ) {
$p->{manual} = 1;
$callback->($p);
}
}
}
}
sub do_lose_branches {
my $dbh = shift;
my %export_opts = parse_export_opts($dbh, "extant_branches", @_);
say "forgetting branches for $export_opts{chunk_size} changes";
$dbh->begin_work;
for my $sql ( <<SQL, <<SQL ) {
delete from
change_parents
where
change between ? and ?
SQL
delete from
change_branches
where
change between ? and ?
SQL
$dbh->do($sql, undef, $export_opts{min}, $export_opts{max});
}
$dbh->commit;
}
sub do_show_branches {
my $dbh = shift;
my $change = shift || max_change($dbh);
if ( $change !~ m{^\d+$} ) {
abort "bad change # '$change'";
}
my $branches = row_iter($dbh, $change, <<SQL);
select distinct on (cb.branchpath)
cb.branchpath,
cb.change,
cm.commitid
from
change_branches cb
left join
(select
change,
branchpath,
commitid
from
change_marks
join marks using (mark)) cm
using (change, branchpath)
where
change <= ?
order by
cb.branchpath,
change desc
SQL
my %branches;
while ( my $x = $branches->() ) {
$branches{$x->{branchpath}} = [$x->{change},$x->{commitid}];
}
my $length = max map { length $_ } keys %branches;
my $num_length = max map { length $_ } map { $_->[0] } values %branches;
for my $branch ( sort keys %branches ) {
print $branch, (" " x ($length - length($branch)
+ $num_length
- length($branches{$branch}[0]) + 1)),
($branches{$branch}[0], " ",
($branches{$branch}[1]
? substr($branches{$branch}[1], 0, 12)
: "-"),
"\n");
}
}
use POSIX qw(strftime);
sub change_stats {
my $dbh = shift;
my $change = shift;
my $o = shift;
my $csv;
# ok, so there are a few internal callbacks in this function.
# the first gets the description of the change - roughly
# change_desc
my $show_header = $o->{show_header} || sub {
my $d = shift;
print "Change $d->{change} by $d->{who_user}\@",
("$d->{who_host} on ",
strftime("%Y/%m/%d %H:%M:%S",
localtime($d->{change_time})));
if ( $o->{compact} ) {
print " '".substr($d->{description},0,40)."'\n";
}
else {
print "\n\n";
print map { "\t$_\n" } split "\n", $d->{description};
print "\n";
}
};
# this one gets called with the list of branches
my %prev_paths;
my %curr_paths;
my $prepare_paths = sub {
my $row = shift;
if ( $row->{change} == $change ) {
$curr_paths{$row->{branchpath}}++;
}
else {
$prev_paths{$row->{branchpath}} = $row->{change};
}
$o->{prepare_path}->($row) if $o->{prepare_path};
};
# this one receives any pre-existing change_parents rows
my %parents;
my $change_parents = sub {
my $row = shift;
push @{ $parents{$row->{branchpath}} ||= [] }, $row;
$o->{change_parents}->($row) if $o->{change_parents};
};
# this one gets change_detail 'rows'
my $show_change_detail = $o->{show_change_detail} = sub {
my $path = shift;
my $details = shift;
print "On path $path,\n";
while (my ($change_title, $by_int_path)
= each %$details) {
for my $int_path (sort keys %$by_int_path) {
my $c_d = $by_int_path->{$int_path};
print "\t", $change_title, " ";
if ( $int_path ) {
print "from $int_path ("
.($c_d->{int_headrev}
?"head":
$c_d->{int_change})."), ";
}
print scalar(@{$c_d->{revcxs}})," file(s)";
print "\n";
if ( $o->{long} ) {
for my $c ( @{$c_d->{revcxs}} ) {
show_chg($path, $c);
}
}
}
}
print "\n";
};
goto no_header if !ref $show_header;
my $sth = $dbh->prepare(<<SQL);
select
*
from
change_desc
inner join change
using (change_desc_id)
where
change.change = ?
SQL
$sth->execute($change);
my $row = $sth->fetchrow_hashref;
if ( !$row ) {
say "no such change $change";
return;
}
if ( !$row->{closed} ) {
moan "change $change was not closed";
}
$show_header->($row);
$sth->finish;
no_header:
my $iter = change_branches_iter($dbh, $change-1, "max");
while ( my $row = $iter->() ) {
$prepare_paths->($row);
}
$iter = change_branches_iter($dbh, $change, "eq");
while ( my $row = $iter->() ) {
$prepare_paths->($row);
}
undef($iter);
my $know_parents;
$sth = $dbh->prepare(<<SQL);
select
*
from
change_parents
where
change = ?
order by
manual desc
SQL
$sth->execute($change);
while ( my $row = $sth->fetchrow_hashref ) {
$know_parents++;
$change_parents->($row);
}
$sth->finish;
$sth = $dbh->prepare(<<SQL);
select
*,
change_type.title as change_title
from
revcx_integed
inner join change_type
using (change_type)
where
change = ?
order by
depotpath
SQL
$sth->execute($change);
my (@revcxs);
my $path_re;
my %paths = (%prev_paths, %curr_paths);
my $add_path = sub {
if ( @_ ) {
if ( grep { (!$_[1] or defined $paths{$_[0]})
and m{^\Q$_[0]\E/} } keys %paths ) {
return undef;
}
$paths{(shift)} ||= undef;
}
if ( keys %paths ) {
my $re = join("|",sort { length($b)<=>length($a) }
map{"\Q$_\E"} keys %paths);
$path_re = qr{$re};
}
else {
$path_re = qr{(?!.)};
}
};
$add_path->();
# now, set about grouping the revcx_integed stuff by branch,
# change type and integrated branch
while ( my $row = $sth->fetchrow_hashref ) {
# try to figure out the branch root by looking for top
# level changes, or integrates from other paths. Hope
# no integrations are happening between files with
# different names!
my $path = $row->{depotpath};
unless ( $path =~ m{^($path_re)/} ) {
# new path, add to paths
$path =~ s{/[^/]*$}{};
if ( $row->{int_obj} and
$row->{int_obj} =~ m{^($path_re)/} ) {
$path = substr $row->{depotpath}, 0,
length($row->{depotpath}) -
(length($row->{int_obj}) -
length $1);
}
elsif ( $row->{int_obj} ) {
$path = diff_right($row->{int_obj},
$row->{depotpath});
moan "using diff_right($row->{int_obj}, $row->{depotpath}) for change branch path (=$path)";
}
else {
mutter "adding new branchpath $path because I saw $row->{depotpath}";
};
$add_path->($path, 1);
}
push @revcxs, $row;
$o->{revcxs}->($row, $path) if $o->{revcxs};
};
# remove the paths that shadow each other.
my @gonners = grep { m{^($path_re)/} } keys %paths;
if ( @gonners ) {
for my $gonner ( @gonners ) {
if ( $prev_paths{$gonner} ) {
# this new change would appear to shadow others.
# so, nuke it.
if ( $gonner =~ m{^($path_re)/} ) {
my $culprit = $1;
moan "$culprit shadows branches: "
.join(" ", grep m{^\Q$culprit\E/},
@gonners);
delete $paths{$1};
$add_path->();
}
}
else {
moan "ignoring changes on path $gonner\@$change";
delete $paths{$gonner};
}
}
$add_path->();
}
# ok, now group them by path and integration branch, and change type
my (%by_path_chg_obj, %by_path_obj_chg);
my @change_details;
for my $row ( @revcxs ) {
{ no warnings;
#whisper "row: $row->{depotpath} $row->{change_title} $row->{int_obj}";
}
my ($path) = ($row->{depotpath} =~ m{^($path_re)/})
or do {
moan "$row->{depotpath} will NOT be imported";
next;
};
my $op = "";
if ( $row->{int_obj} &&
$row->{int_obj} =~ m{^($path_re)/} ) {
$op = $1;
}
elsif ( $row->{int_obj} ) {
$op = diff_right($row->{depotpath},
$row->{int_obj});
moan "using diff_right($row->{depotpath}, $row->{int_obj}) for integration branchpath (=$op)";
while ( !$add_path->($op) ) {
($op) = $row->{int_obj} =~ m{^(\Q$op\E/[^/]+)};
moan "no, that'll shadow something, use $op instead";
};
}
my $c_d = $by_path_chg_obj{$path}{$row->{change_title}}{$op}
||= do {
my $change_detail =
{ branchpath => $path,
change_title => $row->{change_title},
int_branch => $op,
revcxs => []
};
push @change_details, $change_detail;
$change_detail;
};
$by_path_obj_chg{$path}{$op}{$row->{change_title}} = $c_d;
push @{$c_d->{revcxs}}, $row;
}
# within those groups, see if the changes are all headrev or
# their max_change
for my $c_d ( @change_details ) {
$c_d->{int_change} =
max map { $_->{int_subj_max_change}||-1 }
@{$c_d->{revcxs}};
$c_d->{int_headrev} =
!(grep {
!$_->{int_subj_headrev}
} @{$c_d->{revcxs}});
}
my $show_parent = sub {
my $row = shift;
print "Parent: ",
(($row->{ref} ? "$row->{ref}"
: ("$row->{parent_branchpath}\@"
."$row->{parent_change}")),
((!$row->{ref}&&$row->{manual})
? " (manual)" : ""),
((defined($row->{none_unseen})&&!$row->{none_unseen})
? " (tenuous)" : ""),
($row->{evil} ? " (EVIL)" : ""),
"\n");
if ( $row->{json_info} ) {
print map { $_, "\n" }
show_juice($row, from_json($row->{json_info}));
}
};
my $add_parent_sth;
my $add_parent = sub {
my $row = shift;
$show_parent->($row);
if ( $o->{add_parent} ) {
$o->{add_parent}->($row);
return;
}
my @cols = keys %$row;
$add_parent_sth = $dbh->prepare(<<SQL);
insert into change_parents
(${\(join ", ", @cols)})
values
(${\(join ", ", ("?") x @cols)})
SQL
my @row;
for my $col ( @cols ) {
push @row, $row->{$col};
}
mutter "saving new parent:\n"
.join("\n", map { "\t$_: ".(defined($row->{$_})?$row->{$_}:"NULL") }
sort keys %$row)
if $VERBOSE >= 1;
$add_parent_sth->execute(@row);
};
my $add_change_branch_sth;
my $add_change_branch = $o->{add_change_branch} ||= sub {
$add_change_branch_sth ||= $dbh->prepare(<<SQL);
insert into change_branches
(branchpath, change)
values (?, ?)
SQL
mutter "saving new branch path: @_";
$add_change_branch_sth->execute(@_);
};
my @ncp;
my $new_change_parents = sub {
my $row = shift;
$row->{change} = $change;
push @ncp, $row;
$change_parents->($row);
};
# derive extra parents if necessary
for my $path ( sort keys %by_path_chg_obj ) {
$add_change_branch->($path, $change)
unless $curr_paths{$path};
my $PcO = $by_path_chg_obj{$path};
$show_change_detail->($path, $PcO);
my $POc = $by_path_obj_chg{$path};
my $parents = $parents{$path}||[];
if ( grep { $_->{manual} } @$parents ) {
$parents = [ grep { $_->{manual} } @$parents ];
}
if ( $know_parents ) {
for ( @$parents ) {
$show_parent->($_);
}
}
$DB::single = 1;
my $has_other_paths = grep { length && $_ ne $path }
keys %$POc;
if ( !exists $prev_paths{$path} ) {
if ( exists $PcO->{integrate} or
exists $PcO->{delete} or
exists $PcO->{edit}
) {
moan "new path $path\@$change has non-new changes, odd";
}
if ( !$know_parents and $PcO->{branch} and
$has_other_paths
) {
# derive the branch's parent
derive_branch_src $path, $PcO
=> $new_change_parents;
}
}
elsif ( grep { length && $_ ne $path } keys %$POc ) {
# some inter-branch action.
if ( !$know_parents or $o->{derive_again} ) {
derive_extra_parents $dbh, $path, $change,
$PcO, $POc,
\%prev_paths,
=> $new_change_parents;
}
}
else {
if ( !$know_parents ) {
$new_change_parents->
({ branchpath => $path,
parent_branchpath => $path,
manual => 0,
parent_change => $prev_paths{$path},
});
}
}
if ( !$know_parents ) {
for my $row (@ncp) {
$add_parent->($row);
}
@ncp=();
}
}
}
sub show_chg {
my $path = shift;
my $chg = shift;
print "\t\t";
my $filename = substr $chg->{depotpath}, length $path;
if ( $chg->{int_obj_title} ) {
my $odr = _p4_disp_rev
($chg->{int_obj_min}, $chg->{int_obj_max});
my $sdr = _p4_disp_rev
($chg->{int_subj_min}, $chg->{int_subj_max});
if ( $sdr =~ m{,} ) {
$sdr .= " ($chg->{int_subj_min_change},"
."$chg->{int_subj_max_change})";
}
else {
$sdr .= " ($chg->{int_subj_max_change})";
}
if ( $chg->{int_subj_headrev} ) {
$sdr .= "(HEAD)";
}
print "...$filename: $sdr $chg->{int_obj_title} "
.($odr ne "#$chg->{revision}" ? "$odr" : "us"),
"\n";
}
else {
print "...$filename\n";
}
}
sub make_cc { # only CC's eez tasting like theez
my $self =
{ path => shift,
change_title => shift,
changes => shift,
obj_path => shift,
obj_change => shift,
obj_headrev => shift };
}
sub diff_right {
my $left = shift;
my $right = shift;
my $done = 0;
while (!$done) {
my ($last_component) = $left =~ m{(/[^/]+)$};
if (!defined($last_component) or
$right !~ s{\Q$last_component\E$}{}) {
$done = 1;
}
else {
$left =~ s{\Q$last_component\E$}{};
}
}
return $right;
}
sub do_changes {
my $dbh = shift;
my %changes_opts;
{
@ARGV = @_;
getopt("l" => \$changes_opts{show_desc},
"L" => \$changes_opts{show_some_desc},
"m=i" => \$changes_opts{maxRevs},
);
@_ = @ARGV;
}
my $pathspec = shift;
my ($depotpath, $minRev, $maxRev)
= ($pathspec =~ m{^(.*?)(?:#(\d+)(?:,#?(\d+))?)?$})
if $pathspec;
$maxRev = $minRev if !$maxRev;
show_changes($dbh, $depotpath, $minRev, $maxRev, \%changes_opts);
}
sub show_changes {
my $dbh = shift;
my $depotpath = shift;
my $minRev = shift;
my $maxRev = shift;
my $o = shift;
my $output = $o->{output} ||= sub {
my $row = shift;
print "Change ", _p4_changelog($row, $o);
};
# build the query. some code duplicated from show_filelogs,
# CBA fixing for now...
my @placeholders;
my $time_fmt = 'YYYY/MM/DD';
if ( $o->{show_time} ) {
$time_fmt .= ' HH:MI:SS';
}
push @placeholders, $time_fmt;
my @filters;
my $long_desc = "";
my $desc_join = "";
if ( $o->{show_desc} or $o->{show_some_desc} ) {
$long_desc = "\tchange_desc.description,";
$desc_join = "\tleft join change_desc\n"
."\t\tusing (change_desc_id)";
}
my $limit_clause = '';
if ( $o->{maxRevs}) {
$limit_clause = "limit ?";
push @placeholders, $o->{maxRevs};
}
my $revcx_join = "";
if ( $depotpath ) {
$revcx_join = "\tinner join revcx\n"
.("\t\ton (revcx.change = change.change and\n"
."\t\t\trevcx.depotpath = ?)");
push @placeholders, $depotpath;
}
if ( $maxRev ) {
if ( $minRev ) {
push @filters, 'revision between ? and ?';
push @placeholders, $minRev, $maxRev;
}
else {
push @filters, 'revision <= ?';
push @placeholders, $maxRev;
}
}
my $where_clause = "";
if ( @filters ) {
$where_clause = "where\n\t".join("\nand\t", @filters);
}
my $sql = <<SQL;
select
$long_desc
to_char(to_timestamp(change_time), ?) as when,
change.*
from
change
$revcx_join
$desc_join
$where_clause
order by
change.change desc
$limit_clause
SQL
if ( $VERBOSE>1) {
say "querying changes with: $sql";
say "placeholders: (".join(", ", @placeholders).")";
}
my $sth = $dbh->prepare($sql);
$sth->execute(@placeholders);
while ( my $row = $sth->fetchrow_hashref ) {
$output->($row);
}
}
sub max_change {
my $dbh = shift;
my $o = shift;
my $sql = <<SQL;
select
max(change)
from
change
SQL
my $t = "any";
if ( $o and $o->{extant_branches} ) {
$t = "branch";
$sql .= <<SQL;
join change_branches
using (change)
SQL
}
if ( $o and $o->{exported_commits} ) {
$t = "commit";
$sql .= <<SQL;
join change_marks
using (change)
SQL
}
print $PROGNAME.": max_change($t) => " if $VERBOSE>0;
print "\n$sql => " if $VERBOSE>1;
my ($change) = map { @$_ } @{ $dbh->selectall_arrayref($sql) };
print "$change\n" if $VERBOSE>0;
return $change;
}
sub get_rcs {
my $rcs_file = shift;
my $rcs_revision = shift;
return undef if !$rcs_file;
$rcs_file =~ s{^//}{};
my $cmd;
$SOURCE ||= (capture("git", "config", "p4raw.source")
|| ".")."/";
if ( -f "$SOURCE$rcs_file,d/$rcs_revision" ) {
$cmd = "cat '$SOURCE$rcs_file,d/$rcs_revision'";
}
elsif ( -f "$SOURCE$rcs_file,d/$rcs_revision.gz" ) {
$cmd = "zcat '$SOURCE$rcs_file,d/$rcs_revision'";
}
else {
# In fact I'd be highly tempted to parse the RCS file
# directly, as it might be significantly faster to hold
# the latest version of each RCS file in memory, as we work
# backwards through the revisions and construct new
# versions of it for feeding to fast-import. Consider an
# RCS file with many revisions; getting all revisions out
# with `co -p` will be O(N^2) but directly will be O(N)
$cmd = "co -q -p$rcs_revision -kb '$SOURCE$rcs_file'";
}
open CMD, "-|", "$cmd" or die $!;
binmode CMD;
local($/) = \((stat CMD)[11]||4096);
my @data = <CMD>;
close CMD;
join "", @data;
}
sub do_print {
my $dbh = shift;
my %print_opts;
{
@ARGV = @_;
getopt("Q" => \$print_opts{quiet},
"c=i" => \$print_opts{change},
"a" => \$print_opts{all},
);
@_ = @ARGV;
}
my $pathspec = shift;
my ($depotpath, $rev)
= ($pathspec =~ m{^(.*?)(?:#(\d+))?$})
if $pathspec;
show_file($dbh, $depotpath, $rev, \%print_opts);
}
sub show_file {
my $dbh = shift;
my $depotpath = shift;
my $rev = shift;
my $o = shift;
my $change = $o->{change};
if ( !$rev ) {
$change ||= max_change($dbh);
}
my @where;
my @pl;
push @where, "depotpath = ?";
push @pl, $depotpath;
if ( !$rev ) {
push @where, "change <= ?";
push @pl, $change;
}
else {
push @where, "revision = ?";
push @pl, $rev;
}
my $where = join " and ", @where;
my $limit = ($o->{all} ? "" : "limit 1");
my $sth = $dbh->prepare(<<SQL);
select
revision,
rev_change_type,
change,
ct.title as change_title,
file_type,
rcs_file,
rcs_revision,
m.blobid
from
rev
inner join change_type ct
on (ct.change_type = rev.rev_change_type)
left join rev_marks rm
using(depotpath,revision)
left join marks m
using (mark)
where
$where
order by
revision desc
$limit
SQL
$sth->execute(@pl);
my $output = $o->{output} || sub {
my $row = shift;
unless ( $o->{quiet} ) {
print "$depotpath#$row->{revision} - "
.("$row->{change_title} change "
.$row->{change}." ("._p4_type($row->{file_type})
.")".($row->{blobid}
?" blob ".substr($row->{blobid},0,7)
:"")."\n");
}
if ( $row->{blobid} ) {
exec("git", "cat-file", "blob", $row->{blobid});
}
my $file_data = get_rcs($row->{rcs_file},
$row->{rcs_revision});
binmode STDOUT, ":raw";
print $file_data;
};
while ( my $row = $sth->fetchrow_hashref ) {
$output->($row);
}
$sth->finish;
}
sub do_ls_tree {
my $dbh = shift;
my %ls_tree_opts;
{
@ARGV = @_;
getopt("l" => \$ls_tree_opts{length},
"b" => \$ls_tree_opts{blob},
"r" => \$ls_tree_opts{recurse},
"name-only" => \$ls_tree_opts{name_only},
"abbrev=i" => \$ls_tree_opts{abbrev},
);
@_ = @ARGV;
}
my $pathspec = shift;
my ($depotpath, $change)
= ($pathspec =~ m{^(.*?)(?:@(\d+))?$})
if $pathspec;
if ( @_ or !$depotpath ) {
abort "expected depot path/change (only)";
}
$change ||= max_change($dbh);
show_files($dbh, $depotpath, $change, \%ls_tree_opts);
}
sub sha1_blob_ref {
my $data_ref = shift;
my $sha1 = Digest::SHA1->new;
my $l = length($$data_ref);
$sha1->add("blob $l\0");
$sha1->add($$data_ref);
return lc($sha1->hexdigest);
}
sub _p4_type_to_mode {
my $p4_type = shift;
($p4_type & P4_TYPE_EXEC ? 0100755 : 0100644);
}
sub show_files {
my $dbh = shift;
my $path = shift;
my $change = shift;
my $o = shift;
my $output = $o->{output} ||= sub {
my $row = shift;
(my $relative = $row->{depotpath})
=~ s{^\Q$path\E/?}{};
if ( $o->{name_only} ) {
print "$relative\n";
}
else {
my ($l, $b);
$b = $row->{blobid};
if ( $o->{length} or
($o->{blob} and !$row->{blobid}) ) {
my $blob_data =
get_rcs($row->{rcs_file},
$row->{rcs_revision});
if ( $o->{length} ) {
$l = length $blob_data;
}
if ( $o->{blob} ) {
# pass by ref only needed on
# perl <5.8.1
$b = sha1_blob_ref(\$blob_data);
}
}
my $a = $o->{abbrev} || 40;
# don't show trees yet
printf( "%6o %s %s".($o->{length}?" %7d":"")
."\t%s\n",
_p4_type_to_mode($row->{file_type}),
"blob",
($o->{blob}
? substr($b,0,$a)
: substr($row->{revision_md5},0,$a)),
($o->{length} ? ($l) : ()),
$relative);
}
};
my (@where, @pl);
push @where, "change = ?";
push @pl, $change;
if ( $o->{recurse} ) {
push @where, 'depotpath like ?';
push @pl, "$path/%";
}
else {
push @where, 'depotpath ~ ?';
push @pl, "$path(/[^/]*)?\$";
}
my $join_clause = "";
if ($o->{blob} || $o->{marks}) {
$join_clause = <<SQL;
left join rev_marks
using (depotpath, revision)
SQL
if ($o->{blob}) {
$join_clause .= <<SQL;
left join marks
using (mark)
SQL
}
}
my $where = join " and ", @where;
my $sth = $dbh->prepare(<<SQL);
select
*
from
change_state cs
$join_clause
where
$where
order by
depotpath asc
SQL
$sth->execute(@pl);
while ( my $row = $sth->fetchrow_hashref ) {
$output->($row);
}
}
sub index_for_version {
my $dbh = shift;
my $path = shift;
my $change = shift;
my %index;
show_files($dbh, $path, $change,
{ blob => 1,
recurse => 1,
output => sub {
my $row = shift;
(my $relative = $row->{depotpath})
=~ s{^\Q$path\E/?}{};
$index{$relative} =
[ $row->{revision_md5}, $row->{mark},
$row->{blobid}, $relative, $row->{last_change} ];
},
});
return \%index;
}
sub index_eq {
my $idx_a = shift;
my $idx_b = shift;
if ( !$idx_a and !$idx_b ) {
return 1;
}
if ( !$idx_a or !$idx_b ) {
if ( $VERBOSE > 1 ) {
whisper "$idx_a->[3] in A, not B => not equal"
if $idx_a;
whisper "$idx_b->[3] in B, not A => not equal"
if $idx_b;
}
return 0;
}
if ( $idx_a->[2] and $idx_b->[2] ) {
if ( $VERBOSE > 1 ) {
if ($idx_a->[2] ne $idx_b->[2]) {
whisper "$idx_a->[3]: ".substr($idx_a->[2],0,12)
." in A vs ".substr($idx_b->[2],0,12)
." in B";
} else {
whisper "$idx_a->[3]: blob ID match";
}
}
$idx_a->[2] eq $idx_b->[2];
}
elsif ( $idx_a->[1] and $idx_b->[1] and
$idx_a->[1] == $idx_b->[1] ) {
whisper "$idx_a->[3]: marknum match" if $VERBOSE > 1;
return 1;
}
elsif ( $idx_a->[0] and $idx_b->[0] ) {
if ( $VERBOSE > 1 ) {
if ($idx_a->[0] ne $idx_b->[0]) {
whisper "$idx_a->[3]: ".substr($idx_a->[0],0,7)
." in A vs ".substr($idx_b->[0],0,7)
." in B";
} else {
whisper "$idx_a->[3]: md5sum match";
}
}
$idx_a->[0] eq $idx_b->[0];
}
else {
no warnings 'uninitialized';
moan "Can't compare index entries; (@$idx_a) vs (@$idx_b)";
0;
}
}
sub index_diff {
my $base = shift;
my $newer = shift;
if ( !$base and $newer ) {
return "added";
}
elsif ( $base and !$newer ) {
return "deleted";
}
elsif ( index_eq($base, $newer) ) {
return "unchanged";
}
else {
return "changed";
}
}
sub min_change {
my $dbh = shift;
my $type = shift;
no strict 'refs';
print $PROGNAME.": min_change($type) => " if $VERBOSE>0;
my $sql = &{"min_change_$type"};
print "\n$sql => " if $VERBOSE>1;
my $min_row = row_iter($dbh, $sql)->();
if ( ! $min_row ) {
print "(undef)\n" if $VERBOSE>0;
return;
}
print $min_row->{change}, "\n" if $VERBOSE>0;
$min_row->{change};
}
sub min_change_blobs {
<<SQL;
select distinct on (r.change)
r.change
from
change c
join revcx r
on (r.change = c.change and c.closed != 0)
left join rev_marks rm
using (depotpath, revision)
where
r.change_type != 2 and
rm.depotpath is null
order by
r.change asc
limit 1
SQL
}
sub min_change_commits {
<<SQL;
select distinct on (cb.change)
cb.change
from
change c
join change_branches cb
using (change)
left join change_marks cm
using (branchpath, change)
where
cm.branchpath is null and
c.closed != 0
order by
cb.change asc
limit 1
SQL
}
sub min_change_branches {
<<SQL;
select distinct on (c.change)
c.change
from
change c
left join change_branches cb
using (change)
where
cb.change is null and
c.closed != 0
order by
c.change asc
limit 1
SQL
}
sub min_change_extant_branches {
<<SQL;
select distinct on (c.change)
c.change
from
change c
join change_branches cb
using (change)
order by
c.change asc
limit 1
SQL
}
sub min_change_exported_commits {
<<SQL;
select distinct on (c.change)
c.change
from
change c
join change_marks cm
using (change)
where
c.closed != 0
order by
c.change asc
limit 1
SQL
}
sub parse_export_opts {
my $dbh = shift;
my $type = shift;
my %export_opts;
{
@ARGV = @_;
my @gfi_options;
getopt("n=i" => \$export_opts{chunk_size},
"depth=i" => \$export_opts{depth},
"l|long" => \$export_opts{long},
"recalc" => \$export_opts{derive_again},
"automatic|a" => \$FULLY_AUTOMATIC,
);
@_ = @ARGV;
}
if ( @_ ) {
my $revspec = shift;
if ( $revspec !~ m{^(\d+)(?:\.\.(\d+))?$} ) {
abort "bad change spec '$revspec'";
}
@export_opts{qw(min max)} = ($1, $2);
}
if ( !$export_opts{min} ) {
$export_opts{min} = min_change($dbh, $type)
or do {
say "min_change($type) returns undef - apparently nothing to do!";
exit 0;
};
}
if ( $export_opts{min} and $export_opts{chunk_size} ) {
$export_opts{max} = $export_opts{min}
+ $export_opts{chunk_size} - 1;
}
if ( !$export_opts{max} ) {
$export_opts{max} = max_change($dbh, { $type => 1 });
}
if ( $export_opts{min} and $export_opts{max} ) {
$export_opts{chunk_size} = $export_opts{max}
- $export_opts{min} + 1;
}
$export_opts{verbose} = $VERBOSE;
%export_opts;
}
sub do_export_blobs {
my $dbh = shift;
my %export_opts = parse_export_opts($dbh, "blobs", @_);
gfi_open(\%export_opts);
export_blob_chunk($dbh, \%export_opts);
}
our $MARKS_FILE;
sub gfi_open {
my $eo = shift;
$MARKS_FILE ||= "p4raw.$$.marks";
if ( -t STDOUT ) {
open GFI, "|git fast-import --quiet "
.($eo->{gfi_options}||"")
.($eo->{depth}?"--depth=$eo->{depth} ":"")
."--export-marks=$MARKS_FILE"
or barf "popen gfi failed; $!";
}
else {
open GFI, ">&STDOUT";
open STDOUT, ">&STDERR";
moan "won't be able to commit!";
say "drop constraints on rev_marks / change_marks or "
."this run will not be restartable";
undef($MARKS_FILE);
}
binmode GFI;
}
use constant CHUNK_SIZE => 4096;
our $MARK_MIN;
our $MARK_MAX;
our $MARK;
sub gfi_get_marks {
my $dbh = shift;
my $count = shift;
my ($dummy) = map { @$_ } @{ $dbh->selectall_arrayref(<<SQL) };
select nextval('gfi_mark')
SQL
$dbh->do(<<SQL) or die $dbh->errstr;
alter sequence gfi_mark increment by $count
SQL
($MARK_MAX) = map { @$_ } @{ $dbh->selectall_arrayref(<<SQL) };
select nextval('gfi_mark')
SQL
$dbh->do(<<SQL) or die $dbh->errstr;
alter sequence gfi_mark increment by 1
SQL
$MARK_MIN = $MARK_MAX - $count + 1;
die "read nextval of $MARK_MAX" unless $MARK_MIN > 0;
$MARK = $MARK_MIN - 1;
}
our %MARKS;
our %MARK_TYPES = qw(blob 1 commit 2 tag 3);
sub gfi_get_mark {
my $dbh = shift;
my $type = shift;
unless ( $MARK_MAX and ++$MARK <= $MARK_MAX ) {
($MARK) = map { @$_ } @{ $dbh->selectall_arrayref(<<SQL) };
select nextval('gfi_mark')
SQL
}
$MARKS{$MARK}=$MARK_TYPES{$type};
$MARK;
}
sub gfi_send_blob {
my $dbh = shift;
my $buf = shift;
my $mark = gfi_get_mark($dbh, "blob");
print GFI "blob\n";
print GFI "mark :", $mark, "\n";
gfi_data($buf);
$mark;
}
sub gfi_data {
my $buf = shift;
my $l = length($$buf);
print GFI "data $l\n";
print GFI $$buf, "\n";
}
sub gfi_utf8_data {
my $buf = shift;
my $l = utf8::upgrade($$buf);
print GFI "data $l\n";
binmode GFI, ":utf8";
print GFI $$buf, "\n";
binmode GFI;
}
sub gfi_start_commit {
my $dbh = shift;
my $mark = gfi_get_mark($dbh, "commit");
my $ref = shift;
my $author = shift;
my $committer = shift;
binmode GFI, ":utf8";
print GFI "commit ", $ref, "\n";
print GFI "mark :", $mark, "\n";
print GFI "author ", $author->[0], " <", $author->[1], "> ",
$author->[2], "\n" if $author;
print GFI "committer ", $committer->[0], " <", $committer->[1], "> ",
$committer->[2], "\n";
binmode GFI;
return $mark;
}
sub gfi_from {
my $parent = shift;
if ( defined $parent ) {
print GFI "from ", $parent, "\n";
}
else {
print GFI "deleteall\n";
}
}
sub gfi_merge {
while ( my $parent = shift ) {
print GFI "merge ", $parent, "\n";
}
}
sub gfi_filedelete {
my $relname = shift;
print GFI "D ", $relname, "\n";
}
sub gfi_filemodify {
my $mode = shift;
my $ref = shift;
my $relname = shift;
print GFI "M ", sprintf("%o", $mode), " ", $ref, " ", $relname, "\n";
}
sub gfi_finish_commit {
print GFI "\n";
}
sub gfi_checkpoint {
my $dbh = shift;
if ( !defined $MARKS_FILE ) {
moan "don't know where the marks are going! hope you "
.("dropped those rev_marks / change_marks "
."constraints");
goto commit;
}
if ( !keys %MARKS ) {
moan "nothing to checkpoint";
goto commit;
}
# send a dummy blob to make sure gfi writes marks out
gfi_send_blob $dbh,
\(join("", map { chr(rand(64)+33) } (1..3))
x rand(32) );
my $last_mtime = (stat $MARKS_FILE)[9];
say "Now checkpointing.";
print GFI "checkpoint\n\n";
GFI->flush();
my $time;
while ( ! -e $MARKS_FILE or
$last_mtime and
((stat _)[9] == $last_mtime) ) {
sleep 1;
$time++;
say "waited ${time}s for $MARKS_FILE to be created";
}
my $insert_mark_sth = $dbh->prepare(<<SQL);
insert into marks
(mark, commitid, blobid)
values
(?, ?, ?)
SQL
my $b_t = $MARK_TYPES{blob};
my $c_t = $MARK_TYPES{commit};
while ( keys %MARKS ) {
my @marks = `cat $MARKS_FILE`;
my ($t, $found, $mark, $sha1);
for ( @marks ) {
if (($mark, $sha1) =
m{^:(\d+) ([0-9a-f]{40})} and
($t = delete $MARKS{$mark})
) {
$found++;
$insert_mark_sth->execute
($mark,
($t == $c_t ? $sha1 : undef),
($t == $b_t ? $sha1 : undef),
);
}
}
if ( $found ) {
mutter "wrote $found marks to DB";
}
else {
moan "still waiting for ".(scalar keys %MARKS)
." mark(s) to appear";
sleep 1;
}
}
commit:
$dbh->commit
or moan "commit failed; ".$dbh->errstr;
}
sub export_blob_chunk {
my $dbh = shift;
my $o = shift;
local($VERBOSE) = $o->{verbose};
my $min_change = $o->{min};
my $chunk_size = $o->{chunk_size};
my $max_change = $o->{max};
# psuedocode for blob import.
# 1. find the lowest change which is yet to be marked as
# imported
say "exporting blobs for changes $min_change .. "
."$max_change";
# 2. get all of the files in it
$dbh->begin_work;
my ($count_em_sth) = $dbh->prepare(<<SQL);
select
count(distinct rev.depotpath),
count(rev.revision)
from
rev
left join rev_marks
using (depotpath, revision)
where
rev.rev_change_type != 2 and
rev_marks.mark is null and
change between ? and ?
SQL
$count_em_sth->execute($min_change, $min_change+$chunk_size-1);
my ($paths, $revisions);
$count_em_sth->bind_columns(\$paths, \$revisions);
$count_em_sth->fetch;
$count_em_sth->finish;
mutter "this chunk touches $paths depot paths and $revisions"
." distinct revisions";
if ( !$revisions ) {
say "no un-exported file revisions in this range";
goto checkpoint;
}
# don't want to round-trip to the DB just to get sequence numbers
# so do a special increment. This is not transaction guarded!
gfi_get_marks($dbh, $revisions);
# 3. for each of those files, find all the ones without
# rev_blob rows
my @files;
# the sort method here could be improved further, but it is a
# reasonable starting point and should mean that many
# ancestrally related files are sent to fast-import in
# sequence
my $file_list = $dbh->prepare(<<SQL);
select
rev.depotpath,
rev.file_type,
count(rev.revision) as num,
min(rev.revision) as min,
max(rev.revision) as max,
integed.object,
integed.object_minrev,
integed.object_maxrev
from
rev
left join rev_marks
using (depotpath, revision)
left join integed
on (int_type = 2 and subject = depotpath)
where
rev.rev_change_type != 2 and
rev_marks.mark is null and
rev.change between ? and ?
group by
depotpath,
rev.file_type,
integed.object,
integed.object_minrev,
integed.object_maxrev
order by
(case when integed.object is null
then depotpath
else
integed.object
end),
(case when integed.object is null
then 0
else
1
end);
SQL
mutter "getting file list";
$file_list->execute($min_change, $max_change);
# 4. for each of those files, find *all* of the branched
# versions of it, and send them all at once to
# git-fast-import, entering rev_marks rows for them as we
# send marks to git-fast-import
# It's quite important to do all the revisions of a file at
# once, otherwise fast-import will not be able to make
# on-the-fly deltas and the repo will become gigabytes.
#
# note "p4 print" is not required; can just use rcs
# directly by looking at the "rev" table; it has a rcs
# filename and revision that quite adequately refers to an
# rcs version. So you can just collect
# `co -p1.2 -kb depot/mg.c` (eg), get its length, confirm
# the MD5, and then feed to git-fast-import using the
# "mark" functionality, perhaps marking it with the MD5
# or depotpath/revision. Then when the fast-import
# "checkpoint" command is issued we will get back the
# GIT-SHA1 values.
my $list_rcs_revs = $dbh->prepare(<<SQL);
select
revision,
rcs_file,
rcs_revision,
rev_change_type,
revision_md5
from
rev
left join rev_marks rm
using (depotpath, revision)
where
rev_change_type != 2 and
depotpath = ? and
rev.revision between ? and ? and
rm.mark is null
SQL
my $insert_rev_mark_row = $dbh->prepare(<<SQL);
insert into
rev_marks (depotpath, revision, mark)
values
(?, ?, ?)
SQL
mutter "now exporting file images";
my $tpb;
if ( $o->{verbose} >= 0 ) {
$tpb = eval { Term::ProgressBar->new
({ count => $revisions,
ETA => "linear",
}) };
}
my $done_revisions = 0;
my $next_update = 1;
my $delete_md5 = '00000000000000000000000000000000';
my %seen;
my %md5_mark;
while ( my $row = $file_list->fetchrow_hashref ) {
my $depotpath = $row->{depotpath};
$list_rcs_revs->execute(@{$row}{qw/depotpath min max/});
$list_rcs_revs->bind_columns
(\(my ($rev, $rcs_file, $rcs_revision,
$change_type, $md5)));
while ( $list_rcs_revs->fetch ) {
next if $seen{md5_hex("$depotpath#$rev")}++;
my $has_md5 = $md5 ne $delete_md5;
my $lc_md5 = lc($md5);
my $mark;
if ( $has_md5 and $md5_mark{$lc_md5} ) {
$mark = $md5_mark{$lc_md5};
goto record;
}
my $contents = get_rcs($rcs_file, $rcs_revision);
my $found_md5;
# don't do the md5 check on +k files
if ( $has_md5 and
!($row->{file_type}&P4_TYPE_KORRUPT)
) {
$found_md5 = lc(md5_hex($contents));
if ( $found_md5 ne $lc_md5 ) {
# we fall over in a screaming heap
die("MD5 mismatch on $depotpath"
."#$rev ($rcs_file "
."$rcs_revision); "
."$found_md5 from co -p, "
."$md5 in DB");
}
}
$mark = gfi_send_blob($dbh, \$contents);
$md5_mark{$lc_md5||$found_md5}
= $mark if $has_md5 or $found_md5;
record:
eval {
local($dbh->{PrintError}) = 0;
$insert_rev_mark_row->execute
($depotpath, $rev, $mark);
};
die "error inserting ($depotpath#$rev => $mark); $@"
if $@;
$done_revisions++;
}
if ( $tpb and $done_revisions >= $next_update ) {
$next_update = $tpb->update($done_revisions);
}
}
$tpb->update($done_revisions) if $tpb;
$file_list->finish;
checkpoint:
unless ( $o->{no_checkpoint} ) {
gfi_checkpoint($dbh);
}
}
sub do_find_branches {
my $dbh = shift;
# "close enough" :)
my %export_opts = parse_export_opts($dbh, "branches", @_);
say "finding branches for $export_opts{chunk_size} changes";
for my $change ( $export_opts{min} .. $export_opts{max} ) {
$dbh->begin_work;
change_stats($dbh, $change,
{ compact => !$export_opts{long},
derive_again => $export_opts{derive_again},
},
);
print "\n";
$dbh->commit;
}
}
sub check_branchpath {
my $dbh = shift;
my $branchpath = shift;
my $change = shift;
my $sth = $dbh->prepare(<<SQL.($change?<<SQL:""));
select
max(change)
from
change_branches
where
branchpath = ?
SQL
and change = ?
SQL
$sth->execute($branchpath, ($change?($change):()));
($change) = $sth->fetchrow_array;
$change;
}
use Set::Object qw(set);
sub find_merge_base {
my $dbh = shift;
my @places = @_;
my $max_change = max map { $_->[1] } @places;
# slurp in change_parents
my $sth = $dbh->prepare(<<SQL);
select
*
from
change_parents c
where
change <= ? and
(manual or (not manual and
not exists (select true from change_parents c2
where c2.change = c.change and c2.manual) and
(c.none_unseen is null or c.none_unseen = true)))
SQL
$sth->execute($max_change);
my @cp;
while ( my $row = $sth->fetchrow_hashref ) {
push @cp, $row;
}
my %cp;
for ( @cp ) {
my $where = $_->{branchpath}."@".$_->{change};
my $list = $cp{$where} ||= [];
push @$list, $_;
}
# the merge base is the latest plane across the ancestry DAG
# which is mergable from all the starting points.
my @start = map { join '@', @$_ } @places;
my %start = map { $_ => 1 } @start;
whisper "finding merge base of: @start" if $VERBOSE > 1;
my %parent_of = map { $_ => {$_=>1} } @start;
my %stale;
my %seen = map { $_ => 1 } @start;
my $sortfunc = sub {
my ($c_a) = ($_[0] =~ m{@(\d+)$});
my ($c_b) = ($_[1] =~ m{@(\d+)$});
($c_b||0) <=> ($c_a||0)
};
my @interesting = sort { $sortfunc->($a, $b) } keys %seen;
my %bases;
while ( grep { !exists $stale{$_} } @interesting and
(my $item = shift @interesting) ) {
my @parents;
unless ( $item eq '@0' ) {
@parents = map {
$_->{ref} ||
$_->{parent_branchpath}."@".$_->{parent_change}
} @{$cp{$item}};
if ( !@parents ) {
@parents = '@0';
}
}
whisper "parents of $item are: @parents" if $VERBOSE > 1;
if ( 0 and (grep { $start{$_} } @parents) >= (@start - 1) ) {
# *THIS* is a merge base
$bases{$item}++;
for ( @parents ) {
$stale{$_}++;
}
}
my @parent_of;
for my $tip ( @start ) {
if ( $parent_of{$tip}{$item} ) {
push @parent_of, $tip;
for ( @parents ) {
$parent_of{$tip}{$_}++;
}
}
}
if ( $stale{$item} ) {
whisper "$item is stale" if $VERBOSE > 1;
for ( @parents ) {
$stale{$_}++;
}
}
whisper "seen by branches: @parent_of" if $VERBOSE > 1;
if ( @parent_of == @start ) {
# merge base
if ( !$stale{$item} ) {
$bases{$item}++;
whisper "making $item a merge base." if $VERBOSE > 1;
for my $parent ( @parents ) {
$stale{$parent}++;
}
}
else {
whisper "$item is a stale merge base."
if $VERBOSE > 1;
}
}
push @interesting,
(grep { !$seen{$_}++ }
@parents);
@interesting = sort { $sortfunc->($a, $b) }
@interesting;
$seen{$item}++;
}
return map { [ m{(.*)@(\d+)$} ] } keys %bases;
}
sub do_merge_base {
my $dbh = shift;
my @places;
while ( my $item = shift ) {
my ($branchpath, $change) =
($item =~ m{^(//.*?)(?:\@(\d+))?$})
or abort "expecting branch paths, not '$item'";
$change = check_branchpath($dbh, $branchpath, $change)
or barf "'$item' is not a valid branchpath";
push @places, [ $branchpath, $change ];
}
my (@bases) = find_merge_base($dbh, @places);
if ( @bases ) {
say "merge base: ".join(", ", map { join '@', @$_ } @bases);
}
else {
say "no valid merge base";
exit 1;
}
}
sub do_is_parent {
my $dbh = shift;
}
# 5. finally, we have rev_blob rows for all of the files
# in this version, so send a tree and/or commit object
# (though it might be easier to use
# `git update-index --index-info`). Information from the
# integed table should probably go into the commit message,
# where it is not redundant (which is a difficult criterion
# to nail down for sure!)
sub export_commit_chunk {
my $dbh = shift;
my $o = shift;
$dbh->begin_work() unless $dbh->ping == 3;
# first, get a plan of branches to export
my $commit_dag = row_iter($dbh, $o->{min}, $o->{max}, <<SQL);
select
cb.branchpath,
cb.change,
cm2.mark as already,
cp.parent_branchpath,
cp.parent_change,
cm.mark,
m.commitid,
cp.ref,
cp.manual,
cp.json_info
from
change_branches cb
left join change_marks cm2
using (branchpath, change)
left join change_parents cp
on ( (cb.branchpath = cp.branchpath and
cb.change = cp.change) and
(cp.manual or (not cp.manual and
not exists (select true from change_parents cp2
where cp2.change = cp.change
and cp2.manual) and
(cp.none_unseen is null or cp.none_unseen = true)))
)
left join change_marks cm
on (cp.parent_branchpath = cm.branchpath and
cp.parent_change = cm.change)
left join marks m
on (m.mark = cm.mark)
where
cb.change between ? and ?
order by
cb.change,
cb.branchpath,
(cp.parent_branchpath = cb.branchpath) desc
SQL
# a big query yes, but remember this is git-*fast*-import ;)
my $files = row_iter($dbh, $o->{min}, $o->{max}, <<SQL);
select
cb.change,
cb.branchpath,
substr(rev.depotpath, length(cb.branchpath)+2) as relpath,
ct.title as change_type,
file_type,
mark,
blobid
from
change_branches cb
join rev on (cb.change = rev.change
and rev.depotpath like (cb.branchpath||'/%') )
join change_type ct on (rev_change_type = change_type)
left join rev_marks using (depotpath,revision)
left join marks using (mark)
where
cb.change between ? and ?
order by
cb.change,
cb.branchpath
SQL
# remember which branches we have reset
my %branches;
my %cb_marks;
my $to_ref = sub {
my $x = shift;
my $ref = $x->{ref} || $x->{commitid};
if ( !$ref ) {
my ($pb, $pc) =
($x->{parent_branchpath},
$x->{parent_change});
if ( $pb ) {
my $mark = $x->{mark} || $cb_marks{$pb}{$pc};
$ref = ":$mark";
}
else {
$ref = undef;
}
}
whisper "ref of parent: ".$ref
if defined $ref;
$ref;
};
my $desc_i = row_iter($dbh, $o->{min}, $o->{max}, <<SQL);
select
change,
realname,
email,
who_user,
change_time,
description
from
change
join p4user using (who_user)
join change_desc using (change_desc_id)
where
change between ? and ?
and closed = 1
SQL
my $insert_commit_mark_sth = $dbh->prepare(<<SQL);
insert into change_marks
(branchpath, change, mark)
values
(?, ?, ?)
SQL
say "gathering export plan";
$commit_dag->($commit_dag->());
$files->($files->());
say "exporting commits between $o->{min} and $o->{max}";
my ($tpb, $next_update);
if ( $o->{verbose} == 0 ) {
$tpb = eval { Term::ProgressBar->new
({ count => $o->{chunk_size},
ETA => "linear",
}) };
}
change:
while ( my $change_br = $commit_dag->() ) {
my $change = $change_br->{change};
my $bp = $change_br->{branchpath};
my $ref = _path2ref($bp);
if ( $change_br->{already} ) {
mutter "skipping $bp\@$change - already marked";
next;
}
mutter "exporting $bp\@$change to $ref";
my @parents = $change_br;
while ( 1 ) {
my $x = $commit_dag->() or last;
if ( $x->{branchpath} ne $bp or
$x->{change} != $change
) {
# put it back
$commit_dag->($x);
last;
}
push @parents, $x;
}
# fetch a change off
my $desc;
while ( !$desc or
$desc->{change} < $change_br->{change} ) {
$desc = $desc_i->() or do {
moan "out of changes! looking for"
." $change_br->{change}";
last change;
};
}
# and put the change back!
$desc_i->($desc);
die "wrong change!"
if $desc->{change} != $change_br->{change};
my ($author, $committer, $description)
= make_commit($desc, @parents);
if ( $VERBOSE > 0 and $o->{long} ) {
print "author: @$author\n" if $author;
print "committer: @$committer\n";
print "$description\n";
}
add_soft_refs($dbh, \$description);
# write a description
my $mark = gfi_start_commit
($dbh, $ref, $author, $committer);
$cb_marks{$bp}{$change} = $mark;
gfi_utf8_data(\$description);
$insert_commit_mark_sth->execute($bp, $change, $mark);
# write out the parents
my $n = 0;
my $from = shift @parents;
if ( $from->{branchpath} ne $bp or !$branches{$bp}
or $branches{$bp} != $from->{parent_change}
) {
my $where = $to_ref->($from);
whisper "sending 'from' of $where"
if $where;
gfi_from($where);
}
if (@parents) {
mutter @parents." extra merge parents";
gfi_merge(map { $to_ref->($_) } @parents);
}
# now, get all the files in that revision and send
# them to gfi
while ( my $rev = $files->() ) {
if ( $rev->{change} != $change_br->{change} or
$rev->{branchpath} ne $change_br->{branchpath}
) {
$files->($rev);
last;
}
if ( $rev->{change_type} eq "delete" ) {
if ( $VERBOSE > 0 and $o->{long} ) {
print "... D $rev->{relpath}\n";
}
gfi_filedelete($rev->{relpath});
}
else {
my $ft = $rev->{file_type};
my $mode = _p4_type_to_mode($ft);
if ( !$rev->{blobid} and
!$rev->{mark} ) {
say "error: no mark/blob for $rev->{branchpath}\@$rev->{change}:$rev->{relpath}";
say "Aborting change export.";
last change;
}
my $dataref = $rev->{blobid} ||
":".$rev->{mark};
my $relpath =$rev->{relpath};
if ( $VERBOSE > 0 and $o->{long} ) {
print "... M $rev->{relpath}\n";
}
gfi_filemodify ($mode, $dataref, $relpath);
}
}
gfi_finish_commit;
my $done_revisions =
($o->{chunk_size} - ($o->{max} - $change));
if ( $tpb and
( not $next_update or
$done_revisions >= $next_update ) ) {
$next_update = $tpb->update($done_revisions);
}
}
$tpb->update($o->{chunk_size}) if $tpb;
unless ( $o->{no_checkpoint} ) {
gfi_checkpoint($dbh);
}
}
sub add_soft_refs {
my $dbh = shift;
my $desc = shift;
while ( $$desc =~ m{YYY(\S+)YYY} ) {
my $link = $1;
if ( $link =~ m{^(//.*)?@(\d+)$} ) {
my ($branchpath, $change) = ($1, $2);
my $where = "";
$where = "and\n\tbranchpath = ?" if $branchpath;
my $possible = row_iter
($dbh, ($branchpath?($branchpath):()),
$change, <<SQL);
select
cb.branchpath,
cb.change,
cm.mark,
m.commitid
from
change_branches cb
join change_marks cm using (branchpath, change)
left join marks m using (mark)
where
change = ?$where
SQL
; # go, cperl-mode :)
my @possible;
while ( my $x = $possible->() ) {
push @possible, $x;
}
my $replacement = "";
if ( not @possible ) {
$replacement = "(not found)"
}
else {
for my $who ( @possible ) {
$replacement .=
("on $who->{branchpath}: "
. ( $who->{commitid}
? $who->{commitid}
: do {
my $m = $who->{mark};
my $l = int(19-length($m)/2);
# this requires the patch
# 0001-git-fast-import-s-X-N-X-substr-mark-2.patch
("X"x$l).":($m)".("X"x(40-$l-length($m)-3));
} )
);
}
}
$$desc =~ s{YYY\Q$link\EYYY}{$replacement};
}
}
}
use Date::Parse;
use MooseX::TimestampTZ qw(:all);
use Time::Local;
use utf8;
sub make_commit {
my $desc = shift;
my @parents = @_;
my $author = undef;
my $email = $desc->{email};
my $committer = [ $desc->{realname}, $email,