From fe40684ef34a3f30d8832eb8be27fd7d3c3d9226 Mon Sep 17 00:00:00 2001 From: Michael Wiencek Date: Sun, 25 Feb 2024 01:41:03 -0600 Subject: [PATCH 1/2] Add `is_table_empty` to `Script::Utils` --- admin/MBImport.pl | 18 ++++-------------- admin/replication/ImportReplicationChanges | 18 ++++-------------- lib/MusicBrainz/Script/Utils.pm | 21 ++++++++++++++++++++- 3 files changed, 28 insertions(+), 29 deletions(-) diff --git a/admin/MBImport.pl b/admin/MBImport.pl index 8104204ba7e..7351117a44d 100755 --- a/admin/MBImport.pl +++ b/admin/MBImport.pl @@ -10,6 +10,7 @@ use Getopt::Long; use DBDefs; use Sql; +use MusicBrainz::Script::Utils qw( is_table_empty ); use MusicBrainz::Server::Replication qw( :replication_type ); use MusicBrainz::Server::Constants qw( @FULL_TABLE_LIST ); @@ -298,7 +299,7 @@ sub ImportTable $sql->commit; die 'Error loading data' - if -f $file and empty($table); + if -f $file and is_table_empty($sql, $table); ++$tables; $totalrows += $rows; @@ -314,17 +315,6 @@ sub ImportTable exit 1; } -sub empty -{ - my $table = shift; - - my $any = $sql->select_single_value( - "SELECT 1 FROM $table LIMIT 1", - ); - - not defined $any; -} - sub ImportAllTables { for my $table (@$import_tables) { @@ -342,7 +332,7 @@ sub ImportAllTables { my $basetable = $1; - if (not empty($basetable) and not $delete_first) + if (not is_table_empty($sql, $basetable) and not $delete_first) { warn "$basetable table already contains data; skipping $table\n"; next; @@ -352,7 +342,7 @@ sub ImportAllTables ImportTable($basetable, $file) or next; } else { - if (not empty($table) and not $delete_first) + if (not is_table_empty($sql, $table) and not $delete_first) { warn "$table already contains data; skipping\n"; next; diff --git a/admin/replication/ImportReplicationChanges b/admin/replication/ImportReplicationChanges index 6eb20718382..e1996d8f242 100755 --- a/admin/replication/ImportReplicationChanges +++ b/admin/replication/ImportReplicationChanges @@ -9,6 +9,7 @@ use FindBin; use lib "$FindBin::Bin/../../lib"; use Getopt::Long; +use MusicBrainz::Script::Utils qw( is_table_empty ); use MusicBrainz::Server::Context; use DBDefs; use Sql; @@ -156,7 +157,7 @@ sub ImportTable $sql->commit; die 'Error loading data' - if -f $file and empty($table); + if -f $file and is_table_empty($sql, $table); ++$tables; $totalrows += $rows; @@ -172,17 +173,6 @@ sub ImportTable exit 1; } -sub empty -{ - my $table = shift; - - my $any = $sql->select_single_value( - "SELECT 1 FROM $table LIMIT 1", - ); - - not defined $any; -} - sub ImportReplicationTables { $sql->auto_commit; @@ -197,7 +187,7 @@ sub ImportReplicationTables my $file = find_file($table); $file or print("No data file found for '$table', skipping\n"), die; - if (not empty($table)) + if (not is_table_empty($sql, $table)) { die "$table already contains data; skipping\n"; next; @@ -226,7 +216,7 @@ sub ImportDBMirror2ReplicationTables { } my $qualified_table = "dbmirror2.$table"; - if (!empty($qualified_table)) { + if (!is_table_empty($sql, $qualified_table)) { die "$qualified_table already contains data"; } diff --git a/lib/MusicBrainz/Script/Utils.pm b/lib/MusicBrainz/Script/Utils.pm index 2cb3cc8e115..42c3d0196c4 100644 --- a/lib/MusicBrainz/Script/Utils.pm +++ b/lib/MusicBrainz/Script/Utils.pm @@ -8,7 +8,12 @@ use feature 'state'; use base 'Exporter'; -our @EXPORT_OK = qw( get_primary_keys log retry ); +our @EXPORT_OK = qw( + get_primary_keys + is_table_empty + log + retry +); =sub get_primary_keys @@ -39,6 +44,20 @@ sub get_primary_keys($$$) { return @keys; } +=sub is_table_empty + +Returns whether C<$table> is empty. + +=cut + +sub is_table_empty { + my ($sql, $table) = @_; + + not defined $sql->select_single_value(<<~"SQL"); + SELECT 1 FROM $table LIMIT 1; + SQL +} + =sub log Log a message to stdout, prefixed with the local time and ending with a From 2b06d5dc83ac728f901939b40d5455bed284115a Mon Sep 17 00:00:00 2001 From: Michael Wiencek Date: Sun, 25 Feb 2024 10:22:46 -0600 Subject: [PATCH 2/2] Add `copy_table_from_file` to `Script::Utils` admin/MBImport.pl and admin/replication/ImportReplicationChanges contained very similar implementations of `ImportTable`, so it would be ideal to share them. I'd also like to use the same functionality in a future commit (to load dbmirror2 packets into temporary tables). The implementations in these two files did diverge slightly. For one, MBImport.pl's allowed fixing broken UTF-8 byte sequences. I'm not sure how necessary that is in 2024, or what the historical reasons for adding it were, but I kept the functionality behind a flag in `copy_table_from_file`. MBImport.pl's also supported the flags `$delete_first` (to empty the table before importing) and `$fProgress` (to control whether progress is shown). I've basically kept all of MBImport.pl's code, with these features behind `%opts` flags. I kept the definitions of `ImportTable`, but they now call `copy_table_from_file` internally. I couldn't replace all of the `ImportTable` calls with direct calls to `copy_table_from_file`, because `ImportTable` also updates statistics local to each file and has a different return value. --- admin/MBImport.pl | 109 +++----------------- admin/replication/ImportReplicationChanges | 78 +++----------- lib/MusicBrainz/Script/Utils.pm | 113 +++++++++++++++++++++ 3 files changed, 143 insertions(+), 157 deletions(-) diff --git a/admin/MBImport.pl b/admin/MBImport.pl index 7351117a44d..c5ecec5dd16 100755 --- a/admin/MBImport.pl +++ b/admin/MBImport.pl @@ -10,7 +10,10 @@ use Getopt::Long; use DBDefs; use Sql; -use MusicBrainz::Script::Utils qw( is_table_empty ); +use MusicBrainz::Script::Utils qw( + copy_table_from_file + is_table_empty +); use MusicBrainz::Server::Replication qw( :replication_type ); use MusicBrainz::Server::Constants qw( @FULL_TABLE_LIST ); @@ -18,7 +21,6 @@ my ($fHelp, $fIgnoreErrors); my $tmpdir = '/tmp'; -my $fProgress = -t STDOUT; my $fFixUTF8 = 0; my $skip_ensure_editor = 0; my $update_replication_control = 1; @@ -215,104 +217,25 @@ sub usage exit($errors ? 1 : 0); - - sub ImportTable { my ($table, $file) = @_; - print localtime() . " : load $table\n"; - - my $rows = 0; - - my $t1 = [gettimeofday]; - my $interval; - - my $size = -s($file) - or return 1; - - my $p = sub { - my ($pre, $post) = @_; - no integer; - printf $pre.'%-30.30s %9d %3d%% %9d'.$post, - $table, $rows, int(100 * tell(LOAD) / $size), - $rows / ($interval||1); - }; - - $OUTPUT_AUTOFLUSH = 1; - - eval - { - # open in :bytes mode (always keep byte octets), to allow fixing of invalid - # UTF-8 byte sequences in --fix-broken-utf8 mode. - # in default mode, the Pg driver will take care of the UTF-8 transformation - # and croak on any invalid UTF-8 character - open(LOAD, '<:bytes', $file) or die "open $file: $OS_ERROR"; - - # If you're looking at this code because your import failed, maybe - # with an error like this: - # ERROR: copy: line 1, Missing data for column "automodsaccepted" - # then the chances are it's because the data you're trying to load - # doesn't match the structure of the database you're trying to load it - # into. Please make sure you've got the right copy of the server - # code, as described in the INSTALL file. - - $sql->begin; - $sql->do("DELETE FROM $table") if $delete_first; - my $dbh = $sql->dbh; # issues a ping, must be done before COPY - $sql->do("COPY $table FROM stdin"); - - $p->('', '') if $fProgress; - my $t; - - use Encode; - while () - { - $t = $_; - if ($fFixUTF8) { - # replaces any invalid UTF-8 character with special 0xFFFD codepoint - # and warn on any such occurence - $t = Encode::decode('UTF-8', $t, Encode::FB_DEFAULT | Encode::WARN_ON_ERR); - } else { - $t = Encode::decode('UTF-8', $t, Encode::FB_CROAK); - } - if (!$dbh->pg_putcopydata($t)) - { - print 'ERROR while processing: ', $t; - die; - } - - ++$rows; - unless ($rows & 0xFFF) - { - $interval = tv_interval($t1); - $p->("\r", '') if $fProgress; - } - } - $dbh->pg_putcopyend() or die; - $interval = tv_interval($t1); - $p->(($fProgress ? "\r" : ''), sprintf(" %.2f sec\n", $interval)); - - close LOAD - or die $OS_ERROR; - - $sql->commit; - - die 'Error loading data' - if -f $file and is_table_empty($sql, $table); + my $rows = copy_table_from_file( + $sql, $table, $file, + delete_first => $delete_first, + fix_utf8 => $fFixUTF8, + ignore_errors => $fIgnoreErrors, + ); + if ($rows) { ++$tables; $totalrows += $rows; - - 1; - }; - - return 1 unless $EVAL_ERROR; - warn "Error loading $file: $EVAL_ERROR"; - $sql->rollback; - - ++$errors, return 0 if $fIgnoreErrors; - exit 1; + return 1; + } else { + ++$errors; + return 0; + } } sub ImportAllTables diff --git a/admin/replication/ImportReplicationChanges b/admin/replication/ImportReplicationChanges index e1996d8f242..6608d772f4f 100755 --- a/admin/replication/ImportReplicationChanges +++ b/admin/replication/ImportReplicationChanges @@ -9,7 +9,10 @@ use FindBin; use lib "$FindBin::Bin/../../lib"; use Getopt::Long; -use MusicBrainz::Script::Utils qw( is_table_empty ); +use MusicBrainz::Script::Utils qw( + copy_table_from_file + is_table_empty +); use MusicBrainz::Server::Context; use DBDefs; use Sql; @@ -105,72 +108,19 @@ sub ImportTable { my ($table, $file) = @_; - print localtime() . " : load $table\n"; - - my $rows = 0; - - my $t1 = [gettimeofday]; - my $interval; - - my $size = -s($file) || 1; - - my $p = sub { - my ($pre, $post) = @_; - no integer; - printf $pre.'%-30.30s %9d %3d%% %9d'.$post, - $table, $rows, int(100 * tell(LOAD) / $size), - $rows / ($interval||1); - }; - - $OUTPUT_AUTOFLUSH = 1; - - eval - { - open(LOAD, '<:encoding(utf8)', $file) or die "open $file: $OS_ERROR"; - - $sql->begin; - my $dbh = $sql->dbh; # issues a ping, must be done before COPY - $sql->do("COPY $table FROM stdin"); - - $p->('', ''); - - while () - { - $dbh->pg_putcopydata($_) or die; - - ++$rows; - unless ($rows & 0xFFF) - { - $interval = tv_interval($t1); - $p->("\r", ''); - } - } - - $dbh->pg_putcopyend() or die; - - $interval = tv_interval($t1); - $p->("\r", sprintf(" %.2f sec\n", $interval)); - - close LOAD - or die $OS_ERROR; - - $sql->commit; - - die 'Error loading data' - if -f $file and is_table_empty($sql, $table); + my $rows = copy_table_from_file( + $sql, $table, $file, + ignore_errors => $fIgnoreErrors, + ); + if ($rows) { ++$tables; $totalrows += $rows; - - 1; - }; - - return 1 unless $EVAL_ERROR; - warn "Error loading $file: $EVAL_ERROR"; - $sql->rollback; - - ++$errors, return 0 if $fIgnoreErrors; - exit 1; + return 1; + } else { + ++$errors; + return 0; + } } sub ImportReplicationTables diff --git a/lib/MusicBrainz/Script/Utils.pm b/lib/MusicBrainz/Script/Utils.pm index 42c3d0196c4..5f616a3ffa9 100644 --- a/lib/MusicBrainz/Script/Utils.pm +++ b/lib/MusicBrainz/Script/Utils.pm @@ -2,19 +2,132 @@ package MusicBrainz::Script::Utils; use strict; use warnings; +use Encode; use English; +use Time::HiRes qw( gettimeofday tv_interval ); use feature 'state'; use base 'Exporter'; our @EXPORT_OK = qw( + copy_table_from_file get_primary_keys is_table_empty log retry ); +=sub copy_table_from_file + +Imports C<$file> into C<$table> via PostgreSQL's C statement. + +Returns the number of rows imported. + +=cut + +sub copy_table_from_file { + my ($sql, $table, $file, %opts) = @_; + + my $delete_first = $opts{delete_first}; + my $fix_utf8 = $opts{fix_utf8}; + my $ignore_errors = $opts{ignore_errors}; + my $quiet = $opts{quiet}; + my $show_progress = !$quiet && ($opts{show_progress} // (-t STDOUT)); + + print localtime() . " : load $table\n" + unless $quiet; + + my $rows = 0; + my $t1 = [gettimeofday]; + my $interval; + + my $size = -s($file) + or return 1; + + my $p = sub { + my ($pre, $post) = @_; + no integer; + printf $pre.'%-30.30s %9d %3d%% %9d'.$post, + $table, $rows, int(100 * tell(LOAD) / $size), + $rows / ($interval || 1); + }; + + $OUTPUT_AUTOFLUSH = 1; + + eval { + # Open in :bytes mode (always keep byte octets), to allow fixing of + # invalid UTF-8 byte sequences in --fix-broken-utf8 mode. + # In default mode, the Pg driver will take care of the UTF-8 + # transformation and croak on any invalid UTF-8 character. + open(LOAD, '<:bytes', $file) or die "open $file: $OS_ERROR"; + + # If you're looking at this code because your import failed, maybe + # with an error like this: + # ERROR: copy: line 1, Missing data for column "automodsaccepted" + # then the chances are it's because the data you're trying to load + # doesn't match the structure of the database you're trying to load + # it into. Please make sure you've got the right copy of the server + # code, as described in the INSTALL file. + + $sql->begin; + $sql->do("DELETE FROM $table") if $delete_first; + + my $dbh = $sql->dbh; # issues a ping, must be done before COPY + $sql->do("COPY $table FROM stdin"); + + $p->('', '') if $show_progress; + + my $t; + while () { + $t = $_; + if ($fix_utf8) { + # Replaces any invalid UTF-8 character with special 0xFFFD + # codepoint and warn on any such occurence. + $t = Encode::decode('UTF-8', $t, + Encode::FB_DEFAULT | + Encode::WARN_ON_ERR); + } else { + $t = Encode::decode('UTF-8', $t, Encode::FB_CROAK); + } + if (!$dbh->pg_putcopydata($t)) { + print 'ERROR while processing: ', $t; + die; + } + + ++$rows; + unless ($rows & 0xFFF) { + $interval = tv_interval($t1); + $p->("\r", '') if $show_progress; + } + } + + $dbh->pg_putcopyend or die; + + $interval = tv_interval($t1); + $p->(($show_progress ? "\r" : ''), + sprintf(" %.2f sec\n", $interval)) + unless $quiet; + + close LOAD + or die $OS_ERROR; + + $sql->commit; + + die 'Error loading data' + if -f $file and is_table_empty($sql, $table); + + 1; + }; + + return $rows unless $EVAL_ERROR; + warn "Error loading $file: $EVAL_ERROR"; + $sql->rollback; + + return 0 if $ignore_errors; + exit 1; +} + =sub get_primary_keys Get a list of primary key column names for $schema.$table.