Skip to content

Commit

Permalink
Handle more broken input
Browse files Browse the repository at this point in the history
  • Loading branch information
nigelhorne committed Nov 21, 2020
1 parent 39937c7 commit 27b770f
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 22 deletions.
2 changes: 1 addition & 1 deletion bin/brewall
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env bash

typeset -r WITHOUT_TIMEOUT="david"
typeset -r WITH_TIMEOUT="amelia,pi2,pi2b,pi2c,pi4,netgate1,netgate3,smoker01.teada.net,vultr1.bandsman.co.uk,vultr2.bandsman.co.uk,oracle1.bandsman.co.uk,opc@oracle2.bandsman.co.uk,localhost"
typeset -r WITH_TIMEOUT="amelia,philips,pi2,pi2b,pi2c,pi4,netgate1,netgate3,smoker01.teada.net,vultr1.bandsman.co.uk,vultr2.bandsman.co.uk,oracle1.bandsman.co.uk,opc@oracle2.bandsman.co.uk,localhost"

for i in $WITHOUT_TIMEOUT
do
Expand Down
58 changes: 37 additions & 21 deletions bin/obit
Original file line number Diff line number Diff line change
Expand Up @@ -64,23 +64,12 @@ $ua->conn_cache()->total_capacity(undef);
my $page = 1;
my @queue;
while(1) {
$| = 1;
printf "%-70s\r", $page;
$| = 0;
print "\n";

last if(!mlarchives($ua, $page));
$page++;
}

print "\n";

my $issue = 9;
while(1) {
$| = 1;
printf "%-70s\r", $issue;
$| = 0;

last if(!freelists($ua, 'v25no' . sprintf('%03d', $issue)));
$issue++;
}
Expand All @@ -96,14 +85,18 @@ sub mlarchives($$) {
my $page = shift;
my $url = "https://mlarchives.rootsweb.com/listindexes/emails?listname=gen-obit&page=$page";

$| = 1;
printf "%-70s\r", $url;
$| = 0;

my $response = $ua->get($url);

my $data;
if($response->is_success) {
$data = $response->decoded_content();
} else {
if($response->status_line() ne '404 Not Found') {
warn "$url: ", $response->status_line();
warn "\n$url: ", $response->status_line();
}
return 0;
}
Expand Down Expand Up @@ -190,14 +183,18 @@ sub freelists($$) {
my $issue = shift;
my $url = "https://www.freelists.org/post/obitdailytimes/Obituary-Daily-Times-$issue";

$| = 1;
printf "%-70s\r", $url;
$| = 0;

my $response = $ua->get($url);

my $data;
if($response->is_success) {
$data = $response->decoded_content();
} else {
if($response->status_line() ne '404 Not Found') {
warn "$url: ", $response->status_line();
warn "\n$url: ", $response->status_line();
}
return 0;
}
Expand All @@ -218,7 +215,7 @@ sub freelists($$) {
last if($line =~ /^\-\-\-\-\-\-\-\-/);
$line =~ s/ / /g;
$line = decode_entities($line);
# $line =~ s/'/''/g;
$line =~ s/'/''/g;
my ($name, $age, $place, $newspaper, $date, $tag) = split(/;\s?/, $line);
if((!defined($tag)) || ($tag eq '')) {
my $cont = shift(@lines);
Expand All @@ -235,6 +232,12 @@ sub freelists($$) {
# print __LINE__, ": $issue: ", Data::Dumper->new([split(/;\s?/, $line)])->Dump();
next unless(defined($name));
my ($last, $first) = split(',\s', $name);
next unless(defined($last));

# Parsing completely broken
# e.g. https://www.freelists.org/post/obitdailytimes/Obituary-Daily-Times-v25no010
last unless(defined($newspaper));

my $maiden = '';
if($first) {
if($first =~ /(.+)\s\((.+)\)$/) {
Expand Down Expand Up @@ -267,12 +270,16 @@ sub queue($$$$$$$)
if($maiden) {
$columns{'maiden'} = $maiden;
}
if($age ne '') {
if(defined($age) && ($age ne '')) {
$columns{'age'} = $age;
}
$columns{'place'} = $place;
return unless($date =~ /^\d/);
$columns{'date'} = $date;
if($date) {
return unless($date =~ /^\d/);
$columns{'date'} = $date;
} else {
return; # If we don't know the date of publication, it's not easy to find
}
$columns{'newspaper'} = $newspaper;

push @queue, \%columns;
Expand All @@ -297,7 +304,7 @@ sub flush($)
} else {
$query .= "NULL,'" . $columns{'last'} . "',";
}
if($columns{'maiden'}) {
if($columns{'maiden'} && ($columns{'maiden'} ne ' ')) {
$query .= "'" . $columns{'maiden'} . "',";
} else {
$query .= 'NULL,';
Expand All @@ -307,8 +314,16 @@ sub flush($)
} else {
$query .= 'NULL,';
}
$query .= "'" . $columns{'place'} . "',";
$query .= "'" . $columns{'date'} . "',";
if($columns{'place'} && ($columns{'place'} ne '')) {
$query .= "'" . $columns{'place'} . "',";
} else {
$query .= 'NULL,';
}
if($columns{'date'}) {
$query .= "'" . $columns{'date'} . "',";
} else {
$query .= 'NULL,';
}
$query .= "'" . $columns{'newspaper'} . "')";
}

Expand All @@ -320,6 +335,7 @@ sub flush($)
$dbh->do($query);
} catch {
my @call_details = caller(0);
die "Error in insert ($query) called from line ", $call_details[2], "\n";
die "\nError in insert ($query) called from line ",
$call_details[2], ': ', $dbh->errstr(), "\n";
};
}

0 comments on commit 27b770f

Please sign in to comment.