-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
added Parser and Writer for MAB2 Diskette data
- Loading branch information
Showing
9 changed files
with
1,324 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,147 @@ | ||
package MAB2::Parser::Disk; | ||
|
||
# ABSTRACT: MAB2 RAW format parser | ||
# VERSION | ||
|
||
use strict; | ||
use warnings; | ||
use charnames qw< :full >; | ||
use Carp qw(croak); | ||
use Readonly; | ||
|
||
Readonly my $SUBFIELD_INDICATOR => qq{\N{INFORMATION SEPARATOR ONE}}; | ||
Readonly my $END_OF_FIELD => qq{\n}; | ||
Readonly my $END_OF_RECORD => q{}; | ||
|
||
=head1 SYNOPSIS | ||
L<MAB2::Parser::Disk> is a parser for MAB2-Diskette records. | ||
L<MAB2::Parser::Disk> expects UTF-8 encoded files as input. Otherwise provide a | ||
filehande with a specified I/O layer. | ||
Catmandu... | ||
use MAB2::Parser::Disk; | ||
my $parser = MAB2::Parser::Disk->new( $filename ); | ||
while ( my $record_hash = $parser->next() ) { | ||
# do something | ||
} | ||
=head1 SUBROUTINES/METHODS | ||
=head2 new | ||
=cut | ||
|
||
# ToDo: use Moo | ||
|
||
sub new { | ||
my $class = shift; | ||
my $file = shift; | ||
|
||
my $self = { | ||
filename => undef, | ||
rec_number => 0, | ||
reader => undef, | ||
}; | ||
|
||
# check for file or filehandle | ||
my $ishandle = eval { fileno($file); }; | ||
if ( !$@ && defined $ishandle ) { | ||
$self->{filename} = scalar $file; | ||
$self->{reader} = $file; | ||
} | ||
elsif ( -e $file ) { | ||
open $self->{reader}, '<:encoding(UTF-8)', $file | ||
or croak "cannot read from file $file\n"; | ||
$self->{filename} = $file; | ||
} | ||
else { | ||
croak "file or filehande $file does not exists"; | ||
} | ||
return ( bless $self, $class ); | ||
} | ||
|
||
=head2 next() | ||
Reads the next record from MAB2 input stream. Returns a Perl hash. | ||
=cut | ||
|
||
sub next { | ||
my $self = shift; | ||
local $/ = $END_OF_RECORD; | ||
if ( my $data = $self->{reader}->getline() ) { | ||
$self->{rec_number}++; | ||
my $record = _decode($data); | ||
|
||
# get last subfield from 001 as id | ||
my ($id) = map { $_->[-1] } grep { $_->[0] =~ '001' } @{$record}; | ||
return { _id => $id, record => $record }; | ||
} | ||
return; | ||
} | ||
|
||
=head2 _decode() | ||
Deserialize a raw MAB2 record to an array of field arrays. | ||
=cut | ||
|
||
sub _decode { | ||
my $reader = shift; | ||
chomp($reader); | ||
|
||
my @record; | ||
|
||
my @fields = split($END_OF_FIELD, $reader); | ||
|
||
my $leader = shift @fields; | ||
if( $leader =~ m/^\N{NUMBER SIGN}{3}\s(\d{5}[cdnpu]M2.0\d{7}\s{6}\w)/xms ){ | ||
push( @record, [ 'LDR', '', '_', $1 ] ); | ||
} | ||
else{ | ||
croak "record leader not valid: $leader"; | ||
} | ||
|
||
# ToDo: skip faulty fields | ||
foreach my $field (@fields) { | ||
croak "incomplete field: \"$field\"" if length($field) <= 4; | ||
my $tag = substr( $field, 0, 3 ); | ||
my $ind = substr( $field, 3, 1 ); | ||
my $data = substr( $field, 4 ); | ||
|
||
# check for a 3-digit numeric tag | ||
( $tag =~ m/^[0-9]{3}$/xms ) or croak "Invalid tag: \"$tag\""; | ||
|
||
# check if indicator is an single alphabetic character | ||
( $ind =~ m/^[a-z\s]$/xms ) or croak "Invalid indicator: \"$ind\""; | ||
|
||
# check if data contains subfields | ||
if ( $data =~ $SUBFIELD_INDICATOR ) { | ||
|
||
# check if data starts with a SUBFIELD_INDICATOR | ||
( substr( $data, 0, 1 ) eq $SUBFIELD_INDICATOR ) or croak "Invalid subfield structure at: \"$tag$ind\""; | ||
my @subfields = split( $SUBFIELD_INDICATOR, substr( $data, 1 ) ); | ||
( @subfields ) or croak "no subfield data found: \"$tag$ind$data\""; | ||
push( | ||
@record, | ||
[ $tag, | ||
$ind, | ||
map { substr( $_, 0, 1 ), substr( $_, 1 ) } @subfields | ||
] | ||
); | ||
} | ||
else { | ||
push( @record, [ $tag, $ind, '_', $data ] ); | ||
} | ||
} | ||
return \@record; | ||
} | ||
|
||
=cut | ||
|
||
1; # End of MAB2::Parser::Disk |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
package MAB2::Writer::Disk; | ||
|
||
#ABSTRACT: MAB2 Diskette format serializer | ||
#VERSION | ||
|
||
use strict; | ||
use Moo; | ||
with 'MAB2::Writer::Handle'; | ||
|
||
use charnames ':full'; | ||
use Readonly; | ||
|
||
Readonly my $SUBFIELD_INDICATOR => qq{\N{INFORMATION SEPARATOR ONE}}; | ||
Readonly my $END_OF_FIELD => qq{\n}; | ||
Readonly my $END_OF_RECORD => qq{\n}; | ||
|
||
=head1 SYNOPSIS | ||
L<MAB2::Writer::Disk> is a MAB2 Diskette serializer. | ||
use MAB2::Writer::Disk; | ||
my @mab_records = ( | ||
[ | ||
['001', ' ', '_', '2415107-5'], | ||
['331', ' ', '_', 'Code4Lib journal'], | ||
['655', 'e', 'u', 'http://journal.code4lib.org/', 'z', 'kostenfrei'], | ||
... | ||
], | ||
{ | ||
record => [ | ||
['001', ' ', '_', '2415107-5'], | ||
['331', ' ', '_', 'Code4Lib journal'], | ||
['655', 'e', 'u', 'http://journal.code4lib.org/', 'z', 'kostenfrei'], | ||
... | ||
] | ||
} | ||
); | ||
$writer = MAB2::Writer::Disk->new( fh => $fh ); | ||
foreach my $record (@mab_records) { | ||
$writer->write($record); | ||
} | ||
=head1 SUBROUTINES/METHODS | ||
=head2 new() | ||
=cut | ||
|
||
sub BUILD { | ||
my ($self) = @_; | ||
} | ||
|
||
=head2 _write_record() | ||
=cut | ||
|
||
sub _write_record { | ||
my ( $self, $record ) = @_; | ||
my $fh = $self->fh; | ||
|
||
if ( $record->[0][0] eq 'LDR' ) { | ||
my $leader = shift( @{$record} ); | ||
print $fh "### ", $leader->[3], $END_OF_FIELD; | ||
} | ||
else { | ||
# set default record leader | ||
print $fh "### 99999nM2.01200024 h", $END_OF_FIELD; | ||
} | ||
|
||
foreach my $field (@$record) { | ||
|
||
if ( $field->[2] eq '_' ) { | ||
print $fh $field->[0], $field->[1], $field->[3], $END_OF_FIELD; | ||
} | ||
else { | ||
print $fh $field->[0], $field->[1]; | ||
for ( my $i = 2; $i < scalar @$field; $i += 2 ) { | ||
my $subfield_code = $field->[ $i ]; | ||
my $value = $field->[ $i + 1 ]; | ||
print $fh $SUBFIELD_INDICATOR, $subfield_code, $value; | ||
} | ||
print $fh $END_OF_FIELD; | ||
} | ||
} | ||
print $fh $END_OF_RECORD; | ||
} | ||
|
||
1; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.