-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
1,121 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
package MAB2::Parser::Sisis; | ||
|
||
#ABSTRACT: MAB2 Sisis format parser | ||
#VERSION | ||
|
||
use strict; | ||
use warnings; | ||
use charnames qw< :full >; | ||
use Carp qw(croak); | ||
use Readonly; | ||
|
||
Readonly my $SUBFIELD_INDICATOR => qq{\N{INFORMATION SEPARATOR ONE}}; | ||
Readonly my $END_OF_FIELD => qq{\N{LINE FEED}}; | ||
Readonly my $END_OF_RECORD => q{}; | ||
|
||
=head1 SYNOPSisis | ||
L<MAB2::Parser::Sisis> is a parser for Sisisis records. | ||
L<MAB2::Parser::Sisis> expects UTF-8 encoded files as input. Otherwise provide a | ||
filehande with a specified I/O layer. | ||
use MAB2::Parser::Sisis; | ||
my $parser = MAB2::Parser::Sisis->new( $filename ); | ||
while ( my $record_hash = $parser->next() ) { | ||
# do something | ||
} | ||
=head1 Arguments | ||
=over | ||
=item C<file> | ||
Path to file with MAB2 Sisis records. | ||
=item C<fh> | ||
Open filehandle for file with MAB2 Sisis records. | ||
=back | ||
=head1 METHODS | ||
=head2 new($filename | $filehandle) | ||
=cut | ||
|
||
sub new { | ||
my $class = shift; | ||
my $file = shift; | ||
|
||
my $self = { | ||
filename => undef, | ||
rec_number => 0, | ||
reader => undef, | ||
}; | ||
|
||
# check for file or filehandle | ||
my $ishandle = eval { fileno($file); }; | ||
if ( !$@ && defined $ishandle ) { | ||
$self->{filename} = scalar $file; | ||
$self->{reader} = $file; | ||
} | ||
elsif ( -e $file ) { | ||
open $self->{reader}, '<:encoding(UTF-8)', $file | ||
or croak "cannot read from file $file\n"; | ||
$self->{filename} = $file; | ||
} | ||
else { | ||
croak "file or filehande $file does not exists"; | ||
} | ||
return ( bless $self, $class ); | ||
} | ||
|
||
=head2 next() | ||
Reads the next record from MAB2 input stream. Returns a Perl hash. | ||
=cut | ||
|
||
sub next { | ||
my $self = shift; | ||
local $/ = $END_OF_RECORD; | ||
if ( my $data = $self->{reader}->getline() ) { | ||
$self->{rec_number}++; | ||
my $record = _decode($data); | ||
|
||
# get value from 0000 as id | ||
my ($id) = map { $_->[-1] } grep { $_->[0] =~ '0000' } @{$record}; | ||
return { _id => $id, record => $record }; | ||
} | ||
return; | ||
} | ||
|
||
=head2 _decode($record) | ||
Deserialize a raw MAB2 record to an ARRAY of ARRAYs. | ||
=cut | ||
|
||
sub _decode { | ||
my $reader = shift; | ||
chomp($reader); | ||
|
||
my @record; | ||
|
||
my @fields = split($END_OF_FIELD, $reader); | ||
|
||
foreach my $field (@fields) { | ||
my ($tag, $subfield, $value); | ||
# 0015.001:ger | ||
# 0027:S | ||
if ($field =~ m/^(\d{4})(\.(\d{3}))?:(.*)?$/){ | ||
$tag = $1; | ||
$subfield = $3; | ||
$value = $4; | ||
}else{ | ||
# "##### ..." fields are skipped | ||
next; | ||
} | ||
next if $tag eq '9999'; | ||
if (defined $subfield) { | ||
push(@record, [$tag, ' ', $subfield, $value]); | ||
} else { | ||
push(@record, [$tag, ' ', '_', $value]); | ||
} | ||
} | ||
return \@record; | ||
} | ||
|
||
=head1 SEE ALSO | ||
L<Catmandu::Importer::MAB2>. | ||
=cut | ||
|
||
1; # End of MAB2::Parser::Sisis |
Oops, something went wrong.