Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Merge branch 'rc/1.112100'

Signed-off-by: Alexei Znamensky <russoz@cpan.org>
  • Loading branch information...
commit e03aa12ee65a14326d88ebee0db2ce7a3292b115 2 parents f26cfbf + c4bdadc
@russoz authored
View
4 dist.ini
@@ -4,12 +4,10 @@ license = Perl_5
copyright_holder = Alexei Znamensky
copyright_year = 2011
-[AutoVersion]
[@Author::RUSSOZ]
+version = auto
twitter_tags = #opendata #dataflow
[Prereqs]
perl = 5.008
-[@Git]
-
View
134 lib/DataFlow.pm
@@ -8,10 +8,13 @@ use warnings;
# VERSION
use Moose;
+use Moose::Exporter;
with 'DataFlow::Role::Processor';
with 'DataFlow::Role::Dumper';
-use DataFlow::Types qw(ProcessorList);
+use DataFlow::Types qw(WrappedProcList);
+
+use Moose::Autobox;
use namespace::autoclean;
use Queue::Base 2.1;
@@ -23,7 +26,16 @@ with 'MooseX::OneArgNew' => { 'type' => 'DataFlow', 'init_arg' => 'procs', };
with 'MooseX::OneArgNew' =>
{ 'type' => 'DataFlow::Proc', 'init_arg' => 'procs', };
+Moose::Exporter->setup_import_methods( as_is => ['dataflow'] );
+
# attributes
+has 'default_channel' => (
+ 'is' => 'ro',
+ 'isa' => 'Str',
+ 'lazy' => 1,
+ 'default' => 'default',
+);
+
has 'auto_process' => (
'is' => 'ro',
'isa' => 'Bool',
@@ -33,7 +45,7 @@ has 'auto_process' => (
has 'procs' => (
'is' => 'ro',
- 'isa' => 'ProcessorList',
+ 'isa' => 'WrappedProcList',
'required' => 1,
'coerce' => 1,
'builder' => '_build_procs',
@@ -49,7 +61,7 @@ has '_queues' => (
'has_queued_data' =>
sub { return _count_queued_items( shift->_queues ) },
'_make_queues' => sub {
- return [ map { Queue::Base->new() } @{ shift->procs } ];
+ shift->procs->map( sub { Queue::Base->new() } );
},
},
);
@@ -61,21 +73,7 @@ has '_lastq' => (
'default' => sub { return Queue::Base->new },
);
-has 'dump_input' => (
- 'is' => 'ro',
- 'isa' => 'Bool',
- 'lazy' => 1,
- 'default' => 0,
- 'documentation' => 'Prints a dump of the input load to STDERR',
-);
-
-has 'dump_output' => (
- 'is' => 'ro',
- 'isa' => 'Bool',
- 'lazy' => 1,
- 'default' => 0,
- 'documentation' => 'Prints a dump of the output load to STDERR',
-);
+##############################################################################
sub _build_procs {
return;
@@ -86,7 +84,7 @@ sub _count_queued_items {
my $q = shift;
my $count = 0;
- map { $count = $count + $_->size } @{$q};
+ $q->map( sub { $count = $count + $_->size } );
return $count;
}
@@ -102,7 +100,8 @@ sub _process_queues {
sub _reduce {
my ( $p, @q ) = @_;
- map { _process_queues( $p->[$_], $q[$_], $q[ $_ + 1 ] ) } ( 0 .. $#q - 1 );
+ [ 0 .. $#q - 1 ]
+ ->map( sub { _process_queues( $p->[$_], $q[$_], $q[ $_ + 1 ] ) } );
return;
}
@@ -112,12 +111,19 @@ sub clone {
return DataFlow->new( procs => $self->procs );
}
-sub input {
- my ( $self, @args ) = @_;
+sub channel_input {
+ my ( $self, $channel, @args ) = @_;
$self->prefix_dumper( $self->has_name ? $self->name . ' <<' : '<<', @args )
if $self->dump_input;
- $self->_firstq->add(@args);
+ $self->_firstq->add(
+ @{ @args->map( sub { DataFlow::Item->itemize( $channel, $_ ) } ) } );
+ return;
+}
+
+sub input {
+ my ( $self, @args ) = @_;
+ $self->channel_input( $self->default_channel, @args );
return;
}
@@ -128,18 +134,40 @@ sub process_input {
return;
}
-sub output {
- my $self = shift;
+sub _unitem {
+ my ( $item, $channel ) = @_;
+ return unless defined $item;
+ return $item->get_data($channel);
+}
+sub _output_items {
+ my $self = shift;
$self->process_input if ( $self->_lastq->empty && $self->auto_process );
- my @res = wantarray ? $self->_lastq->remove_all : $self->_lastq->remove;
+ return wantarray ? $self->_lastq->remove_all : $self->_lastq->remove;
+}
+
+sub output_items {
+ my $self = shift;
+ my @res = wantarray ? $self->_output_items : scalar( $self->_output_items );
$self->prefix_dumper( $self->has_name ? $self->name . ' >>' : '>>', @res )
if $self->dump_output;
return wantarray ? @res : $res[0];
}
+sub output {
+ my $self = shift;
+ my $channel = shift || $self->default_channel;
+
+ my @res = wantarray ? $self->_output_items : scalar( $self->_output_items );
+ $self->prefix_dumper( $self->has_name ? $self->name . ' >>' : '>>', @res )
+ if $self->dump_output;
+ return wantarray
+ ? @{ @res->map( sub { _unitem( $_, $channel ) } ) }
+ : _unitem( $res[0], $channel );
+}
+
sub reset { ## no critic
- return map { $_->clear } @{ shift->_queues };
+ return shift->_queues->map( sub { $_->clear } );
}
sub flush {
@@ -160,12 +188,21 @@ sub process {
sub proc_by_index {
my ( $self, $index ) = @_;
- return $self->procs->[$index];
+ return unless $self->procs->[$index];
+ return $self->procs->[$index]->on_proc;
}
sub proc_by_name {
my ( $self, $name ) = @_;
- return ( grep { $_->name eq $name } @{ $self->procs } )[0];
+ return $self->procs->map( sub { $_->on_proc } )
+ ->grep( sub { $_->name eq $name } )->[0];
+
+ #return $procs[0];
+}
+
+sub dataflow (@) { ## no critic
+ my @args = @_;
+ return __PACKAGE__->new( procs => [@args] );
}
__PACKAGE__->meta->make_immutable;
@@ -227,6 +264,10 @@ caller.
(Str) A descriptive name for the dataflow. (OPTIONAL)
+=attr default_channel
+
+(Str) The name of the default communication channel. (DEFAULT: 'default')
+
=attr auto_process
(Bool) If there is data available in the output queue, and one calls the
@@ -277,17 +318,32 @@ Processors using the L<DataFlow::Policy::ProcessInto> policy (default) will
process the items inside an array reference, and the values (not the keys)
inside a hash reference.
+=method channel_input
+
+Accepts input data into a specific channel for the data flow:
+
+ $flow->channel_input( 'mydatachannel', qw/all the simple things/ );
+
=method process_input
Processes items in the array of queues and place at least one item in the
output (last) queue. One will typically call this to flush out some unwanted
data and/or if C<auto_process> has been disabled.
+=method output_items
+
+Fetches items, more specifically objects of the type L<DataFlow::Item>, from
+the data flow. If called in scalar context it will return one processed item
+from the flow. If called in list context it will return all the items from
+the last queue.
+
=method output
-Fetches data from the data flow. If called in scalar context it will return
-one processed item from the flow. If called in list context it will return all
-the elements in the last queue.
+Fetches data from the data flow. It accepts a parameter that points from which
+data channel the data must be fetched. If no channel is specified, it will
+default to the 'default' channel.
+If called in scalar context it will return one processed item from the flow.
+If called in list context it will return all the elements in the last queue.
=method reset
@@ -313,9 +369,21 @@ Expects a name (Str) as parameter. Returns the first processor in this
data flow, for which the C<name> attribute has the same value of the C<name>
parameter, or C<undef> otherwise.
+=func dataflow
+
+Syntax sugar function that can be used to instantiate a new flow. It can be
+used like this:
+
+ my $flow = dataflow
+ [ 'Proc' => p => sub { ... } ],
+ ...
+ [ 'CSV' => direction => 'CONVERT_TO' ];
+
+ $flow->process('bananas');
+
=head1 HISTORY
-This is a framework for data flow processing. It started as a spinoff project
+This is a framework for data flow processing. It started as a spin-off project
from the L<OpenData-BR|http://www.opendatabr.org/> initiative.
As of now (Mar, 2011) it is still a 'work in progress', and there is a lot of
View
121 lib/DataFlow/Item.pm
@@ -3,25 +3,71 @@ package DataFlow::Item;
use strict;
use warnings;
-# ABSTRACT: A piece of information to be processed
+# ABSTRACT: A wrapper around the regular data processed by DataFlow
# VERSION
use Moose;
-use DataFlow::Meta;
+use Moose::Autobox;
+use MooseX::ChainedAccessors;
use namespace::autoclean;
has 'metadata' => (
- 'is' => 'ro',
- 'isa' => 'DataFlow::Meta',
+ 'is' => 'ro',
+ 'isa' => 'HashRef[Any]',
+ 'handles' => { metakeys => sub { shift->metadata->keys }, },
+ 'lazy' => 1,
+ 'default' => sub { {} },
);
-has 'data' => (
- 'is' => 'ro',
- 'isa' => 'Any',
+has 'channels' => (
+ 'is' => 'rw',
+ 'isa' => 'HashRef[Any]',
+ 'handles' => { channel_list => sub { shift->channels->keys }, },
+ 'lazy' => 1,
+ 'default' => sub { {} },
+ 'traits' => ['Chained'],
);
+sub get_metadata {
+ my ( $self, $key ) = @_;
+ return $self->metadata->{$key};
+}
+
+sub set_metadata {
+ my ( $self, $key, $data ) = @_;
+ $self->metadata->{$key} = $data;
+ return $self;
+}
+
+sub get_data {
+ my ( $self, $channel ) = @_;
+ return $self->channels->{$channel};
+}
+
+sub set_data {
+ my ( $self, $channel, $data ) = @_;
+ $self->channels->{$channel} = $data;
+ return $self;
+}
+
+sub itemize { ## no critic
+ return __PACKAGE__->new()->set_data( $_[1], $_[2] );
+}
+
+sub clone {
+ my $self = shift;
+ my @c = %{ $self->channels };
+ return __PACKAGE__->new( metadata => $self->metadata )->channels( {@c} );
+}
+
+sub narrow {
+ my ( $self, $channel ) = @_;
+ return __PACKAGE__->new( metadata => $self->metadata, )
+ ->set_data( $channel, $self->get_data($channel) );
+}
+
__PACKAGE__->meta->make_immutable;
1;
@@ -33,8 +79,69 @@ __END__
=head1 SYNOPSIS
use DataFlow::Item;
+ my $item = DataFlow::Item->itemize( 'channel_name', 42 );
+ say $item->get_data( 'channel_name' );
+
+ $item->set_metadata( 'somekey', q{some meta value} );
+ say item->get_metadata( 'somekey' );
=head1 DESCRIPTION
+Wraps data and metadata for processing through DataFlow.
+
+=attr metadata
+
+A hash reference containing metada for the DataFlow.
+
+=attr channels
+
+A hash reference containing data for each 'channel'.
+
+=method metakeys
+
+A convenience method that returns the list of the keys to the metadata hash
+reference.
+
+=method channel_list
+
+A convenience method that returns the list of the keys to the channels hash
+reference.
+
+=method get_metadata
+
+Returns a metadata value, identified by its key.
+
+=method set_metadata
+
+Sets a metadata value, identified by its key.
+
+=method get_data
+
+Returns a channel value, identified by the channel name.
+
+=method set_data
+
+Sets a channel value, identified by the channel name.
+
+=method itemize
+
+This is a B<class> method that creates a new C<DataFlow::Item> with a certain
+data stored in a specific channel. As a class method, it must be called like
+this:
+
+ my $item = DataFlow::Item->itemize( 'channel1', { my => data } );
+
+=method clone
+
+Makes a copy of the C<DataFlow::Item> object. Note that the whole metadata
+contents (hash reference, really) is passed by reference to the new instance,
+while the contents of the channels are copied one by one into the new object.
+
+=method narrow
+
+Makes a copy of the C<DataFlow::Item> object narrowed to one single channel.
+In other words, it is like clone, but the C<channels> will contain B<only>
+the channel specified as a parameter.
=cut
+
View
1  lib/DataFlow/Policy/Scalar.pm
@@ -13,7 +13,6 @@ with 'DataFlow::Role::ProcPolicy';
use namespace::autoclean;
sub _build_handlers {
- my $self = shift;
my $type_handler = {
'SCALAR' => \&_nop_handle,
'ARRAY' => \&_nop_handle,
View
19 lib/DataFlow/Proc.pm
@@ -11,6 +11,7 @@ use Moose;
with 'DataFlow::Role::Processor';
with 'DataFlow::Role::Dumper';
+use Moose::Autobox;
use DataFlow::Types qw(ProcessorSub ProcPolicy);
use namespace::autoclean;
@@ -35,22 +36,6 @@ has 'deref' => (
'default' => 0,
);
-has 'dump_input' => (
- 'is' => 'ro',
- 'isa' => 'Bool',
- 'lazy' => 1,
- 'default' => 0,
- 'documentation' => 'Prints a dump of the input load to STDERR',
-);
-
-has 'dump_output' => (
- 'is' => 'ro',
- 'isa' => 'Bool',
- 'lazy' => 1,
- 'default' => 0,
- 'documentation' => 'Prints a dump of the output load to STDERR',
-);
-
has 'policy' => (
'is' => 'ro',
'isa' => 'ProcPolicy',
@@ -102,7 +87,7 @@ sub process {
my @result =
$self->deref
- ? map { _deref($_) } ( $self->_process_one($item) )
+ ? @{ [ $self->_process_one($item) ]->map( sub { _deref($_) } ) }
: $self->_process_one($item);
$self->prefix_dumper( $self->has_name ? $self->name . ' >>' : '>>',
View
8 lib/DataFlow/Proc/MultiPageURLGenerator.pm
@@ -10,6 +10,7 @@ use warnings;
use Moose;
extends 'DataFlow::Proc';
+use Moose::Autobox;
use namespace::autoclean;
use Carp;
@@ -73,11 +74,12 @@ sub _build_p {
my $last = $self->last_page;
$first = 1 + $last + $first if $first < 0;
- my @result =
- map { $self->make_page_url->( $self, $url, $_ ) } $first .. $last;
+ my $result =
+ [ $first .. $last ]
+ ->map( sub { $self->make_page_url->( $self, $url, $_ ) } );
$self->clear_paged_url;
- return [@result];
+ return $result;
};
}
View
3  lib/DataFlow/Proc/SQL.pm
@@ -10,6 +10,7 @@ use warnings;
use Moose;
extends 'DataFlow::Proc';
+use Moose::Autobox;
use namespace::autoclean;
use SQL::Abstract;
@@ -36,7 +37,7 @@ sub _build_p {
my ( $insert, @bind ) = $sql->insert( $self->table, $data );
# TODO: regex ?
- map { $insert =~ s/\?/'$_'/; } @bind;
+ @bind->map( sub { $insert =~ s/\?/'$_'/; } );
print $insert . "\n";
}
}
View
126 lib/DataFlow/ProcWrapper.pm
@@ -0,0 +1,126 @@
+package DataFlow::ProcWrapper;
+
+use strict;
+use warnings;
+
+# ABSTRACT: Wrapper around a processor
+
+# VERSION
+
+use Moose;
+with 'DataFlow::Role::Processor';
+
+use Moose::Autobox;
+use namespace::autoclean;
+
+use DataFlow::Item;
+use DataFlow::Types qw(Processor);
+
+has 'input_chan' => (
+ is => 'ro',
+ isa => 'Str',
+ lazy => 1,
+ default => 'default',
+);
+
+has 'output_chan' => (
+ is => 'ro',
+ isa => 'Str',
+ lazy => 1,
+ default => sub { shift->input_chan },
+);
+
+has 'on_proc' => (
+ is => 'ro',
+ isa => 'Processor',
+ required => 1,
+ init_arg => 'wraps',
+ coerce => 1,
+);
+
+sub _itemize_response {
+ my ( $self, $input_item, @response ) = @_;
+ return ($input_item) unless @response;
+ return @{
+ @response->map(
+ sub { $input_item->clone->set_data( $self->output_chan, $_ ) }
+ )
+ };
+}
+
+sub process {
+ my ( $self, $item ) = @_;
+
+ return unless defined $item;
+ if ( ref($item) eq 'DataFlow::Item' ) {
+ my $data = $item->get_data( $self->input_chan );
+ return ($item) unless $data;
+ return $self->_itemize_response( $item,
+ $self->on_proc->process($data) );
+ }
+ else {
+ my $data = $item;
+ my $empty_item = DataFlow::Item->new();
+ return ($empty_item) unless $data;
+ return $self->_itemize_response( $empty_item,
+ $self->on_proc->process($data) );
+ }
+}
+
+__PACKAGE__->meta->make_immutable;
+
+1;
+
+__END__
+
+=pod
+
+=head1 SYNOPSIS
+
+ use DataFlow::ProcWrapper;
+ use DataFlow::Item;
+
+ my $wrapper = DataFlow::ProcWrapper->new( wraps => sub { lc } );
+ my $item = DataFlow::Item->itemize( 'default', 'WAKAWAKAWAKA' );
+ my @result = $wrapper->process($item);
+ # $result[0]->get_data('default') equals to 'wakawakawaka'
+
+=head1 DESCRIPTION
+
+This class C<DataFlow::ProcWrapper> consumes the L<DataFlow::Role::Processor>
+role, but this is not a "common" processor and it should not be used as such.
+Actually, it is supposed to be used internally by DataFlow alone, so in theory,
+if not in practice, we should be able to ignore its existence.
+
+C<ProcWrapper> will, as the name suggests, wraps around a processor (read
+a Proc, a DataFlow, a naked sub or a named processor), and provides a layer
+of control on the input and output channels.
+
+=attr input_chan
+
+Name of the input channel. The L<DataFlow::Item> may carry data in distinct
+"channels", and here we can select which channel we will take the data from.
+If not specified, it will default to the literal string C<< 'default' >>.
+
+=attr output_chan
+
+Similarly, the output channel's name. If not specified, it will default to
+the same channel used for input.
+
+=method process
+
+This works like the regular C<process()> method in a processor, except that
+it expects to receive an object of the type L<DataFlow::Item>.
+
+Additionaly, one can pass a random scalar as argument, and add a
+second argument that evaluates to a true value, and the scalar argument will
+be automagically "boxed" into a C<DataFlow::Item> object.
+
+Once the data is within a C<DataFlow::Item>, data will be pulled from the
+specified channel, will call the wrapped processor's C<process()> method.
+
+It will always return an array with one or more elements, all of them of the
+C<DataFlow::Item> type.
+
+=cut
+
View
18 lib/DataFlow/QuickStart.pod
@@ -2,14 +2,14 @@ package DataFlow::QuickStart;
# ABSTRACT: DataFlow Quick Start Guide
+# VERSION
+
1;
__END__
=pod
-=head1 DataFlow Quick Start Guide
-
A guide for quick jumping into the DataFlow bandwagon, programming-wise. That
means that, despite our long term goals of making a framework that can be used
by non-programmers, we are still living within the sphere of those who can
@@ -28,12 +28,12 @@ DataFlow is built upon L<Moose> and it follows the rules of that system.
If you are really serious about submitting code to DataFlow, please read the
section called "Joining the DataFlow effort" below.
-=head2 Using DataFlow
+=head1 Using DataFlow
This is covered in the POD documentation elsewhere, so here we present
just a summary.
-=head3 Proc
+=head2 Proc
A L<DataFlow::Proc> is the basic processing unit of DataFlow. It runs the
closure pointed to by the C<p> parameter with C<$_> localized to the data
@@ -55,7 +55,7 @@ that conforms to a certain structure, say "only arrays, and leave the scalars
alone" or "scalars only, throw an error if anything else comes our way". For
that refer to the L<DataFlow::Role::ProcPolicy> role.
-=head3 DataFlow
+=head2 DataFlow
A L<DataFlow> is a sequence of Procs, arranged so that each Proc's output is
fed into the next Proc's input. Sort of like a sequence of commands in a
@@ -85,7 +85,7 @@ singled-out using proper XPath expressions for them), it will trim the white
spaces and produce a CSV output, which can be used in a spreadsheet or to
load a database.
-=head2 Creating Processors and/or Flows
+=head1 Creating Processors and/or Flows
To create a new Proc, one must extend L<DataFlow::Proc>.
When doing that, do refer to Moose best practices.
@@ -118,14 +118,14 @@ More sophisticated Procs can also be constructed. Tkae a look at the source
code of L<DataFLow::Proc::HTMLFilter>, L<DataFlow::Proc::URLRetriever> or
L<DataFlow::Proc::Converter>.
-=head2 Tweaking the Core
+=head1 Tweaking the Core
DataFlow is not a very sophisticated piece of software on its own, as much
as a Bourne shell of the 70's was not very sophisticated,
but B<it allows and promotes> extending its functionalities to make for
sophisticated solutions.
-=head3 A DataFlow
+=head2 A DataFlow
A DataFlow is nothing more than queues and processors:
@@ -152,7 +152,7 @@ B<Q1> is then run through B<P1> and the result (or results) is enqueued into
B<Q2>, and so forth. Upon running the last processor, B<Pn>, the resulting
data is put into B<Qlast>, the last queue in the desert.
-=head2 Code Repository
+=head1 Code Repository
DataFlow source code is hosted at the superb L<Github|http://github.com/>
service, at the address L<http://github.com/russoz/DataFlow>.
View
19 lib/DataFlow/Role/Dumper.pm
@@ -8,6 +8,7 @@ use warnings;
# VERSION
use Moose::Role;
+use Moose::Autobox;
has '_dumper' => (
'is' => 'ro',
@@ -17,7 +18,7 @@ has '_dumper' => (
use Data::Dumper;
return sub {
$Data::Dumper::Terse = 1;
- return join qq{\n}, map { Dumper($_) } @_;
+ return @_->map( sub { Dumper($_) } )->join(qq{\n});
};
},
'handles' => {
@@ -34,6 +35,22 @@ has '_dumper' => (
},
);
+has 'dump_input' => (
+ 'is' => 'ro',
+ 'isa' => 'Bool',
+ 'lazy' => 1,
+ 'default' => 0,
+ 'documentation' => 'Prints a dump of the input load to STDERR',
+);
+
+has 'dump_output' => (
+ 'is' => 'ro',
+ 'isa' => 'Bool',
+ 'lazy' => 1,
+ 'default' => 0,
+ 'documentation' => 'Prints a dump of the output load to STDERR',
+);
+
1;
__END__
View
9 lib/DataFlow/Role/ProcPolicy.pm
@@ -8,6 +8,7 @@ use warnings;
# VERSION
use Moose::Role;
+use Moose::Autobox;
use namespace::autoclean;
use Scalar::Util 'reftype';
@@ -81,16 +82,12 @@ sub _handle_scalar_ref {
sub _handle_array_ref {
my ( $p, $item ) = @_;
-
- #use Data::Dumper; warn 'handle_array_ref :: item = ' . Dumper($item);
- my @r = map { _run_p( $p, $_ ) } @{$item};
- return [@r];
+ return $item->map( sub { _run_p( $p, $_ ) } );
}
sub _handle_hash_ref {
my ( $p, $item ) = @_;
- my %r = map { $_ => _run_p( $p, $item->{$_} ) } keys %{$item};
- return {%r};
+ return { @{ $item->keys->map( sub { $_ => _run_p( $p, $item->{$_} ) } ) } };
}
sub _handle_code_ref {
View
76 lib/DataFlow/Types.pm
@@ -8,7 +8,7 @@ use warnings;
# VERSION
use MooseX::Types -declare => [
- qw(Processor ProcessorList ProcessorSub ProcPolicy),
+ qw(Processor ProcessorList WrappedProcList ProcessorSub ProcPolicy),
qw(ConversionSubs ConversionDirection),
qw(Encoder Decoder),
qw(HTMLFilterTypes),
@@ -19,6 +19,7 @@ use namespace::autoclean;
use MooseX::Types::Moose qw/Str CodeRef ArrayRef HashRef/;
class_type 'DataFlow';
class_type 'DataFlow::Proc';
+class_type 'DataFlow::ProcWrapper';
role_type 'DataFlow::Role::Processor';
role_type 'DataFlow::Role::ProcPolicy';
@@ -91,6 +92,13 @@ sub _any_to_proc {
return $elem;
}
+sub _wrap_proc {
+ my $proc = shift;
+ return $proc if ref($proc) eq 'DataFlow::ProcWrapper';
+ eval 'use DataFlow::ProcWrapper'; ## no critic
+ return DataFlow::ProcWrapper->new( wraps => $proc );
+}
+
# subtypes CORE
subtype 'Processor' => as 'DataFlow::Role::Processor';
@@ -110,6 +118,20 @@ coerce 'ProcessorList' => from 'ArrayRef' => via {
'CodeRef' => via { [ _any_to_proc($_) ] },
from 'DataFlow::Role::Processor' => via { [$_] };
+subtype 'WrappedProcList' => as 'ArrayRef[DataFlow::ProcWrapper]' =>
+ where { scalar @{$_} > 0 } =>
+ message { 'DataFlow must have at least one processor' };
+coerce 'WrappedProcList' => from 'ArrayRef' => via {
+ my @list = @{$_};
+ my @res = map { _wrap_proc( _any_to_proc($_) ) } @list;
+ return [@res];
+},
+ from
+ 'Str' => via { [ _wrap_proc( _str_to_proc($_) ) ] },
+ from
+ 'CodeRef' => via { [ _wrap_proc( _any_to_proc($_) ) ] },
+ from 'DataFlow::Role::Processor' => via { [ _wrap_proc($_) ] };
+
subtype 'ProcessorSub' => as 'CodeRef';
coerce 'ProcessorSub' => from 'DataFlow::Role::Processor' => via {
my $f = $_;
@@ -132,6 +154,10 @@ sub _make_policy {
# subtypes for DataFlow::Proc::Converter ######################
enum 'ConversionDirection' => [ 'CONVERT_TO', 'CONVERT_FROM' ];
+coerce 'ConversionDirection' => from 'Str' => via {
+ return 'CONVERT_TO' if m/to_/i;
+ return 'CONVERT_FROM' if m/from_/i;
+};
subtype 'ConversionSubs' => as 'HashRef[CodeRef]' => where {
scalar( keys %{$_} ) == 2
@@ -211,14 +237,14 @@ processor object will be created wrapping it:
=head2 ProcessorList
-An ArrayRef of L<DataFlow::Proc> objects, with at least one element.
+An ArrayRef of L<DataFlow::Role::Processor> objects, with at least one element.
=head3 Coercions
=head4 from ArrayRef
-Attempts to make DataFlow::Proc objects out of different things provided in
-an ArrayRef. It currently works for:
+Attempts to make C<DataFlow::Role::Processor> objects out of different things
+provided in an ArrayRef. It currently works for:
=begin :list
@@ -247,6 +273,48 @@ coercion section of the C<Processor> subtype above.
An ArrayRef will be created wrapping the processor, as described in the
coercion section of the C<Processor> subtype above.
+=head2 WrappedProcList
+
+An ArrayRef of L<DataFlow::ProcWrapper> objects, with at least one element.
+
+=head3 Coercions
+
+=head4 from ArrayRef
+
+Attempts to make C<DataFlow::ProcWrapper> objects out of different things
+provided in an ArrayRef. It currently works for:
+
+=begin :list
+
+* Str
+* ArrayRef
+* CodeRef
+* DataFlow::Role::Processor
+
+=end :list
+
+using the same rules as in the subtype C<Processor> described above and
+wrapping the resulting C<Processor> in a C<DataFlow::ProcWrapper> object.
+Anything else will trigger an error.
+
+=head4 from Str
+
+An ArrayRef will be created wrapping a named processor, as described in the
+coercion section of the C<Processor> subtype above and
+wrapping the resulting C<Processor> in a C<DataFlow::ProcWrapper> object.
+
+=head4 from CodeRef
+
+An ArrayRef will be created wrapping a processor, as described in the
+coercion section of the C<Processor> subtype above and
+wrapping the resulting C<Processor> in a C<DataFlow::ProcWrapper> object.
+
+=head4 from DataFlow::Role::Processor
+
+An ArrayRef will be created wrapping the processor, as described in the
+coercion section of the C<Processor> subtype above and
+wrapping the resulting C<Processor> in a C<DataFlow::ProcWrapper> object.
+
=head2 ProcessorSub
A CodeRef, with coercions.
View
88 scripts/portaltransparencia/ceis.pl
@@ -49,54 +49,51 @@ package main;
use DataFlow;
use Encode;
-use Data::Dumper;
-my $flow = DataFlow->new(
+my $flow = dataflow(
+ CeisPages->new( first_page => -5, deref => 1 ),
+ 'URLRetriever',
[
- CeisPages->new( first_page => -5, deref => 1 ),
- 'URLRetriever',
- [
- HTMLFilter => {
- search_xpath =>
- '//div[@id="listagemEmpresasSancionadas"]/table/tbody/tr',
- }
- ],
- [
- HTMLFilter => {
- search_xpath => '//td',
- result_type => 'VALUE',
- ref_result => 1,
- }
- ],
- sub { # remove leading and trailing spaces
- s/^\s*//;
- s/\s*$//;
- s/[\r\n\t]+/ /g;
- s/\s\s+/ /g;
- return $_;
- },
- sub {
- my $internal = decode( "iso-8859-1", $_ );
- return encode( "utf8", $internal );
- },
- [ NOP => { name => 'espiando', dump_output => 1, } ],
- [
- CSV => {
- name => 'csv',
- direction => 'CONVERT_TO',
- converter_opts => { binary => 1, },
- headers => [
- 'CNPJ/CPF', 'Nome/Razão Social/Nome Fantasia',
- 'Tipo', 'Data Inicial',
- 'Data Final', 'Nome do Órgão/Entidade',
- 'UF', 'Fonte',
- 'Data'
- ],
- dump_output => 1,
- }
- ],
- [ SimpleFileOutput => { file => '> /tmp/ceis.csv', ors => "\n" } ],
+ HTMLFilter => {
+ search_xpath =>
+ '//div[@id="listagemEmpresasSancionadas"]/table/tbody/tr',
+ }
],
+ [
+ HTMLFilter => {
+ search_xpath => '//td',
+ result_type => 'VALUE',
+ ref_result => 1,
+ }
+ ],
+ sub { # remove leading and trailing spaces
+ s/^\s*//;
+ s/\s*$//;
+ s/[\r\n\t]+/ /g;
+ s/\s\s+/ /g;
+ return $_;
+ },
+ sub {
+ my $internal = decode( "iso-8859-1", $_ );
+ return encode( "utf8", $internal );
+ },
+ [ NOP => { name => 'espiando', dump_output => 1, } ],
+ [
+ CSV => {
+ name => 'csv',
+ direction => 'CONVERT_TO',
+ converter_opts => { binary => 1, },
+ headers => [
+ 'CNPJ/CPF', 'Nome/Razão Social/Nome Fantasia',
+ 'Tipo', 'Data Inicial',
+ 'Data Final', 'Nome do Órgão/Entidade',
+ 'UF', 'Fonte',
+ 'Data'
+ ],
+ dump_output => 1,
+ }
+ ],
+ [ SimpleFileOutput => { file => '> /tmp/ceis.csv', ors => "\n" } ]
);
##############################################################################
@@ -108,5 +105,6 @@ package main;
my @res = $flow->flush;
+#use Data::Dumper;
#print Dumper(\@res);
View
9 t/01-dataflow.t
@@ -1,4 +1,4 @@
-use Test::More tests => 22;
+use Test::More tests => 25;
use strict;
@@ -47,3 +47,10 @@ ok( !defined( $f->output ) );
$f->flush;
ok( !$f->output );
+$f->input(qw/aaa bbb ccc ddd/);
+is( $f->has_queued_data, 4 );
+$f->output;
+is( $f->has_queued_data, 3 );
+$f->flush;
+is( $f->has_queued_data, 0 );
+
View
4 t/02-dataflow-init-sub.t
@@ -5,8 +5,8 @@ use DataFlow;
my $flow = DataFlow->new( [ sub { uc } ] );
ok( $flow, 'Can construct a dataflow from a bare sub' );
is( $flow->process('aaa'), 'AAA', 'and it provides the correct result' );
-@data = qw/a1 b2 c3 d4 e5 f6 g7 h8 i9 j0/;
-@res = $flow->process(@data);
+my @data = qw/a1 b2 c3 d4 e5 f6 g7 h8 i9 j0/;
+my @res = $flow->process(@data);
is( scalar @res, 10, 'result has the right size' );
is_deeply( \@res, [qw/A1 B2 C3 D4 E5 F6 G7 H8 I9 J0/], 'has the right data' );
View
49 t/02-dataflow-init-sugar.t
@@ -0,0 +1,49 @@
+use Test::More tests => 12;
+
+use strict;
+
+use DataFlow;
+use DataFlow::Proc;
+
+# each call = 2 tests
+sub test_uc_with {
+ my $flow = dataflow @_;
+ ok( $flow, q{test_uc_wth(} . join( q{,}, @_ ) . q{)} );
+ my @res = $flow->process('abcdef');
+ is( $res[0], 'ABCDEF', '...and returns the right value' );
+}
+
+my $uc = sub { uc };
+my $proc = DataFlow::Proc->new( p => $uc );
+my $flow = DataFlow->new( procs => [$proc] );
+my $nested = DataFlow->new( [$flow] );
+
+# proc
+test_uc_with($proc);
+
+# code
+test_uc_with($uc);
+
+# flow
+test_uc_with($flow);
+
+# nested
+test_uc_with($nested);
+
+# string
+test_uc_with('UC');
+
+# each call = 2 tests
+sub test_ucf_with {
+ my $flow = dataflow @_;
+ ok( $flow, q{test_ucf_wth(} . join( q{,}, @_ ) . q{)} );
+ my @res = $flow->process('abcdef');
+ is( $res[0], 'Abcdef' );
+}
+
+my $ucfirst = sub { ucfirst };
+my @mix = ( $nested, $flow, $proc, 'UC', sub { lc }, $ucfirst );
+
+# mix
+test_ucf_with(@mix);
+
View
14 t/03-dataflow-channel.t
@@ -0,0 +1,14 @@
+use Test::More tests => 1;
+
+use strict;
+
+BEGIN {
+ use_ok('DataFlow');
+}
+
+# output_items
+
+# channel_input
+
+# different default channel
+
View
0  t/02-dataflow-inlinedproc.t → t/03-dataflow-inlinedproc.t
File renamed without changes
View
6 t/02-dataflow-many.t → t/03-dataflow-many.t
@@ -55,16 +55,10 @@ ok($flow2);
# tests: 2
$flow2->input( 'qwerty', 'yay' );
-#use Data::Dumper; diag( Dumper($chain) );
my $thirty = $flow2->output;
-
-#use Data::Dumper; diag( Dumper($thirty) );
ok( $thirty == 30 );
-#use Data::Dumper; diag( Dumper($chain2) );
my $fifteen = $flow2->output;
-
-#use Data::Dumper; diag( Dumper($fifteen) );
ok( $fifteen == 15 );
eval {
View
2  t/02-dataflow-procby.t → t/03-dataflow-procby.t
@@ -2,7 +2,7 @@ use Test::More tests => 10;
use DataFlow;
-$f = DataFlow->new(
+my $f = DataFlow->new(
[ sub { uc }, sub { scalar reverse }, sub { lc }, sub { scalar reverse }, ]
);
ok($f);
View
64 t/10-dataflow-item.t
@@ -0,0 +1,64 @@
+
+use Test::More tests => 28;
+
+use_ok('DataFlow::Item');
+
+my $item = DataFlow::Item->new;
+ok($item);
+
+# initial tests
+is_deeply( $item->metadata, {}, 'metadata is empty' );
+is_deeply( $item->channels, {}, 'channels is empty' );
+
+# metadata tests
+is( $item->set_metadata( 'testem', 123 ), $item, 'sets and returns $self' );
+is( $item->get_metadata('testem'), 123, 'gets the right metadata' );
+is_deeply( $item->metakeys, ['testem'] );
+
+is( $item->set_metadata( 'testem1', 'aaa' ), $item, 'sets and returns $self' );
+is_deeply(
+ $item->metadata,
+ {
+ 'testem' => 123,
+ 'testem1' => 'aaa',
+ }
+);
+
+# regular data tests
+is( $item->set_data( 'teste', 456 ), $item, 'sets and returns $self' );
+is( $item->get_data('teste'), 456, 'gets the right data' );
+is_deeply( $item->channel_list, ['teste'] );
+
+is( $item->set_data( 'teste1', 567 ), $item, 'sets and returns $self' );
+is( $item->set_data( 'teste2', 678 ), $item, 'sets and returns $self' );
+is_deeply(
+ $item->channels,
+ {
+ 'teste' => 456,
+ 'teste1' => 567,
+ 'teste2' => 678,
+ }
+);
+is( scalar( grep { $_ eq 'teste' } @{ $item->channel_list } ), 1 );
+is( scalar( grep { $_ eq 'teste1' } @{ $item->channel_list } ), 1 );
+is( scalar( grep { $_ eq 'teste2' } @{ $item->channel_list } ), 1 );
+
+# tests on narrow()
+my $narrow = $item->narrow('teste1');
+isa_ok( $narrow, 'DataFlow::Item' );
+is_deeply( $narrow->channels, { 'teste1' => 567, } );
+is_deeply( $narrow->metadata, $item->metadata );
+
+# tests on clone()
+my $clone = $item->clone('teste1');
+isa_ok( $clone, 'DataFlow::Item' );
+is_deeply( $clone->channels, $item->channels );
+is_deeply( $clone->metadata, $item->metadata );
+
+# tests on itemize
+my $ized = DataFlow::Item->itemize( 'teste1' => 567 );
+isa_ok( $ized, 'DataFlow::Item' );
+is_deeply( $ized->channel_list, ['teste1'] );
+is_deeply( $ized->channels, { 'teste1' => 567, } );
+is_deeply( $ized->metadata, {}, 'metadata is empty' );
+
View
53 t/10-proc-wrapper.t
@@ -0,0 +1,53 @@
+
+use Test::More tests => 24;
+
+use_ok('DataFlow::ProcWrapper');
+
+eval { my $fail = DataFlow::ProcWrapper->new };
+ok($@);
+
+my $wrapped = DataFlow::ProcWrapper->new( wraps => 'UC' );
+ok($wrapped);
+my @res = $wrapped->process('abc');
+is( scalar(@res), 1 );
+isnt( $res[0], 'ABC' );
+is( $res[0]->get_data('default'), 'ABC' );
+
+# non-raw input tests
+use DataFlow::Item;
+my $item = DataFlow::Item->new;
+isa_ok( $item, 'DataFlow::Item' );
+is( $item->set_data( 'teste123', 'something' ), $item );
+is_deeply( $wrapped->process($item), $item );
+
+is( $item->set_data( 'default', 'another one' ), $item );
+@res = $wrapped->process($item);
+is( scalar @res, 1 );
+
+is( $res[0]->get_data('teste123'), 'something' );
+is( $res[0]->get_data('default'), 'ANOTHER ONE' );
+
+# channel tests
+my $wrap_ch1 = DataFlow::ProcWrapper->new( wraps => 'UC', input_chan => 'ch1' );
+ok($wrap_ch1);
+@res = $wrap_ch1->process('abc');
+ok(@res);
+is( $res[0]->get_data('ch1'), 'ABC' );
+is_deeply( $res[0]->channels, { ch1 => 'ABC' } );
+
+@res = $wrap_ch1->process($item);
+ok(@res);
+is_deeply( $res[0]->channels, $item->channels );
+
+# multiple responses tests
+my $wrap_multi =
+ DataFlow::ProcWrapper->new(
+ wraps => sub { return ( 1 + $_, 2 * $_, sqrt($_) ) } );
+ok($wrap_multi);
+@res = $wrap_multi->process(34);
+is( scalar @res, 3 );
+
+is( $res[0]->get_data('default'), 35 );
+is( $res[1]->get_data('default'), 68 );
+is( $res[2]->get_data('default'), sqrt(34) );
+
Please sign in to comment.
Something went wrong with that request. Please try again.