Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Fetching contributors…

Octocat-spinner-32-eaf2f5

Cannot retrieve contributors at this time

file 379 lines (286 sloc) 11.212 kb
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378
package POE::Filter::Line;

use strict;
use POE::Filter;

use vars qw($VERSION @ISA);
$VERSION = '1.299'; # NOTE - Should be #.### (three decimal places)
@ISA = qw(POE::Filter);

use Carp qw(carp croak);

sub DEBUG () { 0 }

sub FRAMING_BUFFER () { 0 }
sub INPUT_REGEXP () { 1 }
sub OUTPUT_LITERAL () { 2 }
sub AUTODETECT_STATE () { 3 }

sub AUTO_STATE_DONE () { 0x00 }
sub AUTO_STATE_FIRST () { 0x01 }
sub AUTO_STATE_SECOND () { 0x02 }

#------------------------------------------------------------------------------

sub new {
  my $type = shift;

  croak "$type requires an even number of parameters" if @_ and @_ & 1;
  my %params = @_;

  croak "$type cannot have both Regexp and Literal line endings" if (
    defined $params{Regexp} and defined $params{Literal}
  );

  my ($input_regexp, $output_literal);
  my $autodetect = AUTO_STATE_DONE;

  # Literal newline for both incoming and outgoing. Every other known
  # parameter conflicts with this one.
  if (defined $params{Literal}) {
    croak "A defined Literal must have a nonzero length"
      unless defined($params{Literal}) and length($params{Literal});
    $input_regexp = quotemeta $params{Literal};
    $output_literal = $params{Literal};
    if (
      exists $params{InputLiteral} or # undef means something
      defined $params{InputRegexp} or
      defined $params{OutputLiteral}
    ) {
      croak "$type cannot have Literal with any other parameter";
    }
  }

  # Input and output are specified separately, then.
  else {

    # Input can be either a literal or a regexp. The regexp may be
    # compiled or not; we don't rightly care at this point.
    if (exists $params{InputLiteral}) {
      $input_regexp = $params{InputLiteral};

      # InputLiteral is defined. Turn it into a regexp and be done.
      # Otherwise we will autodetect it.
      if (defined($input_regexp) and length($input_regexp)) {
        $input_regexp = quotemeta $input_regexp;
      }
      else {
        $autodetect = AUTO_STATE_FIRST;
        $input_regexp = '';
      }

      croak "$type cannot have both InputLiteral and InputRegexp"
        if defined $params{InputRegexp};
    }
    elsif (defined $params{InputRegexp}) {
      $input_regexp = $params{InputRegexp};
      croak "$type cannot have both InputLiteral and InputRegexp"
        if defined $params{InputLiteral};
    }
    else {
      $input_regexp = "(\\x0D\\x0A?|\\x0A\\x0D?)";
    }

    if (defined $params{OutputLiteral}) {
      $output_literal = $params{OutputLiteral};
    }
    else {
      $output_literal = "\x0D\x0A";
    }
  }

  delete @params{qw(Literal InputLiteral OutputLiteral InputRegexp)};
  carp("$type ignores unknown parameters: ", join(', ', sort keys %params))
    if scalar keys %params;

  my $self = bless [
    '', # FRAMING_BUFFER
    $input_regexp, # INPUT_REGEXP
    $output_literal, # OUTPUT_LITERAL
    $autodetect, # AUTODETECT_STATE
  ], $type;

  DEBUG and warn join ':', @$self;

  $self;
}


#------------------------------------------------------------------------------
# get() is inherited from POE::Filter.

#------------------------------------------------------------------------------
# 2001-07-27 RCC: Add get_one_start() and get_one() to correct filter
# changing and make input flow control possible.

sub get_one_start {
  my ($self, $stream) = @_;

  DEBUG and do {
    my $temp = join '', @$stream;
    $temp = unpack 'H*', $temp;
    warn "got some raw data: $temp\n";
  };

  $self->[FRAMING_BUFFER] .= join '', @$stream;
}

# TODO There is a lot of code duplicated here. What can be done?

sub get_one {
  my $self = shift;

  # Process as many newlines an we can find.
  LINE: while (1) {

    # Autodetect is done, or it never started. Parse some buffer!
    unless ($self->[AUTODETECT_STATE]) {
      DEBUG and warn unpack 'H*', $self->[INPUT_REGEXP];
      last LINE
        unless $self->[FRAMING_BUFFER] =~ s/^(.*?)$self->[INPUT_REGEXP]//s;
      DEBUG and warn "got line: <<", unpack('H*', $1), ">>\n";

      return [ $1 ];
    }

    # Waiting for the first line ending. Look for a generic newline.
    if ($self->[AUTODETECT_STATE] & AUTO_STATE_FIRST) {
      last LINE
        unless $self->[FRAMING_BUFFER] =~ s/^(.*?)(\x0D\x0A?|\x0A\x0D?)//;

      my $line = $1;

      # The newline can be complete under two conditions. First: If
      # it's two characters. Second: If there's more data in the
      # framing buffer. Loop around in case there are more lines.
      if ( (length($2) == 2) or
           (length $self->[FRAMING_BUFFER])
         ) {
        DEBUG and warn "detected complete newline after line: <<$1>>\n";
        $self->[INPUT_REGEXP] = $2;
        $self->[AUTODETECT_STATE] = AUTO_STATE_DONE;
      }

      # The regexp has matched a potential partial newline. Save it,
      # and move to the next state. There is no more data in the
      # framing buffer, so we're done.
      else {
        DEBUG and warn "detected suspicious newline after line: <<$1>>\n";
        $self->[INPUT_REGEXP] = $2;
        $self->[AUTODETECT_STATE] = AUTO_STATE_SECOND;
      }

      return [ $line ];
    }

    # Waiting for the second line beginning. Bail out if we don't
    # have anything in the framing buffer.
    if ($self->[AUTODETECT_STATE] & AUTO_STATE_SECOND) {
      return [ ] unless length $self->[FRAMING_BUFFER];

      # Test the first character to see if it completes the previous
      # potentially partial newline.
      if (
        substr($self->[FRAMING_BUFFER], 0, 1) eq
        ( $self->[INPUT_REGEXP] eq "\x0D" ? "\x0A" : "\x0D" )
      ) {

        # Combine the first character with the previous newline, and
        # discard the newline from the buffer. This is two statements
        # for backward compatibility.
        DEBUG and warn "completed newline after line: <<$1>>\n";
        $self->[INPUT_REGEXP] .= substr($self->[FRAMING_BUFFER], 0, 1);
        substr($self->[FRAMING_BUFFER], 0, 1) = '';
      }
      elsif (DEBUG) {
        warn "decided prior suspicious newline is okay\n";
      }

      # Regardless, whatever is in INPUT_REGEXP is now a complete
      # newline. End autodetection, post-process the found newline,
      # and loop to see if there are other lines in the buffer.
      $self->[INPUT_REGEXP] = $self->[INPUT_REGEXP];
      $self->[AUTODETECT_STATE] = AUTO_STATE_DONE;
      next LINE;
    }

    die "consistency error: AUTODETECT_STATE = $self->[AUTODETECT_STATE]";
  }

  return [ ];
}

#------------------------------------------------------------------------------
# New behavior. First translate system newlines ("\n") into whichever
# newlines are supposed to be sent. Second, add a trailing newline if
# one doesn't already exist. Since the referenced output list is
# supposed to contain one line per element, we also do a split and
# join. Bleah. ... why isn't the code doing what the comment says?

sub put {
  my ($self, $lines) = @_;

  my @raw;
  foreach (@$lines) {
    push @raw, $_ . $self->[OUTPUT_LITERAL];
  }

  \@raw;
}

#------------------------------------------------------------------------------

sub get_pending {
  my $self = shift;
  return [ $self->[FRAMING_BUFFER] ] if length $self->[FRAMING_BUFFER];
  return undef;
}

1;

__END__

=head1 NAME

POE::Filter::Line - serialize and parse terminated records (lines)

=head1 SYNOPSIS

#!perl

use POE qw(Wheel::FollowTail Filter::Line);

POE::Session->create(
inline_states => {
_start => sub {
$_[HEAP]{tailor} = POE::Wheel::FollowTail->new(
Filename => "/var/log/system.log",
InputEvent => "got_log_line",
Filter => POE::Filter::Line->new(),
);
},
got_log_line => sub {
print "Log: $_[ARG0]\n";
}
}
);

POE::Kernel->run();
exit;

=head1 DESCRIPTION

POE::Filter::Line parses stream data into terminated records. The
default parser interprets newlines as the record terminator, and the
default serializer appends network newlines (CR/LF, or "\x0D\x0A") to
outbound records.

Record terminators are removed from the data POE::Filter::Line
returns.

POE::Filter::Line supports a number of other ways to parse lines.
Constructor parameters may specify literal newlines, regular
expressions, or that the filter should detect newlines on its own.

=head1 PUBLIC FILTER METHODS

POE::Filter::Line's new() method has some interesting parameters.

=head2 new

new() accepts a list of named parameters.

In all cases, the data interpreted as the record terminator is
stripped from the data POE::Filter::Line returns.

C<InputLiteral> may be used to parse records that are terminated by
some literal string. For example, POE::Filter::Line may be used to
parse and emit C-style lines, which are terminated with an ASCII NUL:

my $c_line_filter = POE::Filter::Line->new(
InputLiteral => chr(0),
OutputLiteral => chr(0),
);

C<OutputLiteral> allows a filter to put() records with a different
record terminator than it parses. This can be useful in applications
that must translate record terminators.

C<Literal> is a shorthand for the common case where the input and
output literals are identical. The previous example may be written
as:

my $c_line_filter = POE::Filter::Line->new(
Literal => chr(0),
);

An application can also allow POE::Filter::Line to figure out which
newline to use. This is done by specifying C<InputLiteral> to be
undef:

my $whichever_line_filter = POE::Filter::Line->new(
InputLiteral => undef,
OutputLiteral => "\n",
);

C<InputRegexp> may be used in place of C<InputLiteral> to recognize
line terminators based on a regular expression. In this example,
input is terminated by two or more consecutive newlines. On output,
the paragraph separator is "---" on a line by itself.

my $paragraph_filter = POE::Filter::Line->new(
InputRegexp => "([\x0D\x0A]{2,})",
OutputLiteral => "\n---\n",
);

=head1 PUBLIC FILTER METHODS

POE::Filter::Line has no additional public methods.

=head1 SEE ALSO

Please see L<POE::Filter> for documentation regarding the base
interface.

The SEE ALSO section in L<POE> contains a table of contents covering
the entire POE distribution.

=head1 BUGS

The default input newline parser is a regexp that has an unfortunate
race condition. First the regular expression:

/(\x0D\x0A?|\x0A\x0D?)/

While it quickly recognizes most forms of newline, it can sometimes
detect an extra blank line. This happens when a two-byte newline
character is broken between two reads. Consider this situation:

some stream dataCR
LFother stream data

The regular expression will see the first CR without its corresponding
LF. The filter will properly return "some stream data" as a line.
When the next packet arrives, the leading "LF" will be treated as the
terminator for a 0-byte line. The filter will faithfully return this
empty line.

B<It is advised to specify literal newlines or use the autodetect
feature in applications where blank lines are significant.>

=head1 AUTHORS & COPYRIGHTS

Please see L<POE> for more information about authors and contributors.

=cut

# rocco // vim: ts=2 sw=2 expandtab
# TODO - Edit.
Something went wrong with that request. Please try again.