Skip to content


Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
tag: RELEASE_0_5_3
Fetching contributors…

Cannot retrieve contributors at this time

220 lines (175 sloc) 5.529 kb
# Copyright (C) 2007-2008, The Perl Foundation.
# $Id$
=head1 NAME
PCT::Grammar - base grammar with useful rules
grammar MyGrammar is PCT::Grammar;
rule abc { [ word | <panic: word not found> ] }
rule quote {
[ \' <string_literal: '> \'
| \" <string_literal: "> \"
This file implements C<PCT::Grammar>, which is a basic grammar object
with a few useful methods for parsing thrown in.
=head2 Methods
=over 4
=item panic(match [, message, ...] )
Throws an exception at the current point of the match, with message
as part of the exception payload. The message doesn't end with
a newline, then the line number and offset of the match are
also included.
.namespace [ 'PCT::Grammar' ]
.sub 'onload' :anon :init :load
load_bytecode 'PGE.pbc'
load_bytecode 'PGE/Util.pbc'
$P0 = subclass 'PGE::Grammar', 'PCT::Grammar'
$P1 = get_hll_global ['PGE::Util'], 'die'
$P0.'add_method'('panic', $P1)
.return ()
=item get_scalar()
Here we overload the get_scalar() method from PGE::Match to
throw an exception if a result object hasn't been set.
.sub 'get_scalar' :method
.local pmc obj
obj = getattribute self, '$!result'
unless null obj goto end
die "No result object"
.return (obj)
=item ww()
Special-purpose rule to return true if we're in the middle
of a word -- i.e., if the previous and next character are
both "word characters". This is roughly equivalent to
C<< <?after \w><?before \w> >> except it's much quicker.
In particular, C<< <!ww> >> can be used by :sigspace rules
to enforce whitespace between lexical words.
.include 'cclass.pasm'
.sub 'ww' :method
.local pmc mob
.local int pos
.local string target
$P0 = get_hll_global ['PGE'], 'Match'
(mob, pos, target) = $P0.'new'(self)
if pos == 0 goto fail
$I0 = is_cclass .CCLASS_WORD, target, pos
unless $I0 goto fail
$I1 = pos - 1
$I0 = is_cclass .CCLASS_WORD, target, $I1
unless $I0 goto fail
.return (mob)
.sub 'string_literal' :method
.param string stop
.param pmc adverbs :slurpy :named
## create a new match object, get the new match position
.local pmc mob
.local int pos, lastpos, stoplen
.local string target, escapechars
(mob, pos, target) = self.'new'(self)
lastpos = length target
stoplen = length stop
$S0 = substr stop, 0, 1
escapechars = concat "\\", $S0
## leave space for close delimiter
lastpos -= stoplen
## now initialize and loop through target
.local string literal, litchar
literal = ''
## if we're beyond the last possible position, fail
if pos > lastpos goto fail
## if ending delimiter, then we're done
$S0 = substr target, pos, stoplen
if $S0 == stop goto literal_end
if pos >= lastpos goto fail
## get next character in literal
litchar = substr target, pos, 1
inc pos
## add non-escape characters to literal
if litchar != "\\" goto add_litchar
## look at the next character, if it's always escaped, add it and
## move on
.local string escaped
escaped = substr target, pos, 1
$I0 = index escapechars, escaped
if $I0 < 0 goto interpolated_escape
inc pos
literal .= escaped
goto literal_loop
## if not double-quoted delim, no interpolation
if stop != '"' goto add_litchar
litchar = escaped
inc pos
$I0 = index "abefnrt0xdo", litchar
if $I0 < 0 goto add_litchar
## if it's one of "xdo", then handle that specially
if $I0 >= 8 goto literal_xdo
litchar = substr "\a\b\e\f\n\r\t\0", $I0, 1
goto add_litchar
## handle \x, \d, and \o escapes. start by converting
## the 'o', 'd', or 'x' into 8, 10, or 16 (yes, it's hack
## but it works). Then loop through the characters that
## follow to compute the integer value of the codepoint,
## and add that codepoint to our literal.
.local int base, codepoint, isbracketed
base = index ' o d x', litchar
codepoint = 0
$S0 = substr target, pos, 1
isbracketed = iseq $S0, '['
pos += isbracketed
$S0 = substr target, pos, 1
$I0 = index '0123456789abcdef', $S0
if $I0 < 0 goto literal_xdo_char_end
if $I0 >= base goto literal_xdo_char_end
codepoint *= base
codepoint += $I0
inc pos
goto literal_xdo_char_loop
$S1 = chr codepoint
## FIXME: RT#50092 -- codepoints 128-255 default to iso-8859-1
## encoding, we explicitly convert them into a unicode encoding
## before concatenating. Needed for systems w/o ICU (RT#39930).
if codepoint < 128 goto literal_xdo_char_end_1
$I0 = find_charset 'unicode'
trans_charset $S1, $I0
concat literal, $S1
unless isbracketed goto literal_xdo_end
if $S0 == ']' goto literal_xdo_end
if $S0 != ',' goto fail
inc pos
codepoint = 0
goto literal_xdo_char_loop
pos += isbracketed
goto literal_loop
literal .= litchar
goto literal_loop
.return (mob)
.return (mob)
# Local Variables:
# mode: pir
# fill-column: 100
# End:
# vim: expandtab shiftwidth=4 ft=pir:
Jump to Line
Something went wrong with that request. Please try again.