Skip to content

michal-josef-spacek/Tag-Reader-Perl

Repository files navigation

NAME
     Tags::Reader::Perl - Parse SGML/HTML/XML by each "tag".

SYNOPSIS
     use Tags::Reader::Perl;

     my $obj = Tags::Reader::Perl->new;
     my @tokens = $obj->gettoken;
     $obj->set_file($file, $force);
     $obj->set_text($text, $force);

METHODS
  "new()"
     my $obj = Tags::Reader::Perl->new;

    Constructor.

    Returns instance of object.

  "gettoken"
     my @tokens = $obj->gettoken;

    Get parsed token.

    Returns structure defining parsed token in array context. See TOKEN
    STRUCTURE e.g. <xml> → ('<xml>', 'xml', 1, 1)

    Returns parsed token in scalar mode. e.g. <xml> → '<xml>'

  "set_file"
     $obj->set_file($file, $force);

    Set file for parsing. If $force present, reset file for parsing if
    exists previous text or file.

    Returns undef.

  "set_text"
     $obj->set_text($text, $force);

    Set text for parsing. if $force present, reset text for parsing if
    exists previous text or file.

    Returns undef.

TOKEN STRUCTURE
     Structure contains 4 fields in array:
     - parsed data
     - tag type
     - number of line
     - number of column in line

     Tag types are:
     - '[\w:]+' - element name.
     - '/[\w:]+' - end of element name.
     - '!data' - data
     - '![cdata[' - cdata
     - '!--' - comment
     - '?\w+' - instruction
     - '![\w+' - conditional
     - '!attlist' - DTD attlist
     - '!element' - DTD element
     - '!entity' - DTD entity
     - '!notation' - DTD notation

ERRORS
     new():
             From Class::Utils::set_params():
                     Unknown parameter '%s'.

     set_text():
             Bad tag.
             Bad text.
             Cannot set new data if exists data.

     set_file():
             Bad tag.
             Bad file.
             Cannot set new data if exists data.
             Cannot open file '%s'.

EXAMPLE1
     use strict;
     use warnings;

     use Encode qw(decode_utf8 encode_utf8);
     use Tag::Reader::Perl;

     # Object.
     my $obj = Tag::Reader::Perl->new;

     # Example data.
     my $sgml = <<'END';
     <DOKUMENT> 
       <adresa stát="cs">
         <město>
         <ulice>Nová</ulice>
         <číslo>5</číslo>
       </adresa>
     </DOKUMENT>
     END

     # Set data to object.
     $obj->set_text(decode_utf8($sgml));

     # Tokenize.
     while (my @tag = $obj->gettoken) {
             print "[\n";
             print "\t[0]: '".encode_utf8($tag[0])."'\n";
             print "\t[1]: ".encode_utf8($tag[1])."\n";
             print "\t[2]: $tag[2]\n";
             print "\t[3]: $tag[3]\n";
             print "]\n";
     }

     # Output:
     # [
     #      [0]: '<DOKUMENT>'
     #      [1]: dokument
     #      [2]: 1
     #      [3]: 1
     # ]
     # [
     #      [0]: ' 
     #   '
     #      [1]: !data
     #      [2]: 1
     #      [3]: 11
     # ]
     # [
     #      [0]: '<adresa stát="cs">'
     #      [1]: adresa
     #      [2]: 2
     #      [3]: 3
     # ]
     # [
     #      [0]: '
     #     '
     #      [1]: !data
     #      [2]: 2
     #      [3]: 21
     # ]
     # [
     #      [0]: '<město>'
     #      [1]: město
     #      [2]: 3
     #      [3]: 5
     # ]
     # [
     #      [0]: '
     #     '
     #      [1]: !data
     #      [2]: 3
     #      [3]: 12
     # ]
     # [
     #      [0]: '<ulice>'
     #      [1]: ulice
     #      [2]: 4
     #      [3]: 5
     # ]
     # [
     #      [0]: 'Nová'
     #      [1]: !data
     #      [2]: 4
     #      [3]: 12
     # ]
     # [
     #      [0]: '</ulice>'
     #      [1]: /ulice
     #      [2]: 4
     #      [3]: 16
     # ]
     # [
     #      [0]: '
     #     '
     #      [1]: !data
     #      [2]: 4
     #      [3]: 24
     # ]
     # [
     #      [0]: '<číslo>'
     #      [1]: číslo
     #      [2]: 5
     #      [3]: 5
     # ]
     # [
     #      [0]: '5'
     #      [1]: !data
     #      [2]: 5
     #      [3]: 12
     # ]
     # [
     #      [0]: '</číslo>'
     #      [1]: /číslo
     #      [2]: 5
     #      [3]: 13
     # ]
     # [
     #      [0]: '
     #   '
     #      [1]: !data
     #      [2]: 5
     #      [3]: 21
     # ]
     # [
     #      [0]: '</adresa>'
     #      [1]: /adresa
     #      [2]: 6
     #      [3]: 3
     # ]
     # [
     #      [0]: '
     # '
     #      [1]: !data
     #      [2]: 6
     #      [3]: 12
     # ]
     # [
     #      [0]: '</DOKUMENT>'
     #      [1]: /dokument
     #      [2]: 7
     #      [3]: 1
     # ]
     # [
     #      [0]: '
     # '
     #      [1]: !data
     #      [2]: 7
     #      [3]: 12
     # ]

DEPENDENCIES
    Class::Utils, Error::Pure, Readonly,

SEE ALSO
    Tag::Reader
        Parse SGML/HTML/XML by each "tag".

    HTML::TagReader
        Perl extension module for reading html/sgml/xml files by tags.

REPOSITORY
    <https://github.com/michal-josef-spacek/Tag-Reader-Perl>

AUTHOR
    Michal Josef Špaček <mailto:skim@cpan.org>

    <http://skim.cz>

LICENSE AND COPYRIGHT
    © Michal Josef Špaček 2005-2021

    BSD 2-Clause License

VERSION
    0.03

About

Parse SGML/HTML/XML by each "tag".

Resources

License

Stars

Watchers

Forks

Packages

No packages published

Languages