Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

take into account google spec with groups of records

  • Loading branch information...
commit f72d6f66769b75ff506d22477e53d885e4614379 1 parent 6bc0f4c
@PuerkitoBio PuerkitoBio authored committed
Showing with 263 additions and 173 deletions.
  1. +136 −63 parser.go
  2. +81 −40 robotstxt.go
  3. +14 −41 robotstxt_test.go
  4. +26 −23 scanner.go
  5. +6 −6 scanner_test.go
View
199 parser.go
@@ -1,40 +1,114 @@
package robotstxt
+// Comments explaining the logic are taken from either the google's spec:
+// https://developers.google.com/webmasters/control-crawl-index/docs/robots_txt
+//
+// or the Wikipedia's entry on robots.txt:
+// http://en.wikipedia.org/wiki/Robots.txt
+
import (
"errors"
+ "fmt"
"io"
+ "regexp"
"strconv"
"strings"
)
+type lineType uint
+
+const (
+ lIgnore lineType = iota
+ lUnknown
+ lUserAgent
+ lAllow
+ lDisallow
+ lCrawlDelay
+ lSitemap
+)
+
type parser struct {
- tokens []string
- pos int
- agent string
- sitemaps []string
+ tokens []string
+ pos int
+}
+
+type lineInfo struct {
+ t lineType
+ k string
+ vs string
+ vf float64
+ vr *regexp.Regexp
}
-func NewParser(tokens []string) *Parser {
- return &Parser{tokens: tokens}
+func newParser(tokens []string) *parser {
+ return &parser{tokens: tokens}
}
-func (p *Parser) ParseAll() (result []Rule, err error) {
- var r *Rule
- err = nil
+func (p *parser) parseAll() (groups []*group, sitemaps []string, errs []error) {
+ var curGroup *group
+
+ // Reset internal fields, tokens are assigned at creation time, never change
+ p.pos = 0
+
+ // TODO : Two successive user-agent lines are part of the same group, so a group
+ // may apply to more than one user-agent!
+ // Re: Google's spec:
+ // There are three distinct groups specified, one for "a" and one for "b"
+ // as well as one for both "e" and "f".
+
for {
- r, err = p.ParseRule()
- if r != nil {
- result = append(result, *r)
+ if li, err := p.parseLine(); err != nil {
+ if err == io.EOF {
+ // Append the current group if any
+ if curGroup != nil {
+ groups = append(groups, curGroup)
+ }
+ break
+ }
+ errs = append(errs, err)
+ } else {
+ switch li.t {
+ case lUserAgent:
+ // End previous group
+ if curGroup != nil {
+ groups = append(groups, curGroup)
+ }
+ // Start new group
+ curGroup = &group{agent: li.vs}
+ case lDisallow:
+ // Error if no current group
+ if curGroup == nil {
+ errs = append(errs, errors.New(fmt.Sprintf("Disallow before User-agent at token #%d.", p.pos)))
+ } else {
+ curGroup.rules = append(curGroup.rules, &rule{li.vs, false, nil})
+ }
+ case lAllow:
+ // Error if no current group
+ if curGroup == nil {
+ errs = append(errs, errors.New(fmt.Sprintf("Allow before User-agent at token #%d.", p.pos)))
+ } else {
+ curGroup.rules = append(curGroup.rules, &rule{li.vs, true, nil})
+ }
+ case lSitemap:
+ sitemaps = append(sitemaps, li.vs)
+ case lCrawlDelay:
+ if curGroup == nil {
+ errs = append(errs, errors.New(fmt.Sprintf("Crawl-delay before User-agent at token #%d.", p.pos)))
+ } else {
+ curGroup.crawlDelay = li.vf
+ }
+ }
}
- if err == io.EOF {
- err = nil
- break
+ }
+ if len(errs) > 0 {
+ for _, e := range errs {
+ fmt.Printf("Error: %s\n", e.Error())
}
}
- return result, err
+ return
}
-func (p *Parser) ParseRule() (r *Rule, err error) {
+func (p *parser) parseLine() (li *lineInfo, err error) {
t1, ok1 := p.popToken()
if !ok1 {
// proper EOF
@@ -42,86 +116,85 @@ func (p *Parser) ParseRule() (r *Rule, err error) {
}
t2, ok2 := p.peekToken()
+ if !ok2 {
+ // EOF, no value associated with the token, so ignore token and return
+ if strings.Trim(t1, " \t\v\n\r") != "" {
+ return nil, errors.New(fmt.Sprintf(`Unexpected EOF at token #%d namely: "%s".`, p.pos, t1))
+ } else {
+ return nil, io.EOF
+ }
+ }
+
+ // Helper closure for all string-based tokens, common behaviour:
+ // - Consume t2 token
+ // - If empty, return unkown line info
+ // - Otherwise return the specified line info
+ returnStringVal := func(t lineType) (*lineInfo, error) {
+ p.popToken()
+ if t2 != "" {
+ return &lineInfo{t: t, k: t1, vs: t2}, nil
+ }
+ return &lineInfo{t: lIgnore}, nil
+ }
+
+ // TODO : For paths, automatically add the starting "/", ignore a trailing "*",
+ // and manage wildcards within a path (turn into a pattern)
+
switch strings.ToLower(t1) {
case "\n":
// Don't consume t2 and continue parsing
- return nil, nil
+ return &lineInfo{t: lIgnore}, nil
case "user-agent", "useragent":
// From google's spec:
// Handling of <field> elements with simple errors / typos (eg "useragent"
// instead of "user-agent") is undefined and may be interpreted as correct
// directives by some user-agents.
- if !ok2 {
- // TODO: report error
- return nil, errors.New("Unexpected EOF at token #" + strconv.Itoa(p.pos) + " namely: \"" + t1 + "\"")
- }
- p.agent = t2
- p.popToken()
- // continue parsing
- return nil, nil
+ return returnStringVal(lUserAgent)
case "disallow":
- if p.agent == "" {
- // TODO: report error
- return nil, errors.New("Disallow before User-agent.")
- }
- p.popToken()
-
// From google's spec:
- // When no path is specified, the directive is ignored.
- if t2 != "" {
- return &Rule{Agent: p.agent, Uri: t2, Allow: false}, nil
- } else {
- return nil, nil
- }
+ // When no path is specified, the directive is ignored (so an empty Disallow
+ // CAN be an allow, since allow is the default. The actual result depends
+ // on the other rules in the group).
+ return returnStringVal(lDisallow)
case "allow":
- if p.agent == "" {
- // TODO: report error
- return nil, errors.New("Allow before User-agent.")
- }
- p.popToken()
// From google's spec:
// When no path is specified, the directive is ignored.
- if t2 != "" {
- return &Rule{Agent: p.agent, Uri: t2, Allow: true}, nil
- } else {
- return nil, nil
- }
+ return returnStringVal(lAllow)
case "sitemap":
// Non-group field, applies to the host as a whole, not to a specific user-agent
- if t2 != "" {
- p.sitemaps = append(p.sitemaps, t2)
- }
- p.popToken()
- return nil, nil
+ return returnStringVal(lSitemap)
case "crawl-delay", "crawldelay":
// From http://en.wikipedia.org/wiki/Robots_exclusion_standard#Nonstandard_extensions
// Several major crawlers support a Crawl-delay parameter, set to the
// number of seconds to wait between successive requests to the same server.
- if p.agent == "" {
- return nil, errors.New("Crawl-delay before User-agent.")
- }
p.popToken()
- // TODO : Continue here with crawl-delay...
+ if cd, e := strconv.ParseFloat(t2, 64); e != nil {
+ return nil, e
+ } else {
+ return &lineInfo{t: lCrawlDelay, k: t1, vf: cd}, nil
+ }
}
- return nil, errors.New("Unknown token: " + strconv.Quote(t1))
+ // Consume t2 token
+ //p.popToken()
+ return &lineInfo{t: lUnknown, k: t1}, nil
}
-func (p *Parser) popToken() (tok string, ok bool) {
- if p.pos >= len(p.tokens) {
- return "", false
+func (p *parser) popToken() (tok string, ok bool) {
+ tok, ok = p.peekToken()
+ if !ok {
+ return
}
- tok = p.tokens[p.pos]
p.pos++
return tok, true
}
-func (p *Parser) peekToken() (tok string, ok bool) {
+func (p *parser) peekToken() (tok string, ok bool) {
if p.pos >= len(p.tokens) {
return "", false
}
View
121 robotstxt.go
@@ -3,16 +3,20 @@
// with various extensions.
package robotstxt
+// Comments explaining the logic are taken from either the google's spec:
+// https://developers.google.com/webmasters/control-crawl-index/docs/robots_txt
+
import (
"bytes"
"errors"
+ "regexp"
"strings"
)
type RobotsData struct {
DefaultAgent string
// private
- groups []group
+ groups []*group
allowAll bool
disallowAll bool
sitemaps []string
@@ -20,13 +24,14 @@ type RobotsData struct {
type group struct {
agent string
- rules []rule
- crawlDelay uint
+ rules []*rule
+ crawlDelay float64
}
type rule struct {
- path string
- allow bool
+ path string
+ allow bool
+ pattern *regexp.Regexp
}
var allowAll = &RobotsData{allowAll: true}
@@ -42,7 +47,7 @@ func FromResponseBytes(statusCode int, body []byte, print_errors bool) (*RobotsD
// This is a "full allow" for crawling. Note: this includes 401
// "Unauthorized" and 403 "Forbidden" HTTP result codes.
case statusCode >= 400 && statusCode < 500:
- return AllowAll, nil
+ return allowAll, nil
case statusCode >= 200 && statusCode < 300:
return FromBytes(body, print_errors)
}
@@ -52,7 +57,7 @@ func FromResponseBytes(statusCode int, body []byte, print_errors bool) (*RobotsD
//
// Server errors (5xx) are seen as temporary errors that result in a "full
// disallow" of crawling.
- return DisallowAll, nil
+ return disallowAll, nil
}
func FromResponse(statusCode int, body string, print_errors bool) (*RobotsData, error) {
@@ -60,13 +65,15 @@ func FromResponse(statusCode int, body string, print_errors bool) (*RobotsData,
}
func FromBytes(body []byte, print_errors bool) (r *RobotsData, err error) {
+ var errs []error
+
// special case (probably not worth optimization?)
trimmed := bytes.TrimSpace(body)
if len(trimmed) == 0 {
- return AllowAll, nil
+ return allowAll, nil
}
- sc := NewByteScanner("bytes", false)
+ sc := newByteScanner("bytes", false)
sc.Quiet = !print_errors
sc.Feed(body, true)
var tokens []string
@@ -77,25 +84,28 @@ func FromBytes(body []byte, print_errors bool) (r *RobotsData, err error) {
// special case worth optimization
if len(tokens) == 0 {
- return AllowAll, nil
+ return allowAll, nil
}
r = &RobotsData{}
- parser := NewParser(tokens)
- r.rules, err = parser.ParseAll()
+ parser := newParser(tokens)
+ r.groups, r.sitemaps, errs = parser.parseAll()
+ if len(errs) > 0 {
+ return nil, errors.New("Parse error.")
+ }
- return r, err
+ return r, nil
}
func FromString(body string, print_errors bool) (r *RobotsData, err error) {
return FromBytes([]byte(body), print_errors)
}
-func (r *RobotsData) Test(url string) bool {
- return r.TestAgent(url, r.DefaultAgent)
+func (r *RobotsData) Test(path string) bool {
+ return r.TestAgent(path, r.DefaultAgent)
}
-func (r *RobotsData) TestAgent(url, agent string) (allow bool) {
+func (r *RobotsData) TestAgent(path, agent string) (allow bool) {
if r.allowAll {
return true
}
@@ -103,36 +113,67 @@ func (r *RobotsData) TestAgent(url, agent string) (allow bool) {
return false
}
- // optimistic
- allow = true
- for _, rule := range r.rules {
- if rule.MatchAgent(agent) && rule.MatchUrl(url) {
- allow = rule.Allow
- // stop on first disallow as safety default
- // in absense of better algorithm
- if !rule.Allow {
- break
- }
+ // Find a group of rules that applies to this agent
+ if g := r.findGroup(agent); g != nil {
+ // Find a rule that applies to this url
+ if r := g.findRule(path); r != nil {
+ return r.allow
}
}
- return allow
-}
-
-func (rule *Rule) MatchAgent(agent string) bool {
- l_agent := strings.ToLower(agent)
- l_rule_agent := strings.ToLower(rule.Agent)
- return rule.Agent == "*" || strings.HasPrefix(l_agent, l_rule_agent)
+ // From google's spec:
+ // By default, there are no restrictions for crawling for the designated crawlers.
+ return true
}
-func (rule *Rule) MatchUrl(url string) bool {
- return strings.HasPrefix(url, rule.Uri)
+// From google's spec:
+// Only one group of group-member records is valid for a particular crawler.
+// The crawler must determine the correct group of records by finding the group
+// with the most specific user-agent that still matches. All other groups of
+// records are ignored by the crawler. The user-agent is non-case-sensitive.
+// The order of the groups within the robots.txt file is irrelevant.
+func (r *RobotsData) findGroup(agent string) (ret *group) {
+ var prefixLen int
+
+ for _, g := range r.groups {
+ if g.agent == "*" && prefixLen == 0 {
+ // Weakest match possible
+ prefixLen = 1
+ ret = g
+ } else if strings.HasPrefix(agent, g.agent) {
+ if l := len(g.agent); l > prefixLen {
+ prefixLen = l
+ ret = g
+ }
+ }
+ }
+ return
}
-func (rule *Rule) String() string {
- allow_str := "Disallow"
- if rule.Allow {
- allow_str = "Allow"
+// From google's spec:
+// The path value is used as a basis to determine whether or not a rule applies
+// to a specific URL on a site. With the exception of wildcards, the path is
+// used to match the beginning of a URL (and any valid URLs that start with the
+// same path).
+//
+// At a group-member level, in particular for allow and disallow directives,
+// the most specific rule based on the length of the [path] entry will trump
+// the less specific (shorter) rule. The order of precedence for rules with
+// wildcards is undefined.
+func (g *group) findRule(path string) (ret *rule) {
+ var prefixLen int
+
+ for _, r := range g.rules {
+ if r.path == "/" && prefixLen == 0 {
+ // Weakest match possible
+ prefixLen = 1
+ ret = r
+ } else if strings.HasPrefix(path, r.path) {
+ if l := len(r.path); l > prefixLen {
+ prefixLen = l
+ ret = r
+ }
+ }
}
- return "<" + allow_str + " " + rule.Agent + " " + rule.Uri + ">"
+ return
}
View
55 robotstxt_test.go
@@ -20,10 +20,7 @@ func TestFromResponseBasic(t *testing.T) {
}
func _expectAllow(r *RobotsData, t *testing.T) bool {
- allow, err := r.TestAgent("/", "Somebot")
- if err != nil {
- t.Fatal("Unexpected error.")
- }
+ allow := r.TestAgent("/", "Somebot")
return allow
}
@@ -41,12 +38,12 @@ func ExpectDisallow(r *RobotsData, t *testing.T, msg string) {
func TestResponse401(t *testing.T) {
r, _ := FromResponse(401, "", true)
- ExpectDisallow(r, t, "FromResponse(401, \"\") MUST disallow everything.")
+ ExpectAllow(r, t, "FromResponse(401, \"\") MUST allow everything.")
}
func TestResponse403(t *testing.T) {
r, _ := FromResponse(403, "", true)
- ExpectDisallow(r, t, "FromResponse(403, \"\") MUST disallow everything.")
+ ExpectAllow(r, t, "FromResponse(403, \"\") MUST allow everything.")
}
func TestResponse404(t *testing.T) {
@@ -62,7 +59,7 @@ func TestFromStringBasic(t *testing.T) {
func TestFromStringEmpty(t *testing.T) {
r, _ := FromString("", true)
- if allow, err := r.TestAgent("/", "Somebot"); err != nil || !allow {
+ if allow := r.TestAgent("/", "Somebot"); !allow {
t.Fatal("FromString(\"\") MUST allow everything.")
}
}
@@ -78,10 +75,7 @@ func TestFromString001(t *testing.T) {
if err != nil {
t.Fatal(err.Error())
}
- allow, err1 := r.TestAgent("/foobar", "SomeAgent")
- if err1 != nil {
- t.Fatal(err1.Error())
- }
+ allow := r.TestAgent("/foobar", "SomeAgent")
if allow {
t.Fatal("Must deny.")
}
@@ -92,10 +86,7 @@ func TestFromString002(t *testing.T) {
if err != nil {
t.Fatal(err.Error())
}
- allow, err1 := r.TestAgent("/foobar", "SomeAgent")
- if err1 != nil {
- t.Fatal(err1.Error())
- }
+ allow := r.TestAgent("/foobar", "SomeAgent")
if !allow {
t.Fatal("Must allow.")
}
@@ -108,10 +99,7 @@ func TestFromString003(t *testing.T) {
if err != nil {
t.Fatal(err.Error())
}
- allow, err1 := r.TestAgent("/administrator/", "SomeBot")
- if err1 != nil {
- t.Fatal(err1.Error())
- }
+ allow := r.TestAgent("/administrator/", "SomeBot")
if allow {
t.Fatal("Must deny.")
}
@@ -122,10 +110,7 @@ func TestFromString004(t *testing.T) {
if err != nil {
t.Fatal(err.Error())
}
- allow, err1 := r.TestAgent("/paruram", "SomeBot")
- if err1 != nil {
- t.Fatal(err1.Error())
- }
+ allow := r.TestAgent("/paruram", "SomeBot")
if !allow {
t.Fatal("Must allow.")
}
@@ -156,10 +141,7 @@ func TestFromString006(t *testing.T) {
if err != nil {
t.Fatal(err.Error())
}
- allow, err1 := r.TestAgent("/search", "SomeBot")
- if err1 != nil {
- t.Fatal(err1.Error())
- }
+ allow := r.TestAgent("/search", "SomeBot")
if allow {
t.Fatal("Must deny.")
}
@@ -172,10 +154,7 @@ func TestFromString007(t *testing.T) {
if err != nil {
t.Fatal(err.Error())
}
- allow, err := r.TestAgent("/random", "SomeBot")
- if err != nil {
- t.Fatal(err.Error())
- }
+ allow := r.TestAgent("/random", "SomeBot")
if !allow {
t.Fatal("Must allow.")
}
@@ -186,12 +165,10 @@ const robots_text_004 = "User-Agent: * \nDisallow: "
func TestFromString008(t *testing.T) {
r, err := FromString(robots_text_004, true)
if err != nil {
+ t.Log(robots_text_004)
t.Fatal(err.Error())
}
- allow, err := r.TestAgent("/random", "SomeBot")
- if err != nil {
- t.Fatal(err.Error())
- }
+ allow := r.TestAgent("/random", "SomeBot")
if !allow {
t.Fatal("Must allow.")
}
@@ -205,9 +182,7 @@ Disallow: /`
func TestRobotstxtOrgCase1(t *testing.T) {
if r, err := FromString(robots_text_005, false); err != nil {
t.Fatal(err.Error())
- } else if allow, err := r.TestAgent("/path/page1.html", "SomeBot"); err != nil {
- t.Fatal(err.Error())
- } else if allow {
+ } else if allow := r.TestAgent("/path/page1.html", "SomeBot"); allow {
t.Fatal("Must disallow.")
}
}
@@ -215,9 +190,7 @@ func TestRobotstxtOrgCase1(t *testing.T) {
func TestRobotstxtOrgCase2(t *testing.T) {
if r, err := FromString(robots_text_005, false); err != nil {
t.Fatal(err.Error())
- } else if allow, err := r.TestAgent("/path/page1.html", "Googlebot"); err != nil {
- t.Fatal(err.Error())
- } else if !allow {
+ } else if allow := r.TestAgent("/path/page1.html", "Googlebot"); !allow {
t.Fatal("Must allow.")
}
}
View
49 scanner.go
@@ -8,30 +8,28 @@ import (
"unicode/utf8"
)
-type ByteScanner struct {
+type byteScanner struct {
ErrorCount int
Quiet bool
- buf []byte
- pos token.Position
- lastChunk bool
- ch rune
- //
- //state string
+ buf []byte
+ pos token.Position
+ lastChunk bool
+ ch rune
+ keyTokenFound bool
}
var WhitespaceChars = []rune{' ', '\t', '\v'}
-func NewByteScanner(srcname string, quiet bool) *ByteScanner {
- return &ByteScanner{
+func newByteScanner(srcname string, quiet bool) *byteScanner {
+ return &byteScanner{
Quiet: quiet,
ch: -1,
pos: token.Position{Filename: srcname},
- //state: "start",
}
}
-func (s *ByteScanner) Feed(input []byte, end bool) (bool, error) {
+func (s *byteScanner) Feed(input []byte, end bool) (bool, error) {
s.buf = input
s.pos.Offset = 0
s.pos.Line = 1
@@ -42,17 +40,16 @@ func (s *ByteScanner) Feed(input []byte, end bool) (bool, error) {
return false, nil
}
-func (s *ByteScanner) GetPosition() token.Position {
+func (s *byteScanner) GetPosition() token.Position {
return s.pos
}
-func (s *ByteScanner) Scan() (string, error) {
+func (s *byteScanner) Scan() (string, error) {
//println("--- Scan(). Offset / len(s.buf): ", s.pos.Offset, len(s.buf))
- bufsize := len(s.buf)
for {
// Note Offset > len, not >=, so we can Scan last character.
- if s.lastChunk && s.pos.Offset > bufsize {
+ if s.lastChunk && s.pos.Offset > len(s.buf) {
return "", io.EOF
}
@@ -64,6 +61,7 @@ func (s *ByteScanner) Scan() (string, error) {
// EOL
if s.isEol() {
+ s.keyTokenFound = false
// skip subsequent newline chars
for s.ch != -1 && s.isEol() {
s.nextChar()
@@ -74,6 +72,7 @@ func (s *ByteScanner) Scan() (string, error) {
// skip comments
if s.ch == '#' {
+ s.keyTokenFound = false
s.skipUntilEol()
// s.state = "start"
if s.ch == -1 {
@@ -96,9 +95,13 @@ func (s *ByteScanner) Scan() (string, error) {
tok := string(s.ch)
s.nextChar()
for s.ch != -1 && !s.isSpace() && !s.isEol() {
- if s.ch == ':' {
+ // Do not consider ":" to be a token separator if a first key token
+ // has already been found on this line (avoid cutting an absolute URL
+ // after the "http:")
+ if s.ch == ':' && !s.keyTokenFound {
// s.state = "pre-value"
s.nextChar()
+ s.keyTokenFound = true
break
}
@@ -108,7 +111,7 @@ func (s *ByteScanner) Scan() (string, error) {
return tok, nil
}
-func (s *ByteScanner) ScanAll() ([]string, error) {
+func (s *byteScanner) ScanAll() ([]string, error) {
var results []string
for {
t, err := s.Scan()
@@ -125,18 +128,18 @@ func (s *ByteScanner) ScanAll() ([]string, error) {
return results, nil
}
-func (s *ByteScanner) error(pos token.Position, msg string) {
+func (s *byteScanner) error(pos token.Position, msg string) {
s.ErrorCount++
if !s.Quiet {
fmt.Fprintf(os.Stderr, "robotstxt from %s: %s\n", pos.String(), msg)
}
}
-func (s *ByteScanner) isEol() bool {
+func (s *byteScanner) isEol() bool {
return s.ch == '\n' || s.ch == '\r'
}
-func (s *ByteScanner) isSpace() bool {
+func (s *byteScanner) isSpace() bool {
for _, r := range WhitespaceChars {
if s.ch == r {
return true
@@ -145,14 +148,14 @@ func (s *ByteScanner) isSpace() bool {
return false
}
-func (s *ByteScanner) skipSpace() {
+func (s *byteScanner) skipSpace() {
//println("--- string(ch): ", s.ch, ".")
for s.ch != -1 && s.isSpace() {
s.nextChar()
}
}
-func (s *ByteScanner) skipUntilEol() {
+func (s *byteScanner) skipUntilEol() {
//println("--- string(ch): ", s.ch, ".")
for s.ch != -1 && !s.isEol() {
s.nextChar()
@@ -164,7 +167,7 @@ func (s *ByteScanner) skipUntilEol() {
}
// Reads next Unicode char.
-func (s *ByteScanner) nextChar() (rune, error) {
+func (s *byteScanner) nextChar() (rune, error) {
//println("--- nextChar(). Offset / len(s.buf): ", s.pos.Offset, len(s.buf))
if s.pos.Offset >= len(s.buf) {
View
12 scanner_test.go
@@ -7,14 +7,14 @@ import (
)
func TestScan001(t *testing.T) {
- sc := NewByteScanner("test-001", false)
+ sc := newByteScanner("test-001", false)
if _, err := sc.Scan(); err == nil {
t.Fatal("Empty ByteScanner should fail on Scan.")
}
}
func TestScan002(t *testing.T) {
- sc := NewByteScanner("test-002", false)
+ sc := newByteScanner("test-002", false)
sc.Feed([]byte("foo"), true)
_, err := sc.Scan()
//print("---", tok, err)
@@ -24,7 +24,7 @@ func TestScan002(t *testing.T) {
}
func TestScan004(t *testing.T) {
- sc := NewByteScanner("test-004", false)
+ sc := newByteScanner("test-004", false)
sc.Feed([]byte("\u2010"), true)
_, err := sc.Scan()
//println("---", tok, err)
@@ -34,7 +34,7 @@ func TestScan004(t *testing.T) {
}
func TestScan005(t *testing.T) {
- sc := NewByteScanner("test-005", true)
+ sc := newByteScanner("test-005", true)
sc.Feed([]byte("\xd9\xd9"), true)
_, err := sc.Scan()
//println("---", tok, err)
@@ -47,7 +47,7 @@ func TestScan005(t *testing.T) {
}
func TestScan006(t *testing.T) {
- sc := NewByteScanner("test-006", false)
+ sc := newByteScanner("test-006", false)
s := "# comment \r\nSomething: Somewhere\r\n"
sc.Feed([]byte(s), true)
tokens, err := sc.ScanAll()
@@ -64,7 +64,7 @@ func TestScan006(t *testing.T) {
}
func TestScan007(t *testing.T) {
- sc := NewByteScanner("test-007", false)
+ sc := newByteScanner("test-007", false)
s := "# comment \r\n# more comments\n\nDisallow:\r"
sc.Feed([]byte(s), true)
tokens, err := sc.ScanAll()
Please sign in to comment.
Something went wrong with that request. Please try again.