Skip to content
Permalink
Browse files

fix misspell, docs, and some variable names.

  • Loading branch information...
suzuken committed Feb 13, 2016
1 parent ece7cbb commit 560bffcb9ae68e85ff0f6187427460817baacf77
Showing with 6 additions and 7 deletions.
  1. +1 −1 crawler.go
  2. +1 −2 doc.go
  3. +1 −1 extractor.go
  4. +2 −2 outputformatter.go
  5. +1 −1 tokenizer.go
@@ -19,7 +19,7 @@ type Crawler struct {
helper Helper
}

// NewCrawler returns a crawler object initialised with the URL and the [optional] raw HTML body
// NewCrawler returns a crawler object initialized with the URL and the [optional] raw HTML body
func NewCrawler(config Configuration, url string, RawHTML string) Crawler {
return Crawler{
config: config,
3 doc.go
@@ -1,5 +1,5 @@
/*
This is a golang port of "Goose" originaly licensed to Gravity.com
Package goose is a golang port of "Goose" originally licensed to Gravity.com
under one or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership.
@@ -19,5 +19,4 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package goose
@@ -391,7 +391,7 @@ func (extr *ContentExtractor) updateNodeCount(node *goquery.Selection, addToCoun

//a lot of times the first paragraph might be the caption under an image so we'll want to make sure if we're going to
//boost a parent node that it should be connected to other paragraphs, at least for the first n paragraphs
//so we'll want to make sure that the next sibling is a paragraph and has at least some substatial weight to it
//so we'll want to make sure that the next sibling is a paragraph and has at least some substantial weight to it
func (extr *ContentExtractor) isBoostable(node *goquery.Selection) bool {
stepsAway := 0
next := node.Next()
@@ -69,8 +69,8 @@ func (formatter *outputFormatter) linksToText() []string {
node.Type = html.TextNode
// save a list of URLs
url, _ := a.Attr("href")
isValidUrl, _ := regexp.MatchString("^http[s]?://", url)
if isValidUrl {
isValidURL, _ := regexp.MatchString("^http[s]?://", url)
if isValidURL {
urlList = append(urlList, url)
}
}
@@ -32,7 +32,7 @@ func (m *MultilangTokenizer) Tokenize(s string) []string {
t := kagomeTokenizer.New()
tokens := t.Tokenize(s)
// tokens contains BOS and EOS as token.
// so length is caliculated by dispite of them.
// so length is caliculated by despite of them.
ret := make([]string, 0, len(tokens)-2)
for _, token := range tokens {
if token.Class != kagomeTokenizer.DUMMY {

0 comments on commit 560bffc

Please sign in to comment.
You can’t perform that action at this time.