Skip to content
This repository has been archived by the owner on Apr 17, 2018. It is now read-only.

Commit

Permalink
Small refactorings, added Store interface
Browse files Browse the repository at this point in the history
  • Loading branch information
nylar committed Jan 15, 2015
1 parent 2c240a9 commit 02c15e8
Show file tree
Hide file tree
Showing 6 changed files with 36 additions and 28 deletions.
4 changes: 2 additions & 2 deletions cli/wally/search.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ func SearchFunc(c *cli.Context) {
content := r.Content
if r.Title != "" {
wally.Info.Printf("\n%s", r.Title)
wally.Success.Printf("\n%s\n", r.Source)
wally.Success.Printf("\n%s\n", r.Document.ID)
} else {
wally.Success.Printf("\n%s\n", r.Source)
wally.Success.Printf("\n%s\n", r.Document.ID)
}
if len(r.Content) > 150 {
content = r.Content[:150] + " ..."
Expand Down
11 changes: 5 additions & 6 deletions crawl.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,11 @@ func Crawler(url string, session *rdb.Session) error {
author := odlaw.ExtractAuthor(doc)
content := odlaw.ExtractText(doc)

d := Document{
Source: url,
Title: title,
Author: author,
Content: content,
}
d := NewDocument(url)
d.Title = title
d.Author = author
d.Content = content

_ = d.Put(session)

indexes := Indexer(content, d.ID)
Expand Down
15 changes: 15 additions & 0 deletions db.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package wally

import rdb "github.com/dancannon/gorethink"

type Store interface {
Put(session *rdb.Session) error
}

func NewDocument(source string) *Document {
return &Document{ID: source}
}

func NewIndex(word, documentID string) *Index {
return &Index{Word: word, DocumentID: documentID}
}
21 changes: 9 additions & 12 deletions indexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import (
"strings"
"sync"

"code.google.com/p/go-uuid/uuid"
rdb "github.com/dancannon/gorethink"
)

Expand Down Expand Up @@ -190,7 +189,6 @@ var stopWords = map[string]bool{
// Document holds data about a document, ID is usually populated with a UUID.
type Document struct {
ID string `gorethink:"id"`
Source string `gorethink:"source"`
Title string `gorethink:"title"`
Author string `gorethink:"author"`
Content string `gorethink:"content"`
Expand All @@ -203,10 +201,6 @@ func (d *Document) String() string {
// Put writes a single document to the database, if an ID isn't set
// then one is set as a UUID.
func (d *Document) Put(session *rdb.Session) error {
if d.ID == "" {
d.ID = uuid.New()
}

res, _ := rdb.Db(Conf.Database.Name).Table(Conf.Tables.DocumentTable).Insert(d).RunWrite(session)
if res.Errors > 0 {
return errors.New(res.FirstError)
Expand All @@ -222,6 +216,11 @@ type Index struct {
DocumentID string `gorethink:"document_id"`
}

func (i *Index) GenerateID() {
i.ID = fmt.Sprintf("%s::%s", i.DocumentID, i.Word)
return
}

func (i *Index) String() string {
return fmt.Sprintf("Index#%s", i.ID)
}
Expand All @@ -230,7 +229,7 @@ func (i *Index) String() string {
// is set as a UUID.
func (i *Index) Put(session *rdb.Session) error {
if i.ID == "" {
i.ID = uuid.New()
i.GenerateID()
}
res, _ := rdb.Db(Conf.Database.Name).Table(Conf.Tables.IndexTable).Insert(i).RunWrite(session)
if res.Errors > 0 {
Expand Down Expand Up @@ -320,11 +319,9 @@ func Indexer(text interface{}, documentID string) []Index {
// Apply stemming

// Append to normalised word list
normalisedWords = append(normalisedWords, Index{
ID: uuid.New(),
Word: word,
DocumentID: documentID,
})
index := NewIndex(word, documentID)
index.GenerateID()
normalisedWords = append(normalisedWords, *index)
}(word)
}

Expand Down
7 changes: 3 additions & 4 deletions indexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ func TestIndexer_IndexPut(t *testing.T) {
err = res.One(&i)
assert.Nil(t, err)

assert.NotEqual(t, i.ID, "")
assert.Equal(t, i.ID, "12345-67890-ABCDE::hello")
assert.Equal(t, i.Word, "hello")
assert.Equal(t, i.Count, 5)
assert.Equal(t, i.DocumentID, "12345-67890-ABCDE")
Expand Down Expand Up @@ -237,7 +237,7 @@ func TestIndexer_DocumentPut(t *testing.T) {
defer tearDbDown(session)

doc := Document{
Source: "www.google.com",
ID: "www.google.com",
Content: "Lorem ipsum dolor sit amet.",
}

Expand All @@ -251,8 +251,7 @@ func TestIndexer_DocumentPut(t *testing.T) {
err = res.One(&d)
assert.Nil(t, err)

assert.NotEqual(t, d.ID, "")
assert.Equal(t, d.Source, "www.google.com")
assert.Equal(t, d.ID, "www.google.com")
assert.Equal(t, d.Content, "Lorem ipsum dolor sit amet.")
}

Expand Down
6 changes: 2 additions & 4 deletions search_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,14 @@ func tearDown() {

func SearchSetup() error {
d1 := Document{
ID: "1",
Source: "http://example.com",
ID: "http://example.com",
Title: "Examples, Examples Everywhere",
Author: "John Johnson",
Content: "This is an example of some example content remember though it's just an example",
}

d2 := Document{
ID: "2",
Source: "http://example.org",
ID: "http://example.org",
Title: "Help Abandoned Examples",
Author: "",
Content: "Save the example",
Expand Down

0 comments on commit 02c15e8

Please sign in to comment.