Skip to content

Commit

Permalink
Ran go fmt for the purists...
Browse files Browse the repository at this point in the history
  • Loading branch information
mish15 committed Apr 29, 2014
1 parent bbe8aef commit b180467
Show file tree
Hide file tree
Showing 2 changed files with 190 additions and 281 deletions.
162 changes: 80 additions & 82 deletions fuzzy.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
package fuzzy

import(
"fmt"
"os"
import (
"bufio"
"strings"
"log"
"encoding/json"
"fmt"
"log"
"os"
"regexp"
"strings"
"sync"
)

Expand All @@ -17,18 +17,18 @@ type Pair struct {
}

type Potential struct {
term string
score int
leven int
method int // 0 - is word, 1 - suggest maps to input, 2 - input delete maps to dictionary, 3 - input delete maps to suggest
term string
score int
leven int
method int // 0 - is word, 1 - suggest maps to input, 2 - input delete maps to dictionary, 3 - input delete maps to suggest
}

type Model struct {
Data map[string]int `json:"data"`
Maxcount int `json:"maxcount"`
Suggest map[string][]string `json:"suggest"`
Depth int `json:"depth"`
Threshold int `json:"threshold"`
Data map[string]int `json:"data"`
Maxcount int `json:"maxcount"`
Suggest map[string][]string `json:"suggest"`
Depth int `json:"depth"`
Threshold int `json:"threshold"`
sync.RWMutex
}

Expand Down Expand Up @@ -83,7 +83,6 @@ func Load(filename string) (*Model, error) {
return model, nil
}


// Change the default depth value of the model. This sets how many
// character differences are indexed. The default is 2.
func (model *Model) SetDepth(val int) {
Expand All @@ -102,36 +101,36 @@ func (model *Model) SetThreshold(val int) {

// Calculate the Levenshtein distance between two strings
func Levenshtein(a, b *string) int {
la := len(*a)
lb := len(*b)
d := make([]int, la + 1)
var lastdiag, olddiag, temp int

for i := 1; i <= la; i++ {
d[i] = i
}
for i := 1; i <= lb; i++ {
d[0] = i
lastdiag = i - 1
for j := 1; j <= la; j++ {
olddiag = d[j]
min := d[j] + 1
if (d[j - 1] + 1) < min {
min = d[j - 1] + 1
}
if ( (*a)[j - 1] == (*b)[i - 1] ) {
temp = 0
} else {
temp = 1
}
if (lastdiag + temp) < min {
min = lastdiag + temp
}
d[j] = min
lastdiag = olddiag
}
}
return d[la]
la := len(*a)
lb := len(*b)
d := make([]int, la+1)
var lastdiag, olddiag, temp int

for i := 1; i <= la; i++ {
d[i] = i
}
for i := 1; i <= lb; i++ {
d[0] = i
lastdiag = i - 1
for j := 1; j <= la; j++ {
olddiag = d[j]
min := d[j] + 1
if (d[j-1] + 1) < min {
min = d[j-1] + 1
}
if (*a)[j-1] == (*b)[i-1] {
temp = 0
} else {
temp = 1
}
if (lastdiag + temp) < min {
min = lastdiag + temp
}
d[j] = min
lastdiag = olddiag
}
}
return d[la]
}

// Add an array of words to train the model in bulk
Expand All @@ -141,7 +140,7 @@ func (model *Model) Train(terms []string) {
}
}

// Manually set the count of a word. Optionally trigger the
// Manually set the count of a word. Optionally trigger the
// creation of suggestion keys for the term. This function lets
// you build a model from an existing dictionary with word popularity
// counts without needing to run "TrainWord" repeatedly
Expand Down Expand Up @@ -184,7 +183,7 @@ func (model *Model) createSuggestKeys(term string) {
if !skip && len(edit) > 1 {
model.Suggest[edit] = append(model.Suggest[edit], term)
}
}
}
}

// Edits at any depth for a given term. The depth of the model is used
Expand All @@ -208,23 +207,23 @@ func (model *Model) EditsMulti(term string, depth int) []string {
// Edits1 creates a set of terms that are 1 char delete from the input term
func Edits1(word string) []string {

splits := []Pair{}
for i := 0; i <= len(word); i++ {
splits = append(splits, Pair{word[:i], word[i:]})
}
splits := []Pair{}
for i := 0; i <= len(word); i++ {
splits = append(splits, Pair{word[:i], word[i:]})
}

total_set := []string{}
for _, elem := range splits {
total_set := []string{}
for _, elem := range splits {

//deletion
if len(elem.str2) > 0 {
total_set = append(total_set, elem.str1+elem.str2[1:])
} else {
total_set = append(total_set, elem.str1)
}
//deletion
if len(elem.str2) > 0 {
total_set = append(total_set, elem.str1+elem.str2[1:])
} else {
total_set = append(total_set, elem.str1)
}

}
return total_set
}
return total_set
}

func (model *Model) score(input string) int {
Expand All @@ -249,7 +248,7 @@ func best(input string, potential map[string]*Potential) string {
if pot.term[0] == input[0] {
bestcalc += bestcalc * 100
}

best = pot.term
}
}
Expand All @@ -262,8 +261,8 @@ func best(input string, potential map[string]*Potential) string {
return best
}

// Test an input, if we get it wrong, look at why it is wrong. This
// function returns a bool indicating if the guess was correct as well
// Test an input, if we get it wrong, look at why it is wrong. This
// function returns a bool indicating if the guess was correct as well
// as the term it is suggesting. Typically this function would be used
// for testing, not for production
func (model *Model) CheckKnown(input string, correct string) bool {
Expand All @@ -277,7 +276,7 @@ func (model *Model) CheckKnown(input string, correct string) bool {
return true
}
if pot, ok := suggestions[correct]; !ok {

if model.score(correct) > 0 {
fmt.Printf("\"%v\" - %v (%v) not in the suggestions. (%v) best option.\n", input, correct, model.score(correct), best)
for _, sugg := range suggestions {
Expand All @@ -292,7 +291,6 @@ func (model *Model) CheckKnown(input string, correct string) bool {
return false
}


// For a given input term, suggest some alternatives. If exhaustive, each of the 4
// cascading checks will be performed and all potentials will be sorted accordingly
func (model *Model) suggestPotential(input string, exhaustive bool) map[string]*Potential {
Expand All @@ -301,7 +299,7 @@ func (model *Model) suggestPotential(input string, exhaustive bool) map[string]*

// 0 - If this is a dictionary term we're all good, no need to go further
if model.score(input) > model.Threshold {
suggestions[input] = &Potential{term : input, score : model.score(input), leven : 0, method : 0}
suggestions[input] = &Potential{term: input, score: model.score(input), leven: 0, method: 0}
if !exhaustive {
return suggestions
}
Expand All @@ -311,7 +309,7 @@ func (model *Model) suggestPotential(input string, exhaustive bool) map[string]*
if sugg, ok := model.Suggest[input]; ok {
for _, pot := range sugg {
if _, ok := suggestions[pot]; !ok {
suggestions[pot] = &Potential{term : pot, score : model.score(pot), leven : Levenshtein(&input, &pot), method : 1}
suggestions[pot] = &Potential{term: pot, score: model.score(pot), leven: Levenshtein(&input, &pot), method: 1}
}
}

Expand All @@ -325,11 +323,11 @@ func (model *Model) suggestPotential(input string, exhaustive bool) map[string]*
edits := model.EditsMulti(input, model.Depth)
for _, edit := range edits {
score := model.score(edit)
if score > 0 && len(edit) > 2 {
if score > 0 && len(edit) > 2 {
if _, ok := suggestions[edit]; !ok {
suggestions[edit] = &Potential{term : edit, score : score, leven : Levenshtein(&input, &edit), method : 2}
suggestions[edit] = &Potential{term: edit, score: score, leven: Levenshtein(&input, &edit), method: 2}
}
if (score > max) {
if score > max {
max = score
}
}
Expand All @@ -349,9 +347,9 @@ func (model *Model) suggestPotential(input string, exhaustive bool) map[string]*
// Is this a real transpose or replace?
for _, pot := range sugg {
lev := Levenshtein(&input, &pot)
if lev <= model.Depth + 1 { // The +1 doesn't seem to impact speed, but has greater coverage when the depth is not sufficient to make suggestions
if lev <= model.Depth+1 { // The +1 doesn't seem to impact speed, but has greater coverage when the depth is not sufficient to make suggestions
if _, ok := suggestions[pot]; !ok {
suggestions[pot] = &Potential{term : pot, score : model.score(pot), leven : lev, method : 3}
suggestions[pot] = &Potential{term: pot, score: model.score(pot), leven: lev, method: 3}
}
}
}
Expand Down Expand Up @@ -380,14 +378,14 @@ func (model *Model) SpellCheck(input string) string {
}

func SampleEnglish() []string {
var out []string
file, err := os.Open("data/big.txt")
if (err != nil) {
fmt.Println(err)
return out
}
reader := bufio.NewReader(file)
scanner := bufio.NewScanner(reader)
var out []string
file, err := os.Open("data/big.txt")
if err != nil {
fmt.Println(err)
return out
}
reader := bufio.NewReader(file)
scanner := bufio.NewScanner(reader)
scanner.Split(bufio.ScanLines)
// Count the words.
count := 0
Expand All @@ -406,4 +404,4 @@ func SampleEnglish() []string {
}

return out
}
}

0 comments on commit b180467

Please sign in to comment.