From 30b8263ac75226525d4e50493ab96b983cfb8120 Mon Sep 17 00:00:00 2001 From: mpppk Date: Mon, 1 Jul 2019 00:56:30 +0900 Subject: [PATCH] Display original words --- cmd/gen.go | 41 ++++++++++++++++------- lib/iroha.go | 46 +++++++++++++++----------- lib/katakana.go | 79 +++++++++++++++++++++++++++++++++----------- lib/katakana_test.go | 22 ++++++------ 4 files changed, 125 insertions(+), 63 deletions(-) diff --git a/cmd/gen.go b/cmd/gen.go index ef803c1..95013a9 100644 --- a/cmd/gen.go +++ b/cmd/gen.go @@ -1,6 +1,10 @@ package cmd import ( + "fmt" + "os" + "strings" + "github.com/mpppk/iroha/lib" "github.com/spf13/cobra" ) @@ -17,22 +21,35 @@ var genCmd = &cobra.Command{ panic(err) } - normalizedWords := lib.NormalizeKatakanaWords(words) - iroha := lib.NewIroha(normalizedWords) - iroha.Search() + iroha := lib.NewIroha(words) + irohaWordsList := iroha.Search() + for _, irohaWords := range irohaWordsList { + if ok, _ := IsValidIroha(irohaWords); !ok { + fmt.Fprintln(os.Stderr, "invalid result is returned", irohaWords) + os.Exit(1) + } + fmt.Println(irohaWords) + } + fmt.Println(len(irohaWordsList)) }, } -func init() { - rootCmd.AddCommand(genCmd) +func IsValidIroha(words []string) (bool, string) { + concatenatedWord := strings.Join(words, "") + n := lib.NormalizeKatakanaWord(concatenatedWord) + runes := []rune(n) - // Here you will define your flags and configuration settings. + if len(runes) != int(lib.KatakanaLen) { + return false, n + } - // Cobra supports Persistent Flags which will work for this command - // and all subcommands, e.g.: - // genCmd.PersistentFlags().String("foo", "", "A help for foo") + if lib.HasDuplicatedRune(n) { + return false, n + } - // Cobra supports local flags which will only run when this command - // is called directly, e.g.: - // genCmd.Flags().BoolP("toggle", "t", false, "Help message for toggle") + return true, n +} + +func init() { + rootCmd.AddCommand(genCmd) } diff --git a/lib/iroha.go b/lib/iroha.go index 990d391..2abbb2a 100644 --- a/lib/iroha.go +++ b/lib/iroha.go @@ -1,7 +1,6 @@ package lib import ( - "fmt" "math/bits" ) @@ -17,42 +16,49 @@ func NewIroha(words []string) *Iroha { } } -func (i *Iroha) Search() { +func (i *Iroha) Search() (wordStringsList [][]string) { katakanaAndWordBitsList := i.katakana.ListSortedKatakanaAndWordBits() - res, _ := i.searchByBits(katakanaAndWordBitsList, WordBits(0)) - fmt.Println(len(res)) + wordsList, _ := i.searchByBits(katakanaAndWordBitsList, WordBits(0)) + for _, words := range wordsList { + var wordStrings []string + for _, word := range words { + wordStrings = append(wordStrings, i.katakana.ToWord(word.Id)) + } + wordStringsList = append(wordStringsList, wordStrings) + } + return } -func (i *Iroha) searchByBits(katakanaAndWordBitsList []*KatakanaAndWordBits, remainKatakanaBits WordBits) ([][]WordBits, bool) { - if bits.OnesCount64(uint64(remainKatakanaBits)) == int(katakanaLen) { - return [][]WordBits{{}}, true +func (i *Iroha) searchByBits(katakanaBitsAndWords []*KatakanaBitsAndWords, remainKatakanaBits WordBits) ([][]*Word, bool) { + if bits.OnesCount64(uint64(remainKatakanaBits)) == int(KatakanaLen) { + return [][]*Word{{}}, true } - if len(katakanaAndWordBitsList) == 0 { + if len(katakanaBitsAndWords) == 0 { return nil, false } - katakanaAndWordBits := katakanaAndWordBitsList[0] - var irohaWordBitsLists [][]WordBits - for _, wordBits := range katakanaAndWordBits.WordBitsList { - if remainKatakanaBits.HasDuplicatedKatakana(wordBits) { + katakanaAndWordBits := katakanaBitsAndWords[0] + var irohaWordLists [][]*Word + for _, word := range katakanaAndWordBits.Words { + if remainKatakanaBits.HasDuplicatedKatakana(word.Bits) { continue } - newRemainKatakanaBits := remainKatakanaBits.Merge(wordBits) - if newIrohaWordBitsLists, ok := i.searchByBits(katakanaAndWordBitsList[1:], newRemainKatakanaBits); ok { - for _, newIrohaWordBitsList := range newIrohaWordBitsLists { - newIrohaWordBitsList = append(newIrohaWordBitsList, wordBits) - irohaWordBitsLists = append(irohaWordBitsLists, newIrohaWordBitsList) + newRemainKatakanaBits := remainKatakanaBits.Merge(word.Bits) + if newIrohaWordIdLists, ok := i.searchByBits(katakanaBitsAndWords[1:], newRemainKatakanaBits); ok { + for _, newIrohaWordList := range newIrohaWordIdLists { + newIrohaWordList = append(newIrohaWordList, word) + irohaWordLists = append(irohaWordLists, newIrohaWordList) } } } // どれも入れない場合 if remainKatakanaBits.has(katakanaAndWordBits.KatakanaBits) { - if otherIrohaWordBitsLists, ok := i.searchByBits(katakanaAndWordBitsList[1:], remainKatakanaBits); ok { - irohaWordBitsLists = append(irohaWordBitsLists, otherIrohaWordBitsLists...) + if otherIrohaWordBitsLists, ok := i.searchByBits(katakanaBitsAndWords[1:], remainKatakanaBits); ok { + irohaWordLists = append(irohaWordLists, otherIrohaWordBitsLists...) } } - return irohaWordBitsLists, len(irohaWordBitsLists) > 0 + return irohaWordLists, len(irohaWordLists) > 0 } diff --git a/lib/katakana.go b/lib/katakana.go index fa6d23b..343ac72 100644 --- a/lib/katakana.go +++ b/lib/katakana.go @@ -5,7 +5,12 @@ import ( "strings" ) +type WordId uint16 type WordBits uint64 +type Word struct { + Id WordId + Bits WordBits +} func (w WordBits) has(katakanaBits KatakanaBits) bool { return w&WordBits(katakanaBits) != 0 @@ -22,15 +27,16 @@ func (w WordBits) Merge(otherWordBits WordBits) WordBits { type KatakanaBits uint64 type KatakanaBitsMap map[rune]KatakanaBits type RKatakanaBitsMap map[KatakanaBits]rune -type WordBitsMap map[KatakanaBits][]WordBits +type WordByKatakanaMap map[KatakanaBits][]*Word +type WordMap map[WordId]string type WordCountMap map[KatakanaBits]int type KatakanaCount struct { katakanaBits KatakanaBits count int } -type KatakanaAndWordBits struct { +type KatakanaBitsAndWords struct { KatakanaBits KatakanaBits - WordBitsList []WordBits + Words []*Word } func (w WordCountMap) toSortedKatakanaBitsList() (katakanaBits []KatakanaBits) { @@ -61,36 +67,43 @@ func (w WordCountMap) toList() []*KatakanaCount { } type Katakana struct { - katakanaBitsMap KatakanaBitsMap - wordBitsMap WordBitsMap - wordCountMap WordCountMap + katakanaBitsMap KatakanaBitsMap + wordByKatakanaMap WordByKatakanaMap + wordMap WordMap + wordCountMap WordCountMap } -var katakanaLen = uint64(45) +var KatakanaLen = uint64(45) func NewKatakana(words []string) *Katakana { + normalizedWords, orgWords := NormalizeAndFilterKatakanaWords(words) katakana := &Katakana{ katakanaBitsMap: newKatakanaBitsMap(), + wordMap: toWordMap(orgWords), } - wordBitsList := katakana.loadWords(words) + wordBitsList := katakana.loadWords(normalizedWords) wordCountMap := countWordBitsFrequency(wordBitsList) katakana.wordCountMap = wordCountMap - katakana.wordBitsMap = katakana.createWordBitsMap(wordBitsList) + katakana.wordByKatakanaMap = katakana.createWordBitsMap(wordBitsList) return katakana } -func (k *Katakana) ListSortedKatakanaAndWordBits() (katakanaAndWordBitsList []*KatakanaAndWordBits) { +func (k *Katakana) ListSortedKatakanaAndWordBits() (katakanaAndWordBitsList []*KatakanaBitsAndWords) { katakanaBitsList := k.wordCountMap.toSortedKatakanaBitsList() for _, katakanaBits := range katakanaBitsList { - katakanaAndWordBitsList = append(katakanaAndWordBitsList, &KatakanaAndWordBits{ + katakanaAndWordBitsList = append(katakanaAndWordBitsList, &KatakanaBitsAndWords{ KatakanaBits: katakanaBits, - WordBitsList: k.wordBitsMap[katakanaBits], + Words: k.wordByKatakanaMap[katakanaBits], }) } return katakanaAndWordBitsList } +func (k *Katakana) ToWord(wordId WordId) string { + return k.wordMap[wordId] +} + func (k *Katakana) loadWords(words []string) (wordBits []WordBits) { for _, word := range words { wordBits = append(wordBits, k.toWordBits(word)) @@ -98,24 +111,35 @@ func (k *Katakana) loadWords(words []string) (wordBits []WordBits) { return wordBits } +func toWordMap(words []string) WordMap { + wordMap := WordMap{} + for wordId, word := range words { + wordMap[WordId(wordId)] = word + } + return wordMap +} + func (k *Katakana) toWordBits(word string) WordBits { return toWordBits(k.katakanaBitsMap, word) } -func (k *Katakana) createWordBitsMap(wordBitsList []WordBits) WordBitsMap { +func (k *Katakana) createWordBitsMap(wordBitsList []WordBits) WordByKatakanaMap { sortedKatakanaBitsList := k.wordCountMap.toSortedKatakanaBitsList() return newWordBitsMap(sortedKatakanaBitsList, wordBitsList) } -func newWordBitsMap(sortedKatakanaBits []KatakanaBits, wordBitsList []WordBits) WordBitsMap { +func newWordBitsMap(sortedKatakanaBits []KatakanaBits, wordBitsList []WordBits) WordByKatakanaMap { var newWordBitsList []WordBits copy(newWordBitsList, wordBitsList) - wordBitsMap := WordBitsMap{} - for _, wordBits := range wordBitsList { + wordBitsMap := WordByKatakanaMap{} + for wordId, wordBits := range wordBitsList { for _, katakanaBits := range sortedKatakanaBits { if wordBits.has(katakanaBits) { - wordBitsMap[katakanaBits] = append(wordBitsMap[katakanaBits], wordBits) + wordBitsMap[katakanaBits] = append(wordBitsMap[katakanaBits], &Word{ + Id: WordId(wordId), + Bits: wordBits, + }) break } } @@ -134,7 +158,7 @@ func toWordBits(bitsMap KatakanaBitsMap, word string) WordBits { func countWordBitsFrequency(wordBitsList []WordBits) WordCountMap { wordCountMaps := WordCountMap{} for _, wb := range wordBitsList { - for i := uint64(0); i < katakanaLen; i++ { + for i := uint64(0); i < KatakanaLen; i++ { katakanaBits := KatakanaBits(1 << i) if wb.has(katakanaBits) { wordCountMaps[katakanaBits]++ @@ -207,9 +231,13 @@ func newNormalizeKatakanaMap() map[rune]rune { return m } -func NormalizeKatakanaWords(words []string) (newWords []string) { +func NormalizeAndFilterKatakanaWords(words []string) (normalizedWords, orgWords []string) { for _, word := range words { - newWords = append(newWords, NormalizeKatakanaWord(word)) + normalizedWord := NormalizeKatakanaWord(word) + if !HasDuplicatedRune(normalizedWord) { + normalizedWords = append(normalizedWords, normalizedWord) + orgWords = append(orgWords, word) + } } return } @@ -227,3 +255,14 @@ func NormalizeKatakanaWord(word string) string { newWord := string(runes) return strings.Replace(newWord, "ー", "", -1) } + +func HasDuplicatedRune(word string) bool { + m := map[rune]struct{}{} + for _, r := range word { + if _, ok := m[r]; ok { + return true + } + m[r] = struct{}{} + } + return false +} diff --git a/lib/katakana_test.go b/lib/katakana_test.go index 2726490..b949aec 100644 --- a/lib/katakana_test.go +++ b/lib/katakana_test.go @@ -29,7 +29,7 @@ func TestNewKatakana(t *testing.T) { katakanaBitsMap['ウ']: 2, katakanaBitsMap['エ']: 1, }, - wordBitsMap: WordBitsMap{ + wordByKatakanaMap: WordByKatakanaMap{ katakanaBitsMap['ア']: []WordBits{ toWordBits(katakanaBitsMap, "アイウ"), }, @@ -46,8 +46,8 @@ func TestNewKatakana(t *testing.T) { if !reflect.DeepEqual(katakana.wordCountMap, tt.want.wordCountMap) { t.Errorf("wordCountMap() = %v, want %v", katakana.wordCountMap, tt.want.wordCountMap) } - if !reflect.DeepEqual(katakana.wordBitsMap, tt.want.wordBitsMap) { - t.Errorf("wordBitsMap() = %v, want %v", katakana.wordBitsMap, tt.want.wordBitsMap) + if !reflect.DeepEqual(katakana.wordByKatakanaMap, tt.want.wordByKatakanaMap) { + t.Errorf("wordByKatakanaMap() = %v, want %v", katakana.wordByKatakanaMap, tt.want.wordByKatakanaMap) } }) } @@ -57,13 +57,13 @@ func TestKatakana_ToSortedKatakanaAndWordBits(t *testing.T) { katakanaBitsMap := newKatakanaBitsMap() type fields struct { katakanaBitsMap KatakanaBitsMap - wordBitsMap WordBitsMap + wordBitsMap WordByKatakanaMap wordCountMap WordCountMap } tests := []struct { name string fields fields - wantKatakanaAndWordBitsList []*KatakanaAndWordBits + wantKatakanaAndWordBitsList []*KatakanaBitsAndWords }{ { name: "", @@ -74,7 +74,7 @@ func TestKatakana_ToSortedKatakanaAndWordBits(t *testing.T) { katakanaBitsMap['ウ']: 2, katakanaBitsMap['エ']: 1, }, - wordBitsMap: WordBitsMap{ + wordBitsMap: WordByKatakanaMap{ katakanaBitsMap['ア']: []WordBits{ toWordBits(katakanaBitsMap, "アイウ"), }, @@ -84,7 +84,7 @@ func TestKatakana_ToSortedKatakanaAndWordBits(t *testing.T) { }, katakanaBitsMap: katakanaBitsMap, }, - wantKatakanaAndWordBitsList: []*KatakanaAndWordBits{ + wantKatakanaAndWordBitsList: []*KatakanaBitsAndWords{ { KatakanaBits: katakanaBitsMap['ア'], WordBitsList: []WordBits{ @@ -109,7 +109,7 @@ func TestKatakana_ToSortedKatakanaAndWordBits(t *testing.T) { }, } - contains := func(list []*KatakanaAndWordBits, v *KatakanaAndWordBits) bool { + contains := func(list []*KatakanaBitsAndWords, v *KatakanaBitsAndWords) bool { for _, nv := range list { if nv.KatakanaBits == v.KatakanaBits { // FIXME @@ -125,9 +125,9 @@ func TestKatakana_ToSortedKatakanaAndWordBits(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { k := &Katakana{ - katakanaBitsMap: tt.fields.katakanaBitsMap, - wordBitsMap: tt.fields.wordBitsMap, - wordCountMap: tt.fields.wordCountMap, + katakanaBitsMap: tt.fields.katakanaBitsMap, + wordByKatakanaMap: tt.fields.wordBitsMap, + wordCountMap: tt.fields.wordCountMap, } gotKatakanaAndWordBitsList := k.ListSortedKatakanaAndWordBits()