Skip to content
This repository has been archived by the owner on Sep 3, 2023. It is now read-only.

Commit

Permalink
change: directory structure
Browse files Browse the repository at this point in the history
  • Loading branch information
kawa1214 committed May 9, 2021
1 parent b74c47d commit beca8f0
Show file tree
Hide file tree
Showing 15 changed files with 392,350 additions and 155 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@

build/

lib/assets/dic.csv
#lib/assets/dic.csv
coverage/
16 changes: 10 additions & 6 deletions example/lib/main.dart
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import 'package:flutter/material.dart';
import 'dart:async';

import 'package:flutter/services.dart';
import 'package:ringo/ringo.dart';

void main() {
Expand All @@ -24,23 +23,28 @@ class _MyAppState extends State<MyApp> {
super.initState();
_initRingo();
controller.addListener(_controllerListener);
//_ringo = await Ringo.init();
}

//final result = _ringo.tokenize('吾輩はRingoである');
//print(result);
Future _lattice() async {

}

Future _initRingo() async {
Future<void> _initRingo() async {
_ringo = await Ringo.init();
_ringo.analyzeMorphologic('私は昨日ワインを飲んだ気がする');
}

void _controllerListener() {
tokenized.clear();

// debug
_ringo.analyzeMorphologic('私は昨日ワインを飲んだ気がする');

final words = _ringo.tokenize(controller.text);
for (final word in words) {
tokenized.write('$word\n');
setState(() {});
}
setState(() {});
}

@override
Expand Down
392,126 changes: 392,126 additions & 0 deletions lib/assets/dic.csv

Large diffs are not rendered by default.

127 changes: 17 additions & 110 deletions lib/ringo.dart
Original file line number Diff line number Diff line change
@@ -1,127 +1,34 @@
library ringo;

import 'src/utils/path.dart';
import 'src/utils/string_utils.dart';
import 'src/utils/list_utils.dart';
import 'package:ringo/src/lattice/lattice.dart';

/*
main() async {
final ringo = await Ringo.init();
final tokenized = ringo.tokenize('吾輩はRingoである');
print('tokenized: $tokenized');
}
*/
import 'src/data/dictionary.dart';
import 'src/data/double_array.dart';
import 'src/data/tokenized_word.dart';
import 'src/search/longest_match.dart';

class Ringo {
const Ringo(this._doubleArray);
const Ringo(this._doubleArray, this._dictionary);
final DoubleArray _doubleArray;
final Dictionary _dictionary;

static Future<Ringo> init() async {
final base = await ListUtils.loadList(Path.base);
final check = await ListUtils.loadList(Path.check);
final doubleArray = DoubleArray(base: base, check: check);
return Ringo(doubleArray);
final doubleArray = await DoubleArray.init();
final dictionary = await Dictionary.init();
return Ringo(doubleArray, dictionary);
}

List<String> tokenize(String query) {
final searchdResult = _searchPrefix(query);
final longestMatchResult = _longestMatch(searchdResult, query);
final searchdResult = _doubleArray.searchCommonPrefix(query);
final longestMatchResult = LongestMatch.simpleTokenize(searchdResult, query);

return longestMatchResult;
}

List<String> _longestMatch(List<SearchedWord> words, String query) {
final result = <String>[];
int current = 0;
words.sort((a, b) => b.end.compareTo(a.end));
final unknownWordBuffer = StringBuffer();
//print('query.length: ${query.length}');
for (var i = 0; i < query.length && current < query.length; i++) {
//print(current);
final startWords = words.where((e) => e.start == current).toList();
if (startWords.isNotEmpty) {
/// 未知語を追加
if (unknownWordBuffer.toString() != '') {
result.add(unknownWordBuffer.toString());
unknownWordBuffer.clear();
}

/// 辞書にある単語を追加
final word = startWords.first;
current = word.end;
result.add(word.word);
} else {
/// 未知語は一つにまとめるため,StringWordBufferに保存
unknownWordBuffer.write(query[current]);
current++;
}
}

/// 最後に未知後があったケースの処理
if (unknownWordBuffer.toString() != '') {
result.add(unknownWordBuffer.toString());
unknownWordBuffer.clear();
}
return result;
}

List<SearchedWord> _searchPrefix(String word) {
final searchResult = <SearchedWord>[];
for (var start = 0; start < word.length; start++) {
for (var end = 1; end < word.length + 1; end++) {
final result = _searchPrefixHelper(word, start, end);
if (result != null) {
searchResult.add(result);
}
}
}
return searchResult;
}

SearchedWord? _searchPrefixHelper(String word, int start, int end) {
if (start == end || start > end) {
return null;
}
final query = word.substring(start, end);
if (_search(query)) {
final word = SearchedWord(word: query, start: start, end: end);
return word;
}
return null;
void analyzeMorphologic(String query) {
final words = _doubleArray.searchCommonPrefix(query);
final allWords = _dictionary.allWordsFromTokenizedWords(words);
print(words.first.dicWords);
//print(allWords.length);
}

bool _search(String word) {
int current = 0;
final codes = StringUtils.stringToUint8List(word);
for (var i = 0; i < codes.length; i++) {
final code = codes[i];
final next = _doubleArray.base[current] + code;
final checkData = _doubleArray.check[next] - 1;
if (current != checkData) {
return false;
}
current = next;
}
return true;
}
}

class DoubleArray {
const DoubleArray({
required this.base,
required this.check,
});
final List<int> base;
final List<int> check;
}

class SearchedWord {
const SearchedWord({
required this.word,
required this.start,
required this.end,
});
final String word;
final int start;
final int end;
}
22 changes: 22 additions & 0 deletions lib/src/data/dictionary.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import 'package:ringo/src/utils/list_utils.dart';
import 'tokenized_word.dart';
import 'word.dart';

class Dictionary {
const Dictionary(this.words);
final List<Word> words;

static Future<Dictionary> init() async {
final words = await ListUtils.loadWords();
return Dictionary(words);
}

void allWordsFromTokenizedWords(List<TokenizedWord> tokenizedWords) {
for(final tokenizedWord in tokenizedWords) {
final allWords = words.where((e) => e.word==tokenizedWord.word).toList();
if (allWords.isNotEmpty){
tokenizedWord.dicWords.addAll(allWords);
}
}
}
}
59 changes: 59 additions & 0 deletions lib/src/data/double_array.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import '../utils/string_utils.dart';
import '../utils/list_utils.dart';
import '../utils/path.dart';
import 'tokenized_word.dart';

class DoubleArray {
const DoubleArray({
required this.base,
required this.check,
});
final List<int> base;
final List<int> check;

static Future<DoubleArray> init() async {
final base = await ListUtils.loadList(Path.base);
final check = await ListUtils.loadList(Path.check);
return DoubleArray(base: base, check: check);
}

List<TokenizedWord> searchCommonPrefix(String word) {
final searchResult = <TokenizedWord>[];
for (var start = 0; start < word.length; start++) {
for (var end = 1; end < word.length + 1; end++) {
final result = _searchCommonPrefixHelper(word, start, end);
if (result != null) {
searchResult.add(result);
}
}
}
return searchResult;
}

TokenizedWord? _searchCommonPrefixHelper(String word, int start, int end) {
if (start == end || start > end) {
return null;
}
final query = word.substring(start, end);
if (_search(query)) {
final word = TokenizedWord(word: query, start: start, end: end);
return word;
}
return null;
}

bool _search(String word) {
int current = 0;
final codes = StringUtils.stringToUint8List(word);
for (var i = 0; i < codes.length; i++) {
final code = codes[i];
final next = base[current] + code;
final checkData = check[next] - 1;
if (current != checkData) {
return false;
}
current = next;
}
return true;
}
}
14 changes: 14 additions & 0 deletions lib/src/data/tokenized_word.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import 'package:ringo/src/data/word.dart';

class TokenizedWord {
TokenizedWord({
required this.word,
required this.start,
required this.end,
dicWords,
});
final String word;
final int start;
final int end;
final List<Word> dicWords = <Word>[];
}
12 changes: 12 additions & 0 deletions lib/src/data/word.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
class Word {
const Word({
required this.word,
required this.leftContextId,
required this.rightContextId,
required this.occurrenceCost,
});
final String word;
final int leftContextId;
final int rightContextId;
final int occurrenceCost;
}
51 changes: 25 additions & 26 deletions lib/src/lattice/lattice.dart
Original file line number Diff line number Diff line change
@@ -1,37 +1,35 @@
import 'dart:io';

import 'package:ringo/src/data/tokenized_word.dart';
import 'package:ringo/src/data/word.dart';
import 'package:ringo/src/output_double_array/array_utils.dart';
import 'package:ringo/src/output_double_array/trie.dart';
import 'package:ringo/src/utils/path.dart';

main() async {
final current = '/lib/src/output_double_array/outputs/';
final base =
await ListUtils.loadList(Directory.current.path + current + 'base.csv');
final check =
await ListUtils.loadList(Directory.current.path + current + 'check.csv');
class Node {
Node(this.word);
final TokenizedWord word;
final List<Node> nodes= [];
}

final dic = await ListUtils.loadDic();
class Lattice {
Lattice(this.node);
final Node node;

final wordDic = dic.map((e) {
final parts = e.split(',');
return Word(
word: parts[0],
leftContextId: int.tryParse(parts[1]) ?? 0,
rightContextId: int.tryParse(parts[2]) ?? 0,
occurrenceCost: int.tryParse(parts[3]) ?? 0,
);
}).toList();
static Lattice setWords(List<TokenizedWord> words) {
final node = bosNode();



final doubleArray = SearchDoubleArray(
base: base,
check: check,
);
final lattice = Lattice(doubleArray: doubleArray, dic: wordDic);
final result = lattice.searchPrefix('はなしたら元気になった');
//result.forEach((e) {print(e.word.word);});
print(result);
return Lattice(node);
}

static Node bosNode() {
return Node(
TokenizedWord(end: 0, start: 0, word: 'BOS'),
);
}
}

/*
class Word {
const Word(
{required this.word,
Expand Down Expand Up @@ -105,3 +103,4 @@ class Lattice {
return true;
}
}
*/
4 changes: 2 additions & 2 deletions lib/src/output_double_array/array_utils.dart
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ class ListUtils {
return data;
}

static Future<List<String>> loadDic() async {
final file = File(Directory.current.path + '/lib/src/dictionaly/dic.csv');
static Future<List<String>> loadDic(String path) async {
final file = File(path);
final read = file.openRead();

final lines =
Expand Down
Loading

0 comments on commit beca8f0

Please sign in to comment.