Skip to content

Commit

Permalink
change to es
Browse files Browse the repository at this point in the history
  • Loading branch information
taisukef committed Oct 8, 2022
1 parent b85b21e commit 42f32a3
Show file tree
Hide file tree
Showing 26 changed files with 120 additions and 253 deletions.
3 changes: 2 additions & 1 deletion example/load-deno.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
* limitations under the License.
*/

import { kuromoji } from "../build/kuromoji.js";
//import { kuromoji } from "../build/kuromoji.js";
import { kuromoji } from "../src/kuromoji.js";
const DIC_DIR = "../dict/";

// Load dictionaries from file, and prepare tokenizer
Expand Down
2 changes: 1 addition & 1 deletion example/load-deno2.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,5 @@
import { kuromoji } from "../kuromoji.js";

const tokenizer = await kuromoji.createTokenizer();
const path = tokenizer.tokenize("すもももももももものうち");
const path = tokenizer.tokenize("私は家にいる");
console.log(path);
3 changes: 2 additions & 1 deletion kuromoji.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { kuromoji as _kuromoji } from "./build/kuromoji.js";
//import { kuromoji as _kuromoji } from "./build/kuromoji.js";
import { kuromoji as _kuromoji } from "./src/kuromoji.js";

const DIC_URL = "https://code4fukui.github.io/kuromoji-es/dict/";

Expand Down
12 changes: 4 additions & 8 deletions src/Tokenizer.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,9 @@
* limitations under the License.
*/

"use strict";

var ViterbiBuilder = require("./viterbi/ViterbiBuilder");
var ViterbiSearcher = require("./viterbi/ViterbiSearcher");
var IpadicFormatter = require("./util/IpadicFormatter");
import { ViterbiBuilder } from "./viterbi/ViterbiBuilder.js";
import { ViterbiSearcher } from "./viterbi/ViterbiSearcher.js";
import { IpadicFormatter } from "./util/IpadicFormatter.js";

var PUNCTUATION = /、|。/;

Expand All @@ -28,7 +26,7 @@ var PUNCTUATION = /、|。/;
* @param {DynamicDictionaries} dic Dictionaries used by this tokenizer
* @constructor
*/
function Tokenizer(dic) {
export function Tokenizer(dic) {
this.token_info_dictionary = dic.token_info_dictionary;
this.unknown_dictionary = dic.unknown_dictionary;
this.viterbi_builder = new ViterbiBuilder(dic);
Expand Down Expand Up @@ -125,5 +123,3 @@ Tokenizer.prototype.tokenizeForSentence = function (sentence, tokens) {
Tokenizer.prototype.getLattice = function (text) {
return this.viterbi_builder.build(text);
};

module.exports = Tokenizer;
12 changes: 5 additions & 7 deletions src/TokenizerBuilder.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,18 @@
* limitations under the License.
*/

"use strict";

var Tokenizer = require("./Tokenizer");
var DictionaryLoader = require("./loader/NodeDictionaryLoader");
import { Tokenizer } from "./Tokenizer.js";
//import { DictionaryLoader } from "./loader/NodeDictionaryLoader.js";
import { BrowserDictionaryLoader as DictionaryLoader } from "./loader/BrowserDictionaryLoader.js";
//import { DictionaryLoader } from "./loader/DictionaryLoader.js";

/**
* TokenizerBuilder create Tokenizer instance.
* @param {Object} option JSON object which have key-value pairs settings
* @param {string} option.dicPath Dictionary directory path (or URL using in browser)
* @constructor
*/
function TokenizerBuilder(option) {
export function TokenizerBuilder(option) {
if (option.dicPath == null) {
this.dic_path = "dict/";
} else {
Expand All @@ -51,5 +51,3 @@ TokenizerBuilder.prototype.build = function (callback) {
* @param {Object} err Error object
* @param {Tokenizer} tokenizer Prepared Tokenizer
*/

module.exports = TokenizerBuilder;
6 changes: 1 addition & 5 deletions src/dict/CharacterClass.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
* limitations under the License.
*/

"use strict";

/**
* CharacterClass
* @param {number} class_id
Expand All @@ -26,12 +24,10 @@
* @param {number} max_length
* @constructor
*/
function CharacterClass(class_id, class_name, is_always_invoke, is_grouping, max_length) {
export function CharacterClass(class_id, class_name, is_always_invoke, is_grouping, max_length) {
this.class_id = class_id;
this.class_name = class_name;
this.is_always_invoke = is_always_invoke;
this.is_grouping = is_grouping;
this.max_length = max_length;
}

module.exports = CharacterClass;
12 changes: 4 additions & 8 deletions src/dict/CharacterDefinition.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,9 @@
* limitations under the License.
*/

"use strict";

var InvokeDefinitionMap = require("./InvokeDefinitionMap");
var CharacterClass = require("./CharacterClass");
var SurrogateAwareString = require("../util/SurrogateAwareString");
import { InvokeDefinitionMap } from "./InvokeDefinitionMap.js";
import { CharacterClass } from "./CharacterClass.js";
import { SurrogateAwareString } from "../util/SurrogateAwareString.js";

var DEFAULT_CATEGORY = "DEFAULT";

Expand All @@ -28,7 +26,7 @@ var DEFAULT_CATEGORY = "DEFAULT";
* defines behavior of unknown word processing
* @constructor
*/
function CharacterDefinition() {
export function CharacterDefinition() {
this.character_category_map = new Uint8Array(65536); // for all UCS2 code points
this.compatible_category_map = new Uint32Array(65536); // for all UCS2 code points
this.invoke_definition_map = null;
Expand Down Expand Up @@ -201,5 +199,3 @@ CharacterDefinition.prototype.lookup = function (ch) {

return this.invoke_definition_map.getCharacterClass(class_id);
};

module.exports = CharacterDefinition;
6 changes: 1 addition & 5 deletions src/dict/ConnectionCosts.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,14 @@
* limitations under the License.
*/

"use strict";

/**
* Connection costs matrix from cc.dat file.
* 2 dimension matrix [forward_id][backward_id] -> cost
* @constructor
* @param {number} forward_dimension
* @param {number} backward_dimension
*/
function ConnectionCosts(forward_dimension, backward_dimension) {
export function ConnectionCosts(forward_dimension, backward_dimension) {
this.forward_dimension = forward_dimension;
this.backward_dimension = backward_dimension;

Expand Down Expand Up @@ -55,5 +53,3 @@ ConnectionCosts.prototype.loadConnectionCosts = function (connection_costs_buffe
this.backward_dimension = connection_costs_buffer[1];
this.buffer = connection_costs_buffer;
};

module.exports = ConnectionCosts;
14 changes: 5 additions & 9 deletions src/dict/DynamicDictionaries.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,10 @@
* limitations under the License.
*/

"use strict";

var doublearray = require("doublearray");
var TokenInfoDictionary = require("./TokenInfoDictionary");
var ConnectionCosts = require("./ConnectionCosts");
var UnknownDictionary = require("./UnknownDictionary");
import { doublearray } from "https://code4fukui.github.io/doublearray-es/doublearray.js";
import { TokenInfoDictionary } from "./TokenInfoDictionary.js";
import { ConnectionCosts } from "./ConnectionCosts.js";
import { UnknownDictionary } from "./UnknownDictionary.js";

/**
* Dictionaries container for Tokenizer
Expand All @@ -30,7 +28,7 @@ var UnknownDictionary = require("./UnknownDictionary");
* @param {UnknownDictionary} unknown_dictionary
* @constructor
*/
function DynamicDictionaries(trie, token_info_dictionary, connection_costs, unknown_dictionary) {
export function DynamicDictionaries(trie, token_info_dictionary, connection_costs, unknown_dictionary) {
if (trie != null) {
this.trie = trie;
} else {
Expand Down Expand Up @@ -78,5 +76,3 @@ DynamicDictionaries.prototype.loadUnknownDictionaries = function (unk_buffer, un
this.unknown_dictionary.loadUnknownDictionaries(unk_buffer, unk_pos_buffer, unk_map_buffer, cat_map_buffer, compat_cat_map_buffer, invoke_def_buffer);
return this;
};

module.exports = DynamicDictionaries;
10 changes: 3 additions & 7 deletions src/dict/InvokeDefinitionMap.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,14 @@
* limitations under the License.
*/

"use strict";

var ByteBuffer = require("../util/ByteBuffer");
var CharacterClass = require("./CharacterClass");
import { ByteBuffer } from "../util/ByteBuffer.js";
import { CharacterClass } from "./CharacterClass.js";

/**
* InvokeDefinitionMap represents invoke definition a part of char.def
* @constructor
*/
function InvokeDefinitionMap() {
export function InvokeDefinitionMap() {
this.map = [];
this.lookup_table = {}; // Just for building dictionary
}
Expand Down Expand Up @@ -106,5 +104,3 @@ InvokeDefinitionMap.prototype.toBuffer = function () {
buffer.shrink();
return buffer.buffer;
};

module.exports = InvokeDefinitionMap;
8 changes: 2 additions & 6 deletions src/dict/TokenInfoDictionary.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,13 @@
* limitations under the License.
*/

"use strict";

var ByteBuffer = require("../util/ByteBuffer");
import { ByteBuffer } from "../util/ByteBuffer.js";

/**
* TokenInfoDictionary
* @constructor
*/
function TokenInfoDictionary() {
export function TokenInfoDictionary() {
this.dictionary = new ByteBuffer(10 * 1024 * 1024);
this.target_map = {}; // trie_id (of surface form) -> token_info_id (of token)
this.pos_buffer = new ByteBuffer(10 * 1024 * 1024);
Expand Down Expand Up @@ -148,5 +146,3 @@ TokenInfoDictionary.prototype.getFeatures = function (token_info_id_str) {
var pos_id = this.dictionary.getInt(token_info_id + 6);
return this.pos_buffer.getString(pos_id);
};

module.exports = TokenInfoDictionary;
12 changes: 4 additions & 8 deletions src/dict/UnknownDictionary.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,15 @@
* limitations under the License.
*/

"use strict";

var TokenInfoDictionary = require("./TokenInfoDictionary");
var CharacterDefinition = require("./CharacterDefinition");
var ByteBuffer = require("../util/ByteBuffer");
import { TokenInfoDictionary } from "./TokenInfoDictionary.js";
import { CharacterDefinition } from "./CharacterDefinition.js";
import { ByteBuffer } from "../util/ByteBuffer.js";

/**
* UnknownDictionary
* @constructor
*/
function UnknownDictionary() {
export function UnknownDictionary() {
this.dictionary = new ByteBuffer(10 * 1024 * 1024);
this.target_map = {}; // class_id (of CharacterClass) -> token_info_id (of unknown class)
this.pos_buffer = new ByteBuffer(10 * 1024 * 1024);
Expand Down Expand Up @@ -54,5 +52,3 @@ UnknownDictionary.prototype.loadUnknownDictionaries = function (unk_buffer, unk_
this.loadTargetMap(unk_map_buffer);
this.character_definition = CharacterDefinition.load(cat_map_buffer, compat_cat_map_buffer, invoke_def_buffer);
};

module.exports = UnknownDictionary;
10 changes: 3 additions & 7 deletions src/dict/builder/CharacterDefinitionBuilder.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,8 @@
* limitations under the License.
*/

"use strict";

var CharacterDefinition = require("../CharacterDefinition");
var InvokeDefinitionMap = require("../InvokeDefinitionMap");
import { CharacterDefinition } from "../CharacterDefinition.js";
import { InvokeDefinitionMap } from "../InvokeDefinitionMap.js";

var CATEGORY_DEF_PATTERN = /^(\w+)\s+(\d)\s+(\d)\s+(\d)/;
var CATEGORY_MAPPING_PATTERN = /^(0x[0-9A-F]{4})(?:\s+([^#\s]+))(?:\s+([^#\s]+))*/;
Expand All @@ -28,7 +26,7 @@ var RANGE_CATEGORY_MAPPING_PATTERN = /^(0x[0-9A-F]{4})\.\.(0x[0-9A-F]{4})(?:\s+(
* CharacterDefinitionBuilder
* @constructor
*/
function CharacterDefinitionBuilder() {
export function CharacterDefinitionBuilder() {
this.char_def = new CharacterDefinition();
this.char_def.invoke_definition_map = new InvokeDefinitionMap();
this.character_category_definition = [];
Expand Down Expand Up @@ -64,5 +62,3 @@ CharacterDefinitionBuilder.prototype.build = function () {
this.char_def.initCategoryMappings(this.category_mapping);
return this.char_def;
};

module.exports = CharacterDefinitionBuilder;
8 changes: 2 additions & 6 deletions src/dict/builder/ConnectionCostsBuilder.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,13 @@
* limitations under the License.
*/

"use strict";

var ConnectionCosts = require("../ConnectionCosts");
import { ConnectionCosts } from "../ConnectionCosts.js";

/**
* Builder class for constructing ConnectionCosts object
* @constructor
*/
function ConnectionCostsBuilder() {
export function ConnectionCostsBuilder() {
this.lines = 0;
this.connection_cost = null;
}
Expand Down Expand Up @@ -66,5 +64,3 @@ ConnectionCostsBuilder.prototype.putLine = function (line) {
ConnectionCostsBuilder.prototype.build = function () {
return this.connection_cost;
};

module.exports = ConnectionCostsBuilder;
18 changes: 7 additions & 11 deletions src/dict/builder/DictionaryBuilder.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,12 @@
* limitations under the License.
*/

"use strict";

var doublearray = require("doublearray");
var DynamicDictionaries = require("../DynamicDictionaries");
var TokenInfoDictionary = require("../TokenInfoDictionary");
var ConnectionCostsBuilder = require("./ConnectionCostsBuilder");
var CharacterDefinitionBuilder = require("./CharacterDefinitionBuilder");
var UnknownDictionary = require("../UnknownDictionary");
import { doublearray } from "https://code4fukui.github.io/doublearray-es/doublearray.js";
import { DynamicDictionaries } from "../DynamicDictionaries.js";
import { TokenInfoDictionary } from "../TokenInfoDictionary.js";
import { ConnectionCostsBuilder } from "./ConnectionCostsBuilder.js";
import { CharacterDefinitionBuilder } from "./CharacterDefinitionBuilder.js";
import { UnknownDictionary } from "../UnknownDictionary.js";

/**
* Build dictionaries (token info, connection costs)
Expand All @@ -36,7 +34,7 @@ var UnknownDictionary = require("../UnknownDictionary");
* tid_map.dat: targetMap
* tid_pos.dat: posList (part of speech)
*/
function DictionaryBuilder() {
export function DictionaryBuilder() {
// Array of entries, each entry in Mecab form
// (0: surface form, 1: left id, 2: right id, 3: word cost, 4: part of speech id, 5-: other features)
this.tid_entries = [];
Expand Down Expand Up @@ -154,5 +152,3 @@ DictionaryBuilder.prototype.buildDoubleArray = function () {
var builder = doublearray.builder(1024 * 1024);
return builder.build(words);
};

module.exports = DictionaryBuilder;
10 changes: 3 additions & 7 deletions src/kuromoji.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,15 @@
* limitations under the License.
*/

"use strict";

var TokenizerBuilder = require("./TokenizerBuilder");
var DictionaryBuilder = require("./dict/builder/DictionaryBuilder");
import { TokenizerBuilder } from "./TokenizerBuilder.js";
import { DictionaryBuilder } from "./dict/builder/DictionaryBuilder.js";

// Public methods
var kuromoji = {
export const kuromoji = {
builder: function (option) {
return new TokenizerBuilder(option);
},
dictionaryBuilder: function () {
return new DictionaryBuilder();
}
};

module.exports = kuromoji;
Loading

0 comments on commit 42f32a3

Please sign in to comment.