Skip to content

Commit

Permalink
feat(deinflect): Add logic to handle all inflections of the copula だ (#…
Browse files Browse the repository at this point in the history
…988)

- Adds a new type of deinflection for the copula, だ.
- Adds mappings for all common conjugations to deinflect.dat.
    - See http://www.japaneseprofessor.com/reference/grammar/conjugations-of-the-japanese-copula/
- For handling polite negative じゃないです update general adjective handling to recognize polite です addition.

Fixes #89
  • Loading branch information
maawisul committed May 1, 2022
1 parent 69c7fe7 commit 52d30c5
Show file tree
Hide file tree
Showing 5 changed files with 174 additions and 4 deletions.
5 changes: 4 additions & 1 deletion extension/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ class RcxDict {
if (type & rule.typeMask && end === rule.from) {
const newWord =
word.substr(0, word.length - rule.from.length) + rule.to;
if (newWord.length <= 1) {
if (newWord.length <= 0) {
continue;
}
o = { word: word, type: 0xff, reason: '' } as Deinflection;
Expand Down Expand Up @@ -493,6 +493,9 @@ class RcxDict {
if (y & 8 && w === 'vk') {
break;
}
if (y & 32 && w === 'cop') {
break;
}
}
ok = z !== -1;
}
Expand Down
33 changes: 31 additions & 2 deletions extension/data/deinflect.dat
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Deinflect Rules 20081220-0509 | by Jonathan Zarate | http://www.polarcloud.com
Deinflect Rules 20081220-0509 | by Jonathan Zarate | http://www.polarcloud.com
polite past negative
polite negative
polite volitional
Expand Down Expand Up @@ -27,6 +27,7 @@ masu stem
adv
noun
imperative negative
past negative
くありませんでした い 1152 0
いませんでした う 640 0
きませんでした く 640 0
Expand Down Expand Up @@ -123,6 +124,10 @@ imperative negative
かせる く 513 9
がせる ぐ 513 9
かった い 1152 14
ではありません ではない 1152 13
ではありませんでした ではない 1152 6
じゃありません じゃない 1152 13
じゃありませんでした じゃない 1152 6
かない く 516 15
がない ぐ 516 15
かれる く 513 16
Expand All @@ -139,6 +144,8 @@ imperative negative
きます くる 2176 13
ぎます ぐ 640 13
くない い 1028 15
です 1152 13
かったです い 1152 6
ければ い 1152 17
こない くる 2052 15
こよう くる 2176 18
Expand Down Expand Up @@ -333,4 +340,26 @@ imperative negative
り る 640 24
れ る 640 23
れ れる 384 24
ろ る 384 23
ろ る 384 23
です だ 8320 13
だった だ 8320 14
でした だ 8320 6
ではない だ 8196 15
じゃない だ 8196 15
ではありません だ 8320 1
じゃありません だ 8320 1
ではありませんでした だ 8320 0
じゃありませんでした だ 8320 0
だろう だ 8320 18
でしょう だ 8320 2
で だ 8320 21
でありまして だ 8320 21
なら だ 8320 17
ならば だ 8320 17
であれば だ 8320 17
でございました でございます 8320 14
ではございません でございます 8320 15
ではございませんでした でございます 8320 28
でございましょう でございます 8320 18
でございまして でございます 8320 21
であれば でございます 8320 17
66 changes: 66 additions & 0 deletions extension/test/data_test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import { Config } from '../configuration';
import { RcxDict } from '../data';
import { expect, use } from '@esm-bundle/chai';
import chaiLike from 'chai-like';
import chaiThings from 'chai-things';
import sinonChrome from 'sinon-chrome';

// Extend chai-like to allow using regex for fuzzy string matching inside
// objects.
chaiLike.extend({
match: function (object, expected) {
return typeof object === 'string' && expected instanceof RegExp;
},
assert: function (object, expected) {
return expected instanceof RegExp && expected.test(object);
},
});

use(chaiLike);
use(chaiThings);

let rcxDict: RcxDict;

describe('data.ts', function () {
// Increase timeout from 2000ms since data tests can take longer.
// Make it relative to current timeout so config level changes are taken
// into account. (ie browserstack)
this.timeout(this.timeout() * 2);
before(async function () {
// stub sinon chrome getURL method to return the path it's given
// Required to load dictionary files.
sinonChrome.extension.getURL.returnsArg(0);
rcxDict = await RcxDict.create({} as Config);
});

describe('deinflect', function () {
it('should include deinflections of length one or more', function () {
expect(rcxDict.deinflect('です')).to.include.something.like({
word: 'だ',
});
});

it('should not include empty deinflections', function () {
expect(rcxDict.deinflect('な')).to.not.include.something.like({
word: '',
});
});
});

describe('wordSearch', function () {
it('should return results for deinflected copula', function () {
expect(
rcxDict.wordSearch('です', /* doNames= */ false)?.data
).to.include.something.like({
entry: /^だ .*/,
reason: '&lt; polite',
});
});

it('should not include copula deinflections for non-copula words', function () {
expect(
rcxDict.wordSearch('ぼんです', /* doNames= */ false)?.data
).to.not.include.something.like({ entry: /^凡打 .*/ });
});
});
});
68 changes: 68 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
"stylelint:check": "stylelint '**/*.css' '**/*.ts' --ignore-path '.gitignore'",
"stylelint:fix": "npm run stylelint:check --silent -- --fix",
"test": "wtr \"extension/test/*_test.ts\"",
"test:small": "wtr --files \"extension/test/background_test.ts\" \"extension/test/docs-annotate-canvas_test.ts\" \"extension/test/rikaicontent_test.ts\"",
"test:small": "wtr --files \"extension/test/background_test.ts\" \"extension/test/data_test.ts\" \"extension/test/docs-annotate-canvas_test.ts\" \"extension/test/rikaicontent_test.ts\"",
"test:browserstack": "npm run test -- --browserstack",
"test:watch": "npm run test -- --watch",
"test:update-baselines": "npm run test -- --update-visual-baseline",
Expand Down Expand Up @@ -62,6 +62,8 @@
"@stylelint/postcss-css-in-js": "^0.37.2",
"@types/byline": "^4.2.33",
"@types/chai": "^4.3.1",
"@types/chai-like": "^1.1.1",
"@types/chai-things": "^0.0.35",
"@types/chrome": "0.0.148",
"@types/mocha": "^9.1.1",
"@types/node": "^16.4.4",
Expand All @@ -77,6 +79,8 @@
"@web/test-runner-puppeteer": "^0.10.5",
"@web/test-runner-visual-regression": "^0.6.5",
"byline": "^5.0.0",
"chai-like": "^1.1.1",
"chai-things": "^0.2.0",
"conventional-changelog-conventionalcommits": "^4.6.3",
"csv-parse": "^5.0.4",
"eslint": "^8.14.0",
Expand Down

0 comments on commit 52d30c5

Please sign in to comment.