Skip to content

Commit

Permalink
feat(module): add support for Japanese language
Browse files Browse the repository at this point in the history
  • Loading branch information
rhahao committed Dec 21, 2023
1 parent 6d675d7 commit 647b5e0
Show file tree
Hide file tree
Showing 8 changed files with 358 additions and 25 deletions.
3 changes: 2 additions & 1 deletion README.md
Expand Up @@ -102,8 +102,9 @@ Currently, we only support enhanced parsing for the following languages:
```bash
Enlish
French
Italian
German
Italian
Japanese
Madagascar Sign Language, Malagasy
Malagasy
Portuguese Brazil
Expand Down
3 changes: 2 additions & 1 deletion src/browser/utils.browser.ts
Expand Up @@ -3,6 +3,7 @@ import * as path from 'path-browserify';
import E from '../locales/en/text.json';
import F from '../locales/fr-FR/text.json';
import I from '../locales/it-IT/text.json';
import J from '../locales/ja-JP/text.json';
import K from '../locales/uk-UA/text.json';
import MG from '../locales/mg-MG/text.json';
import T from '../locales/pt-BR/text.json';
Expand All @@ -19,6 +20,6 @@ declare global {
}

window.jw_epub_parser = {
languages: { E, F, I, K, MG, T, TND, TNK, TTM, VZ, X },
languages: { E, F, I, J, K, MG, T, TND, TNK, TTM, VZ, X },
path: path,
};
84 changes: 62 additions & 22 deletions src/common/parsing_rules.ts
Expand Up @@ -6,18 +6,35 @@ export const extractMonthName = (src: string, lang: string) => {
let monthIndex;

const text = src.toLowerCase();
const split = text.split(/[–-—]/);
const separators = ['bis', '–', '-', '—'];
const regex = new RegExp(separators.join('|'), 'gi');
const split = text.split(regex);
const monthNames = getMonthNames(lang);

outerLoop: for (const splitted of split) {
for (const month of monthNames) {
const monthLang = month.name.toLowerCase();
const regex = new RegExp(`(${monthLang})`);
let searchKey = `(${monthLang})`;

if (lang === 'J') {
searchKey = `\\b${searchKey}\\b`;
}

const regex = new RegExp(searchKey);
const array = regex.exec(splitted);

if (Array.isArray(array)) {
varDay = +text.match(/(\d+)/)![0];
const regex = /\d+/g;
const match = text.match(regex);

if (lang === 'J') {
varDay = +match![1];
}

if (lang !== 'J') {
varDay = +match![0];
}

monthIndex = month.index;
break outerLoop;
}
Expand Down Expand Up @@ -53,8 +70,8 @@ export const extractSourceEnhanced = (src: string, lang: string) => {
if (match) {
const splits = src.split(regex);
const duration = +match[0].match(/\d+/)![0];
const regexStartColumn = /^[:.]/;
const regexEndColumn = /:$/;
const regexStartColumn = /^[:.]/;
const regexEndColumn = /[:」]$/;

const tmpAssignment = splits[0].trim();
const source = splits[1].trim().replace(regexStartColumn, '').replace(regexEndColumn, '').trim();
Expand Down Expand Up @@ -87,16 +104,16 @@ export const extractLastSong = (src: string) => {
};

export const extractWTStudyDate = (src: string, lang: string) => {
let varDay;
let monthIndex;
let varYear;

const variations = getStudyArticleDateVariations(lang).split('|');

const patternNumber = '{{ number }}';
const patternDate = '{{ date }}';

let varDay;
let monthIndex;
let varYear;

for (const variation of variations) {
outerLoop: for (const variation of variations) {
let textSearch = variation.replace(patternDate, '');
textSearch = textSearch.replace(patternNumber, '\\d+');

Expand All @@ -110,23 +127,46 @@ export const extractWTStudyDate = (src: string, lang: string) => {

textSearch = dateValue.trim();

const text = textSearch.toLowerCase();
const separators = ['bis', '–', '-', '—'];
const regex = new RegExp(separators.join('|'), 'gi');
const split = text.split(regex);
const monthNames = getMonthNames(lang);

for (const month of monthNames) {
const monthLang = month.name.toLowerCase();
const regex = new RegExp(`(${monthLang})`);
const array2 = regex.exec(textSearch.toLowerCase());
for (const splitted of split) {
for (const month of monthNames) {
const monthLang = month.name.toLowerCase();
let searchKey = `(${monthLang})`;

if (lang === 'J') {
searchKey = `\\b${searchKey}\\b`;
}

const regex = new RegExp(searchKey);
const array2 = regex.exec(splitted);

if (Array.isArray(array2)) {
const regex = /\d+/g;
const match = textSearch.match(regex);

if (lang === 'J') {
varDay = +match![2];
}

if (lang !== 'J') {
varDay = +match![0];
}

monthIndex = month.index;

if (Array.isArray(array2)) {
varDay = +textSearch.match(/(\d+)/)![0];
monthIndex = month.index;
const findYear = /\b\d{4}\b/;
const array3 = findYear.exec(dateValue);
if (array3 !== null) {
varYear = +array3[0];
}

const findYear = /\b\d{4}\b/;
const array3 = findYear.exec(dateValue);
if (array3 !== null) {
varYear = +array3[0];
break outerLoop;
}
break;
}
}
}
Expand Down
1 change: 1 addition & 0 deletions src/locales/languages.ts
Expand Up @@ -2,6 +2,7 @@ export default [
{ locale: 'en', code: 'E' },
{ locale: 'fr-FR', code: 'F' },
{ locale: 'it-IT', code: 'I' },
{ locale: 'ja-JP', code: 'J' },
{ locale: 'uk-UA', code: 'K' },
{ locale: 'mg-MG', code: 'MG' },
{ locale: 'pt-BR', code: 'T' },
Expand Down
3 changes: 2 additions & 1 deletion src/node/utils.node.ts
Expand Up @@ -4,6 +4,7 @@ import { readFile } from 'fs/promises';
import E from '../locales/en/text.json' assert { type: 'json' };
import F from '../locales/fr-FR/text.json' assert { type: 'json' };
import I from '../locales/it-IT/text.json' assert { type: 'json' };
import J from '../locales/ja-JP/text.json' assert { type: 'json' };
import K from '../locales/uk-UA/text.json' assert { type: 'json' };
import MG from '../locales/mg-MG/text.json' assert { type: 'json' };
import T from '../locales/pt-BR/text.json' assert { type: 'json' };
Expand All @@ -18,7 +19,7 @@ declare global {
}

global.jw_epub_parser = {
languages: { E, F, I, K, MG, T, TND, TNK, TTM, VZ, X },
languages: { E, F, I, J, K, MG, T, TND, TNK, TTM, VZ, X },
path: path,
readFile: readFile,
};
1 change: 1 addition & 0 deletions test/enhancedParsing/list.json
Expand Up @@ -2,6 +2,7 @@
{ "language": "E", "issue": "202309" },
{ "language": "F", "issue": "202309" },
{ "language": "I", "issue": "202309" },
{ "language": "J", "issue": "202309" },
{ "language": "K", "issue": "202309" },
{ "language": "MG", "issue": "202309" },
{ "language": "TND", "issue": "202309" },
Expand Down

0 comments on commit 647b5e0

Please sign in to comment.