Skip to content

Commit

Permalink
add zip to cache instead unzipped files
Browse files Browse the repository at this point in the history
  • Loading branch information
leyhline committed Jun 23, 2023
1 parent c4d34e1 commit 29d6fea
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 132 deletions.
5 changes: 0 additions & 5 deletions src/Dictionary.ts
Original file line number Diff line number Diff line change
@@ -1,29 +1,24 @@
export interface Dictionary<T extends Features | null = null> {
url: string;
cacheName: string;
wrapper?: (features: string[]) => T | null;
}

export const UNIDIC2: Dictionary<UnidicFeature26> = {
url: "/unidic-mecab-2.1.2_bin.zip",
cacheName: "unidic-2.1.2_bin",
wrapper: createUnidicFeature26,
};

export const UNIDIC3: Dictionary<UnidicFeature29> = {
url: "/unidic-3.1.0.zip",
cacheName: "unidic-3.1.0",
wrapper: createUnidicFeature29,
};

export const IPADIC: Dictionary = {
url: "/ipadic-2.7.0_bin.zip",
cacheName: "ipadic-2.7.0_bin",
};

export const JUMANDIC: Dictionary = {
url: "/jumandic-7.0_bin.zip",
cacheName: "jumandic-7.0_bin",
};

export type Features = { [key: string]: string };
Expand Down
28 changes: 13 additions & 15 deletions src/MecabWorker.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,12 @@ import { IPADIC, JUMANDIC, UNIDIC2 } from "./Dictionary.js";

describe("MecabWorker integration tests", function () {
it("fails to create a worker when no dictionary was found", function (done) {
MecabWorker.create({ url: "not-a-real-path", cacheName: "test" })
MecabWorker.create({ url: "not-a-real-path" })
.then(() => {
done(new Error("Should not have created a worker"));
})
.catch((error) => {
expect(error).to.equal(
"Failed to fetch dictionary: not-a-real-path (404 Not Found)"
);
expect(error).to.have.length.above(0);
done();
});
});
Expand Down Expand Up @@ -72,39 +70,39 @@ describe("MecabWorker integration tests", function () {
});
expect(onLoadLog).has.length(9);
expect(onLoadLog[0]).to.deep.equal({
type: "unzip",
type: "network",
name: "ipadic-2.7.0_bin/",
});
expect(onLoadLog[1]).to.deep.equal({
type: "unzip",
type: "network",
name: "ipadic-2.7.0_bin/dicrc",
});
expect(onLoadLog[2]).to.deep.equal({
type: "unzip",
type: "network",
name: "ipadic-2.7.0_bin/README",
});
expect(onLoadLog[3]).to.deep.equal({
type: "unzip",
type: "network",
name: "ipadic-2.7.0_bin/unk.dic",
});
expect(onLoadLog[4]).to.deep.equal({
type: "unzip",
type: "network",
name: "ipadic-2.7.0_bin/AUTHORS",
});
expect(onLoadLog[5]).to.deep.equal({
type: "unzip",
type: "network",
name: "ipadic-2.7.0_bin/COPYING",
});
expect(onLoadLog[6]).to.deep.equal({
type: "unzip",
type: "network",
name: "ipadic-2.7.0_bin/sys.dic",
});
expect(onLoadLog[7]).to.deep.equal({
type: "unzip",
type: "network",
name: "ipadic-2.7.0_bin/matrix.bin",
});
expect(onLoadLog[8]).to.deep.equal({
type: "unzip",
type: "network",
name: "ipadic-2.7.0_bin/char.bin",
});
});
Expand Down Expand Up @@ -154,7 +152,7 @@ describe("MecabWorker integration tests", function () {
});

it("creates a worker with IPADIC and parses a string, inserting spaces", async function () {
const worker = await MecabWorker.create(IPADIC);
const worker = await MecabWorker.create(IPADIC, undefined, true);
const result = await worker.parse(
"青森県と秋田県にまたがり所在する十和田湖、御鼻部山展望台からの展望"
);
Expand All @@ -164,7 +162,7 @@ describe("MecabWorker integration tests", function () {
});

it("creates a worker with IPADIC and parses a string, returning a node for each word", async function () {
const worker = await MecabWorker.create(IPADIC);
const worker = await MecabWorker.create(IPADIC, undefined, true);
const nodes = await worker.parseToNodes(
"青森県と秋田県にまたがり所在する十和田湖、御鼻部山展望台からの展望"
);
Expand Down
14 changes: 6 additions & 8 deletions src/MecabWorker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ export interface MecabReady extends MecabDataType {
type: "ready";
}

export interface MecabUnzip extends MecabDataType {
type: "unzip";
export interface MecabNetwork extends MecabDataType {
type: "network";
name: string;
size: number;
total: number | null;
Expand Down Expand Up @@ -43,15 +43,14 @@ export type MecabData =
| MecabReady
| MecabParse
| MecabParseToNodes
| MecabUnzip
| MecabNetwork
| MecabCache
| MecabError;

export type MecabMessageEvent = MessageEvent<MecabData>;

export interface MecabCallInit extends MecabDataType {
type: "init";
cacheName: string;
url: string;
noCache: boolean;
}
Expand Down Expand Up @@ -94,7 +93,7 @@ export class MecabWorker<T extends Features | null = null> {
*/
static async create<T extends Features | null = null>(
dictionary: Dictionary<T>,
onLoad?: (message: MecabUnzip | MecabCache) => void,
onLoad?: (message: MecabNetwork | MecabCache) => void,
noCache = false
): Promise<MecabWorker<T>> {
const mecabWorker = new MecabWorker<T>(dictionary.wrapper, onLoad);
Expand All @@ -103,7 +102,7 @@ export class MecabWorker<T extends Features | null = null> {

constructor(
wrapper?: (feature: string[]) => T | null,
onLoad?: (message: MecabUnzip | MecabCache) => void
onLoad?: (message: MecabNetwork | MecabCache) => void
) {
if (!testModuleWorkerSupport()) {
throw new Error(
Expand All @@ -118,7 +117,7 @@ export class MecabWorker<T extends Features | null = null> {
type: "module",
});
this.worker.onmessage = (e: MecabMessageEvent) => {
if (onLoad && (e.data.type === "unzip" || e.data.type === "cache")) {
if (onLoad && (e.data.type === "network" || e.data.type === "cache")) {
onLoad(e.data);
}
const callback = this.pending.get(e.data.id);
Expand All @@ -135,7 +134,6 @@ export class MecabWorker<T extends Features | null = null> {
const message: MecabCallInit = {
id: this.counter,
type: "init",
cacheName: dictionary.cacheName,
url: dictionary.url,
noCache: noCache,
};
Expand Down
151 changes: 53 additions & 98 deletions src/mecab-worker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import createModule from "./mecab.js";

import type {
MecabReady,
MecabUnzip,
MecabNetwork,
MecabCache,
MecabData,
MecabCallData,
Expand Down Expand Up @@ -52,11 +52,7 @@ onmessage = (e: MecabMessageCallEvent) => {
async function initTagger(data: MecabCallInit) {
try {
const Module = await createModule();
const files = await loadDictionaryFiles(
data.cacheName,
data.url,
data.noCache
);
const files = await loadDictionaryFiles(data.url, data.noCache);
mountDicionaryFiles(Module, files);
mecabTagger = new MecabTagger(Module);
const readyMessage: MecabReady = { id: data.id, type: "ready" };
Expand Down Expand Up @@ -235,110 +231,80 @@ function mountDicionaryFiles(Module: Module, files: File[]): void {
Module.FS.chdir("/mecab/" + baseDir);
}

interface ResponseWithPath {
pathname: string;
response: Response;
}

async function loadDictionaryFiles(
cacheName: string,
url: string,
noCache = false
noCache = false,
cacheName = "v0.2.2"
): Promise<File[]> {
if (noCache || !(await caches.has(cacheName))) {
return loadDictionaryFilesFromNetwork(cacheName, url, noCache);
} else {
return loadDictionaryFilesFromCache(cacheName);
return loadDictionaryFilesFromCache(cacheName, url);
}
}

async function loadDictionaryFilesFromCache(
cacheName: string
): Promise<File[]> {
const c = tryForCachesApi();
const cache = await c.open(cacheName);
const keys = await cache.keys();
if (keys.length === 0) throw new Error("Cache is empty");
const files: File[] = [];
for (const key of keys) {
const responseWithPath: ResponseWithPath = {
pathname: new URL(key.url).pathname.slice(1),
response: (await cache.match(key))!,
};
const file = await responseToFile(responseWithPath);
const message: MecabCache = {
id: 0,
type: "cache",
name: file.name,
size: file.size,
total: null,
};
postMessage(message);
files.push(file);
}
return files;
}

/**
* Downloads the dictionary zip file from the given url, places the extracted files
* in the cache and returns the files as File objects.
*
* @param cacheName the name of the newly created cache
* @param url the url of the dictonary zip file
* @returns the extracted files
*/
async function loadDictionaryFilesFromNetwork(
cacheName: string,
url: string,
noCache: boolean
): Promise<File[]> {
const [stream, contentLength] = await unzipDictionary(url);
const reader = stream.getReader();
const files: File[] = [];
if (noCache) {
while (true) {
const { done, value } = await reader.read();
if (done) break;
const message: MecabUnzip = {
id: 0,
type: "unzip",
name: value.name,
size: value.size,
total: contentLength,
};
postMessage(message);
files.push(value);
}
if (files.length === 0) {
throw new Error("No files extracted");
}
const response = await fetch(url);
return collectFiles(response, "network");
} else {
const c = tryForCachesApi();
const cache = await c.open(cacheName);
try {
while (true) {
const { done, value } = await reader.read();
if (done) break;
const message: MecabUnzip = {
id: 0,
type: "unzip",
name: value.name,
size: value.size,
total: contentLength,
};
postMessage(message);
const { pathname, response } = fileToResponse(value);
await cache.put("/" + pathname, response);
files.push(value);
}
if (files.length === 0) {
throw new Error("No files extracted");
}
const cache = await c.open(cacheName);
await cache.add(url);
const response = await cache.match(url);
if (!response) throw new Error("Dictionary not cached: " + url);
return collectFiles(response, "network");
} catch (error) {
c.delete(cacheName);
throw error;
}
}
}

async function loadDictionaryFilesFromCache(
cacheName: string,
url: string
): Promise<File[]> {
const c = tryForCachesApi();
try {
const cache = await c.open(cacheName);
const response = await cache.match(url);
if (!response) throw new Error("Dictionary not cached: " + url);
return collectFiles(response, "cache");
} catch (error) {
c.delete(cacheName);
throw error;
}
}

async function collectFiles(
response: Response,
type: "network" | "cache"
): Promise<File[]> {
const files: File[] = [];
const [stream, contentLength] = await unzipDictionary(response);
const reader = stream.getReader();
while (true) {
const { done, value } = await reader.read();
if (done) break;
const message: MecabNetwork | MecabCache = {
id: 0,
type: type,
name: value.name,
size: value.size,
total: contentLength,
};
postMessage(message);
files.push(value);
}
if (files.length === 0) {
throw new Error("No files extracted");
}
return files;
}

Expand All @@ -352,15 +318,4 @@ function tryForCachesApi(): CacheStorage {
}
}

async function responseToFile({
pathname,
response,
}: ResponseWithPath): Promise<File> {
return new File([await response.blob()], pathname);
}

function fileToResponse(file: File): ResponseWithPath {
return { pathname: file.name, response: new Response(file) };
}

export type { MecabNode };
Loading

0 comments on commit 29d6fea

Please sign in to comment.