Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/Procedures/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ export { type RequestExecRegExp, type ResponseExecRegExp, createRequestExecRegEx
export { type RequestMatchAllRegExpArray, createRequestMatchRegExpArray } from './procMatchAllRegExpArray.js';
export { createRequestMatchRegExp, type RequestMatchRegExp } from './procMatchRegExp.js';
export { type RequestMatchAllRegExp, createRequestMatchAllRegExp } from './procMatchAllRegExp.js';
export { type RequestMatchAllRegExpAsRange, createRequestMatchAllRegExpAsRange } from './procMatchAllRegExpAsRange.js';
4 changes: 3 additions & 1 deletion src/Procedures/procMatchAllRegExp.test.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { describe, test, expect } from 'vitest';
import type { RequestMatchAllRegExp } from './procMatchAllRegExp.js';
import { createRequestMatchAllRegExp, procMatchAllRegExp, isMatchAllRegExpResponse } from './procMatchAllRegExp.js';
import type { Request } from './procedure.js';
import { isErrorResponse } from './procedure.js';
Expand All @@ -24,7 +25,8 @@ describe('procMatchAllRegExp', () => {
});

test('RequestExecRegExp bad regex', () => {
const req: Request = createRequestMatchAllRegExp({ text: 'two words', regexp: '/[/g' });
const req: RequestMatchAllRegExp = createRequestMatchAllRegExp({ text: 'two words', regexp: /\[/g });
Object.assign(req.data, { regexp: '/[/g' });
const result = procMatchAllRegExp(req);
const response = isErrorResponse(result) ? result : undefined;
expect(isMatchAllRegExpResponse(result)).toBe(false);
Expand Down
2 changes: 1 addition & 1 deletion src/Procedures/procMatchAllRegExp.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ export type MatchAllRegExpResponseType = MatchAllRegExpRequestType;

export interface RequestMatchAllRegExpData {
text: string;
regexp: RegExp | string;
regexp: RegExp;
}

export type RequestMatchAllRegExp = Request<MatchAllRegExpRequestType, RequestMatchAllRegExpData>;
Expand Down
44 changes: 44 additions & 0 deletions src/Procedures/procMatchAllRegExpAsRange.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import { describe, test, expect } from 'vitest';
import type { RequestMatchAllRegExpAsRange } from './procMatchAllRegExpAsRange.js';
import {
createRequestMatchAllRegExpAsRange,
procMatchAllRegExpAsRange,
isMatchAllRegExpAsRangeResponse,
} from './procMatchAllRegExpAsRange.js';
import type { Request } from './procedure.js';
import { isErrorResponse } from './procedure.js';
import { createId } from './uniqueId.js';

describe('procMatchAllRegExpAsRange', () => {
test('basic', () => {
const text = 'two words';
const regexp = /w\w+/g;
const req = createRequestMatchAllRegExpAsRange({ text, regexp });
const result = procMatchAllRegExpAsRange(req);
expect(isMatchAllRegExpAsRangeResponse(result)).toBe(true);
const response = isMatchAllRegExpAsRangeResponse(result) ? result : undefined;
expect(response?.data.elapsedTimeMs).toBeGreaterThan(0);
expect(response?.data.ranges).toEqual(
Uint32Array.from(Array.from(text.matchAll(regexp)).flatMap((match) => [match.index, match.index + match[0].length])),
);
});

test('non-RequestMatchAllRegExpAsRange', () => {
const req: Request = { id: createId(), requestType: 'unknown', data: { text: 'two words', regexp: /w\w+/g } };
const result = procMatchAllRegExpAsRange(req);
expect(isMatchAllRegExpAsRangeResponse(result)).toBe(false);
expect(result).toBeUndefined();
});

test('RequestExecRegExp bad regex', () => {
const req: RequestMatchAllRegExpAsRange = createRequestMatchAllRegExpAsRange({ text: 'two words', regexp: /\[/g });
Object.assign(req.data, { regexp: '/[/g' });
const result = procMatchAllRegExpAsRange(req);
const response = isErrorResponse(result) ? result : undefined;
expect(isMatchAllRegExpAsRangeResponse(result)).toBe(false);
expect(isErrorResponse(result)).toBe(true);
expect(response?.id).toBe(req.id);
expect(response?.data.requestType).toBe(req.requestType);
expect(response?.data.message).toContain('SyntaxError');
});
});
55 changes: 55 additions & 0 deletions src/Procedures/procMatchAllRegExpAsRange.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import { format } from 'util';
import type { MatchAllToRangesRegExpResult } from '../helpers/evaluateRegExp.js';
import { matchAllToRangesRegExp, toRegExp } from '../helpers/evaluateRegExp.js';
import type { ErrorResponse, Request, Response } from './procedure.js';
import { createErrorResponse, createRequest, createResponse, isRequestType, isResponseType } from './procedure.js';

export const requestTypeMatchAllRegExpAsRange = 'MatchAllRegExpAsRange';
export type MatchAllRegExpAsRangeRequestType = typeof requestTypeMatchAllRegExpAsRange;
export type MatchAllRegExpAsRangeResponseType = MatchAllRegExpAsRangeRequestType;

export interface RequestMatchAllRegExpAsRangeData {
text: string;
regexp: RegExp;
}

export type RequestMatchAllRegExpAsRange = Request<MatchAllRegExpAsRangeRequestType, RequestMatchAllRegExpAsRangeData>;
export type ResponseMatchAllRegExpAsRange = Response<MatchAllRegExpAsRangeResponseType, MatchAllToRangesRegExpResult>;

export function isMatchAllRegExpAsRangeRequest(v: unknown): v is RequestMatchAllRegExpAsRange {
return isRequestType(v, requestTypeMatchAllRegExpAsRange);
}
export function isMatchAllRegExpAsRangeResponse(v: unknown): v is ResponseMatchAllRegExpAsRange {
return isResponseType(v, requestTypeMatchAllRegExpAsRange);
}

export function procMatchAllRegExpAsRange(
r: RequestMatchAllRegExpAsRange,
): ResponseMatchAllRegExpAsRange | ErrorResponse<MatchAllRegExpAsRangeResponseType>;
export function procMatchAllRegExpAsRange(
r: Request,
): ResponseMatchAllRegExpAsRange | ErrorResponse<MatchAllRegExpAsRangeResponseType> | undefined;
export function procMatchAllRegExpAsRange(
r: RequestMatchAllRegExpAsRange | Request,
): ResponseMatchAllRegExpAsRange | ErrorResponse | undefined {
if (!isMatchAllRegExpAsRangeRequest(r)) return undefined;
try {
const regex = toRegExp(r.data.regexp);
const regexResult = matchAllToRangesRegExp(r.data.text, regex);

return createResponseMatchAllRegExpAsRange(r, regexResult);
} catch (e) {
return createErrorResponse(r, format(e));
}
}

export function createRequestMatchAllRegExpAsRange(data: RequestMatchAllRegExpAsRange['data']): RequestMatchAllRegExpAsRange {
return createRequest(requestTypeMatchAllRegExpAsRange, data);
}

export function createResponseMatchAllRegExpAsRange(
request: RequestMatchAllRegExpAsRange,
data: ResponseMatchAllRegExpAsRange['data'],
): ResponseMatchAllRegExpAsRange {
return createResponse(request.id, request.requestType, data);
}
13 changes: 12 additions & 1 deletion src/Procedures/procedures.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,17 @@ import type { Procedure } from './procedure.js';
import { procExecRegExp } from './procExecRegExp.js';
import { procMatchAllRegExp } from './procMatchAllRegExp.js';
import { procMatchAllRegExpArray } from './procMatchAllRegExpArray.js';
import { procMatchAllRegExpAsRange } from './procMatchAllRegExpAsRange.js';
import { procMatchRegExp } from './procMatchRegExp.js';

export const procedures: Procedure[] = [procExecRegExp, procMatchAllRegExp, procMatchAllRegExpArray, procMatchRegExp];
/**
* Collection of all procedures related to regular expressions.
* Order is important for the worker, as it processes them sequentially.
*/
export const procedures: Procedure[] = [
procMatchAllRegExp, // expected to be the most frequently used
procMatchAllRegExpAsRange,
procExecRegExp,
procMatchAllRegExpArray,
procMatchRegExp,
];
19 changes: 18 additions & 1 deletion src/RegExpWorker.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
/* eslint-disable @typescript-eslint/no-unsafe-assignment */
import { describe, test, expect } from 'vitest';
import { RegExpWorker, workerExec, workerMatch, workerMatchAll, workerMatchAllArray, timeoutRejection } from './RegExpWorker.js';
import {
RegExpWorker,
workerExec,
workerMatch,
workerMatchAll,
workerMatchAllArray,
timeoutRejection,
workerMatchAllAsRangePairs,
} from './RegExpWorker.js';
import { TimeoutError } from './TimeoutError.js';
import { catchErrors } from './helpers/errors.js';

Expand Down Expand Up @@ -86,6 +94,15 @@ describe('RegExpWorker', () => {
const response = await workerMatchAllArray('Good Morning', [/\b\w+/g]);
expect(response.results.flatMap((r) => r.matches.map((m) => m[0]))).toEqual(['Good', 'Morning']);
});

test('workerMatchAllAsRangePairs', async () => {
const response = await workerMatchAllAsRangePairs('Good Morning, sunshine.', /\b\w+/g);
expect(response.ranges).toEqual([
[0, 4],
[5, 12],
[14, 22],
]);
});
});

describe('timeoutRejection', () => {
Expand Down
72 changes: 68 additions & 4 deletions src/RegExpWorker.ts
Original file line number Diff line number Diff line change
@@ -1,23 +1,30 @@
import type { ExecRegExpResult, MatchAllRegExpArrayResult, MatchRegExpResult } from './helpers/evaluateRegExp.js';
import type {
ExecRegExpResult,
MatchAllRegExpArrayResult,
MatchAllToRangesRegExpResult,
MatchRegExpResult,
} from './helpers/evaluateRegExp.js';
import { type MatchAllRegExpResult } from './helpers/evaluateRegExp.js';
import type { RequestMatchRegExp } from './Procedures/index.js';
import {
type RequestExecRegExp,
type RequestMatchAllRegExp,
type RequestMatchAllRegExpArray,
type RequestMatchAllRegExpAsRange,
type Response,
createRequestExecRegExp,
createRequestMatchAllRegExp,
createRequestMatchRegExp,
createRequestMatchRegExpArray,
} from './Procedures/index.js';
import { createRequestMatchAllRegExpAsRange } from './Procedures/procMatchAllRegExpAsRange.js';
import { Scheduler } from './scheduler/index.js';
import { isTimeoutErrorLike, TimeoutError } from './TimeoutError.js';
import type { CreateWorker } from './worker/di.js';
import { createWorker as defaultCreateWorker } from './worker/di.js';

export { toRegExp } from './helpers/evaluateRegExp.js';
export type { ExecRegExpResult, MatchAllRegExpArrayResult, MatchRegExpResult, MatchAllRegExpResult } from './helpers/evaluateRegExp.js';
export type { ExecRegExpResult, MatchAllRegExpArrayResult, MatchAllRegExpResult, MatchRegExpResult } from './helpers/evaluateRegExp.js';

export class RegExpWorker {
private scheduler: Scheduler;
Expand Down Expand Up @@ -71,14 +78,35 @@ export class RegExpWorker {
return this.makeRequest(req, timeLimitMs);
}

/**
* Runs text.matchAll against an array of RegExps in a worker.
* @param text - The text to search within.
* @param regExps - An array of regular expressions to match against the text.
* @param timeLimitMs - Optional time limit in milliseconds for the operation.
*/
public async matchAllAsRangePairs(text: string, regexp: RegExp, timeLimitMs?: number): Promise<MatchAllAsRangePairsResult> {
const req = createRequestMatchAllRegExpAsRange({ regexp, text });
const result = await this.makeRequest(req, timeLimitMs);
return {
elapsedTimeMs: result.elapsedTimeMs,
ranges: mapToRanges(result.ranges),
};
}

private makeRequest(req: RequestExecRegExp, timeLimitMs: number | undefined): Promise<ExecRegExpResult>;
private makeRequest(req: RequestMatchRegExp, timeLimitMs: number | undefined): Promise<MatchRegExpResult>;
private makeRequest(req: RequestMatchAllRegExp, timeLimitMs: number | undefined): Promise<MatchAllRegExpResult>;
private makeRequest(req: RequestMatchAllRegExpAsRange, timeLimitMs: number | undefined): Promise<MatchAllToRangesRegExpResult>;
private makeRequest(req: RequestMatchAllRegExpArray, timeLimitMs: number | undefined): Promise<MatchAllRegExpArrayResult>;
private makeRequest(
req: RequestExecRegExp | RequestMatchAllRegExp | RequestMatchRegExp | RequestMatchAllRegExpArray,
req: RequestExecRegExp | RequestMatchAllRegExp | RequestMatchAllRegExpArray | RequestMatchAllRegExpAsRange | RequestMatchRegExp,
timeLimitMs: number | undefined,
): Promise<ExecRegExpResult> | Promise<MatchRegExpResult> | Promise<MatchAllRegExpResult> | Promise<MatchAllRegExpArrayResult> {
):
| Promise<ExecRegExpResult>
| Promise<MatchAllRegExpArrayResult>
| Promise<MatchAllRegExpResult>
| Promise<MatchAllToRangesRegExpResult>
| Promise<MatchRegExpResult> {
return this.scheduler.scheduleRequest(req, timeLimitMs).then(extractResult, timeoutRejection) as Promise<MatchAllRegExpResult>;
}

Expand Down Expand Up @@ -120,6 +148,17 @@ export async function workerMatchAll(text: string, regExp: RegExp, timeLimitMs?:
return await worker.matchAll(text, regExp, timeLimitMs);
}

/**
* Run text.matchAll against a RegExp in a worker and return the matches as [start, end] range pairs.
* @param text - The text to search within.
* @param regExp - The regular expression to match against the text.
* @param timeLimitMs - Optional time limit in milliseconds for the operation.
*/
export async function workerMatchAllAsRangePairs(text: string, regExp: RegExp, timeLimitMs?: number): Promise<MatchAllAsRangePairsResult> {
const worker = new RegExpWorker();
return await worker.matchAllAsRangePairs(text, regExp, timeLimitMs);
}

/**
* Runs text.matchAll against an array of RegExps in a worker.
* @param text - The text to search within.
Expand Down Expand Up @@ -152,3 +191,28 @@ export async function workerMatch(text: string, regExp: RegExp, timeLimitMs?: nu
const worker = new RegExpWorker();
return await worker.match(text, regExp, timeLimitMs);
}

/**
* Each range is represented as a tuple of [start, end] indices.
* The start index is inclusive, and the end index is exclusive.
*/
export type RangePair = [start: number, end: number];

export interface MatchAllAsRangePairsResult {
elapsedTimeMs: number;
/**
* The ranges of matches in the text.
* Each range is represented as a tuple of [start, end] indices.
* The start index is inclusive, and the end index is exclusive.
*/
ranges: RangePair[];
}

function mapToRanges(flatRange: Uint32Array): RangePair[] {
const size = flatRange.length;
const result: RangePair[] = new Array<RangePair>(size / 2);
for (let i = 0, j = 0; i < size; i += 2, j++) {
result[j] = [flatRange[i], flatRange[i + 1]];
}
return result;
}
Loading