Skip to content
This repository has been archived by the owner on Mar 24, 2022. It is now read-only.

Commit

Permalink
refactor: Merge LocationInfoTokenizerMixin into Tokenizer
Browse files Browse the repository at this point in the history
  • Loading branch information
fb55 committed Nov 24, 2021
1 parent 5639c01 commit 758f13c
Show file tree
Hide file tree
Showing 14 changed files with 218 additions and 309 deletions.
7 changes: 1 addition & 6 deletions packages/html-rewriting-stream/lib/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ import { EndTag, StartTag, Doctype, Text, Comment, SaxToken } from '../../sax-pa
import type { Token, Location } from '@parse5/parse5/lib/common/token.js';
import { SAXParser } from '@parse5/sax-parser/lib/index.js';
import { escapeString } from '@parse5/parse5/lib/serializer/index.js';
import type { Preprocessor } from '@parse5/parse5/lib/tokenizer/preprocessor.js';

/**
* Streaming [SAX](https://en.wikipedia.org/wiki/Simple_API_for_XML)-style HTML rewriter.
Expand Down Expand Up @@ -54,13 +53,9 @@ import type { Preprocessor } from '@parse5/parse5/lib/tokenizer/preprocessor.js'
* ```
*/
export class RewritingStream extends SAXParser {
posTracker: Preprocessor;

/** Note: The `sourceCodeLocationInfo` is always enabled. */
constructor() {
super({ sourceCodeLocationInfo: true });

this.posTracker = this.locInfoMixin!.posTracker;
}

override _transformChunk(chunk: string) {
Expand All @@ -71,7 +66,7 @@ export class RewritingStream extends SAXParser {
}

_getRawHtml(location: Location) {
const { droppedBufferSize } = this.posTracker;
const { droppedBufferSize } = this.tokenizer!.preprocessor;
const start = location.startOffset - droppedBufferSize;
const end = location.endOffset - droppedBufferSize;

Expand Down
6 changes: 3 additions & 3 deletions packages/parse5/lib/common/token.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,12 @@ export interface Location {
}

export interface LocationWithAttributes extends Location {
attrs: Record<string, Location>;
attrs?: Record<string, Location>;
}

interface TokenBase {
readonly type: TokenType;
location?: Location;
location: Location | null;
}

export interface DoctypeToken extends TokenBase {
Expand Down Expand Up @@ -59,7 +59,7 @@ export interface TagToken extends TokenBase {
selfClosing: boolean;
ackSelfClosing: boolean;
attrs: Attribute[];
location?: LocationWithAttributes;
location: LocationWithAttributes | null;
}

export function getTokenAttr(token: TagToken, attrName: string) {
Expand Down
21 changes: 11 additions & 10 deletions packages/parse5/lib/extensions/error-reporting/mixin-base.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,18 @@ export abstract class ErrorReportingMixinBase<Host extends ClassWithErrorReporti
this.onParseError = opts.onParseError;
}

_setErrorLocation(err: ParserError) {
err.startLine = err.endLine = this.posTracker.line;
err.startCol = err.endCol = this.posTracker.col;
err.startOffset = err.endOffset = this.posTracker.offset;
}

_reportError(code: ERR) {
const err = { ...BASE_ERROR, code };

this._setErrorLocation(err);
this.onParseError(err);
const { line, col, offset } = this.posTracker;

this.onParseError({
code,
startLine: line,
endLine: line,
startCol: col,
endCol: col,
startOffset: offset,
endOffset: offset,
});
}

override _getOverriddenMethods(mxn: ErrorReportingMixinBase<Host>, _originalMethods: Host): Partial<Host> {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import { ErrorReportingMixinOptions, BASE_ERROR } from './mixin-base.js';
import { ErrorReportingTokenizerMixin } from './tokenizer-mixin.js';
import { LocationInfoTokenizerMixin } from '../location-info/tokenizer-mixin.js';
import { Mixin } from '../../utils/mixin.js';
import type { Location, Token } from '../../common/token.js';
import type { Parser } from '../../parser/index.js';
Expand Down Expand Up @@ -39,7 +38,6 @@ export class ErrorReportingParserMixin<T extends TreeAdapterTypeMap> extends Mix
orig._bootstrap.call(this, document, fragmentContext);

Mixin.install(this.tokenizer, ErrorReportingTokenizerMixin, mxn.opts);
Mixin.install(this.tokenizer, LocationInfoTokenizerMixin);
},

_processInputToken(token: Token) {
Expand Down
15 changes: 4 additions & 11 deletions packages/parse5/lib/extensions/location-info/parser-mixin.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
import { CommentToken, DoctypeToken, CharacterToken } from '../../common/token';
import { Mixin } from '../../utils/mixin.js';
import { LocationInfoTokenizerMixin } from './tokenizer-mixin.js';
import { TAG_NAMES as $, NAMESPACES as NS } from '../../common/html.js';
import type { TreeAdapter, TreeAdapterTypeMap, ElementLocation } from '../../tree-adapters/interface';
import type { Preprocessor } from './../../tokenizer/preprocessor.js';
import type { Parser } from '../../parser/index.js';
import { TokenType, Token, TagToken } from '../../common/token.js';

export class LocationInfoParserMixin<T extends TreeAdapterTypeMap> extends Mixin<Parser<T>> {
treeAdapter: TreeAdapter<T>;
posTracker: Preprocessor | null = null;
lastStartTagToken: null | TagToken = null;
lastFosterParentingLocation: null | ReturnType<Parser<T>['_findFosterParentingLocation']> = null;
currentToken: Token | null = null;
Expand All @@ -26,7 +23,7 @@ export class LocationInfoParserMixin<T extends TreeAdapterTypeMap> extends Mixin
if (this.lastStartTagToken) {
loc = {
...this.lastStartTagToken.location!,
startTag: this.lastStartTagToken.location,
startTag: this.lastStartTagToken.location!,
};
}

Expand Down Expand Up @@ -70,9 +67,6 @@ export class LocationInfoParserMixin<T extends TreeAdapterTypeMap> extends Mixin
mxn.lastFosterParentingLocation = null;
mxn.currentToken = null;

const tokenizerMixin = Mixin.install(this.tokenizer, LocationInfoTokenizerMixin);

mxn.posTracker = tokenizerMixin.posTracker;
this.openElements.onItemPop = (element) => mxn._setEndLocation(element, mxn.currentToken!);
},

Expand All @@ -98,11 +92,10 @@ export class LocationInfoParserMixin<T extends TreeAdapterTypeMap> extends Mixin

//NOTE: <body> and <html> are never popped from the stack, so we need to updated
//their end location explicitly.
const requireExplicitUpdate =
if (
token.type === TokenType.END_TAG &&
(token.tagName === $.HTML || (token.tagName === $.BODY && this.openElements.hasInScope($.BODY)));

if (requireExplicitUpdate) {
(token.tagName === $.HTML || (token.tagName === $.BODY && this.openElements.hasInScope($.BODY)))
) {
for (let i = this.openElements.stackTop; i >= 0; i--) {
const element = this.openElements.items[i];

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import * as assert from 'assert';
import { Tokenizer, TokenizerMode } from '../../tokenizer/index.js';
import { LocationInfoTokenizerMixin } from './tokenizer-mixin.js';
import { Mixin } from '../../utils/mixin.js';
import { TokenType } from './../../common/token.js';
import { getSubstringByLineCol, normalizeNewLine } from '../../../../../test/utils/common.js';

Expand Down Expand Up @@ -84,11 +82,9 @@ it('Location Info (Tokenizer)', () => {
testCases.forEach((testCase) => {
const html = testCase.htmlChunks.join('');
const lines = html.split(/\r?\n/g);
const tokenizer = new Tokenizer();
const tokenizer = new Tokenizer({ sourceCodeLocationInfo: true });
const lastChunkIdx = testCase.htmlChunks.length - 1;

Mixin.install(tokenizer, LocationInfoTokenizerMixin);

for (let i = 0; i < testCase.htmlChunks.length; i++) {
tokenizer.write(testCase.htmlChunks[i], i === lastChunkIdx);
}
Expand Down
143 changes: 0 additions & 143 deletions packages/parse5/lib/extensions/location-info/tokenizer-mixin.ts

This file was deleted.

1 change: 1 addition & 0 deletions packages/parse5/lib/parser/formatting-element-list.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ generateTestsForEachTreeAdapter('FormattingElementList', (treeAdapter) => {
ackSelfClosing: false,
selfClosing: false,
attrs: [],
location: null,
};
}

Expand Down
11 changes: 6 additions & 5 deletions packages/parse5/lib/parser/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -137,12 +137,13 @@ export class Parser<T extends TreeAdapterTypeMap> {
this.treeAdapter = this.options.treeAdapter!;
this.pendingScript = null;

if (this.options.sourceCodeLocationInfo) {
Mixin.install(this, LocationInfoParserMixin as any);
}

if (this.options.onParseError) {
Mixin.install(this, ErrorReportingParserMixin as any, { onParseError: this.options.onParseError });
this.options.sourceCodeLocationInfo = true;
}

if (this.options.sourceCodeLocationInfo) {
Mixin.install(this, LocationInfoParserMixin as any);
}
}

Expand Down Expand Up @@ -219,7 +220,7 @@ export class Parser<T extends TreeAdapterTypeMap> {

//Bootstrap parser
_bootstrap(document: T['document'], fragmentContext: T['element'] | null) {
this.tokenizer = new Tokenizer();
this.tokenizer = new Tokenizer(this.options);

this.stopped = false;

Expand Down
2 changes: 1 addition & 1 deletion packages/parse5/lib/tokenizer/index.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import { generateTokenizationTests } from '../../../../test/utils/generate-token
const dataPath = new URL('../../../../test/data/html5lib-tests/tokenizer', import.meta.url);

generateTokenizationTests('tokenizer', 'Tokenizer', dataPath.pathname, ({ errors }) => {
const tokenizer = new Tokenizer();
const tokenizer = new Tokenizer({ sourceCodeLocationInfo: true });

Mixin.install(tokenizer, ErrorReportingTokenizerMixin, {
onParseError(err) {
Expand Down

0 comments on commit 758f13c

Please sign in to comment.