Skip to content
This repository has been archived by the owner on Mar 24, 2022. It is now read-only.

Commit

Permalink
refactor: Move ErrorReportingParserMixin into parser
Browse files Browse the repository at this point in the history
  • Loading branch information
fb55 committed Nov 24, 2021
1 parent 54e1feb commit 269ef1c
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 108 deletions.
51 changes: 0 additions & 51 deletions packages/parse5/lib/extensions/error-reporting/parser-mixin.ts

This file was deleted.

53 changes: 24 additions & 29 deletions packages/parse5/lib/extensions/location-info/parser-mixin.ts
Original file line number Diff line number Diff line change
@@ -1,20 +1,17 @@
import { CommentToken, DoctypeToken, CharacterToken } from '../../common/token';
import { Mixin } from '../../utils/mixin.js';
import { TAG_NAMES as $, NAMESPACES as NS } from '../../common/html.js';
import type { TreeAdapter, TreeAdapterTypeMap, ElementLocation } from '../../tree-adapters/interface';
import type { TreeAdapterTypeMap, ElementLocation } from '../../tree-adapters/interface';
import type { Parser } from '../../parser/index.js';
import { TokenType, Token, TagToken } from '../../common/token.js';

export class LocationInfoParserMixin<T extends TreeAdapterTypeMap> extends Mixin<Parser<T>> {
treeAdapter: TreeAdapter<T>;
lastStartTagToken: null | TagToken = null;
lastFosterParentingLocation: null | ReturnType<Parser<T>['_findFosterParentingLocation']> = null;
currentToken: Token | null = null;

constructor(parser: Parser<T>) {
constructor(private parser: Parser<T>) {
super(parser);

this.treeAdapter = parser.treeAdapter;
}

_setStartLocation(element: T['element']) {
Expand All @@ -27,34 +24,32 @@ export class LocationInfoParserMixin<T extends TreeAdapterTypeMap> extends Mixin
};
}

this.treeAdapter.setNodeSourceCodeLocation(element, loc);
this.parser.treeAdapter.setNodeSourceCodeLocation(element, loc);
}

_setEndLocation(element: T['element'], closingToken: Token) {
const loc = this.treeAdapter.getNodeSourceCodeLocation(element);

if (loc) {
if (closingToken.location) {
const ctLoc = closingToken.location;
const tn = this.treeAdapter.getTagName(element);

// NOTE: For cases like <p> <p> </p> - First 'p' closes without a closing
// tag and for cases like <td> <p> </td> - 'p' closes without a closing tag.
const isClosingEndTag = closingToken.type === TokenType.END_TAG && tn === closingToken.tagName;
const endLoc: Partial<ElementLocation> = {};
if (isClosingEndTag) {
endLoc.endTag = { ...ctLoc };
endLoc.endLine = ctLoc.endLine;
endLoc.endCol = ctLoc.endCol;
endLoc.endOffset = ctLoc.endOffset;
} else {
endLoc.endLine = ctLoc.startLine;
endLoc.endCol = ctLoc.startCol;
endLoc.endOffset = ctLoc.startOffset;
}

this.treeAdapter.updateNodeSourceCodeLocation(element, endLoc);
const loc = this.parser.treeAdapter.getNodeSourceCodeLocation(element);

if (loc && closingToken.location) {
const ctLoc = closingToken.location;
const tn = this.parser.treeAdapter.getTagName(element);

// NOTE: For cases like <p> <p> </p> - First 'p' closes without a closing
// tag and for cases like <td> <p> </td> - 'p' closes without a closing tag.
const isClosingEndTag = closingToken.type === TokenType.END_TAG && tn === closingToken.tagName;
const endLoc: Partial<ElementLocation> = {};
if (isClosingEndTag) {
endLoc.endTag = { ...ctLoc };
endLoc.endLine = ctLoc.endLine;
endLoc.endCol = ctLoc.endCol;
endLoc.endOffset = ctLoc.endOffset;
} else {
endLoc.endLine = ctLoc.startLine;
endLoc.endCol = ctLoc.startCol;
endLoc.endOffset = ctLoc.startOffset;
}

this.parser.treeAdapter.updateNodeSourceCodeLocation(element, endLoc);
}
}

Expand Down
78 changes: 50 additions & 28 deletions packages/parse5/lib/parser/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,11 @@ import { Tokenizer, TokenizerMode } from '../tokenizer/index.js';
import { OpenElementStack } from './open-element-stack.js';
import { FormattingElementList, ElementEntry, EntryType } from './formatting-element-list.js';
import { LocationInfoParserMixin } from '../extensions/location-info/parser-mixin.js';
import { ErrorReportingParserMixin } from '../extensions/error-reporting/parser-mixin.js';
import { Mixin } from '../utils/mixin.js';
import * as defaultTreeAdapter from '../tree-adapters/default.js';
import * as doctype from '../common/doctype.js';
import * as foreignContent from '../common/foreign-content.js';
import { ERR } from '../common/error-codes.js';
import { ERR, ParserErrorHandler } from '../common/error-codes.js';
import * as unicode from '../common/unicode.js';
import {
TAG_NAMES as $,
Expand All @@ -18,7 +17,6 @@ import {
isNumberedHeader,
} from '../common/html.js';
import type { TreeAdapter, TreeAdapterTypeMap } from '../tree-adapters/interface.js';
import type { ParserError } from '../common/error-codes.js';
import {
TokenType,
getTokenAttr,
Expand Down Expand Up @@ -89,6 +87,15 @@ const TEMPLATE_INSERTION_MODE_SWITCH_MAP = new Map<string, InsertionMode>([
[$.TH, InsertionMode.IN_ROW],
]);

const BASE_LOC = {
startLine: -1,
startCol: -1,
startOffset: -1,
endLine: -1,
endCol: -1,
endOffset: -1,
};

const TABLE_STRUCTURE_TAGS = new Set<string>([$.TABLE, $.TBODY, $.TFOOT, $.THEAD, $.TR]);

export interface ParserOptions<T extends TreeAdapterTypeMap> {
Expand Down Expand Up @@ -124,14 +131,15 @@ export interface ParserOptions<T extends TreeAdapterTypeMap> {
*
* @default `null`
*/
onParseError?: ((err: ParserError) => void) | null;
onParseError?: ParserErrorHandler | null;
}

//Parser
export class Parser<T extends TreeAdapterTypeMap> {
options: ParserOptions<T>;
treeAdapter: TreeAdapter<T>;
pendingScript: null | T['element'];
private onParseError: ParserErrorHandler | null;

constructor(options?: ParserOptions<T>) {
this.options = {
Expand All @@ -145,8 +153,9 @@ export class Parser<T extends TreeAdapterTypeMap> {
this.treeAdapter = this.options.treeAdapter!;
this.pendingScript = null;

this.onParseError = this.options.onParseError ?? null;

if (this.options.onParseError) {
Mixin.install(this, ErrorReportingParserMixin as any, { onParseError: this.options.onParseError });
this.options.sourceCodeLocationInfo = true;
}

Expand Down Expand Up @@ -255,8 +264,21 @@ export class Parser<T extends TreeAdapterTypeMap> {
}

//Errors
_err(_err: ERR, _opts?: { beforeToken: boolean }) {
// NOTE: err reporting is noop by default. Enabled by mixin.
_err(token: Token, code: ERR, beforeToken?: boolean) {
if (!this.onParseError) return;

const loc = token.location ?? BASE_LOC;
const err = {
code,
startLine: loc.startLine,
startCol: loc.startCol,
startOffset: loc.startOffset,
endLine: beforeToken ? loc.startLine : loc.endLine,
endCol: beforeToken ? loc.startCol : loc.endCol,
endOffset: beforeToken ? loc.startOffset : loc.endOffset,
};

this.onParseError(err);
}

//Parsing loop
Expand Down Expand Up @@ -572,7 +594,7 @@ export class Parser<T extends TreeAdapterTypeMap> {
}

if (token.type === TokenType.START_TAG && token.selfClosing && !token.ackSelfClosing) {
this._err(ERR.nonVoidHtmlElementStartTagWithTrailingSolidus);
this._err(token, ERR.nonVoidHtmlElementStartTagWithTrailingSolidus);
}
}

Expand Down Expand Up @@ -911,8 +933,8 @@ function callAdoptionAgency<T extends TreeAdapterTypeMap>(p: Parser<T>, token: T
//Generic token handlers
//------------------------------------------------------------------

function misplacedDoctype<T extends TreeAdapterTypeMap>(p: Parser<T>) {
p._err(ERR.misplacedDoctype);
function misplacedDoctype<T extends TreeAdapterTypeMap>(p: Parser<T>, token: DoctypeToken) {
p._err(token, ERR.misplacedDoctype);
}

function appendComment<T extends TreeAdapterTypeMap>(p: Parser<T>, token: CommentToken) {
Expand Down Expand Up @@ -949,7 +971,7 @@ function doctypeInInitialMode<T extends TreeAdapterTypeMap>(p: Parser<T>, token:
const mode = token.forceQuirks ? DOCUMENT_MODE.QUIRKS : doctype.getDocumentMode(token);

if (!doctype.isConforming(token)) {
p._err(ERR.nonConformingDoctype);
p._err(token, ERR.nonConformingDoctype);
}

p.treeAdapter.setDocumentMode(p.document, mode);
Expand All @@ -958,7 +980,7 @@ function doctypeInInitialMode<T extends TreeAdapterTypeMap>(p: Parser<T>, token:
}

function tokenInInitialMode<T extends TreeAdapterTypeMap>(p: Parser<T>, token: Token) {
p._err(ERR.missingDoctype, { beforeToken: true });
p._err(token, ERR.missingDoctype, true);
p.treeAdapter.setDocumentMode(p.document, DOCUMENT_MODE.QUIRKS);
p.insertionMode = InsertionMode.BEFORE_HTML;
p._processToken(token);
Expand Down Expand Up @@ -1009,7 +1031,7 @@ function modeBeforeHead<T extends TreeAdapterTypeMap>(p: Parser<T>, token: Token
} else if (token.type === TokenType.COMMENT) {
appendComment(p, token);
} else if (token.type === TokenType.DOCTYPE) {
misplacedDoctype(p);
misplacedDoctype(p, token);
} else if (token.type === TokenType.START_TAG) {
startTagBeforeHead(p, token);
} else if (token.type === TokenType.END_TAG) {
Expand Down Expand Up @@ -1037,7 +1059,7 @@ function endTagBeforeHead<T extends TreeAdapterTypeMap>(p: Parser<T>, token: Tag
if (tn === $.HEAD || tn === $.BODY || tn === $.HTML || tn === $.BR) {
tokenBeforeHead(p, token);
} else {
p._err(ERR.endTagWithoutMatchingOpenElement);
p._err(token, ERR.endTagWithoutMatchingOpenElement);
}
}

Expand All @@ -1058,7 +1080,7 @@ function modeInHead<T extends TreeAdapterTypeMap>(p: Parser<T>, token: Token) {
} else if (token.type === TokenType.COMMENT) {
appendComment(p, token);
} else if (token.type === TokenType.DOCTYPE) {
misplacedDoctype(p);
misplacedDoctype(p, token);
} else if (token.type === TokenType.START_TAG) {
startTagInHead(p, token);
} else if (token.type === TokenType.END_TAG) {
Expand Down Expand Up @@ -1094,7 +1116,7 @@ function startTagInHead<T extends TreeAdapterTypeMap>(p: Parser<T>, token: TagTo
p.insertionMode = InsertionMode.IN_TEMPLATE;
p.tmplInsertionModeStack.unshift(InsertionMode.IN_TEMPLATE);
} else if (tn === $.HEAD) {
p._err(ERR.misplacedStartTagForHeadElement);
p._err(token, ERR.misplacedStartTagForHeadElement);
} else {
tokenInHead(p, token);
}
Expand All @@ -1113,18 +1135,18 @@ function endTagInHead<T extends TreeAdapterTypeMap>(p: Parser<T>, token: TagToke
p.openElements.generateImpliedEndTagsThoroughly();

if (p.openElements.currentTagName !== $.TEMPLATE) {
p._err(ERR.closingOfElementWithOpenChildElements);
p._err(token, ERR.closingOfElementWithOpenChildElements);
}

p.openElements.popUntilTagNamePopped($.TEMPLATE);
p.activeFormattingElements.clearToLastMarker();
p.tmplInsertionModeStack.shift();
p._resetInsertionMode();
} else {
p._err(ERR.endTagWithoutMatchingOpenElement);
p._err(token, ERR.endTagWithoutMatchingOpenElement);
}
} else {
p._err(ERR.endTagWithoutMatchingOpenElement);
p._err(token, ERR.endTagWithoutMatchingOpenElement);
}
}

Expand All @@ -1144,7 +1166,7 @@ function modeInHeadNoScript<T extends TreeAdapterTypeMap>(p: Parser<T>, token: T
} else if (token.type === TokenType.COMMENT) {
appendComment(p, token);
} else if (token.type === TokenType.DOCTYPE) {
misplacedDoctype(p);
misplacedDoctype(p, token);
} else if (token.type === TokenType.START_TAG) {
startTagInHeadNoScript(p, token);
} else if (token.type === TokenType.END_TAG) {
Expand All @@ -1168,7 +1190,7 @@ function startTagInHeadNoScript<T extends TreeAdapterTypeMap>(p: Parser<T>, toke
) {
startTagInHead(p, token);
} else if (tn === $.NOSCRIPT) {
p._err(ERR.nestedNoscriptInHead);
p._err(token, ERR.nestedNoscriptInHead);
} else {
tokenInHeadNoScript(p, token);
}
Expand All @@ -1183,14 +1205,14 @@ function endTagInHeadNoScript<T extends TreeAdapterTypeMap>(p: Parser<T>, token:
} else if (tn === $.BR) {
tokenInHeadNoScript(p, token);
} else {
p._err(ERR.endTagWithoutMatchingOpenElement);
p._err(token, ERR.endTagWithoutMatchingOpenElement);
}
}

function tokenInHeadNoScript<T extends TreeAdapterTypeMap>(p: Parser<T>, token: Token) {
const errCode = token.type === TokenType.EOF ? ERR.openElementsLeftAfterEof : ERR.disallowedContentInNoscriptInHead;

p._err(errCode);
p._err(token, errCode);
p.openElements.pop();
p.insertionMode = InsertionMode.IN_HEAD;
p._processToken(token);
Expand All @@ -1206,7 +1228,7 @@ function modeAfterHead<T extends TreeAdapterTypeMap>(p: Parser<T>, token: Token)
} else if (token.type === TokenType.COMMENT) {
appendComment(p, token);
} else if (token.type === TokenType.DOCTYPE) {
misplacedDoctype(p);
misplacedDoctype(p, token);
} else if (token.type === TokenType.START_TAG) {
startTagAfterHead(p, token);
} else if (token.type === TokenType.END_TAG) {
Expand Down Expand Up @@ -1240,12 +1262,12 @@ function startTagAfterHead<T extends TreeAdapterTypeMap>(p: Parser<T>, token: Ta
p._insertElement(token, NS.HTML);
p.insertionMode = InsertionMode.IN_FRAMESET;
} else if (ABANDONED_HEAD_ELEMENT_CHILDS.has(tn)) {
p._err(ERR.abandonedHeadElementChild);
p._err(token, ERR.abandonedHeadElementChild);
p.openElements.push(p.headElement!);
startTagInHead(p, token);
p.openElements.remove(p.headElement!);
} else if (tn === $.HEAD) {
p._err(ERR.misplacedStartTagForHeadElement);
p._err(token, ERR.misplacedStartTagForHeadElement);
} else {
tokenAfterHead(p, token);
}
Expand All @@ -1259,7 +1281,7 @@ function endTagAfterHead<T extends TreeAdapterTypeMap>(p: Parser<T>, token: TagT
} else if (tn === $.TEMPLATE) {
endTagInHead(p, token);
} else {
p._err(ERR.endTagWithoutMatchingOpenElement);
p._err(token, ERR.endTagWithoutMatchingOpenElement);
}
}

Expand Down Expand Up @@ -2126,7 +2148,7 @@ function endTagInText<T extends TreeAdapterTypeMap>(p: Parser<T>, token: TagToke
}

function eofInText<T extends TreeAdapterTypeMap>(p: Parser<T>, token: Token) {
p._err(ERR.eofInElementThatCanContainOnlyText);
p._err(token, ERR.eofInElementThatCanContainOnlyText);
p.openElements.pop();
p.insertionMode = p.originalInsertionMode;
p._processToken(token);
Expand Down

0 comments on commit 269ef1c

Please sign in to comment.