From b5de926dd0d8d3425fc79fc08bcae339f78eb55f Mon Sep 17 00:00:00 2001 From: "Rodolfo M. Raya" Date: Fri, 17 Nov 2023 17:11:44 -0300 Subject: [PATCH 01/11] Added FileReader --- package.json | 4 ++-- ts/FileReader.ts | 47 +++++++++++++++++++++++++++++++++++++++++++++++ ts/index.ts | 1 + 3 files changed, 50 insertions(+), 2 deletions(-) create mode 100644 ts/FileReader.ts diff --git a/package.json b/package.json index 5fcc5c6..aa7ad78 100644 --- a/package.json +++ b/package.json @@ -22,7 +22,7 @@ "url": "https://github.com/rmraya/TypesXML.git" }, "devDependencies": { - "@types/node": "^18.11.19", - "typescript": "^4.9.5" + "@types/node": "^20.9.1", + "typescript": "^5.2.2" } } \ No newline at end of file diff --git a/ts/FileReader.ts b/ts/FileReader.ts new file mode 100644 index 0000000..3e5831b --- /dev/null +++ b/ts/FileReader.ts @@ -0,0 +1,47 @@ +/******************************************************************************* + * Copyright (c) 2023 Maxprograms. + * + * This program and the accompanying materials + * are made available under the terms of the Eclipse License 1.0 + * which accompanies this distribution, and is available at + * https://www.eclipse.org/org/documents/epl-v10.html + * + * Contributors: + * Maxprograms - initial API and implementation + *******************************************************************************/ + +import { openSync, readSync, closeSync, statSync, Stats } from "fs"; + +export class FileReader { + + fileHandle: number; + encoding: BufferEncoding; + blockSize: number; + fileSize: number; + position: number; + + constructor(path: string, encoding: BufferEncoding) { + let stats: Stats = statSync(path, { bigint: false, throwIfNoEntry: true }); + this.fileSize = stats.size; + this.blockSize = stats.blksize; + this.fileHandle = openSync(path, 'r'); + this.encoding = encoding; + this.position = 0; + } + + readData(): string { + let buffer: Buffer = Buffer.alloc(this.blockSize, this.encoding); + let amount: number = this.blockSize <= this.fileSize - this.position ? this.blockSize : this.fileSize - this.position; + let bytesRead: number = readSync(this.fileHandle, buffer, 0, amount, this.position); + this.position += bytesRead; + return buffer.toString(this.encoding, 0, bytesRead); + } + + dataAvailable(): boolean { + return this.position < this.fileSize; + } + + closeFile(): void { + closeSync(this.fileHandle); + } +} \ No newline at end of file diff --git a/ts/index.ts b/ts/index.ts index 02fa83d..e158cd6 100644 --- a/ts/index.ts +++ b/ts/index.ts @@ -19,6 +19,7 @@ export { AttlistDecl } from "./dtd/AttlistDecl"; export { DTDParser } from "./dtd/DTDParser"; export { ElementDecl } from "./dtd/ElementDecl"; export { EntityDecl } from "./dtd/EntityDecl"; +export { FileReader } from "./FileReader"; export { InternalSubset } from "./dtd/InternalSubset"; export { NotationDecl } from "./dtd/NotationDecl"; export { ContentModel } from "./grammar/ContentModel"; From b3d7bb9e9c8750b8a4803263e6ada8a09f907f6a Mon Sep 17 00:00:00 2001 From: "Rodolfo M. Raya" Date: Sat, 18 Nov 2023 07:29:17 -0300 Subject: [PATCH 02/11] Detected encoding from BOM --- ts/FileReader.ts | 38 ++++++++++++++++++++++++++++++++++++-- ts/XMLUtils.ts | 4 +++- 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/ts/FileReader.ts b/ts/FileReader.ts index 3e5831b..49dd82a 100644 --- a/ts/FileReader.ts +++ b/ts/FileReader.ts @@ -20,15 +20,49 @@ export class FileReader { fileSize: number; position: number; - constructor(path: string, encoding: BufferEncoding) { + constructor(path: string, encoding?: BufferEncoding) { let stats: Stats = statSync(path, { bigint: false, throwIfNoEntry: true }); this.fileSize = stats.size; this.blockSize = stats.blksize; this.fileHandle = openSync(path, 'r'); - this.encoding = encoding; + if (encoding) { + this.encoding = encoding; + } else { + this.encoding = this.detectEncoding(path); + } this.position = 0; } + detectEncoding(path: string): BufferEncoding { + const fd = openSync(path, "r"); + let buffer = Buffer.alloc(3); + let bytesRead: number = readSync(fd, buffer, 0, 3, 0); + closeSync(fd); + + if (bytesRead < 2) { + throw new Error("File too small to detect encoding"); + } + + const UTF8 = Buffer.from([-17, -69, -65]); + const UTF16 = Buffer.from([-2, -1]); + + if (buffer.toString().startsWith(UTF8.toString())) { + return 'utf8'; + } + if (buffer.toString().startsWith(UTF16.toString())) { + return 'utf16le'; + } + return "utf8"; + } + + getEncoding(): BufferEncoding { + return this.encoding; + } + + setEncoding(encoding: BufferEncoding): void { + this.encoding = encoding; + } + readData(): string { let buffer: Buffer = Buffer.alloc(this.blockSize, this.encoding); let amount: number = this.blockSize <= this.fileSize - this.position ? this.blockSize : this.fileSize - this.position; diff --git a/ts/XMLUtils.ts b/ts/XMLUtils.ts index 26b0adc..885560e 100644 --- a/ts/XMLUtils.ts +++ b/ts/XMLUtils.ts @@ -12,6 +12,8 @@ export class XMLUtils { + static SPACES: string = ' \t\r\n'; + static cleanString(text: string): string { let result: string = text.replace('&', '&'); return result.replace('<', '<'); @@ -27,7 +29,7 @@ export class XMLUtils { } static isXmlSpace(char: string): boolean { - return char.charCodeAt(0) === 0x20 || char.charCodeAt(0) === 0x9 || char.charCodeAt(0) === 0xA; + return this.SPACES.indexOf(char) > -1; } static normalizeSpaces(text: string): string { From e009f3fce52cc3ad38ae6a59054bf4503dd0b239 Mon Sep 17 00:00:00 2001 From: "Rodolfo M. Raya" Date: Sat, 18 Nov 2023 07:45:36 -0300 Subject: [PATCH 03/11] Code cleanup --- ts/FileReader.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ts/FileReader.ts b/ts/FileReader.ts index 49dd82a..0ce4652 100644 --- a/ts/FileReader.ts +++ b/ts/FileReader.ts @@ -34,13 +34,13 @@ export class FileReader { } detectEncoding(path: string): BufferEncoding { - const fd = openSync(path, "r"); + const fd = openSync(path, 'r'); let buffer = Buffer.alloc(3); let bytesRead: number = readSync(fd, buffer, 0, 3, 0); closeSync(fd); if (bytesRead < 2) { - throw new Error("File too small to detect encoding"); + throw new Error('Error reading BOM: not enough bytes'); } const UTF8 = Buffer.from([-17, -69, -65]); @@ -52,7 +52,7 @@ export class FileReader { if (buffer.toString().startsWith(UTF16.toString())) { return 'utf16le'; } - return "utf8"; + return 'utf8'; } getEncoding(): BufferEncoding { From 2f0afab8fa18b2a72e48b051288293a86ba72bf2 Mon Sep 17 00:00:00 2001 From: "Rodolfo M. Raya" Date: Sat, 18 Nov 2023 07:46:25 -0300 Subject: [PATCH 04/11] Code cleanup --- ts/FileReader.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ts/FileReader.ts b/ts/FileReader.ts index 0ce4652..6ef4946 100644 --- a/ts/FileReader.ts +++ b/ts/FileReader.ts @@ -39,7 +39,7 @@ export class FileReader { let bytesRead: number = readSync(fd, buffer, 0, 3, 0); closeSync(fd); - if (bytesRead < 2) { + if (bytesRead < 3) { throw new Error('Error reading BOM: not enough bytes'); } From 3d9eeb70df7e81ac6b74bfdcb0b64f4a1f3a496e Mon Sep 17 00:00:00 2001 From: "Rodolfo M. Raya" Date: Sun, 19 Nov 2023 20:05:09 -0300 Subject: [PATCH 05/11] Implemented SAXParser --- package.json | 2 +- target/npmlist.json | 1 - ts/Constants.ts | 4 - ts/ContentHandler.ts | 38 +++ ts/DOMBuilder.ts | 132 ++++++++++ ts/DocumentType.ts | 116 --------- ts/FileReader.ts | 31 ++- ts/ProcessingInstruction.ts | 38 +-- ts/SAXParser.ts | 488 ++++++++++++++++++++++++++++++++++++ ts/XMLComment.ts | 10 +- ts/XMLDeclaration.ts | 61 +---- ts/XMLDocument.ts | 63 +++-- ts/XMLDocumentType.ts | 78 ++++++ ts/XMLNode.ts | 1 - ts/XMLParser.ts | 340 ------------------------- ts/XMLUtils.ts | 3 +- ts/XMLWriter.ts | 2 +- ts/dtd/AttDecl.ts | 42 ---- ts/dtd/AttlistDecl.ts | 92 ------- ts/dtd/DTDParser.ts | 122 --------- ts/dtd/ElementDecl.ts | 54 ---- ts/dtd/EntityDecl.ts | 76 ------ ts/dtd/InternalSubset.ts | 139 ---------- ts/dtd/NotationDecl.ts | 54 ---- ts/grammar/ContentModel.ts | 24 -- ts/grammar/Grammar.ts | 34 --- ts/index.ts | 17 +- 27 files changed, 820 insertions(+), 1242 deletions(-) delete mode 100644 target/npmlist.json create mode 100644 ts/ContentHandler.ts create mode 100644 ts/DOMBuilder.ts delete mode 100644 ts/DocumentType.ts create mode 100644 ts/SAXParser.ts create mode 100644 ts/XMLDocumentType.ts delete mode 100644 ts/XMLParser.ts delete mode 100644 ts/dtd/AttDecl.ts delete mode 100644 ts/dtd/AttlistDecl.ts delete mode 100644 ts/dtd/DTDParser.ts delete mode 100644 ts/dtd/ElementDecl.ts delete mode 100644 ts/dtd/EntityDecl.ts delete mode 100644 ts/dtd/InternalSubset.ts delete mode 100644 ts/dtd/NotationDecl.ts delete mode 100644 ts/grammar/ContentModel.ts delete mode 100644 ts/grammar/Grammar.ts diff --git a/package.json b/package.json index aa7ad78..12a8e0a 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "typesxml", "productName": "TypesXML", - "version": "1.0.3", + "version": "1.1.0", "description": "Open source XML library written in TypeScript", "scripts": { "build": "tsc" diff --git a/target/npmlist.json b/target/npmlist.json deleted file mode 100644 index c874a80..0000000 --- a/target/npmlist.json +++ /dev/null @@ -1 +0,0 @@ -{"version":"1.0.2","name":"typesxml"} \ No newline at end of file diff --git a/ts/Constants.ts b/ts/Constants.ts index 94ad206..b5700e9 100644 --- a/ts/Constants.ts +++ b/ts/Constants.ts @@ -23,8 +23,4 @@ export class Constants { static readonly XML_DECLARATION_NODE: number = 8; static readonly ATTRIBUTE_LIST_DECL_NODE: number = 9; static readonly DOCUMENT_TYPE_NODE: number = 10; - static readonly NOTATION_DECL_NODE: number = 11; - static readonly INTERNAL_SUBSET_NODE: number = 12; - static readonly ELEMENT_DECL_NODE: number = 13; - static readonly ATTRIBUTE_DECL_NODE: number = 14; } \ No newline at end of file diff --git a/ts/ContentHandler.ts b/ts/ContentHandler.ts new file mode 100644 index 0000000..034b6a9 --- /dev/null +++ b/ts/ContentHandler.ts @@ -0,0 +1,38 @@ +/******************************************************************************* + * Copyright (c) 2023 Maxprograms. + * + * This program and the accompanying materials + * are made available under the terms of the Eclipse License 1.0 + * which accompanies this distribution, and is available at + * https://www.eclipse.org/org/documents/epl-v10.html + * + * Contributors: + * Maxprograms - initial API and implementation + *******************************************************************************/ + +import { XMLAttribute } from "./XMLAttribute"; + +export interface ContentHandler { + startDocument(): void; + endDocument(): void; + + xmlDeclaration(version: string, encoding: string, standalone: string): void; + + startElement(name: string, atts: Array ): void; + endElement(name: string): void; + internalSubset(declaration: string): void; + + characters(ch: string): void; + ignorableWhitespace(ch: string): void; + + comment(ch: string): void; + processingInstruction(target: string, data: string): void; + + startCDATA(): void; + endCDATA(): void; + + startDTD(name: string, publicId: string, systemId: string): void; + endDTD(): void; + + skippedEntity(name: string): void; +} \ No newline at end of file diff --git a/ts/DOMBuilder.ts b/ts/DOMBuilder.ts new file mode 100644 index 0000000..5f04f37 --- /dev/null +++ b/ts/DOMBuilder.ts @@ -0,0 +1,132 @@ +import { ContentHandler } from "./ContentHandler"; +import { TextNode } from "./TextNode"; +import { XMLAttribute } from "./XMLAttribute"; +import { XMLComment } from "./XMLComment"; +import { XMLDeclaration } from "./XMLDeclaration"; +import { XMLDocument } from "./XMLDocument"; +import { XMLElement } from "./XMLElement"; +import { ProcessingInstruction } from "./ProcessingInstruction"; +import { CData } from "./CData"; +import { XMLDocumentType } from "./XMLDocumentType"; + +export class DOMBuilder implements ContentHandler { + + inCdData: boolean; + currentCData: CData; + document: XMLDocument; + stack: Array; + + constructor() { + this.document = new XMLDocument(); + this.stack = new Array(); + this.inCdData = false; + } + + getDocument(): XMLDocument { + return this.document; + } + + startDocument(): void { + // do nothing + } + + endDocument(): void { + // do nothing + } + + xmlDeclaration(version: string, encoding: string, standalone: string): void { + let xmlDclaration = new XMLDeclaration(version, encoding, standalone); + this.document.setXmlDeclaration(xmlDclaration); + } + + startElement(name: string, atts: XMLAttribute[]): void { + let element: XMLElement = new XMLElement(name); + atts.forEach((att) => { + element.setAttribute(att); + }); + if (this.stack.length > 0) { + this.stack[this.stack.length - 1].addElement(element); + } else { + this.document.setRoot(element); + } + this.stack.push(element); + } + + endElement(name: string): void { + this.stack.pop(); + } + + internalSubset(declaration: string): void { + let docType: XMLDocumentType = this.document.getDocumentType(); + if (docType) { + docType.setInternalSubset(declaration); + } + } + + characters(ch: string): void { + if (this.inCdData) { + this.currentCData.setValue(this.currentCData.getValue() + ch); + return; + } + let textNode: TextNode = new TextNode(ch); + if (this.stack.length > 0) { + this.stack[this.stack.length - 1].addTextNode(textNode); + } else { + this.document.addTextNode(textNode); + } + } + + ignorableWhitespace(ch: string): void { + let textNode: TextNode = new TextNode(ch); + if (this.stack.length > 0) { + this.stack[this.stack.length - 1].addTextNode(textNode); + } else { + this.document.addTextNode(textNode); + } + } + + comment(ch: string): void { + let comment: XMLComment = new XMLComment(ch); + if (this.stack.length > 0) { + this.stack[this.stack.length - 1].addComment(comment); + } else { + this.document.addComment(comment); + } + } + + processingInstruction(target: string, data: string): void { + let pi = new ProcessingInstruction(target, data); + if (this.stack.length > 0) { + this.stack[this.stack.length - 1].addProcessingInstruction(pi); + } else { + this.document.addProcessingInstruction(pi); + } + } + + startCDATA(): void { + this.currentCData = new CData(''); + this.inCdData = true; + } + + endCDATA(): void { + if (this.stack.length > 0) { + this.stack[this.stack.length - 1].addCData(this.currentCData); + } else { + throw new Error("CData section outside of root element"); + } + this.inCdData = false; + } + + startDTD(name: string, publicId: string, systemId: string): void { + let docType: XMLDocumentType = new XMLDocumentType(name, publicId, systemId); + this.document.setDocumentType(docType); + } + + endDTD(): void { + // do nothing + } + + skippedEntity(name: string): void { + throw new Error("Method not implemented."); + } +} \ No newline at end of file diff --git a/ts/DocumentType.ts b/ts/DocumentType.ts deleted file mode 100644 index 1fab091..0000000 --- a/ts/DocumentType.ts +++ /dev/null @@ -1,116 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2023 Maxprograms. - * - * This program and the accompanying materials - * are made available under the terms of the Eclipse License 1.0 - * which accompanies this distribution, and is available at - * https://www.eclipse.org/org/documents/epl-v10.html - * - * Contributors: - * Maxprograms - initial API and implementation - *******************************************************************************/ - -import { Constants } from "./Constants"; -import { DTDParser } from "./dtd/DTDParser"; -import { InternalSubset } from "./dtd/InternalSubset"; -import { Grammar } from "./grammar/Grammar"; -import { XMLNode } from "./XMLNode"; -import { XMLUtils } from "./XMLUtils"; - -export class DocumentType implements XMLNode { - - private name: string; - private systemId: string; - private publicId: string; - private internalSubset: InternalSubset; - private internalGrammar: Grammar; - - constructor(declaration: string) { - this.name = ''; - let i: number = ''; - } - - equals(node: XMLNode): boolean { - if (node instanceof DocumentType) { - return this.publicId === node.publicId && this.systemId === node.systemId; - } - return false; - } -} \ No newline at end of file diff --git a/ts/FileReader.ts b/ts/FileReader.ts index 6ef4946..c908478 100644 --- a/ts/FileReader.ts +++ b/ts/FileReader.ts @@ -19,6 +19,7 @@ export class FileReader { blockSize: number; fileSize: number; position: number; + firstRead: boolean; constructor(path: string, encoding?: BufferEncoding) { let stats: Stats = statSync(path, { bigint: false, throwIfNoEntry: true }); @@ -28,12 +29,13 @@ export class FileReader { if (encoding) { this.encoding = encoding; } else { - this.encoding = this.detectEncoding(path); + this.encoding = FileReader.detectEncoding(path); } this.position = 0; + this.firstRead = true; } - detectEncoding(path: string): BufferEncoding { + static detectEncoding(path: string): BufferEncoding { const fd = openSync(path, 'r'); let buffer = Buffer.alloc(3); let bytesRead: number = readSync(fd, buffer, 0, 3, 0); @@ -42,9 +44,8 @@ export class FileReader { if (bytesRead < 3) { throw new Error('Error reading BOM: not enough bytes'); } - - const UTF8 = Buffer.from([-17, -69, -65]); - const UTF16 = Buffer.from([-2, -1]); + const UTF8: Buffer = Buffer.from([-17, -69, -65]); + const UTF16: Buffer = Buffer.from([-2, -1]); if (buffer.toString().startsWith(UTF8.toString())) { return 'utf8'; @@ -63,11 +64,25 @@ export class FileReader { this.encoding = encoding; } - readData(): string { + read(): string { let buffer: Buffer = Buffer.alloc(this.blockSize, this.encoding); let amount: number = this.blockSize <= this.fileSize - this.position ? this.blockSize : this.fileSize - this.position; let bytesRead: number = readSync(this.fileHandle, buffer, 0, amount, this.position); this.position += bytesRead; + return this.firstRead ? this.skipBOM(buffer, bytesRead) : buffer.toString(this.encoding, 0, bytesRead); + } + + skipBOM(buffer: Buffer, bytesRead: number): string { + this.firstRead = false; + const utf8Bom: string = Buffer.from([-17, -69, -65]).toString(); + const utf16Bom: string = Buffer.from([-2, -1]).toString(); + let result: string = buffer.toString(this.encoding, 0, bytesRead); + if (result.startsWith(utf8Bom)) { + return result.substring(utf8Bom.length); + } + if (result.startsWith(utf16Bom)) { + return result.substring(utf16Bom.length); + } return buffer.toString(this.encoding, 0, bytesRead); } @@ -75,6 +90,10 @@ export class FileReader { return this.position < this.fileSize; } + getFileSize(): number { + return this.fileSize; + } + closeFile(): void { closeSync(this.fileHandle); } diff --git a/ts/ProcessingInstruction.ts b/ts/ProcessingInstruction.ts index f78756f..aec9fdf 100644 --- a/ts/ProcessingInstruction.ts +++ b/ts/ProcessingInstruction.ts @@ -17,43 +17,23 @@ import { XMLUtils } from "./XMLUtils"; export class ProcessingInstruction implements XMLNode { private target: string; - private value: string; + private data: string; - constructor(target: string, value: string) { + constructor(target: string, data: string) { this.target = target; - this.value = value; - } - - static parse(instructionText: string): ProcessingInstruction { - let target: string = ''; - let i: number = '')); - return new ProcessingInstruction(target, value); + this.data = data; } getTarget(): string { return this.target; } - getValue(): string { - return this.value; + getData(): string { + return this.data; } - setValue(value: string): void { - this.value = value; + setData(data: string): void { + this.data = data; } getNodeType(): number { @@ -61,12 +41,12 @@ export class ProcessingInstruction implements XMLNode { } toString(): string { - return ''; + return ''; } equals(node: XMLNode): boolean { if (node instanceof ProcessingInstruction) { - return this.target === node.target && this.value === node.value; + return this.target === node.target && this.data === node.data; } return false; } diff --git a/ts/SAXParser.ts b/ts/SAXParser.ts new file mode 100644 index 0000000..e5f93b4 --- /dev/null +++ b/ts/SAXParser.ts @@ -0,0 +1,488 @@ +/******************************************************************************* + * Copyright (c) 2023 Maxprograms. + * + * This program and the accompanying materials + * are made available under the terms of the Eclipse License 1.0 + * which accompanies this distribution, and is available at + * https://www.eclipse.org/org/documents/epl-v10.html + * + * Contributors: + * Maxprograms - initial API and implementation + *******************************************************************************/ + +import { ContentHandler } from "./ContentHandler"; +import { FileReader } from "./FileReader"; +import { XMLAttribute } from "./XMLAttribute"; +import { XMLUtils } from "./XMLUtils"; + +export class SAXParser { + + contentHandler: ContentHandler; + reader: FileReader; + pointer: number; + buffer: string; + fileSize: number; + encoding: BufferEncoding; + elementStack: number; + characterRun: string; + rootParsed: boolean; + + constructor() { + this.characterRun = ''; + this.elementStack = 0; + this.pointer = 0; + this.rootParsed = false; + } + + setContentHandler(contentHandler: ContentHandler): void { + this.contentHandler = contentHandler; + } + + parse(path: string, encoding?: BufferEncoding): void { + this.encoding = encoding ? encoding : FileReader.detectEncoding(path); + this.reader = new FileReader(path, encoding); + this.fileSize = this.reader.getFileSize(); + this.buffer = this.reader.read(); + this.readDocument(); + } + + readDocument(): void { + this.contentHandler.startDocument(); + while (this.pointer < this.buffer.length) { + if (this.lookingAt('')) { + this.endCDATA(); + continue; + } + if (this.lookingAt('&')) { + this.parseEntityReference(); + continue; + } + if (this.lookingAt('<')) { + this.startElement(); + continue; + } + let char: string = this.buffer.charAt(this.pointer); + if (!this.rootParsed && !XMLUtils.isXmlSpace(char)) { + throw new Error('Malformed XML document: text found in prolog'); + } + if (this.rootParsed && this.elementStack === 0 && !XMLUtils.isXmlSpace(char)) { + throw new Error('Malformed XML document: text found after root element'); + } + this.characterRun += char; + this.pointer++; + if (this.pointer > this.buffer.length && this.reader.dataAvailable()) { + this.buffer += this.reader.read(); + } + if (this.rootParsed && this.elementStack === 0) { + this.contentHandler.endDocument(); + } + } + if (this.elementStack !== 0) { + throw new Error('Malformed XML document: unclosed elements'); + } + this.cleanCharacterRun(); + } + + parseEntityReference() { + this.cleanCharacterRun(); + this.pointer++; // skip '&' + let name: string = ''; + while (!this.lookingAt(';')) { + name += this.buffer.charAt(this.pointer++); + if (this.pointer > this.buffer.length && this.reader.dataAvailable()) { + this.buffer += this.reader.read(); + } + } + if (name === 'lt') { + this.contentHandler.characters('<'); + } else if (name === 'gt') { + this.contentHandler.characters('>'); + } else if (name === 'amp') { + this.contentHandler.characters('&'); + } else if (name === 'apos') { + this.contentHandler.characters('\''); + } else if (name === 'quot') { + this.contentHandler.characters('"'); + } else if (name.startsWith('#x')) { + let code: number = parseInt(name.substring(2), 16); + this.contentHandler.characters(String.fromCharCode(code)); + } else if (name.startsWith('#')) { + let code: number = parseInt(name.substring(1)); + this.contentHandler.characters(String.fromCharCode(code)); + } else { + this.contentHandler.skippedEntity(name); + } + this.pointer++; // skip ';' + this.buffer = this.buffer.substring(this.pointer); + this.pointer = 0; + } + + startElement() { + this.cleanCharacterRun(); + this.pointer++; // skip '<' + let name: string = ''; + while (!XMLUtils.isXmlSpace(this.buffer.charAt(this.pointer)) && !this.lookingAt('>') && !this.lookingAt('/>')) { + name += this.buffer.charAt(this.pointer++); + if (this.pointer > this.buffer.length && this.reader.dataAvailable()) { + this.buffer += this.reader.read(); + } + } + let rest: string = ''; + while (!this.lookingAt('>') && !this.lookingAt('/>')) { + rest += this.buffer.charAt(this.pointer++); + if (this.pointer > this.buffer.length && this.reader.dataAvailable()) { + this.buffer += this.reader.read(); + } + } + rest = rest.trim(); + let attributesMap: Map = this.parseAttributes(rest); + let attributes: Array = []; + attributesMap.forEach((value: string, key: string) => { + let attribute: XMLAttribute = new XMLAttribute(key, value); + attributes.push(attribute); + }); + this.contentHandler.startElement(name, attributes); + this.elementStack++; + if (!this.rootParsed) { + this.rootParsed = true; + } + if (this.lookingAt('/>')) { + this.contentHandler.endElement(name); + this.elementStack--; + this.pointer += 2; // skip '/>' + } else { + this.pointer++; // skip '>' + } + this.buffer = this.buffer.substring(this.pointer); + this.pointer = 0; + } + + endElement() { + this.cleanCharacterRun(); + this.pointer += 2; // skip '')) { + name += this.buffer.charAt(this.pointer++); + } + this.contentHandler.endElement(name); + this.elementStack--; + this.pointer++; // skip '>' + this.buffer = this.buffer.substring(this.pointer); + this.pointer = 0; + } + + cleanCharacterRun(): void { + if (this.characterRun !== '') { + if (this.rootParsed) { + if (this.elementStack === 0) { + // document ended + this.contentHandler.ignorableWhitespace(this.characterRun); + } else { + // in an element + this.contentHandler.characters(this.characterRun); + } + } else { + // in prolog + this.contentHandler.ignorableWhitespace(this.characterRun); + } + this.characterRun = ''; + } + } + + parseComment(): void { + this.cleanCharacterRun(); + let comment: string = ''; + this.pointer += 4; // skip '')) { + comment += this.buffer.charAt(this.pointer++); + } + this.buffer = this.buffer.substring(this.pointer + 3); // skip '-->' + this.pointer = 0; + this.contentHandler.comment(comment); + } + + parseProcessingInstruction(): void { + this.cleanCharacterRun(); + let instructionText: string = ''; + let target: string = ''; + let data: string = ''; + this.pointer += 2; // skip '')) { + instructionText += this.buffer.charAt(this.pointer++); + } + instructionText = instructionText.trim(); + let i: number = 0; + // read target + for (; i < instructionText.length; i++) { + let char: string = instructionText[i]; + if (XMLUtils.isXmlSpace(char)) { + break; + } + target += char; + } + // skip spaces + for (; i < instructionText.length; i++) { + let char: string = instructionText[i]; + if (!XMLUtils.isXmlSpace(char)) { + break; + } + } + // set data + data = instructionText.substring(i); + this.buffer = this.buffer.substring(this.pointer + 2); // skip '?>' + this.pointer = 0; + this.contentHandler.processingInstruction(target, data); + } + + parseDoctype() { + let declaration: string = ''; + this.pointer += 9; // skip '= this.buffer.length && this.reader.dataAvailable()) { + this.buffer += this.reader.read(); + } + } + this.pointer += i; + i = 0; + // read name + let name: string = ''; + for (; i < this.buffer.length; i++) { + let char: string = this.buffer.charAt(this.pointer + i); + if (XMLUtils.isXmlSpace(char)) { + break; + } + name += char; + if (this.pointer + i + 1 >= this.buffer.length && this.reader.dataAvailable()) { + this.buffer += this.reader.read(); + } + } + this.pointer += i; + i = 0; + // skip spaces after root name + for (; i < this.buffer.length; i++) { + let char: string = this.buffer.charAt(this.pointer + i); + if (!XMLUtils.isXmlSpace(char)) { + break; + } + if (this.pointer + i + 1 >= this.buffer.length && this.reader.dataAvailable()) { + this.buffer += this.reader.read(); + } + } + this.pointer += i; + i = 0; + // read the rest of the declaration + let stack: number = 1; + for (; this.pointer < this.buffer.length; this.pointer++) { + let char: string = this.buffer[this.pointer]; + if ('<' === char) { + stack++; + } + if ('>' === char) { + stack--; + if (stack === 0) { + break; + } + } + declaration += char; + if (this.pointer + 1 > this.buffer.length && this.reader.dataAvailable()) { + this.buffer += this.reader.read(); + } + } + this.buffer = this.buffer.substring(this.pointer + 1); // skip '>' + this.pointer = 0; + let systemId: string = this.extractSystem(declaration); + let publicId: string = this.extractPublic(declaration); + let internalSubset: string = this.extractInternal(declaration); + this.contentHandler.startDTD(name, publicId, systemId); + if (internalSubset !== '') { + this.contentHandler.internalSubset(internalSubset); + } + this.contentHandler.endDTD(); + } + + extractInternal(declaration: string): string { + let index = declaration.indexOf('['); + if (index === -1) { + return ''; + } + let end = declaration.indexOf(']'); + if (end === -1) { + return ''; + } + return declaration.substring(index + 1, end); + } + + extractPublic(declaration: string): string { + let index = declaration.indexOf('PUBLIC'); + if (index === -1) { + return ''; + } + // skip spaces after PUBLIC + let i: number = 6; + for (; i < declaration.length; i++) { + let char = declaration[i]; + if (!XMLUtils.isXmlSpace(char)) { + break; + } + } + let separator: string = ''; + let publicId: string = ''; + for (; i < declaration.length; i++) { + let char = declaration[i]; + if (separator === '' && ('\'' === char || '"' === char)) { + separator = char; + continue; + } + if (char === separator) { + break; + } + publicId += char; + } + return publicId; + } + + extractSystem(declaration: string): string { + let index: number = declaration.indexOf('SYSTEM'); + if (index === -1) { + return ''; + } + // skip spaces after SYSTEM + let i: number = 6; + for (; i < declaration.length; i++) { + let char = declaration[i]; + if (!XMLUtils.isXmlSpace(char)) { + break; + } + } + let separator: string = ''; + let systemId: string = ''; + for (; i < declaration.length; i++) { + let char = declaration[i]; + if (separator === '' && ('\'' === char || '"' === char)) { + separator = char; + continue; + } + if (char === separator) { + break; + } + systemId += char; + } + return systemId; + } + + parseXMLDecl() { + let declarationText: string = ''; + this.pointer += 6; // skip '')) { + declarationText += this.buffer.charAt(this.pointer++); + } + declarationText = declarationText.trim(); + let attributes: Map = this.parseAttributes(declarationText); + this.buffer = this.buffer.substring(this.pointer + 2); // skip '?>' + this.pointer = 0; + this.contentHandler.xmlDeclaration(attributes.get('version'), attributes.get('encoding'), attributes.get('standalone')); + } + + lookingAt(text: string): boolean { + let length: number = text.length; + if (this.pointer + length > this.buffer.length && this.reader.dataAvailable()) { + this.buffer += this.reader.read(); + } + if (this.pointer + length > this.buffer.length) { + return false; + } + for (let i = 0; i < length; i++) { + if (this.buffer[this.pointer + i] !== text[i]) { + return false; + } + } + return true; + } + + parseAttributes(text: string): Map { + let map = new Map(); + let pairs: string[] = []; + let separator: string = ''; + while (text.indexOf('=') != -1) { + let i: number = 0; + for (; i < text.length; i++) { + let char = text[i]; + if (XMLUtils.isXmlSpace(char) || '=' === char) { + break; + } + } + for (; i < text.length; i++) { + let char = text[i]; + if (separator === '' && ('\'' === char || '"' === char)) { + separator = char; + continue; + } + if (char === separator) { + break; + } + } + // end of value + let pair = text.substring(0, i + 1).trim(); + pairs.push(pair); + text = text.substring(pair.length).trim(); + separator = ''; + } + pairs.forEach((pair: string) => { + let index = pair.indexOf('='); + if (index === -1) { + throw new Error('Malformed attributes list'); + } + let name = pair.substring(0, index).trim(); + let value = pair.substring(index + 2, pair.length - 1); + map.set(name, value); + }); + return map; + } + + startCDATA() { + this.cleanCharacterRun(); + this.pointer += 9; // skip '' + this.buffer = this.buffer.substring(this.pointer); + this.pointer = 0; + this.contentHandler.endCDATA(); + } + +} \ No newline at end of file diff --git a/ts/XMLComment.ts b/ts/XMLComment.ts index 1fe73f5..269e227 100644 --- a/ts/XMLComment.ts +++ b/ts/XMLComment.ts @@ -17,11 +17,8 @@ export class XMLComment implements XMLNode { private value: string; - constructor(comment: string) { - if (comment.startsWith('')) { - comment = comment.substring(''.length); - } - this.value = comment; + constructor(value: string) { + this.value = value; } setValue(value: string) { @@ -37,7 +34,7 @@ export class XMLComment implements XMLNode { } toString(): string { - return ''; + return ''; } equals(node: XMLNode): boolean { @@ -46,5 +43,4 @@ export class XMLComment implements XMLNode { } return false; } - } \ No newline at end of file diff --git a/ts/XMLDeclaration.ts b/ts/XMLDeclaration.ts index 6084fca..49f99b1 100644 --- a/ts/XMLDeclaration.ts +++ b/ts/XMLDeclaration.ts @@ -12,7 +12,6 @@ import { Constants } from "./Constants"; import { XMLNode } from "./XMLNode"; -import { XMLUtils } from "./XMLUtils"; export class XMLDeclaration implements XMLNode { @@ -21,7 +20,7 @@ export class XMLDeclaration implements XMLNode { private standalone: string; constructor(version: string, encoding: string, standalone?: string) { - if (!('1.0' === version || '1.1' === version)) { + if (version !== '' && !('1.0' === version || '1.1' === version)) { throw new Error('Incorrect XML version'); } this.version = version; @@ -34,64 +33,6 @@ export class XMLDeclaration implements XMLNode { } } - static parse(declarationText: string): XMLDeclaration { - let declaration: XMLDeclaration = new XMLDeclaration('1.0', 'UTF-8'); - let attributesPortion = declarationText.substring(''.length); - declaration.parseAttributes(attributesPortion.trim()); - return declaration; - } - - parseAttributes(text: string): void { - let pairs: string[] = []; - let separator: string = ''; - while (text.indexOf('=') != -1) { - let i: number = 0; - for (; i < text.length; i++) { - let char = text[i]; - if (XMLUtils.isXmlSpace(char) || '=' === char) { - break; - } - } - for (; i < text.length; i++) { - let char = text[i]; - if (separator === '' && ('\'' === char || '"' === char)) { - separator = char; - continue; - } - if (char === separator) { - break; - } - } - // end of value - let pair = text.substring(0, i + 1).trim(); - pairs.push(pair); - text = text.substring(pair.length).trim(); - separator = ''; - } - pairs.forEach((pair: string) => { - this.setValues(pair); - }); - } - - setValues(pair: string): void { - let index = pair.indexOf('='); - if (index === -1) { - throw new Error('Malformed XML declaration'); - } - let name = pair.substring(0, index).trim(); - let value = pair.substring(index + 1).trim(); - value = value.substring(1, value.length - 1); - if (name === 'version') { - this.version = value; - } - if (name === 'encoding') { - this.encoding = value; - } - if (name === 'standalone') { - this.standalone = value; - } - } - getVersion(): string { return this.version; } diff --git a/ts/XMLDocument.ts b/ts/XMLDocument.ts index 5f8f638..8be330b 100644 --- a/ts/XMLDocument.ts +++ b/ts/XMLDocument.ts @@ -11,49 +11,62 @@ *******************************************************************************/ import { Constants } from "./Constants"; -import { DocumentType } from "./DocumentType"; import { ProcessingInstruction } from "./ProcessingInstruction"; import { TextNode } from "./TextNode"; import { XMLComment } from "./XMLComment"; import { XMLDeclaration } from "./XMLDeclaration"; +import { XMLDocumentType } from "./XMLDocumentType"; import { XMLElement } from "./XMLElement"; import { XMLNode } from "./XMLNode"; import { XMLUtils } from "./XMLUtils"; export class XMLDocument implements XMLNode { - xmlDeclaration: XMLDeclaration | undefined; - documentType: DocumentType; - private root: XMLElement; private content: Array; - constructor(name: string, xmlDeclaration?: XMLDeclaration, prologContent?: Array) { - if (xmlDeclaration !== undefined) { - this.xmlDeclaration = xmlDeclaration; - } + constructor() { this.content = new Array(); - if (prologContent !== undefined) { - prologContent.forEach((node: XMLNode) => { - if (node instanceof DocumentType) { - this.documentType = node; - } - this.content.push(node); - }); + } + + contentIterator(): IterableIterator { + return this.content.values(); + } + + setRoot(root: XMLElement): void { + this.content.push(root); + } + + getRoot(): XMLElement | undefined { + for (let i = 0; i < this.content.length; i++) { + if (this.content[i] instanceof XMLElement) { + return this.content[i] as XMLElement; + } } - this.root = new XMLElement(name); - this.content.push(this.root); + return undefined; } - getRoot(): XMLElement { - return this.root; + setDocumentType(documentType: XMLDocumentType): void { + this.content.push(documentType); + } + + getDocumentType(): XMLDocumentType | undefined { + for (let i = 0; i < this.content.length; i++) { + if (this.content[i] instanceof XMLDocumentType) { + return this.content[i] as XMLDocumentType; + } + } + return undefined; } setXmlDeclaration(declaration: XMLDeclaration): void { - this.xmlDeclaration = declaration; + this.content.unshift(declaration); } getXmlDeclaration(): XMLDeclaration | undefined { - return this.xmlDeclaration; + if (this.content[0] instanceof XMLDeclaration) { + return this.content[0] as XMLDeclaration; + } + return undefined; } addComment(comment: XMLComment): void { @@ -75,9 +88,9 @@ export class XMLDocument implements XMLNode { toString(): string { let result: string = ''; let isXml10: boolean = true; - if (this.xmlDeclaration) { - result += this.xmlDeclaration.toString() + '\n'; - isXml10 = this.xmlDeclaration.getVersion() === '1.0'; + let xmlDeclaration: XMLDeclaration | undefined = this.getXmlDeclaration(); + if (xmlDeclaration) { + isXml10 = xmlDeclaration.getVersion() === '1.0'; } this.content.forEach((node: XMLNode) => { result += isXml10 ? XMLUtils.validXml10Chars(node.toString()) : XMLUtils.validXml11Chars(node.toString()); @@ -87,7 +100,7 @@ export class XMLDocument implements XMLNode { equals(node: XMLNode): boolean { if (node instanceof XMLDocument) { - if (this.xmlDeclaration !== node.xmlDeclaration || this.content.length !== node.content.length) { + if (this.content.length !== node.content.length) { return false; } for (let i = 0; i < this.content.length; i++) { diff --git a/ts/XMLDocumentType.ts b/ts/XMLDocumentType.ts new file mode 100644 index 0000000..64a1d1c --- /dev/null +++ b/ts/XMLDocumentType.ts @@ -0,0 +1,78 @@ +/******************************************************************************* + * Copyright (c) 2023 Maxprograms. + * + * This program and the accompanying materials + * are made available under the terms of the Eclipse License 1.0 + * which accompanies this distribution, and is available at + * https://www.eclipse.org/org/documents/epl-v10.html + * + * Contributors: + * Maxprograms - initial API and implementation + *******************************************************************************/ + +import { Constants } from "./Constants"; +import { XMLNode } from "./XMLNode"; + +export class XMLDocumentType implements XMLNode { + + private name: string; + private systemId: string; + private publicId: string; + private internalSubset: string; + + constructor(name: string, publicId: string, systemId: string) { + this.name = name; + this.publicId = publicId; + this.systemId = systemId; + } + + setSystemId(systemId: string): void { + this.systemId = systemId; + } + + getSystemId(): string { + return this.systemId; + } + + setPublicId(publicId: string): void { + this.publicId = publicId; + } + + getPublicId(): string { + return this.publicId; + } + + setInternalSubset(subset: string): void { + this.internalSubset = subset; + } + + getInternalSubset(): string { + return this.internalSubset; + } + + getNodeType(): number { + return Constants.DOCUMENT_TYPE_NODE; + } + + toString(): string { + let doctype: string = ''; + } + + equals(node: XMLNode): boolean { + if (node instanceof DocumentType) { + return this.publicId === node.publicId && this.systemId === node.systemId; + } + return false; + } +} \ No newline at end of file diff --git a/ts/XMLNode.ts b/ts/XMLNode.ts index 3a3945f..b977784 100644 --- a/ts/XMLNode.ts +++ b/ts/XMLNode.ts @@ -15,5 +15,4 @@ export interface XMLNode { getNodeType(): number; toString(): string; equals(node: XMLNode): boolean; - } \ No newline at end of file diff --git a/ts/XMLParser.ts b/ts/XMLParser.ts deleted file mode 100644 index cec2e43..0000000 --- a/ts/XMLParser.ts +++ /dev/null @@ -1,340 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2023 Maxprograms. - * - * This program and the accompanying materials - * are made available under the terms of the Eclipse License 1.0 - * which accompanies this distribution, and is available at - * https://www.eclipse.org/org/documents/epl-v10.html - * - * Contributors: - * Maxprograms - initial API and implementation - *******************************************************************************/ - -import { CData } from "./CData"; -import { Constants } from "./Constants"; -import { DocumentType } from "./DocumentType"; -import { ProcessingInstruction } from "./ProcessingInstruction"; -import { TextNode } from "./TextNode"; -import { XMLAttribute } from "./XMLAttribute"; -import { XMLComment } from "./XMLComment"; -import { XMLDeclaration } from "./XMLDeclaration"; -import { XMLDocument } from "./XMLDocument"; -import { XMLElement } from "./XMLElement"; -import { XMLNode } from "./XMLNode"; -import { XMLUtils } from "./XMLUtils"; - -export class XMLParser { - - private source: string; - private pointer: number; - private document: XMLDocument; - private inProlog: boolean; - private prologContent: Array; - private xmlDeclaration: XMLDeclaration; - private stack: Array; - private currentElement: XMLElement; - - constructor() { - this.source = ''; - } - - parse(source: string): XMLDocument { - this.source = source; - this.pointer = 0; - this.stack = new Array(); - this.readProlog(); - this.readDocument(); - return this.document; - } - - readProlog(): void { - this.inProlog = true; - this.prologContent = new Array(); - while (this.inProlog) { - if (this.lookingAt(' 0 && this.prologContent[this.prologContent.length - 1].getNodeType() === Constants.TEXT_NODE) { - let lastNode: TextNode = this.prologContent[this.prologContent.length - 1] as TextNode; - lastNode.setValue(lastNode.getValue() + char); - } else { - this.prologContent.push(new TextNode(char)); - } - this.pointer++; - continue; - } - this.inProlog = false; - } - } - - readDocument(): void { - let inDocument: boolean = true; - while (inDocument) { - if (this.lookingAt('', this.pointer); - if (index === -1) { - throw new Error('Malformed XML comment'); - } - let commentText: string = this.source.substring(this.pointer, index + '-->'.length); - this.pointer += commentText.length; - let comment: XMLComment = new XMLComment(commentText); - if (this.inProlog) { - this.prologContent.push(comment); - } else { - if (this.stack.length === 0) { - this.document.addComment(comment); - } else { - this.currentElement.addComment(comment); - } - } - } - - parseProcessingInstruction(): void { - let index: number = this.source.indexOf('?>', this.pointer); - if (index === -1) { - throw new Error('Malformed Processing Instruction'); - } - let instructionText = this.source.substring(this.pointer, this.pointer + index + '?>'.length); - this.pointer += instructionText.length; - let pi: ProcessingInstruction = ProcessingInstruction.parse(instructionText); - if (this.inProlog) { - this.prologContent.push(pi); - } else { - if (this.stack.length === 0) { - this.document.addProcessingInstruction(pi); - } else { - this.currentElement.addProcessingInstruction(pi); - } - } - } - - parseAttributes(original: string): Map { - let attributes: Map = new Map(); - let text: string = original.trim(); - let pairs: string[] = []; - let separator: string = ''; - while (text.indexOf('=') != -1) { - let i: number = 0; - for (; i < text.length; i++) { - let char = text[i]; - if (XMLUtils.isXmlSpace(char) || '=' === char) { - break; - } - } - for (; i < text.length; i++) { - let char = text[i]; - if (separator === '' && ('\'' === char || '"' === char)) { - separator = char; - continue; - } - if (char === separator) { - break; - } - } - // end of value - let pair = text.substring(0, i + 1).trim(); - pairs.push(pair); - text = text.substring(pair.length).trim(); - separator = ''; - } - pairs.forEach((pair: string) => { - let index = pair.indexOf('='); - if (index === -1) { - throw new Error('Malformed attribute'); - } - let name = pair.substring(0, index).trim(); - let value = pair.substring(index + 1).trim(); - attributes.set(name, new XMLAttribute(name, value.substring(1, value.length - 1))); - }); - return attributes; - } - - parseDoctype(): void { - let stack: number = 0; - let i = this.pointer - for (; i < this.source.length; i++) { - let char: string = this.source[i]; - if ('<' === char) { - stack++; - } - if ('>' === char) { - stack--; - if (stack === 0) { - break; - } - } - } - let declaration: string = this.source.substring(this.pointer, i + 1); - this.prologContent.push(new DocumentType(declaration)); - this.pointer += declaration.length; - } - - parseCData(): void { - let index: number = this.source.indexOf(']]>', this.pointer); - if (index === -1) { - throw new Error('Malformed CData'); - } - let instructionText = this.source.substring(this.pointer, this.pointer + index + ']]>'.length); - instructionText = instructionText.substring(''.length); - this.currentElement.addCData(new CData(instructionText)); - this.pointer += instructionText.length; - } -} \ No newline at end of file diff --git a/ts/XMLUtils.ts b/ts/XMLUtils.ts index 885560e..d838765 100644 --- a/ts/XMLUtils.ts +++ b/ts/XMLUtils.ts @@ -16,7 +16,8 @@ export class XMLUtils { static cleanString(text: string): string { let result: string = text.replace('&', '&'); - return result.replace('<', '<'); + result = result.replace('<', '<'); + return result.replace('>', '>'); } static unquote(text: string): string { diff --git a/ts/XMLWriter.ts b/ts/XMLWriter.ts index f71fb5e..4f79d76 100644 --- a/ts/XMLWriter.ts +++ b/ts/XMLWriter.ts @@ -18,7 +18,7 @@ export class XMLWriter { static writeDocument(doc: XMLDocument, file: string): void { let options: any = { - encoding: 'UTF-8' + encoding: 'utf8' }; let decl: XMLDeclaration = doc.getXmlDeclaration(); if (decl) { diff --git a/ts/dtd/AttDecl.ts b/ts/dtd/AttDecl.ts deleted file mode 100644 index 494cea8..0000000 --- a/ts/dtd/AttDecl.ts +++ /dev/null @@ -1,42 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2023 Maxprograms. - * - * This program and the accompanying materials - * are made available under the terms of the Eclipse License 1.0 - * which accompanies this distribution, and is available at - * https://www.eclipse.org/org/documents/epl-v10.html - * - * Contributors: - * Maxprograms - initial API and implementation - *******************************************************************************/ - -import { Constants } from "../Constants"; -import { XMLNode } from "../XMLNode"; - -export class AttDecl implements XMLNode { - - private name: string; - private type: string; - private defaultType: string; - private defaultValue: string; - - constructor() { - // TODO - } - - getNodeType(): number { - return Constants.ATTRIBUTE_DECL_NODE; - } - - equals(node: XMLNode): boolean { - if (node instanceof AttDecl) { - return this.name === node.name && this.type === node.type && this.defaultType === node.defaultType && this.defaultValue === node.defaultValue; - } - return false; - } - - toString(): string { - // TODO - return this.name; - } -} \ No newline at end of file diff --git a/ts/dtd/AttlistDecl.ts b/ts/dtd/AttlistDecl.ts deleted file mode 100644 index 7b14d95..0000000 --- a/ts/dtd/AttlistDecl.ts +++ /dev/null @@ -1,92 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2023 Maxprograms. - * - * This program and the accompanying materials - * are made available under the terms of the Eclipse License 1.0 - * which accompanies this distribution, and is available at - * https://www.eclipse.org/org/documents/epl-v10.html - * - * Contributors: - * Maxprograms - initial API and implementation - *******************************************************************************/ - -import { Constants } from "../Constants"; -import { XMLNode } from "../XMLNode"; -import { XMLUtils } from "../XMLUtils"; -import { AttDecl } from "./AttDecl"; - -export class AttlistDecl implements XMLNode { - - private listName: string; - private attributes: Map; - - private attTypes: string[] = ['CDATA', 'ID', 'IDREF', 'IDREFS', 'ENTITY', 'ENTITIES', 'NMTOKEN', 'NMTOKENS']; - - constructor(declaration: string) { - this.listName = ''; - this.attributes = new Map(); - let i: number = ' { - return this.attributes; - } - - parseAttributes(text: string) { - let parts: string[] = text.split(/[ \t\r\n]/); // (#x20 | #x9 | #xD | #xA) - let index: number = 0; - while (index < parts.length) { - let name: string = parts[index++]; - let type: string = parts[index++]; - let defaultType: string = parts[index++]; - let defaultValue: string = ''; - if ('#FIXED' === defaultType) { - defaultValue = parts[index++]; - } - } - - } - - getNodeType(): number { - return Constants.ATTRIBUTE_LIST_DECL_NODE; - } - - toString(): string { - let result: string = ' { - result += ' ' + a.toString() + '\n'; - }); - return result + '>'; - } - - equals(node: XMLNode): boolean { - if (node instanceof AttlistDecl) { - let nodeAtts: Map = node.getAttributes(); - if (this.listName !== node.getListName() || this.attributes.size !== nodeAtts.size) { - return false; - } - this.attributes.forEach((value: AttDecl, key: string) => { - if (!value.equals(nodeAtts.get(key))) { - return false; - } - }); - return true; - } - return false; - } -} \ No newline at end of file diff --git a/ts/dtd/DTDParser.ts b/ts/dtd/DTDParser.ts deleted file mode 100644 index b3a107e..0000000 --- a/ts/dtd/DTDParser.ts +++ /dev/null @@ -1,122 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2023 Maxprograms. - * - * This program and the accompanying materials - * are made available under the terms of the Eclipse License 1.0 - * which accompanies this distribution, and is available at - * https://www.eclipse.org/org/documents/epl-v10.html - * - * Contributors: - * Maxprograms - initial API and implementation - *******************************************************************************/ - -import { XMLUtils } from "../XMLUtils"; -import { Grammar } from "../grammar/Grammar"; -import { AttlistDecl } from "./AttlistDecl"; -import { ElementDecl } from "./ElementDecl"; -import { EntityDecl } from "./EntityDecl"; -import { NotationDecl } from "./NotationDecl"; - -export class DTDParser { - - - private grammar: Grammar; - private elementDeclMap: Map; - private attributeListMap: Map; - private entitiesMap: Map; - private notationsMap: Map; - - constructor() { - this.elementDeclMap = new Map(); - this.attributeListMap = new Map(); - this.entitiesMap = new Map(); - this.notationsMap = new Map(); - } - - parse(source: string): Grammar { - let pointer: number = 0; - this.grammar = new Grammar(); - - while (pointer < source.length) { - if (XMLUtils.lookingAt('', pointer); - if (index === -1) { - throw new Error('Malformed element declaration'); - } - let elementText: string = source.substring(pointer, index + '>'.length); - let elementDecl: ElementDecl = new ElementDecl(elementText); - this.elementDeclMap.set(elementDecl.getName(), elementDecl); - pointer += elementText.length; - continue; - } - if (XMLUtils.lookingAt('', pointer); - if (index === -1) { - throw new Error('Malformed attribute declaration'); - } - let attListText: string = source.substring(pointer, index + '>'.length); - let attList: AttlistDecl = new AttlistDecl(attListText); - this.attributeListMap.set(attList.getListName(), attList); - pointer += attListText.length; - continue; - } - if (XMLUtils.lookingAt('', pointer); - if (index === -1) { - throw new Error('Malformed entity declaration'); - } - let entityDeclText: string = source.substring(pointer, index + '>'.length); - let entityDecl: EntityDecl = new EntityDecl(entityDeclText); - this.entitiesMap.set(entityDecl.getName(), entityDecl); - pointer += entityDeclText.length; - continue; - } - if (XMLUtils.lookingAt('', pointer); - if (index === -1) { - throw new Error('Malformed notation declaration'); - } - let notationDeclText: string = source.substring(pointer, index + '>'.length); - let notation: NotationDecl = new NotationDecl(notationDeclText); - this.notationsMap.set(notation.getName(), notation); - pointer += notationDeclText.length; - continue; - } - if (XMLUtils.lookingAt('', pointer); - if (index === -1) { - throw new Error('Malformed processing instruction'); - } - let piText: string = source.substring(pointer, index + '?>'.length); - // ignore processing instructions - pointer += piText.length; - continue; - } - if (XMLUtils.lookingAt('', pointer); - if (index === -1) { - throw new Error('Malformed comment'); - } - let commentText: string = source.substring(pointer, index + '-->'.length); - // ignore comments - pointer += commentText.length; - continue; - } - if (XMLUtils.lookingAt('%', source, pointer)) { - let index: number = source.indexOf(';', pointer); - if (index == -1) { - throw new Error('Malformed entity reference'); - } - let entityName: string = source.substring(pointer + '%'.length, index); - pointer += '%'.length + entityName.length + ';'.length; - } - let char: string = source.charAt(pointer); - if (XMLUtils.isXmlSpace(char)) { - pointer++; - continue; - } - throw new Error('Error parsing DTD'); - } - return this.grammar; - } -} \ No newline at end of file diff --git a/ts/dtd/ElementDecl.ts b/ts/dtd/ElementDecl.ts deleted file mode 100644 index 0ceae73..0000000 --- a/ts/dtd/ElementDecl.ts +++ /dev/null @@ -1,54 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2023 Maxprograms. - * - * This program and the accompanying materials - * are made available under the terms of the Eclipse License 1.0 - * which accompanies this distribution, and is available at - * https://www.eclipse.org/org/documents/epl-v10.html - * - * Contributors: - * Maxprograms - initial API and implementation - *******************************************************************************/ - -import { Constants } from "../Constants"; -import { XMLNode } from "../XMLNode"; -import { XMLUtils } from "../XMLUtils"; - -export class ElementDecl implements XMLNode { - - private name: string; - - constructor(declaration: string) { - this.name = ''; - let i: number = ''; - } - - equals(node: XMLNode): boolean { - // TODO - throw new Error("Method not implemented."); - } -} \ No newline at end of file diff --git a/ts/dtd/EntityDecl.ts b/ts/dtd/EntityDecl.ts deleted file mode 100644 index 785cc0a..0000000 --- a/ts/dtd/EntityDecl.ts +++ /dev/null @@ -1,76 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2023 Maxprograms. - * - * This program and the accompanying materials - * are made available under the terms of the Eclipse License 1.0 - * which accompanies this distribution, and is available at - * https://www.eclipse.org/org/documents/epl-v10.html - * - * Contributors: - * Maxprograms - initial API and implementation - *******************************************************************************/ - -import { Constants } from "../Constants"; -import { XMLNode } from "../XMLNode"; -import { XMLUtils } from "../XMLUtils"; - -export class EntityDecl implements XMLNode { - - private name: string; - private value: string; - private type: string; - - constructor(declaration: string) { - this.name = ''; - let i: number = '' - } - - equals(node: XMLNode): boolean { - if (node instanceof EntityDecl) { - return this.name === node.name && this.type === node.type && this.value === node.value; - } - return false; - } -} \ No newline at end of file diff --git a/ts/dtd/InternalSubset.ts b/ts/dtd/InternalSubset.ts deleted file mode 100644 index de69574..0000000 --- a/ts/dtd/InternalSubset.ts +++ /dev/null @@ -1,139 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2023 Maxprograms. - * - * This program and the accompanying materials - * are made available under the terms of the Eclipse License 1.0 - * which accompanies this distribution, and is available at - * https://www.eclipse.org/org/documents/epl-v10.html - * - * Contributors: - * Maxprograms - initial API and implementation - *******************************************************************************/ - -import { XMLComment } from "../XMLComment"; -import { ProcessingInstruction } from "../ProcessingInstruction"; -import { TextNode } from "../TextNode"; -import { XMLNode } from "../XMLNode"; -import { XMLUtils } from "../XMLUtils"; -import { AttlistDecl } from "./AttlistDecl"; -import { ElementDecl } from "./ElementDecl"; -import { EntityDecl } from "./EntityDecl"; -import { NotationDecl } from "./NotationDecl"; -import { Constants } from "../Constants"; - -export class InternalSubset implements XMLNode { - - content: Array; - - constructor(declaration: string) { - this.content = new Array(); - this.parseDeclaration(declaration.substring(1, declaration.length - 1)); - } - - parseDeclaration(declaration: string) { - let pointer: number = 0; - let inSubset: boolean = true; - while (inSubset) { - if (XMLUtils.lookingAt('', pointer); - if (index === -1) { - throw new Error('Malformed element declaration'); - } - let elementText: string = declaration.substring(pointer, index + '>'.length); - this.content.push(new ElementDecl(elementText)); - pointer += elementText.length; - continue; - } - if (XMLUtils.lookingAt('', pointer); - if (index === -1) { - throw new Error('Malformed attribute declaration'); - } - let attListText: string = declaration.substring(pointer, index + '>'.length); - this.content.push(new AttlistDecl(attListText)); - pointer += attListText.length; - continue; - } - if (XMLUtils.lookingAt('', pointer); - if (index === -1) { - throw new Error('Malformed entity declaration'); - } - let entityDeclText: string = declaration.substring(pointer, index + '>'.length); - this.content.push(new EntityDecl(entityDeclText)); - pointer += entityDeclText.length; - continue; - } - if (XMLUtils.lookingAt('', pointer); - if (index === -1) { - throw new Error('Malformed notation declaration'); - } - let notationDeclText: string = declaration.substring(pointer, index + '>'.length); - this.content.push(new NotationDecl(notationDeclText)); - pointer += notationDeclText.length; - continue; - } - if (XMLUtils.lookingAt('', pointer); - if (index === -1) { - throw new Error('Malformed processing instruction in internal subset'); - } - let piText: string = declaration.substring(pointer, index + '?>'.length); - this.content.push(ProcessingInstruction.parse(piText)); - pointer += piText.length; - continue; - } - if (XMLUtils.lookingAt('', pointer); - if (index === -1) { - throw new Error('Malformed comment in internal subset'); - } - let commentText: string = declaration.substring(pointer, index + '-->'.length); - this.content.push(new XMLComment(commentText)); - pointer += commentText.length; - continue; - } - if (XMLUtils.lookingAt('%', declaration, pointer)) { - // Parameter-entity references - // TODO - } - let char: string = declaration.charAt(pointer); - if (XMLUtils.isXmlSpace(char)) { - if (this.content.length > 0 && this.content[this.content.length - 1].getNodeType() === Constants.TEXT_NODE) { - let lastNode: TextNode = this.content[this.content.length - 1] as TextNode; - lastNode.setValue(lastNode.getValue() + char); - } else { - this.content.push(new TextNode(char)); - } - pointer++; - continue; - } - inSubset = false; - } - } - - getNodeType(): number { - return Constants.INTERNAL_SUBSET_NODE; - } - - toString(): string { - let result: string = '['; - this.content.forEach((value: XMLNode) => { - result += value.toString(); - }); - return result + ']'; - } - - equals(node: XMLNode): boolean { - if (node instanceof InternalSubset) { - for (let i: number = 0; i < this.content.length; i++) { - if (!this.content[i].equals(node.content[i])) { - return false; - } - } - return true; - } - return false; - } -} \ No newline at end of file diff --git a/ts/dtd/NotationDecl.ts b/ts/dtd/NotationDecl.ts deleted file mode 100644 index 0cd25de..0000000 --- a/ts/dtd/NotationDecl.ts +++ /dev/null @@ -1,54 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2023 Maxprograms. - * - * This program and the accompanying materials - * are made available under the terms of the Eclipse License 1.0 - * which accompanies this distribution, and is available at - * https://www.eclipse.org/org/documents/epl-v10.html - * - * Contributors: - * Maxprograms - initial API and implementation - *******************************************************************************/ - -import { Constants } from "../Constants"; -import { XMLNode } from "../XMLNode"; -import { XMLUtils } from "../XMLUtils"; - -export class NotationDecl implements XMLNode { - - private name: string; - - constructor(declaration: string) { - this.name = ''; - let i: number = '; - - constructor() { - this.models = new Map(); - } - - getContentModel(elementName: string): ContentModel { - return this.models.get(elementName); - } - - toString(): string { - let result: string; - this.models.forEach((value: ContentModel) => { - result = result + value.toString() + '\n'; - }); - return result; - } -} \ No newline at end of file diff --git a/ts/index.ts b/ts/index.ts index e158cd6..bf30b68 100644 --- a/ts/index.ts +++ b/ts/index.ts @@ -12,19 +12,12 @@ export { CData } from "./CData"; export { Constants } from "./Constants"; -export { DocumentType } from "./DocumentType"; +export { ContentHandler } from "./ContentHandler"; +export { XMLDocumentType } from "./XMLDocumentType"; export { Indenter } from "./Indenter"; -export { AttDecl } from "./dtd/AttDecl"; -export { AttlistDecl } from "./dtd/AttlistDecl"; -export { DTDParser } from "./dtd/DTDParser"; -export { ElementDecl } from "./dtd/ElementDecl"; -export { EntityDecl } from "./dtd/EntityDecl"; export { FileReader } from "./FileReader"; -export { InternalSubset } from "./dtd/InternalSubset"; -export { NotationDecl } from "./dtd/NotationDecl"; -export { ContentModel } from "./grammar/ContentModel"; -export { Grammar } from "./grammar/Grammar"; export { ProcessingInstruction } from "./ProcessingInstruction"; +export { SAXParser } from "./SAXParser"; export { TextNode } from "./TextNode"; export { XMLAttribute } from "./XMLAttribute"; export { XMLComment } from "./XMLComment"; @@ -32,6 +25,4 @@ export { XMLDeclaration } from "./XMLDeclaration"; export { XMLDocument } from "./XMLDocument"; export { XMLElement } from "./XMLElement"; export { XMLNode } from "./XMLNode"; -export { XMLParser } from "./XMLParser"; -export { XMLUtils } from "./XMLUtils"; -export { XMLWriter } from "./XMLWriter"; \ No newline at end of file +export { XMLUtils } from "./XMLUtils"; \ No newline at end of file From 0e7d0370c87b4d7a62f5a0afeaf6e088037d56e5 Mon Sep 17 00:00:00 2001 From: "Rodolfo M. Raya" Date: Tue, 21 Nov 2023 19:10:48 -0300 Subject: [PATCH 06/11] Implemented OASIS Catalog --- ts/Catalog.ts | 276 +++++++++++++++++++++++++++++++++++++++++++ ts/ContentHandler.ts | 18 +-- ts/DOMBuilder.ts | 56 +++++++++ 3 files changed, 343 insertions(+), 7 deletions(-) create mode 100644 ts/Catalog.ts diff --git a/ts/Catalog.ts b/ts/Catalog.ts new file mode 100644 index 0000000..8e09027 --- /dev/null +++ b/ts/Catalog.ts @@ -0,0 +1,276 @@ +/******************************************************************************* + * Copyright (c) 2023 Maxprograms. + * + * This program and the accompanying materials + * are made available under the terms of the Eclipse License 1.0 + * which accompanies this distribution, and is available at + * https://www.eclipse.org/org/documents/epl-v10.html + * + * Contributors: + * Maxprograms - initial API and implementation + *******************************************************************************/ + +import path = require("path"); +import { existsSync } from "fs"; +import { ContentHandler } from "./ContentHandler"; +import { DOMBuilder } from "./DOMBuilder"; +import { SAXParser } from "./SAXParser"; +import { XMLDocument } from "./XMLDocument"; +import { XMLElement } from "./XMLElement"; + +export class Catalog { + + systemCatalog: Map; + publicCatalog: Map; + uriCatalog: Map; + dtdCatalog: Map; + + uriRewrites: Array; + systemRewrites: Array; + workDir: string; + base: string; + + constructor(catalogFile: string) { + if (!path.isAbsolute(catalogFile)) { + throw new Error('Catalog file must be absolute: ' + catalogFile); + } + if (!existsSync(catalogFile)) { + throw new Error('Catalog file ' + catalogFile + ' not found'); + } + + this.systemCatalog = new Map(); + this.publicCatalog = new Map(); + this.uriCatalog = new Map(); + this.dtdCatalog = new Map(); + this.uriRewrites = new Array(); + this.systemRewrites = new Array(); + this.workDir = path.dirname(catalogFile); + this.base = ''; + + let contentHandler: ContentHandler = new DOMBuilder(); + let parser: SAXParser = new SAXParser(); + parser.setContentHandler(contentHandler); + parser.parse(catalogFile); + let catalogDocument: XMLDocument = (contentHandler as DOMBuilder).getDocument(); + let catalogRoot: XMLElement = catalogDocument.getRoot(); + if (catalogRoot.getName() !== 'catalog') { + throw new Error('Catalog root element must be '); + } + this.recurse(catalogRoot); + } + + recurse(catalogRoot: XMLElement) { + for (let child of catalogRoot.getChildren()) { + let currentBase: string = this.base; + if (child.hasAttribute('xml:base') && child.getAttribute("xml:base").getValue() !== '') { + this.base = child.getAttribute("xml:base").getValue(); + if (!this.base.endsWith('/')) { + this.base += '/'; + } + if (!path.isAbsolute(this.base)) { + this.base = path.resolve(this.workDir, this.base); + } + if (!existsSync(this.base)) { + throw new Error('Invalid xml:base: ' + this.base); + } + } + if (child.getName() === 'public') { + let publicId: string = child.getAttribute("publicId").getValue(); + if (publicId.startsWith("urn:publicid:")) { + publicId = this.unwrapUrn(publicId); + } + if (!this.publicCatalog.has(publicId)) { + let uri: string = this.makeAbsolute(child.getAttribute("uri").getValue()); + if (existsSync(uri)) { + this.publicCatalog.set(publicId, uri); + if (uri.endsWith(".dtd")) { + let name: string = path.basename(uri); + if (!this.dtdCatalog.has(name)) { + this.dtdCatalog.set(name, uri); + } + } + } + } + } + if (child.getName() === 'system') { + let uri: string = this.makeAbsolute(child.getAttribute("uri").getValue()); + if (existsSync(uri)) { + this.systemCatalog.set(child.getAttribute("systemId").getValue(), uri); + if (uri.endsWith(".dtd")) { + let name: string = path.basename(uri); + if (!this.dtdCatalog.has(name)) { + this.dtdCatalog.set(name, uri); + } + } + } + } + if (child.getName() === 'uri') { + let uri: string = this.makeAbsolute(child.getAttribute("uri").getValue()); + if (existsSync(uri)) { + this.uriCatalog.set(child.getAttribute("name").getValue(), uri); + if (uri.endsWith(".dtd")) { + let name: string = path.basename(uri); + if (!this.dtdCatalog.has(name)) { + this.dtdCatalog.set(name, uri); + } + } + } + } + if (child.getName() === 'rewriteURI') { + let uri: string = this.makeAbsolute(child.getAttribute("rewritePrefix").getValue()); + let pair: string[] = [child.getAttribute("uriStartString").getValue(), uri]; + if (!this.uriRewrites.includes(pair)) { + this.uriRewrites.push(pair); + } + } + if (child.getName() === 'rewriteSystem') { + let uri: string = this.makeAbsolute(child.getAttribute("rewritePrefix").getValue()); + let pair: string[] = [child.getAttribute("systemIdStartString").getValue(), uri]; + if (!this.systemRewrites.includes(pair)) { + this.systemRewrites.push(pair); + } + } + if (child.getName() === 'nextCatalog') { + let nextCatalog: string = this.makeAbsolute(child.getAttribute("catalog").getValue()); + let catalog: Catalog = new Catalog(nextCatalog); + let map: Map = catalog.getSystemCatalog(); + map.forEach((key, value) => { + if (!this.systemCatalog.has(key)) { + this.systemCatalog.set(key, value); + } + }); + map = catalog.getPublicCatalog(); + map.forEach((key, value) => { + if (!this.publicCatalog.has(key)) { + this.publicCatalog.set(key, value); + } + }); + map = catalog.getUriCatalog(); + map.forEach((key, value) => { + if (!this.uriCatalog.has(key)) { + this.uriCatalog.set(key, value); + } + }); + map = catalog.getDtdCatalog(); + map.forEach((key, value) => { + if (!this.dtdCatalog.has(key)) { + this.dtdCatalog.set(key, value); + } + }); + let array: Array = catalog.getUriRewrites(); + array.forEach((value) => { + if (!this.uriRewrites.includes(value)) { + this.uriRewrites.push(value); + } + }); + array = catalog.getSystemRewrites(); + array.forEach((value) => { + if (!this.systemRewrites.includes(value)) { + this.systemRewrites.push(value); + } + }); + } + this.recurse(child); + this.base = currentBase; + } + } + + makeAbsolute(uri: string): string { + let file: string = this.base + uri; + if (!path.isAbsolute(file)) { + if (this.base !== '') { + return path.resolve(this.base, uri); + } + return path.resolve(this.workDir, uri); + } + return this.base + uri; + } + + unwrapUrn(urn: string): string { + if (!urn.startsWith('urn:publicid:')) { + return urn; + } + let publicId: string = urn.trim().substring('urn:publicid:'.length); + publicId = publicId.replace(/\\+/, ' '); + publicId = publicId.replace(/\\:/, '//'); + publicId = publicId.replace(';', '::'); + publicId = publicId.replace('%2B', '+'); + publicId = publicId.replace('%3A', ':'); + publicId = publicId.replace('%2F', '/'); + publicId = publicId.replace('%3B', ';'); + publicId = publicId.replace('%27', '\''); + publicId = publicId.replace('%3F', '?'); + publicId = publicId.replace('%23', '#'); + publicId = publicId.replace('%25', '%'); + return publicId; + } + + getSystemCatalog(): Map { + return this.systemCatalog; + } + + getPublicCatalog(): Map { + return this.publicCatalog; + } + + getUriCatalog(): Map { + return this.uriCatalog; + } + + getDtdCatalog(): Map { + return this.dtdCatalog; + } + + getUriRewrites(): Array { + return this.uriRewrites; + } + + getSystemRewrites(): Array { + return this.systemRewrites; + } + + resolveEntity(publicId: string, systemId: string): string { + if (publicId) { + let location: string = this.matchPublic(publicId); + if (location) { + return location; + } + } + let location: string = this.matchSystem(systemId); + if (location) { + return location; + } + return undefined; + } + + matchSystem(systemId: string): string { + if (systemId) { + for (let i: number = 0; i < this.systemRewrites.length; i++) { + let pair: string[] = this.systemRewrites[i]; + if (systemId.startsWith(pair[0])) { + systemId = pair[1] + systemId.substring(pair[0].length); + } + } + if (this.systemCatalog.has(systemId)) { + return this.systemCatalog.get(systemId); + } + let fileName: string = path.basename(systemId); + if (this.dtdCatalog.has(fileName)) { + return this.dtdCatalog.get(fileName); + } + } + return undefined; + } + + matchPublic(publicId: string): string { + if (publicId.startsWith("urn:publicid:")) { + publicId = this.unwrapUrn(publicId); + } + if (this.publicCatalog.has(publicId)) { + return this.publicCatalog.get(publicId); + } + return undefined; + } +} + + diff --git a/ts/ContentHandler.ts b/ts/ContentHandler.ts index 034b6a9..398a128 100644 --- a/ts/ContentHandler.ts +++ b/ts/ContentHandler.ts @@ -10,29 +10,33 @@ * Maxprograms - initial API and implementation *******************************************************************************/ +import { Catalog } from "./Catalog"; import { XMLAttribute } from "./XMLAttribute"; export interface ContentHandler { + + setCatalog(catalog: Catalog): void; + startDocument(): void; endDocument(): void; - + xmlDeclaration(version: string, encoding: string, standalone: string): void; - startElement(name: string, atts: Array ): void; + startElement(name: string, atts: Array): void; endElement(name: string): void; internalSubset(declaration: string): void; - + characters(ch: string): void; ignorableWhitespace(ch: string): void; - + comment(ch: string): void; processingInstruction(target: string, data: string): void; - + startCDATA(): void; endCDATA(): void; - + startDTD(name: string, publicId: string, systemId: string): void; endDTD(): void; - + skippedEntity(name: string): void; } \ No newline at end of file diff --git a/ts/DOMBuilder.ts b/ts/DOMBuilder.ts index 5f04f37..70178ef 100644 --- a/ts/DOMBuilder.ts +++ b/ts/DOMBuilder.ts @@ -8,6 +8,8 @@ import { XMLElement } from "./XMLElement"; import { ProcessingInstruction } from "./ProcessingInstruction"; import { CData } from "./CData"; import { XMLDocumentType } from "./XMLDocumentType"; +import { Catalog } from "./Catalog"; +import { XMLUtils } from "./XMLUtils"; export class DOMBuilder implements ContentHandler { @@ -15,6 +17,8 @@ export class DOMBuilder implements ContentHandler { currentCData: CData; document: XMLDocument; stack: Array; + catalog: Catalog; + grammarUrl: string; constructor() { this.document = new XMLDocument(); @@ -22,6 +26,10 @@ export class DOMBuilder implements ContentHandler { this.inCdData = false; } + setCatalog(catalog: Catalog): void { + this.catalog = catalog; + } + getDocument(): XMLDocument { return this.document; } @@ -101,6 +109,51 @@ export class DOMBuilder implements ContentHandler { } else { this.document.addProcessingInstruction(pi); } + if (target === 'xml-model' && this.catalog) { + let atts: Map = this.parseXmlModel(data); + let href: string = atts.get('href'); + let schematypens: string = atts.get('schematypens'); + } + } + + parseXmlModel(text: string): Map { + let map = new Map(); + let pairs: string[] = []; + let separator: string = ''; + while (text.indexOf('=') != -1) { + let i: number = 0; + for (; i < text.length; i++) { + let char = text[i]; + if (XMLUtils.isXmlSpace(char) || '=' === char) { + break; + } + } + for (; i < text.length; i++) { + let char = text[i]; + if (separator === '' && ('\'' === char || '"' === char)) { + separator = char; + continue; + } + if (char === separator) { + break; + } + } + // end of value + let pair = text.substring(0, i + 1).trim(); + pairs.push(pair); + text = text.substring(pair.length).trim(); + separator = ''; + } + pairs.forEach((pair: string) => { + let index = pair.indexOf('='); + if (index === -1) { + throw new Error('Malformed attributes list'); + } + let name = pair.substring(0, index).trim(); + let value = pair.substring(index + 2, pair.length - 1); + map.set(name, value); + }); + return map; } startCDATA(): void { @@ -120,6 +173,9 @@ export class DOMBuilder implements ContentHandler { startDTD(name: string, publicId: string, systemId: string): void { let docType: XMLDocumentType = new XMLDocumentType(name, publicId, systemId); this.document.setDocumentType(docType); + if (this.catalog) { + this.grammarUrl = this.catalog.resolveEntity(publicId, systemId); + } } endDTD(): void { From dd77f6c9c9018fa5cdd83ce5f7ef28c8af3ceab2 Mon Sep 17 00:00:00 2001 From: "Rodolfo M. Raya" Date: Thu, 23 Nov 2023 08:08:51 -0300 Subject: [PATCH 07/11] Add and update exports in index.ts --- ts/index.ts | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/ts/index.ts b/ts/index.ts index bf30b68..c698051 100644 --- a/ts/index.ts +++ b/ts/index.ts @@ -10,12 +10,13 @@ * Maxprograms - initial API and implementation *******************************************************************************/ +export { Catalog } from "./Catalog"; export { CData } from "./CData"; export { Constants } from "./Constants"; export { ContentHandler } from "./ContentHandler"; -export { XMLDocumentType } from "./XMLDocumentType"; -export { Indenter } from "./Indenter"; +export { DOMBuilder } from "./DOMBuilder"; export { FileReader } from "./FileReader"; +export { Indenter } from "./Indenter"; export { ProcessingInstruction } from "./ProcessingInstruction"; export { SAXParser } from "./SAXParser"; export { TextNode } from "./TextNode"; @@ -23,6 +24,8 @@ export { XMLAttribute } from "./XMLAttribute"; export { XMLComment } from "./XMLComment"; export { XMLDeclaration } from "./XMLDeclaration"; export { XMLDocument } from "./XMLDocument"; +export { XMLDocumentType } from "./XMLDocumentType"; export { XMLElement } from "./XMLElement"; export { XMLNode } from "./XMLNode"; -export { XMLUtils } from "./XMLUtils"; \ No newline at end of file +export { XMLUtils } from "./XMLUtils"; +export { XMLWriter } from "./XMLWriter"; \ No newline at end of file From 0366496b83a06adf6d739cec4c49a8ba457b426d Mon Sep 17 00:00:00 2001 From: "Rodolfo M. Raya" Date: Thu, 23 Nov 2023 08:11:04 -0300 Subject: [PATCH 08/11] Updated dependencies --- package.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/package.json b/package.json index 12a8e0a..f10557a 100644 --- a/package.json +++ b/package.json @@ -22,7 +22,7 @@ "url": "https://github.com/rmraya/TypesXML.git" }, "devDependencies": { - "@types/node": "^20.9.1", - "typescript": "^5.2.2" + "@types/node": "^20.9.4", + "typescript": "^5.3.2" } } \ No newline at end of file From 384d7bf4ec11b1816f64555340fea2a59ca78f68 Mon Sep 17 00:00:00 2001 From: "Rodolfo M. Raya" Date: Thu, 23 Nov 2023 08:55:38 -0300 Subject: [PATCH 09/11] Updated README.md --- README.md | 62 +++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 44 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 80642f6..bef275a 100644 --- a/README.md +++ b/README.md @@ -2,41 +2,67 @@ Open source XML library written in TypeScript +Implements a SAX parser that exposes the these methods from the `ContentHandler` interface: + +* setCatalog(catalog: Catalog): void; +* startDocument(): void; +* endDocument(): void; +* xmlDeclaration(version: string, encoding: string, standalone: string): void; +* startElement(name: string, atts: Array\): void; +* endElement(name: string): void; +* internalSubset(declaration: string): void; +* characters(ch: string): void; +* ignorableWhitespace(ch: string): void; +* comment(ch: string): void; +* processingInstruction(target: string, data: string): void; +* startCDATA(): void; +* endCDATA(): void; +* startDTD(name: string, publicId: string, systemId: string): void; +* endDTD(): void; +* skippedEntity(name: string): void; + +Class `DOMBuilder` implements the `ContentHandler` interface and builds a DOM tree from an XML document. + ## Features currently in development -- Parsing of the Internal Subset specified in the declaration +* Parsing of the Internal Subset specified in the declaration ## Limitations -- Validation not supported yet -- Default values for attributes are not set when parsing an element +* Validation not supported yet +* Default values for attributes are not set when parsing an element ## On the Roadmap -- Support for XML Schemas -- Support for RelaxNG +* Support for XML Schemas +* Support for RelaxNG ## Example ```TypeScript -import { XMLParser } from "./XMLParser"; +import { ContentHandler } from "./ContentHandler"; +import { DOMBuilder } from "./DOMBuilder"; +import { SAXParser } from "./SAXParser"; import { XMLDocument } from "./XMLDocument"; -import { readFile } from "fs"; +import { XMLElement } from "./XMLElement"; -class Test { +export class Test { constructor() { try { - readFile('sample.xml', 'utf-8', (err, data) => { - if (err) { - throw new Error(err.message); - } - let parser: XMLParser = new XMLParser(); - let document: XMLDocument = parser.parse(data); - console.log(document.toString()); - }); - } catch (e) { - console.log(e.message) + let contentHandler: ContentHandler = new DOMBuilder(); + let xmlParser = new SAXParser(); + xmlParser.setContentHandler(contentHandler); + xmlParser.parse("test.xml"); + let doc: XMLDocument = (contentHandler as DOMBuilder).getDocument(); + let root: XMLElement = doc.getRoot(); + console.log(root.toString()); + } catch (error: any) { + if (error instanceof Error) { + console.log(error.message); + } else { + console.log(error); + } } } } From fad47d77eaf4e68156214cfb4cb005158fe6c527 Mon Sep 17 00:00:00 2001 From: "Rodolfo M. Raya" Date: Thu, 23 Nov 2023 09:33:34 -0300 Subject: [PATCH 10/11] Code cleanup with SonarQube --- sonar-project.properties | 2 +- ts/ProcessingInstruction.ts | 1 - ts/SAXParser.ts | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/sonar-project.properties b/sonar-project.properties index 46e72bf..2083246 100644 --- a/sonar-project.properties +++ b/sonar-project.properties @@ -2,7 +2,7 @@ sonar.projectKey=TypesXML # this is the name displayed in the SonarQube UI sonar.projectName=TypesXML -sonar.projectVersion=1.0.0 +sonar.projectVersion=1.1.0 # Path is relative to the sonar-project.properties file. Replace "\" by "/" on Windows. # Since SonarQube 4.2, this property is optional if sonar.modules is set. diff --git a/ts/ProcessingInstruction.ts b/ts/ProcessingInstruction.ts index aec9fdf..975260c 100644 --- a/ts/ProcessingInstruction.ts +++ b/ts/ProcessingInstruction.ts @@ -12,7 +12,6 @@ import { Constants } from "./Constants"; import { XMLNode } from "./XMLNode"; -import { XMLUtils } from "./XMLUtils"; export class ProcessingInstruction implements XMLNode { diff --git a/ts/SAXParser.ts b/ts/SAXParser.ts index e5f93b4..f0357bf 100644 --- a/ts/SAXParser.ts +++ b/ts/SAXParser.ts @@ -299,7 +299,6 @@ export class SAXParser { } } this.pointer += i; - i = 0; // read the rest of the declaration let stack: number = 1; for (; this.pointer < this.buffer.length; this.pointer++) { From 5efc8a5d03f0d677649c61135ac20b874502feb8 Mon Sep 17 00:00:00 2001 From: "Rodolfo M. Raya" Date: Thu, 23 Nov 2023 11:42:36 -0300 Subject: [PATCH 11/11] Improved text replacement --- ts/Catalog.ts | 24 ++++++++++++------------ ts/XMLUtils.ts | 30 ++++++++++++++++++++++++------ 2 files changed, 36 insertions(+), 18 deletions(-) diff --git a/ts/Catalog.ts b/ts/Catalog.ts index 8e09027..ef3af35 100644 --- a/ts/Catalog.ts +++ b/ts/Catalog.ts @@ -17,6 +17,7 @@ import { DOMBuilder } from "./DOMBuilder"; import { SAXParser } from "./SAXParser"; import { XMLDocument } from "./XMLDocument"; import { XMLElement } from "./XMLElement"; +import { XMLUtils } from "./XMLUtils"; export class Catalog { @@ -191,18 +192,17 @@ export class Catalog { return urn; } let publicId: string = urn.trim().substring('urn:publicid:'.length); - publicId = publicId.replace(/\\+/, ' '); - publicId = publicId.replace(/\\:/, '//'); - publicId = publicId.replace(';', '::'); - publicId = publicId.replace('%2B', '+'); - publicId = publicId.replace('%3A', ':'); - publicId = publicId.replace('%2F', '/'); - publicId = publicId.replace('%3B', ';'); - publicId = publicId.replace('%27', '\''); - publicId = publicId.replace('%3F', '?'); - publicId = publicId.replace('%23', '#'); - publicId = publicId.replace('%25', '%'); - return publicId; + publicId = XMLUtils.replaceAll(publicId, '+', ' '); + publicId = XMLUtils.replaceAll(publicId, ':', '//'); + publicId = XMLUtils.replaceAll(publicId, ';', '::'); + publicId = XMLUtils.replaceAll(publicId, '%2B', '+'); + publicId = XMLUtils.replaceAll(publicId, '%3A', ':'); + publicId = XMLUtils.replaceAll(publicId, '%2F', '/'); + publicId = XMLUtils.replaceAll(publicId, '%3B', ';'); + publicId = XMLUtils.replaceAll(publicId, '%27', '\''); + publicId = XMLUtils.replaceAll(publicId, '%3F', '?'); + publicId = XMLUtils.replaceAll(publicId, '%23', '#'); + return XMLUtils.replaceAll(publicId, '%25', '%'); } getSystemCatalog(): Map { diff --git a/ts/XMLUtils.ts b/ts/XMLUtils.ts index d838765..01b8302 100644 --- a/ts/XMLUtils.ts +++ b/ts/XMLUtils.ts @@ -15,18 +15,18 @@ export class XMLUtils { static SPACES: string = ' \t\r\n'; static cleanString(text: string): string { - let result: string = text.replace('&', '&'); - result = result.replace('<', '<'); - return result.replace('>', '>'); + let result: string = XMLUtils.replaceAll(text, '&', '&'); + result = XMLUtils.replaceAll(result, '<', '<'); + return XMLUtils.replaceAll(result, '>', '>'); } static unquote(text: string): string { - return text.replace('"', '"'); + return XMLUtils.replaceAll(text, '"', '"'); } static normalizeLines(text: string): string { - let result: string = text.replace('\r\n', '\n'); - return result.replace('\r', '\n'); + let result: string = XMLUtils.replaceAll(text, '\r\n', '\n'); + return XMLUtils.replaceAll(result, '\r', '\n'); } static isXmlSpace(char: string): boolean { @@ -37,6 +37,24 @@ export class XMLUtils { return text.replace(/[\r\n\t]/s, ' '); } + static replaceAll(text: string, search: string, replacement: string): string { + let re: RegExp = new RegExp(XMLUtils.escapeRegExpChars(search), 'g'); + return text.replace(re, replacement); + } + + static escapeRegExpChars(text: string): string { + let result: string = ''; + let length: number = text.length; + for (let i = 0; i < length; i++) { + let c: string = text.charAt(i); + if ('[]{}()^$?*+.'.indexOf(c) > -1) { + result += '\\'; + } + result += c; + } + return result; + } + static validXml10Chars(text: string): string { let result: string = ''; let length: number = text.length;