From 18c7d5b9a203fc909109310d053d6f742bc9d69a Mon Sep 17 00:00:00 2001
From: modesty <modestyz@hotmail.com>
Date: Fri, 23 May 2025 16:32:46 -0700
Subject: [PATCH 1/2] maint: remove dependency on @xmldom/xmldom

---
 lib/ptixmlinject.js         |  46 ++++++---
 lib/simpleXmlParser.js      | 190 ++++++++++++++++++++++++++++++++++++
 package.json                |   4 +-
 rollup.config.js            |   1 -
 rollup/bundle-pdfjs-base.js |   2 +-
 5 files changed, 226 insertions(+), 17 deletions(-)
 create mode 100644 lib/simpleXmlParser.js
diff --git a/lib/ptixmlinject.js b/lib/ptixmlinject.js
index 2034586f..13669bbf 100644
--- a/lib/ptixmlinject.js
+++ b/lib/ptixmlinject.js
@@ -1,29 +1,43 @@
 import fs from "fs";
-import { DOMParser } from "@xmldom/xmldom";
+import { DOMParser } from "./simpleXmlParser.js";
 
+/**
+ * XML Parser for PTI format
+ * @class
+ */
 export default class PTIXmlParser {
+    /** @type {string|null} */
     xmlData = null;
+	/** @type {Array<any>} */
 	ptiPageArray = [];
 
-	// constructor
+	/**
+	 * Create a new PTIXmlParser
+	 */
 	constructor() {
         this.xmlData = null;
         this.ptiPageArray = [];
     }
 
+	/**
+	 * Parse an XML file
+	 * @param {string} filePath - The path to the XML file
+	 * @param {Function} callback - The callback function
+	 */
 	parseXml(filePath, callback) {
 		fs.readFile(filePath, 'utf8', (err, data) => {
 			if (err) {
                 callback(err);
 			}
 			else {
+				/** @type {string} */
 				this.xmlData = data;
 
 				var parser = new DOMParser();
 				var dom = parser.parseFromString(this.xmlData);
 				var root = dom.documentElement;
 
-				var xmlFields = root.getElementsByTagName("field");
+				var xmlFields = root ? root.getElementsByTagName("field") : [];
 				var fields = [];
 
 				for (var i = 0; i < xmlFields.length; i++) {
@@ -37,31 +51,34 @@ export default class PTIXmlParser {
 					var fontName = xmlFields[i].getAttribute('fontName');
 					var fontSize = xmlFields[i].getAttribute('fontSize');
 
+					/** @type {Record<string, any>} */
 					var item = {};
 
-					var rectLeft = parseInt(xPos) - 21; //was 23.5
-					var rectTop = parseInt(yPos) - 20;//was 23
-					var rectRight = parseInt(rectLeft) + parseInt(width) - 4;
-					var rectBottom = parseInt(rectTop) + parseInt(height) - 4;
+					var rectLeft = parseInt(xPos || '0') - 21; //was 23.5
+					var rectTop = parseInt(yPos || '0') - 20;//was 23
+					var rectRight = parseInt(String(rectLeft)) + parseInt(width || '0') - 4;
+					var rectBottom = parseInt(String(rectTop)) + parseInt(height || '0') - 4;
 
 					item.fieldType = "Tx";
 					if (type === "Boolean") {
 						item.fieldType="Btn";
 					}
 					else if (type === "SSN" ||  type === "Phone" || type === "zip") {
-						item.TName = type.toLowerCase();
+						item.TName = type ? type.toLowerCase() : '';
 					}
 					item.alternativeText = "";
-					item.fullName = id;
-					item.fontSize = fontSize;
-					item.fontName = fontName;
+					item.fullName = id || '';
+					item.fontSize = fontSize || '';
+					item.fontName = fontName || '';
 					item.subtype = "Widget";
 
 					item.rect = [rectLeft, rectTop, rectRight, rectBottom];
 
 					fields.push(item);
 
-					this.ptiPageArray[parseInt(page)]=fields;
+					if (page) {
+						this.ptiPageArray[parseInt(page)] = fields;
+					}
 				}
 
 			}
@@ -69,6 +86,11 @@ export default class PTIXmlParser {
 		});
 	}
 
+	/**
+	 * Get fields for a specific page
+	 * @param {number} pageNum - The page number
+	 * @returns {Array<any>|undefined} The fields for the page
+	 */
 	getFields(pageNum) {
 		return this.ptiPageArray[pageNum];
 	}
diff --git a/lib/simpleXmlParser.js b/lib/simpleXmlParser.js
new file mode 100644
index 00000000..09114d30
--- /dev/null
+++ b/lib/simpleXmlParser.js
@@ -0,0 +1,190 @@
+// A simple XML parser to replace @xmldom/xmldom dependency
+// This implements just enough functionality to support the existing code
+
+/**
+ * A simple XML Element implementation
+ * @class
+ */
+class Element {
+  /**
+   * Create a new Element
+   * @param {string} nodeName - The name of the node/tag
+   */
+  constructor(nodeName) {
+    /** @type {string} */
+    this.nodeName = nodeName;
+    /** @type {Array<Element>} */
+    this.childNodes = [];
+    /** @type {Object.<string, string>} */
+    this.attributes = {};
+    /** @type {string} */
+    this.textContent = "";
+  }
+
+  /**
+   * Get attribute value by name
+   * @param {string} name - The attribute name
+   * @returns {string|null} The attribute value or null
+   */
+  getAttribute(name) {
+    return this.attributes[name] || null;
+  }
+
+  /**
+   * Get elements by tag name
+   * @param {string} tagName - The tag name to search for
+   * @returns {Array<Element>} The matching elements
+   */
+  getElementsByTagName(tagName) {
+    /** @type {Array<Element>} */
+    let results = [];
+
+    // Check if this element matches
+    if (this.nodeName === tagName) {
+      results.push(this);
+    }
+
+    // Check child elements recursively
+    for (const child of this.childNodes) {
+      if (child instanceof Element) {
+        if (tagName === "*" || child.nodeName === tagName) {
+          results.push(child);
+        }
+
+        // Add matching descendants
+        const childMatches = child.getElementsByTagName(tagName);
+        results = results.concat(childMatches);
+      }
+    }
+
+    return results;
+  }
+}
+
+/**
+ * A simple XML Document implementation
+ * @class
+ */
+class Document {
+  constructor() {
+    /** @type {Element|null} */
+    this.documentElement = null;
+  }
+}
+
+/**
+ * A minimal DOMParser implementation that supports the basic features needed
+ * @class
+ */
+class SimpleDOMParser {
+  /**
+   * Parse XML string into a Document
+   * @param {string} xmlString - The XML string to parse
+   * @returns {Document} The parsed document
+   */
+  parseFromString(xmlString) {
+    const doc = new Document();
+
+    // Remove XML declaration if present
+    xmlString = xmlString.replace(/<\?xml[^?]*\?>/, "").trim();
+
+    // Parse the document
+    doc.documentElement = this.parseElement(xmlString);
+
+    return doc;
+  }
+
+  /**
+   * Parse an XML element
+   * @param {string} xmlString - The XML string to parse
+   * @returns {Element|null} The parsed element or null
+   */
+  parseElement(xmlString) {
+    // Regular expressions for parsing XML
+    const startTagRegex = /<([^\s/>]+)([^>]*)>/;
+    const attributeRegex = /([^\s=]+)=(?:"([^"]*)"|'([^']*)')/g;
+
+    // Find the start tag
+    const startMatch = xmlString.match(startTagRegex);
+    if (!startMatch) {
+      return null;
+    }
+
+    const tagName = startMatch[1];
+    const attributeString = startMatch[2];
+
+    // Create the element
+    const element = new Element(tagName);
+
+    // Parse attributes
+    let attributeMatch;
+    while ((attributeMatch = attributeRegex.exec(attributeString)) !== null) {
+      const attrName = attributeMatch[1];
+      const attrValue = attributeMatch[2] || attributeMatch[3]; // Use whichever capture group matched
+      element.attributes[attrName] = attrValue;
+    }
+
+    // Find the content between start and end tags
+    const startTagEnd = startMatch[0].length;
+    const endTagSearch = new RegExp(`</${tagName}>`);
+    const endMatch = xmlString.slice(startTagEnd).search(endTagSearch);
+
+    if (endMatch === -1) {
+      // Self-closing or malformed tag
+      return element;
+    }
+
+    const contentString = xmlString.slice(startTagEnd, startTagEnd + endMatch);
+
+    // Parse child elements
+    let remainingContent = contentString.trim();
+    while (remainingContent.length > 0) {
+      // Check if there's a child element
+      if (remainingContent.startsWith("<") && !remainingContent.startsWith("</")) {
+        // Find the next child element
+        const childStartMatch = remainingContent.match(startTagRegex);
+        if (childStartMatch) {
+          const childTagName = childStartMatch[1];
+          const childEndTagSearch = new RegExp(`</${childTagName}>`);
+          const childEndIndex = remainingContent.search(childEndTagSearch);
+
+          if (childEndIndex !== -1) {
+            // Extract the complete child element string (including its end tag)
+            const childEndTagLength = childTagName.length + 3; // "</tag>"
+            const childXmlString = remainingContent.slice(0, childEndIndex + childEndTagLength);
+
+            // Parse the child element and add it to parent
+            const childElement = this.parseElement(childXmlString);
+            if (childElement) {
+              element.childNodes.push(childElement);
+            }
+
+            // Remove the processed child from remaining content
+            remainingContent = remainingContent.slice(childXmlString.length).trim();
+            continue;
+          }
+        }
+      }
+
+      // Handle text content
+      const nextTagIndex = remainingContent.indexOf("<");
+      if (nextTagIndex === -1) {
+        // The rest is all text
+        element.textContent += remainingContent.trim();
+        break;
+      } else if (nextTagIndex > 0) {
+        // There's some text before the next tag
+        element.textContent += remainingContent.slice(0, nextTagIndex).trim();
+        remainingContent = remainingContent.slice(nextTagIndex).trim();
+      } else {
+        // Can't parse further, just break
+        break;
+      }
+    }
+
+    return element;
+  }
+}
+
+// Export DOMParser as a class
+export { SimpleDOMParser as DOMParser };
diff --git a/package.json b/package.json
index b8cc0423..b469077f 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "pdf2json",
-	"version": "3.1.5",
+	"version": "3.1.6",
 	"description": "PDF file parser that converts PDF binaries to JSON and text, powered by porting a fork of PDF.JS to Node.js",
 	"keywords": [
 		"pdf",
@@ -68,10 +68,8 @@
 		"pdf2json": "./bin/pdf2json.js"
 	},
 	"dependencies": {
-		"@xmldom/xmldom": "^0.9.6"
 	},
 	"bundleDependencies": [
-		"@xmldom/xmldom"
 	],
 	"devDependencies": {
 		"@rollup/plugin-commonjs": "^28.0.2",
diff --git a/rollup.config.js b/rollup.config.js
index 12897812..aed05be5 100644
--- a/rollup.config.js
+++ b/rollup.config.js
@@ -18,7 +18,6 @@ const external = [
 	"url",
 	"buffer",
 	"stream",
-	"@xmldom/xmldom",
 ];
 
 export default [
diff --git a/rollup/bundle-pdfjs-base.js b/rollup/bundle-pdfjs-base.js
index f6e983dd..8053d20b 100644
--- a/rollup/bundle-pdfjs-base.js
+++ b/rollup/bundle-pdfjs-base.js
@@ -64,7 +64,7 @@ const _baseCode = _pdfjsFiles.reduce(
 
 fs.writeFileSync(path.join(__dirname, "../lib/pdfjs-code.js"),
 	`
-  ${"import nodeUtil from 'util';import { Blob } from 'buffer';import { DOMParser } from '@xmldom/xmldom';import PDFAnno from './pdfanno.js';import Image from './pdfimage.js';import { createScratchCanvas } from './pdfcanvas.js';"}
+  ${"import nodeUtil from 'util';import { Blob } from 'buffer';import { DOMParser } from './simpleXmlParser.js';import PDFAnno from './pdfanno.js';import Image from './pdfimage.js';import { createScratchCanvas } from './pdfcanvas.js';"}
   ${"export const PDFJS = {};"}
   ${"const globalScope = { console };"}
   ${_baseCode}

From a2fcd7f9c3689233592554ca7869c019b764843c Mon Sep 17 00:00:00 2001
From: modesty <modestyz@hotmail.com>
Date: Fri, 23 May 2025 16:49:05 -0700
Subject: [PATCH 2/2] doc: update readme for zero dependency update

---
 readme.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/readme.md b/readme.md
index 3af884ff..3647504b 100644
--- a/readme.md
+++ b/readme.md
@@ -8,15 +8,16 @@
 ![GitHub top language](https://img.shields.io/github/languages/top/modesty/pdf2json)
 ![GitHub last commit](https://img.shields.io/github/last-commit/modesty/pdf2json?color=red)
 
-pdf2json is a [node.js](http://nodejs.org/) module converts binary PDF to JSON and text. Built with [pdf.js](https://github.com/mozilla/pdf.js/), it extracts text content and interactive form elements for server-side processing and command-line use.
+pdf2json is a [node.js](http://nodejs.org/) module that converts binary PDF to JSON and text. Built with [pdf.js](https://github.com/mozilla/pdf.js/), it extracts text content and interactive form elements for server-side processing and command-line use.
 
 ## Features
 
 - **PDF text extraction**: extracts textual content of PDF documents into structured JSON.
 - **Form element handling**: parses interactive form fields within PDFs for flexible data capture.
 - **Server-side and command-line versatility**: Integrate with web services for remote PDF processing or use as a standalone command-line tool for local file conversion.
-- **Swift Performance**: fast performance with minimal depdendencies
+- **Swift Performance**: fast performance with zero dependencies (since v3.1.6)
 - **Community driven**: decade+ long community driven development ensures continuous improvement.
+- **Zero dependencies**: completely dependency-free since v3.1.6, only pure JavaScript code.
 
 ## Install