path: unwind regular expressions in POSIX

This is the first part to removing REDOS vulnerabilities from v4.x The function `splitPathRe` exposed a REDOS vulnerability. It was only utilized in the POSIX implementation of a number of the path utilities. In v6.x a change landed that unwound this regular expression, and in turn patched the vulnerability. This commit copies the unwound implementation currently found on v8.x. It is completely self contained. I attempted to keep all warnings and deprecations the same as the v4.x implementation, but may have missed something buried in the large unwound functions. Refs: b212be08f6
nodejs · Feb 22, 2018 · 4196fcf · 4196fcf
1 parent b39ba55
commit 4196fcf
Showing 1 changed file with 220 additions and 38 deletions.
diff --git a/lib/path.js b/lib/path.js
@@ -399,21 +399,8 @@ win32.parse = function(pathString) {
 win32.sep = '\\';
 win32.delimiter = ';';
 
-
-// Split a filename into [root, dir, basename, ext], unix version
-// 'root' is just a slash, or nothing.
-const splitPathRe =
-    /^(\/?|)([\s\S]*?)((?:\.{1,2}|[^\/]+?|)(\.[^.\/]*|))(?:[\/]*)$/;
 var posix = {};
 
-
-function posixSplitPath(filename) {
-  const out = splitPathRe.exec(filename);
-  out.shift();
-  return out;
-}
-
-
 // path.resolve([from ...], to)
 // posix version
 posix.resolve = function() {
@@ -527,39 +514,159 @@ posix._makeLong = function(path) {
 
 
 posix.dirname = function(path) {
-  const result = posixSplitPath(path);
-  const root = result[0];
-  var dir = result[1];
-
-  if (!root && !dir) {
-    // No dirname whatsoever
+  if (path.length === 0)
     return '.';
+  var code = path.charCodeAt(0);
+  var hasRoot = (code === 47);
+  var end = -1;
+  var matchedSlash = true;
+  for (var i = path.length - 1; i >= 1; --i) {
+    code = path.charCodeAt(i);
+    if (code === 47) {
+      if (!matchedSlash) {
+        end = i;
+        break;
+      }
+    } else {
+      // We saw the first non-path separator
+      matchedSlash = false;
+    }
   }
 
-  if (dir) {
-    // It has a dirname, strip trailing slash
-    dir = dir.substr(0, dir.length - 1);
-  }
-
-  return root + dir;
+  if (end === -1)
+    return hasRoot ? '/' : '.';
+  if (hasRoot && end === 1)
+    return '//';
+  return path.slice(0, end);
 };
 
 
 posix.basename = function(path, ext) {
   if (ext !== undefined && typeof ext !== 'string')
     throw new TypeError('ext must be a string');
 
-  var f = posixSplitPath(path)[2];
+  var start = 0;
+  var end = -1;
+  var matchedSlash = true;
+  var i;
+
+  if (ext !== undefined && ext.length > 0 && ext.length <= path.length) {
+    if (ext.length === path.length && ext === path)
+      return '';
+    var extIdx = ext.length - 1;
+    var firstNonSlashEnd = -1;
+    for (i = path.length - 1; i >= 0; --i) {
+      const code = path.charCodeAt(i);
+      if (code === 47/*/*/) {
+        // If we reached a path separator that was not part of a set of path
+        // separators at the end of the string, stop now
+        if (!matchedSlash) {
+          start = i + 1;
+          break;
+        }
+      } else {
+        if (firstNonSlashEnd === -1) {
+          // We saw the first non-path separator, remember this index in case
+          // we need it if the extension ends up not matching
+          matchedSlash = false;
+          firstNonSlashEnd = i + 1;
+        }
+        if (extIdx >= 0) {
+          // Try to match the explicit extension
+          if (code === ext.charCodeAt(extIdx)) {
+            if (--extIdx === -1) {
+              // We matched the extension, so mark this as the end of our path
+              // component
+              end = i;
+            }
+          } else {
+            // Extension does not match, so our result is the entire path
+            // component
+            extIdx = -1;
+            end = firstNonSlashEnd;
+          }
+        }
+      }
+    }
+
+    if (start === end)
+      end = firstNonSlashEnd;
+    else if (end === -1)
+      end = path.length;
+    return path.slice(start, end);
+  } else {
+    for (i = path.length - 1; i >= 0; --i) {
+      if (path.charCodeAt(i) === 47/*/*/) {
+        // If we reached a path separator that was not part of a set of path
+        // separators at the end of the string, stop now
+        if (!matchedSlash) {
+          start = i + 1;
+          break;
+        }
+      } else if (end === -1) {
+        // We saw the first non-path separator, mark this as the end of our
+        // path component
+        matchedSlash = false;
+        end = i + 1;
+      }
+    }
 
-  if (ext && f.substr(-1 * ext.length) === ext) {
-    f = f.substr(0, f.length - ext.length);
+    if (end === -1)
+      return '';
+    return path.slice(start, end);
   }
-  return f;
 };
 
 
 posix.extname = function(path) {
-  return posixSplitPath(path)[3];
+  var startDot = -1;
+  var startPart = 0;
+  var end = -1;
+  var matchedSlash = true;
+  // Track the state of characters (if any) we see before our first dot and
+  // after any path separator we find
+  var preDotState = 0;
+  for (var i = path.length - 1; i >= 0; --i) {
+    const code = path.charCodeAt(i);
+    if (code === 47) {
+      // If we reached a path separator that was not part of a set of path
+      // separators at the end of the string, stop now
+      if (!matchedSlash) {
+        startPart = i + 1;
+        break;
+      }
+      continue;
+    }
+    if (end === -1) {
+      // We saw the first non-path separator, mark this as the end of our
+      // extension
+      matchedSlash = false;
+      end = i + 1;
+    }
+    if (code === 46) {
+      // If this is our first dot, mark it as the start of our extension
+      if (startDot === -1)
+        startDot = i;
+      else if (preDotState !== 1)
+        preDotState = 1;
+    } else if (startDot !== -1) {
+      // We saw a non-dot and non-path separator before our dot, so we should
+      // have a good chance at having a non-empty extension
+      preDotState = -1;
+    }
+  }
+
+  if (startDot === -1 ||
+      end === -1 ||
+      // We saw a non-dot character immediately before the dot
+      preDotState === 0 ||
+      // The (right-most) trimmed path component is exactly '..'
+      (preDotState === 1 &&
+       startDot === end - 1 &&
+       startDot === startPart + 1)) {
+    return '';
+  }
+  return path.slice(startDot, end);
 };
 
 
@@ -587,15 +694,90 @@ posix.format = function(pathObject) {
 
 posix.parse = function(pathString) {
   assertPath(pathString);
+  var ret = { root: '', dir: '', base: '', ext: '', name: '' };
+  if (pathString.length === 0)
+    return ret;
+  var code = pathString.charCodeAt(0);
+  var isAbsolute = (code === 47);
+  var start;
+  if (isAbsolute) {
+    ret.root = '/';
+    start = 1;
+  } else {
+    start = 0;
+  }
+  var startDot = -1;
+  var startPart = 0;
+  var end = -1;
+  var matchedSlash = true;
+  var i = pathString.length - 1;
+
+  // Track the state of characters (if any) we see before our first dot and
+  // after any path separator we find
+  var preDotState = 0;
+
+  // Get non-dir info
+  for (; i >= start; --i) {
+    code = pathString.charCodeAt(i);
+    if (code === 47) {
+      // If we reached a path separator that was not part of a set of path
+      // separators at the end of the string, stop now
+      if (!matchedSlash) {
+        startPart = i + 1;
+        break;
+      }
+      continue;
+    }
+    if (end === -1) {
+      // We saw the first non-path separator, mark this as the end of our
+      // extension
+      matchedSlash = false;
+      end = i + 1;
+    }
+    if (code === 46) {
+      // If this is our first dot, mark it as the start of our extension
+      if (startDot === -1)
+        startDot = i;
+      else if (preDotState !== 1)
+        preDotState = 1;
+    } else if (startDot !== -1) {
+      // We saw a non-dot and non-path separator before our dot, so we should
+      // have a good chance at having a non-empty extension
+      preDotState = -1;
+    }
+  }
 
-  var allParts = posixSplitPath(pathString);
-  return {
-    root: allParts[0],
-    dir: allParts[0] + allParts[1].slice(0, -1),
-    base: allParts[2],
-    ext: allParts[3],
-    name: allParts[2].slice(0, allParts[2].length - allParts[3].length)
-  };
+  if (startDot === -1 ||
+      end === -1 ||
+      // We saw a non-dot character immediately before the dot
+      preDotState === 0 ||
+      // The (right-most) trimmed path component is exactly '..'
+      (preDotState === 1 &&
+       startDot === end - 1 &&
+       startDot === startPart + 1)) {
+    if (end !== -1) {
+      if (startPart === 0 && isAbsolute)
+        ret.base = ret.name = pathString.slice(1, end);
+      else
+        ret.base = ret.name = pathString.slice(startPart, end);
+    }
+  } else {
+    if (startPart === 0 && isAbsolute) {
+      ret.name = pathString.slice(1, startDot);
+      ret.base = pathString.slice(1, end);
+    } else {
+      ret.name = pathString.slice(startPart, startDot);
+      ret.base = pathString.slice(startPart, end);
+    }
+    ret.ext = pathString.slice(startDot, end);
+  }
+
+  if (startPart > 0)
+    ret.dir = pathString.slice(0, startPart - 1);
+  else if (isAbsolute)
+    ret.dir = '/';
+
+  return ret;
 };