Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: transform negative lookaheads #560

Merged
merged 1 commit into from
Jan 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
48 changes: 47 additions & 1 deletion node/declaration.test.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { test, expect } from 'vitest'
import { describe, test, expect } from 'vitest'

import { FunctionConfig } from './config.js'
import { Declaration, mergeDeclarations, parsePattern } from './declaration.js'
Expand Down Expand Up @@ -183,3 +183,49 @@ test('Ensures pattern match on the whole path', () => {

expect(actual).toEqual(expected)
})

describe('Transforms negative lookaheads', () => {
test('Throws if a `extractedExclusionPatterns` property is not supplied', () => {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In practice, this is asserting that if this property (which is set by the feature flag) is not used, we'll keep the existing behaviour of throwing on lookaheads.

const input = "'/((?!api|_next/static|_next/image|favicon.ico).*)'"

expect(() => parsePattern(input)).toThrowError('Regular expressions with lookaheads are not supported')
})

test('With a disjunction inside the lookahead', () => {
const input = "'/((?!api|_next/static|_next/image|favicon.ico).*)'"
const exclusions: string[] = []
const actual = parsePattern(input, exclusions)

expect(actual).toEqual("^'\\/(.*)'$")
expect(exclusions).toStrictEqual(["/^'\\/(api|_next\\/static|_next\\/image|favicon.ico.*)'$/"])
})

test('With multiple lookaheads inside a disjunction', () => {
const input = 'one(two(?!three)|four|five(?!six)|seven)'
const exclusions: string[] = []
const expected = '^one(two|four|five|seven)$'
const actual = parsePattern(input, exclusions)

expect(actual).toEqual(expected)
expect(exclusions).toStrictEqual(['/^one(twothree)$/', '/^one(fivesix)$/'])
})

test('With a lookahead outside of a disjunction', () => {
const input = 'a(b|c)(?!d)'
const exclusions: string[] = []
const expected = '^a(b|c)$'
const actual = parsePattern(input, exclusions)

expect(actual).toEqual(expected)
expect(exclusions).toStrictEqual(['/^a(b|c)d$/'])
})

test('Throws on nested lookaheads', () => {
const exclusions: string[] = []

expect(() => parsePattern('one(?!two(?!three))', exclusions)).toThrowError(
'Regular expressions with nested lookaheads are not supported',
)
expect(exclusions).toStrictEqual([])
})
})
148 changes: 137 additions & 11 deletions node/declaration.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import regexpAST from 'regexp-tree'
import regexpAST, { NodePath } from 'regexp-tree'

import { FunctionConfig, HTTPMethod, Path } from './config.js'
import { FeatureFlags } from './feature_flags.js'
Expand Down Expand Up @@ -116,20 +116,73 @@ const createDeclarationsFromFunctionConfigs = (
return declarations
}

// Validates and normalizes a pattern so that it's a valid regular expression
// in Go, which is the engine used by our edge nodes.
export const parsePattern = (pattern: string) => {
/**
* Validates and normalizes a pattern so that it's a valid regular expression
* in Go, which is the engine used by our edge nodes.
*
* @param pattern Original regular expression
* @param extractedExclusionPatterns If set, negative lookaheads (which are
* not supported in Go) are transformed into a list of exclusion patterns to
* be stored in this array
* @returns Normalized regular expression
*/
export const parsePattern = (pattern: string, extractedExclusionPatterns?: string[]) => {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Could we return extractedExclusionPatterns as an array, instead of mutating the parameter?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That would be the cleanest way, for sure. But parsePattern is used in a few different contexts and I wanted to change the API surface as little as possible to contain the change behind the flag. We can revisit this in the future.

let enclosedPattern = pattern
if (!pattern.startsWith('^')) enclosedPattern = `^${enclosedPattern}`
if (!pattern.endsWith('$')) enclosedPattern = `${enclosedPattern}$`

if (!pattern.startsWith('^')) {
enclosedPattern = `^${enclosedPattern}`
}

if (!pattern.endsWith('$')) {
enclosedPattern = `${enclosedPattern}$`
}

let lookaheadDepth = 0

// Holds the location of every lookahead expression found.
const lookaheads = new Set<string>()
const regexp = new RegExp(enclosedPattern)
const newRegexp = regexpAST.transform(regexp, {
Assertion(path) {
// Lookaheads are not supported. If we find one, throw an error.
if (path.node.kind === 'Lookahead') {
throw new Error('Regular expressions with lookaheads are not supported')
}
Assertion: {
// If we're entering a negative lookahead expression, register its
// location.
pre(path) {
if (path.node.kind !== 'Lookahead') {
return
}

if (!extractedExclusionPatterns) {
throw new Error('Regular expressions with lookaheads are not supported')
}

if (!path.node.negative) {
throw new Error('Regular expressions with positive lookaheads are not supported')
}

lookaheadDepth += 1

if (lookaheadDepth > 1) {
throw new Error('Regular expressions with nested lookaheads are not supported')
}

const lookahead = serializeNodeLocation(path.node)

if (lookahead) {
lookaheads.add(lookahead)
}
},

// If we're leaving a negative lookahead expression, remove it from the
// AST. We'll later replace its functionality with an exclusion pattern.
post(path) {
if (path.node.kind !== 'Lookahead' || !path.node.negative) {
return
}

lookaheadDepth -= 1

path.remove()
},
},

Group(path) {
Expand All @@ -146,6 +199,79 @@ export const parsePattern = (pattern: string) => {
},
})

// The `extractedExclusionPatterns` property works as a shut-off valve: if
// it's not supplied, don't even traverse the AST again to further process
// lookaheads.
if (extractedExclusionPatterns) {
const exclusionPatterns = [...lookaheads].map((lookahead) => getExclusionPatternFromLookahead(regexp, lookahead))

extractedExclusionPatterns.push(...exclusionPatterns)
Comment on lines +202 to +208
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

have you thought about introducing a logging-only mechanism that can give you data on real-world examples so you can spot check if this code is correct?

}

// Strip leading and forward slashes.
return newRegexp.toString().slice(1, -1)
}

/**
* Takes a regular expression and a lookahead inside it and returns a new
* regular expression that acts as an exclusion pattern to replace the
* lookahead.
*
* @param regexp Original regular expression
* @param location Serialized location of the lookahead
* @returns Exclusion pattern regular expression
*/
const getExclusionPatternFromLookahead = (regexp: RegExp, location: string) => {
const exclusionRegexp = regexpAST.transform(regexp, {
Assertion(path) {
if (
path.node.kind !== 'Lookahead' ||
path.node.assertion === null ||
serializeNodeLocation(path.node) !== location
) {
return
}

// Unwrap the lookahead by replacing it with the expression it holds —
// e.g. `(?!foo)` becomes `foo`.
path.replace(path.node.assertion)

// Traverse the parents of the lookahead all the way up to the root. When
// we find a disjunction, replace it with the child we travelled from. In
// practice this means getting rid of all the branches that are not the
// lookahead.
// For example, in `(a|b(?!c)|d)` the exclusion patterns cannot contain
// the `a` or `d` branches of the disjunction, otherwise `ab` and `ad`
// would incorrectly be excluded. The exclusion must be `bc` only.
let visitor: NodePath | null = path

while (visitor !== null) {
const child = visitor

visitor = visitor.parentPath

if (visitor?.node.type !== 'Disjunction') {
continue
}

visitor.replace(child.node)
}
},
})

return exclusionRegexp.toString()
}

/**
* Creates a string representation of a regexp AST node in the format
* `<start line>,<start column>,<start offset>,<end line>,<end column>,<end offset>`
*/
const serializeNodeLocation = (node: NodePath['node']) => {
if (!node.loc) {
return ''
}

const { start, end } = node.loc

return [start.line, start.column, start.offset, end.line, end.column, end.offset].join(',')
}
4 changes: 3 additions & 1 deletion node/feature_flags.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
const defaultFlags = {}
const defaultFlags = {
edge_bundler_transform_lookaheads: false,
}

type FeatureFlag = keyof typeof defaultFlags
type FeatureFlags = Partial<Record<FeatureFlag, boolean>>
Expand Down
19 changes: 19 additions & 0 deletions node/manifest.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -486,3 +486,22 @@ test('Returns functions without a declaration and unrouted functions', () => {
expect(declarationsWithoutFunction).toEqual(['func-3'])
expect(unroutedFunctions).toEqual(['func-2', 'func-4'])
})

test('Generates exclusion patterns from negative lookaheads', () => {
const functions = [{ name: 'func-1', path: '/path/to/func-1.ts' }]
const declarations = [{ function: 'func-1', pattern: "'/((?!api|_next/static|_next/image|favicon.ico).*)'" }]
const { manifest } = generateManifest({
bundles: [],
declarations,
featureFlags: { edge_bundler_transform_lookaheads: true },
functions,
})

expect(manifest.routes).toEqual([
{
function: 'func-1',
pattern: "^'/(.*)'$",
excluded_patterns: ["/^'/(api|_next/static|_next/image|favicon.ico.*)'$/"],
},
])
})
10 changes: 6 additions & 4 deletions node/manifest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ const normalizeMethods = (method: unknown, name: string): string[] | undefined =
const generateManifest = ({
bundles = [],
declarations = [],
featureFlags,
functions,
userFunctionConfig = {},
internalFunctionConfig = {},
Expand Down Expand Up @@ -153,7 +154,8 @@ const generateManifest = ({
return
}

const pattern = getRegularExpression(declaration)
const extractedExclusionPatterns = featureFlags?.edge_bundler_transform_lookaheads ? [] : undefined
const pattern = getRegularExpression(declaration, extractedExclusionPatterns)

// If there is no `pattern`, the declaration will never be triggered, so we
// can discard it.
Expand All @@ -163,7 +165,7 @@ const generateManifest = ({

routedFunctions.add(declaration.function)

const excludedPattern = getExcludedRegularExpressions(declaration)
const excludedPattern = [...getExcludedRegularExpressions(declaration), ...(extractedExclusionPatterns ?? [])]
const route: Route = {
function: func.name,
pattern: serializePattern(pattern),
Expand Down Expand Up @@ -225,10 +227,10 @@ const pathToRegularExpression = (path: string) => {
}
}

const getRegularExpression = (declaration: Declaration) => {
const getRegularExpression = (declaration: Declaration, extractedExclusionPatterns?: string[]) => {
if ('pattern' in declaration) {
try {
return parsePattern(declaration.pattern)
return parsePattern(declaration.pattern, extractedExclusionPatterns)
} catch (error: unknown) {
throw wrapBundleError(
new Error(
Expand Down