From a04c29766e3165534a5789d59a3a7bef5a8f5b5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Ad=C3=A1mek?= Date: Fri, 5 Jun 2026 10:09:45 +0200 Subject: [PATCH 1/2] fix(memory-storage): prevent storage names from escaping the storage directory (#3715) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Storage names and key-value-store record keys are used directly as on-disk path components in `@crawlee/memory-storage`. A value containing `..` or an absolute path could therefore resolve outside the intended storage directory and create/write files elsewhere: - **Storage names** — `KeyValueStore`/`Dataset`/`RequestQueue` `getOrCreate(name)` and `update({ name })` resolved `path.resolve(baseDir, name)`, so e.g. `KeyValueStore.open('../escaped')` escaped the `key_value_stores` / `datasets` / `request_queues` directory. - **Record keys** — the storage client's `setRecord({ key })` built the file path as `resolve(storeDir, key)`. The core `KeyValueStore.setValue` already validates keys against a restricted charset, but the lower-level memory-storage client did not, so a key like `../escaped` escaped the store directory. Adds a shared `resolveWithinDirectory(baseDirectory, segment)` helper that resolves the candidate path and asserts it stays within the base directory, throwing otherwise. All name- and key-based path construction (resource-client constructors, rename in `update`, the `findOrCache*` lookup helpers, and the key-value-store filesystem entry) now routes through it. Legitimate names/keys are unaffected — including nested segments that stay within the directory; only values that actually escape are rejected. Dataset entity IDs (sequential index) and request IDs (hashed) are internally generated and were already safe. --- packages/memory-storage/src/cache-helpers.ts | 8 +-- .../src/fs/key-value-store/fs.ts | 10 ++-- .../src/resource-clients/dataset.ts | 7 ++- .../src/resource-clients/key-value-store.ts | 14 ++++-- .../src/resource-clients/request-queue.ts | 7 ++- packages/memory-storage/src/utils.ts | 20 ++++++++ .../record-key-path-traversal.test.ts | 47 +++++++++++++++++ .../test/storage-name-path-traversal.test.ts | 50 +++++++++++++++++++ 8 files changed, 142 insertions(+), 21 deletions(-) create mode 100644 packages/memory-storage/test/key-value-store/record-key-path-traversal.test.ts create mode 100644 packages/memory-storage/test/storage-name-path-traversal.test.ts diff --git a/packages/memory-storage/src/cache-helpers.ts b/packages/memory-storage/src/cache-helpers.ts index 157e69847c6c..d392d6acdf9d 100644 --- a/packages/memory-storage/src/cache-helpers.ts +++ b/packages/memory-storage/src/cache-helpers.ts @@ -22,7 +22,7 @@ export async function findOrCacheDatasetByPossibleId(client: MemoryStorage, entr return found; } - const datasetDir = resolve(client.datasetsDirectory, entryNameOrId); + const datasetDir = resolveWithinDirectory(client.datasetsDirectory, entryNameOrId); try { // Check if directory exists @@ -125,7 +125,7 @@ export async function findOrCacheKeyValueStoreByPossibleId(client: MemoryStorage return found; } - const keyValueStoreDir = resolve(client.keyValueStoresDirectory, entryNameOrId); + const keyValueStoreDir = resolveWithinDirectory(client.keyValueStoresDirectory, entryNameOrId); try { // Check if directory exists @@ -278,7 +278,7 @@ export async function findRequestQueueByPossibleId(client: MemoryStorage, entryN return found; } - const requestQueueDir = resolve(client.requestQueuesDirectory, entryNameOrId); + const requestQueueDir = resolveWithinDirectory(client.requestQueuesDirectory, entryNameOrId); try { // Check if directory exists @@ -392,4 +392,4 @@ import { DatasetClient } from './resource-clients/dataset'; import type { InternalKeyRecord } from './resource-clients/key-value-store'; import { KeyValueStoreClient } from './resource-clients/key-value-store'; import { RequestQueueClient } from './resource-clients/request-queue'; -import { memoryStorageLog } from './utils'; +import { memoryStorageLog, resolveWithinDirectory } from './utils'; diff --git a/packages/memory-storage/src/fs/key-value-store/fs.ts b/packages/memory-storage/src/fs/key-value-store/fs.ts index 48b727d639ca..5d1e499c7d38 100644 --- a/packages/memory-storage/src/fs/key-value-store/fs.ts +++ b/packages/memory-storage/src/fs/key-value-store/fs.ts @@ -1,5 +1,5 @@ import { readFile, rm } from 'node:fs/promises'; -import { dirname, resolve } from 'node:path'; +import { dirname } from 'node:path'; import { basename } from 'node:path/win32'; import { AsyncQueue } from '@sapphire/async-queue'; @@ -8,7 +8,7 @@ import mime from 'mime-types'; import { lockAndWrite } from '../../background-handler/fs-utils'; import type { InternalKeyRecord } from '../../resource-clients/key-value-store'; -import { memoryStorageLog } from '../../utils'; +import { memoryStorageLog, resolveWithinDirectory } from '../../utils'; import type { StorageImplementation } from '../common'; import type { CreateStorageImplementationOptions } from '.'; @@ -35,7 +35,7 @@ export class KeyValueFileSystemEntry implements StorageImplementation constructor(options: DatasetClientOptions) { super(options.id ?? randomUUID()); this.name = options.name; - this.datasetDirectory = resolve(options.baseStorageDirectory, this.name ?? this.id); + this.datasetDirectory = resolveWithinDirectory(options.baseStorageDirectory, this.name ?? this.id); this.client = options.client; } @@ -100,7 +99,7 @@ export class DatasetClient const previousDir = existingStoreById.datasetDirectory; - existingStoreById.datasetDirectory = resolve( + existingStoreById.datasetDirectory = resolveWithinDirectory( this.client.datasetsDirectory, parsed.name ?? existingStoreById.name ?? existingStoreById.id, ); diff --git a/packages/memory-storage/src/resource-clients/key-value-store.ts b/packages/memory-storage/src/resource-clients/key-value-store.ts index 66c6a3101e70..f2ec0b19fd17 100644 --- a/packages/memory-storage/src/resource-clients/key-value-store.ts +++ b/packages/memory-storage/src/resource-clients/key-value-store.ts @@ -1,6 +1,5 @@ import { randomUUID } from 'node:crypto'; import { rm } from 'node:fs/promises'; -import { resolve } from 'node:path'; import { Readable } from 'node:stream'; import type * as storage from '@crawlee/types'; @@ -16,7 +15,14 @@ import { DEFAULT_API_PARAM_LIMIT, StorageTypes } from '../consts'; import type { StorageImplementation } from '../fs/common'; import { createKeyValueStorageImplementation } from '../fs/key-value-store'; import type { MemoryStorage } from '../index'; -import { createKeyList, createKeyStringList, createLazyIterablePromise, isBuffer, isStream } from '../utils'; +import { + createKeyList, + createKeyStringList, + createLazyIterablePromise, + isBuffer, + isStream, + resolveWithinDirectory, +} from '../utils'; import { BaseClient } from './common/base-client'; const DEFAULT_LOCAL_FILE_EXTENSION = 'bin'; @@ -49,7 +55,7 @@ export class KeyValueStoreClient extends BaseClient { constructor(options: KeyValueStoreClientOptions) { super(options.id ?? randomUUID()); this.name = options.name; - this.keyValueStoreDirectory = resolve(options.baseStorageDirectory, this.name ?? this.id); + this.keyValueStoreDirectory = resolveWithinDirectory(options.baseStorageDirectory, this.name ?? this.id); this.client = options.client; } @@ -96,7 +102,7 @@ export class KeyValueStoreClient extends BaseClient { const previousDir = existingStoreById.keyValueStoreDirectory; - existingStoreById.keyValueStoreDirectory = resolve( + existingStoreById.keyValueStoreDirectory = resolveWithinDirectory( this.client.keyValueStoresDirectory, parsed.name ?? existingStoreById.name ?? existingStoreById.id, ); diff --git a/packages/memory-storage/src/resource-clients/request-queue.ts b/packages/memory-storage/src/resource-clients/request-queue.ts index 7f0666f96f77..dfc02e98707d 100644 --- a/packages/memory-storage/src/resource-clients/request-queue.ts +++ b/packages/memory-storage/src/resource-clients/request-queue.ts @@ -1,6 +1,5 @@ import { randomUUID } from 'node:crypto'; import { rm } from 'node:fs/promises'; -import { resolve } from 'node:path'; import type * as storage from '@crawlee/types'; import { AsyncQueue } from '@sapphire/async-queue'; @@ -14,7 +13,7 @@ import { createRequestQueueStorageImplementation } from '../fs/request-queue'; import type { RequestQueueFileSystemEntry } from '../fs/request-queue/fs'; import type { RequestQueueMemoryEntry } from '../fs/request-queue/memory'; import type { MemoryStorage } from '../index'; -import { purgeNullsFromObject, uniqueKeyToRequestId } from '../utils'; +import { purgeNullsFromObject, resolveWithinDirectory, uniqueKeyToRequestId } from '../utils'; import { BaseClient } from './common/base-client'; const requestShape = s.object({ @@ -68,7 +67,7 @@ export class RequestQueueClient extends BaseClient implements storage.RequestQue constructor(options: RequestQueueClientOptions) { super(options.id ?? randomUUID()); this.name = options.name; - this.requestQueueDirectory = resolve(options.baseStorageDirectory, this.name ?? this.id); + this.requestQueueDirectory = resolveWithinDirectory(options.baseStorageDirectory, this.name ?? this.id); this.client = options.client; } @@ -128,7 +127,7 @@ export class RequestQueueClient extends BaseClient implements storage.RequestQue const previousDir = existingQueueById.requestQueueDirectory; - existingQueueById.requestQueueDirectory = resolve( + existingQueueById.requestQueueDirectory = resolveWithinDirectory( this.client.requestQueuesDirectory, parsed.name ?? existingQueueById.name ?? existingQueueById.id, ); diff --git a/packages/memory-storage/src/utils.ts b/packages/memory-storage/src/utils.ts index 8a74bb33b43d..c0a7241696bd 100644 --- a/packages/memory-storage/src/utils.ts +++ b/packages/memory-storage/src/utils.ts @@ -1,4 +1,5 @@ import { createHash } from 'node:crypto'; +import { resolve, sep } from 'node:path'; import type * as storage from '@crawlee/types'; import { s } from '@sapphire/shapeshift'; @@ -7,6 +8,25 @@ import defaultLog from '@apify/log'; import { REQUEST_ID_LENGTH } from './consts'; +/** + * Resolves `segment` against `baseDirectory` and ensures the result stays within `baseDirectory`. + * Storage names and record keys are used as filesystem path components, so a value containing `..` + * or an absolute path could otherwise escape the intended directory. + */ +export function resolveWithinDirectory(baseDirectory: string, segment: string): string { + const base = resolve(baseDirectory); + const resolved = resolve(base, segment); + + if (resolved !== base && !resolved.startsWith(`${base}${sep}`)) { + throw new Error( + `"${segment}" is not allowed because it would resolve outside of the storage directory. ` + + `Storage names and record keys must not contain path traversal segments ("..") or absolute paths.`, + ); + } + + return resolved; +} + /** * Removes all properties with a null value * from the provided object. diff --git a/packages/memory-storage/test/key-value-store/record-key-path-traversal.test.ts b/packages/memory-storage/test/key-value-store/record-key-path-traversal.test.ts new file mode 100644 index 000000000000..e52e9fba0322 --- /dev/null +++ b/packages/memory-storage/test/key-value-store/record-key-path-traversal.test.ts @@ -0,0 +1,47 @@ +import { rm } from 'node:fs/promises'; +import { resolve } from 'node:path'; + +import { MemoryStorage } from '@crawlee/memory-storage'; +import { pathExists } from 'fs-extra'; + +import { waitTillWrittenToDisk } from '../__shared__'; + +describe('record key path traversal', () => { + const tmpLocation = resolve(__dirname, './tmp/record-key-path-traversal'); + + afterAll(async () => { + await rm(tmpLocation, { force: true, recursive: true }); + }); + + const storage = new MemoryStorage({ + localDataDirectory: tmpLocation, + persistStorage: true, + writeMetadata: true, + }); + + test('setRecord rejects a key that escapes the store directory', async () => { + const info = await storage.keyValueStores().getOrCreate('record-key-store'); + const client = storage.keyValueStore(info.id); + + await expect( + client.setRecord({ key: '../escaped-record', value: 'pwned', contentType: 'text/plain' }), + ).rejects.toThrow(); + + // The escaped file must not have been created outside the store directory. + await expect(pathExists(resolve(storage.keyValueStoresDirectory, 'escaped-record.txt'))).resolves.toBe(false); + }); + + test('setRecord still works for a regular key', async () => { + const info = await storage.keyValueStores().getOrCreate('record-key-store-ok'); + const client = storage.keyValueStore(info.id); + + await expect( + client.setRecord({ key: 'SAFEKEY', value: 'value', contentType: 'text/plain' }), + ).resolves.not.toThrow(); + + // The store was opened by name, so it lives under its name directory. + const storePath = resolve(storage.keyValueStoresDirectory, info.name!); + await waitTillWrittenToDisk(resolve(storePath, 'SAFEKEY.txt')); + await expect(pathExists(resolve(storePath, 'SAFEKEY.txt'))).resolves.toBe(true); + }); +}); diff --git a/packages/memory-storage/test/storage-name-path-traversal.test.ts b/packages/memory-storage/test/storage-name-path-traversal.test.ts new file mode 100644 index 000000000000..9afd816756c7 --- /dev/null +++ b/packages/memory-storage/test/storage-name-path-traversal.test.ts @@ -0,0 +1,50 @@ +import { rm } from 'node:fs/promises'; +import { resolve, sep } from 'node:path'; + +import { MemoryStorage } from '@crawlee/memory-storage'; + +describe('storage name path traversal', () => { + const tmpLocation = resolve(__dirname, './tmp/storage-name-path-traversal'); + + afterAll(async () => { + await rm(tmpLocation, { force: true, recursive: true }); + }); + + const storage = new MemoryStorage({ + localDataDirectory: tmpLocation, + persistStorage: true, + writeMetadata: true, + }); + + const traversalNames = ['../escaped', `..${sep}escaped`, resolve(tmpLocation, '..', 'escaped-absolute')]; + + describe('getOrCreate rejects names that escape the storage directory', () => { + test.each(traversalNames)('key-value store name %s', async (name) => { + await expect(storage.keyValueStores().getOrCreate(name)).rejects.toThrow(); + }); + + test.each(traversalNames)('dataset name %s', async (name) => { + await expect(storage.datasets().getOrCreate(name)).rejects.toThrow(); + }); + + test.each(traversalNames)('request queue name %s', async (name) => { + await expect(storage.requestQueues().getOrCreate(name)).rejects.toThrow(); + }); + }); + + test('rename via update rejects names that escape the storage directory', async () => { + const info = await storage.keyValueStores().getOrCreate('legit-store'); + const client = storage.keyValueStore(info.id); + + await expect(client.update({ name: '../escaped-rename' })).rejects.toThrow(); + }); + + test('legitimate names still work', async () => { + const info = await storage.keyValueStores().getOrCreate('normal-name'); + const client = storage.keyValueStore(info.id); + + await expect( + client.setRecord({ key: 'SAFEKEY', value: 'value', contentType: 'text/plain' }), + ).resolves.not.toThrow(); + }); +}); From 31512a3514f9321fa93ab84364c0a4d9221ecb71 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 5 Jun 2026 10:10:04 +0200 Subject: [PATCH 2/2] chore(deps): bump hono from 4.12.18 to 4.12.23 (#3718) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [hono](https://github.com/honojs/hono) from 4.12.18 to 4.12.23.
Release notes

Sourced from hono's releases.

v4.12.23

What's Changed

Full Changelog: https://github.com/honojs/hono/compare/v4.12.22...v4.12.23

v4.12.22

What's Changed

New Contributors

Full Changelog: https://github.com/honojs/hono/compare/v4.12.21...v4.12.22

v4.12.21

Security fixes

This release includes fixes for the following security issues:

app.mount() strips mount prefix using undecoded path, causing incorrect routing for percent-encoded paths

Affects: app.mount(). Fixes prefix stripping using the raw URL pathname instead of the decoded path, where percent-encoded characters in the mount prefix or path could cause the prefix to be removed at the wrong position, resulting in the sub-application receiving an incorrect path. GHSA-2gcr-mfcq-wcc3

IP Restriction bypasses static deny rules for non-canonical IPv6

Affects: hono/ip-restriction. Fixes IP address comparison using string equality, where non-canonical IPv6 representations of a denied address — such as compressed forms or hex-notation IPv4-mapped addresses — could bypass static deny rules. GHSA-xrhx-7g5j-rcj5

Cookie helper does not sanitize sameSite and priority, allowing Set-Cookie injection

Affects: hono/cookie. Fixes missing validation of sameSite and priority options against injection characters (;, \r, \n), where user-controlled input passed to either option could inject additional attributes into the Set-Cookie response header. GHSA-3hrh-pfw6-9m5x

JWT middleware accepts any Authorization scheme, not only Bearer

Affects: hono/jwt, hono/jwk. Fixes missing scheme validation in the Authorization header, where any two-part header value was accepted regardless of the scheme name, allowing non-Bearer schemes to pass JWT authentication. GHSA-f577-qrjj-4474


Users who use app.mount(), hono/ip-restriction, hono/cookie, or hono/jwt/hono/jwk are encouraged to upgrade to this version.

... (truncated)

Commits
  • 83bfb3b 4.12.23
  • bcd290a fix(utils/ipaddr): do not compress a single 0 group to :: (#4971)
  • c968177 feat(compress): add contentTypeFilter option and `COMPRESSIBLE_CONTENT_TYPE_R...
  • 0265a54 docs(contribution): add AI Usage Policy (#4970)
  • c84c5d2 feat(context): export the Context class publicly (#4543)
  • 82dad62 fix(serve-static): normalize all backslashes in file paths, not just the firs...
  • 2f01b77 4.12.22
  • 6bc0dff feat: add msgpack as a compressible content type (#4957)
  • 7e0555d fix(deno): echo negotiated WebSocket subprotocol in upgrade response (#4955)
  • f0ed246 fix(compress): respect Accept-Encoding when encoding option is set (#4951)
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=hono&package-manager=npm_and_yarn&previous-version=4.12.18&new-version=4.12.23)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself) You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/apify/crawlee/network/alerts).
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- yarn.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/yarn.lock b/yarn.lock index ddbff3adf9bc..36015348ac5a 100644 --- a/yarn.lock +++ b/yarn.lock @@ -7897,9 +7897,9 @@ __metadata: linkType: hard "hono@npm:^4.11.4": - version: 4.12.18 - resolution: "hono@npm:4.12.18" - checksum: 10c0/b0b9688fd9e41a1847b077d579dc0e92a28b67c247c6ee7d1e751c0bae269824c30c7773feff1a2874e40ea36a3d2f9d1fc5ba618a28ecdf2ca1b33ed2473864 + version: 4.12.23 + resolution: "hono@npm:4.12.23" + checksum: 10c0/58945bb3aeb16d710b4a6c2809ba02b86b7269f6a3a67e126fc81cee5e9ae8ad2d9aa553db03c4f1a104ec03e423072e33932cb23eb3bbc48774acc72fc9c346 languageName: node linkType: hard