diff --git a/packages/memory-storage/src/cache-helpers.ts b/packages/memory-storage/src/cache-helpers.ts index 157e69847c6c..d392d6acdf9d 100644 --- a/packages/memory-storage/src/cache-helpers.ts +++ b/packages/memory-storage/src/cache-helpers.ts @@ -22,7 +22,7 @@ export async function findOrCacheDatasetByPossibleId(client: MemoryStorage, entr return found; } - const datasetDir = resolve(client.datasetsDirectory, entryNameOrId); + const datasetDir = resolveWithinDirectory(client.datasetsDirectory, entryNameOrId); try { // Check if directory exists @@ -125,7 +125,7 @@ export async function findOrCacheKeyValueStoreByPossibleId(client: MemoryStorage return found; } - const keyValueStoreDir = resolve(client.keyValueStoresDirectory, entryNameOrId); + const keyValueStoreDir = resolveWithinDirectory(client.keyValueStoresDirectory, entryNameOrId); try { // Check if directory exists @@ -278,7 +278,7 @@ export async function findRequestQueueByPossibleId(client: MemoryStorage, entryN return found; } - const requestQueueDir = resolve(client.requestQueuesDirectory, entryNameOrId); + const requestQueueDir = resolveWithinDirectory(client.requestQueuesDirectory, entryNameOrId); try { // Check if directory exists @@ -392,4 +392,4 @@ import { DatasetClient } from './resource-clients/dataset'; import type { InternalKeyRecord } from './resource-clients/key-value-store'; import { KeyValueStoreClient } from './resource-clients/key-value-store'; import { RequestQueueClient } from './resource-clients/request-queue'; -import { memoryStorageLog } from './utils'; +import { memoryStorageLog, resolveWithinDirectory } from './utils'; diff --git a/packages/memory-storage/src/fs/key-value-store/fs.ts b/packages/memory-storage/src/fs/key-value-store/fs.ts index 48b727d639ca..5d1e499c7d38 100644 --- a/packages/memory-storage/src/fs/key-value-store/fs.ts +++ b/packages/memory-storage/src/fs/key-value-store/fs.ts @@ -1,5 +1,5 @@ import { readFile, rm } from 'node:fs/promises'; -import { dirname, resolve } from 'node:path'; +import { dirname } from 'node:path'; import { basename } from 'node:path/win32'; import { AsyncQueue } from '@sapphire/async-queue'; @@ -8,7 +8,7 @@ import mime from 'mime-types'; import { lockAndWrite } from '../../background-handler/fs-utils'; import type { InternalKeyRecord } from '../../resource-clients/key-value-store'; -import { memoryStorageLog } from '../../utils'; +import { memoryStorageLog, resolveWithinDirectory } from '../../utils'; import type { StorageImplementation } from '../common'; import type { CreateStorageImplementationOptions } from '.'; @@ -35,7 +35,7 @@ export class KeyValueFileSystemEntry implements StorageImplementation constructor(options: DatasetClientOptions) { super(options.id ?? randomUUID()); this.name = options.name; - this.datasetDirectory = resolve(options.baseStorageDirectory, this.name ?? this.id); + this.datasetDirectory = resolveWithinDirectory(options.baseStorageDirectory, this.name ?? this.id); this.client = options.client; } @@ -100,7 +99,7 @@ export class DatasetClient const previousDir = existingStoreById.datasetDirectory; - existingStoreById.datasetDirectory = resolve( + existingStoreById.datasetDirectory = resolveWithinDirectory( this.client.datasetsDirectory, parsed.name ?? existingStoreById.name ?? existingStoreById.id, ); diff --git a/packages/memory-storage/src/resource-clients/key-value-store.ts b/packages/memory-storage/src/resource-clients/key-value-store.ts index 66c6a3101e70..f2ec0b19fd17 100644 --- a/packages/memory-storage/src/resource-clients/key-value-store.ts +++ b/packages/memory-storage/src/resource-clients/key-value-store.ts @@ -1,6 +1,5 @@ import { randomUUID } from 'node:crypto'; import { rm } from 'node:fs/promises'; -import { resolve } from 'node:path'; import { Readable } from 'node:stream'; import type * as storage from '@crawlee/types'; @@ -16,7 +15,14 @@ import { DEFAULT_API_PARAM_LIMIT, StorageTypes } from '../consts'; import type { StorageImplementation } from '../fs/common'; import { createKeyValueStorageImplementation } from '../fs/key-value-store'; import type { MemoryStorage } from '../index'; -import { createKeyList, createKeyStringList, createLazyIterablePromise, isBuffer, isStream } from '../utils'; +import { + createKeyList, + createKeyStringList, + createLazyIterablePromise, + isBuffer, + isStream, + resolveWithinDirectory, +} from '../utils'; import { BaseClient } from './common/base-client'; const DEFAULT_LOCAL_FILE_EXTENSION = 'bin'; @@ -49,7 +55,7 @@ export class KeyValueStoreClient extends BaseClient { constructor(options: KeyValueStoreClientOptions) { super(options.id ?? randomUUID()); this.name = options.name; - this.keyValueStoreDirectory = resolve(options.baseStorageDirectory, this.name ?? this.id); + this.keyValueStoreDirectory = resolveWithinDirectory(options.baseStorageDirectory, this.name ?? this.id); this.client = options.client; } @@ -96,7 +102,7 @@ export class KeyValueStoreClient extends BaseClient { const previousDir = existingStoreById.keyValueStoreDirectory; - existingStoreById.keyValueStoreDirectory = resolve( + existingStoreById.keyValueStoreDirectory = resolveWithinDirectory( this.client.keyValueStoresDirectory, parsed.name ?? existingStoreById.name ?? existingStoreById.id, ); diff --git a/packages/memory-storage/src/resource-clients/request-queue.ts b/packages/memory-storage/src/resource-clients/request-queue.ts index 7f0666f96f77..dfc02e98707d 100644 --- a/packages/memory-storage/src/resource-clients/request-queue.ts +++ b/packages/memory-storage/src/resource-clients/request-queue.ts @@ -1,6 +1,5 @@ import { randomUUID } from 'node:crypto'; import { rm } from 'node:fs/promises'; -import { resolve } from 'node:path'; import type * as storage from '@crawlee/types'; import { AsyncQueue } from '@sapphire/async-queue'; @@ -14,7 +13,7 @@ import { createRequestQueueStorageImplementation } from '../fs/request-queue'; import type { RequestQueueFileSystemEntry } from '../fs/request-queue/fs'; import type { RequestQueueMemoryEntry } from '../fs/request-queue/memory'; import type { MemoryStorage } from '../index'; -import { purgeNullsFromObject, uniqueKeyToRequestId } from '../utils'; +import { purgeNullsFromObject, resolveWithinDirectory, uniqueKeyToRequestId } from '../utils'; import { BaseClient } from './common/base-client'; const requestShape = s.object({ @@ -68,7 +67,7 @@ export class RequestQueueClient extends BaseClient implements storage.RequestQue constructor(options: RequestQueueClientOptions) { super(options.id ?? randomUUID()); this.name = options.name; - this.requestQueueDirectory = resolve(options.baseStorageDirectory, this.name ?? this.id); + this.requestQueueDirectory = resolveWithinDirectory(options.baseStorageDirectory, this.name ?? this.id); this.client = options.client; } @@ -128,7 +127,7 @@ export class RequestQueueClient extends BaseClient implements storage.RequestQue const previousDir = existingQueueById.requestQueueDirectory; - existingQueueById.requestQueueDirectory = resolve( + existingQueueById.requestQueueDirectory = resolveWithinDirectory( this.client.requestQueuesDirectory, parsed.name ?? existingQueueById.name ?? existingQueueById.id, ); diff --git a/packages/memory-storage/src/utils.ts b/packages/memory-storage/src/utils.ts index 8a74bb33b43d..c0a7241696bd 100644 --- a/packages/memory-storage/src/utils.ts +++ b/packages/memory-storage/src/utils.ts @@ -1,4 +1,5 @@ import { createHash } from 'node:crypto'; +import { resolve, sep } from 'node:path'; import type * as storage from '@crawlee/types'; import { s } from '@sapphire/shapeshift'; @@ -7,6 +8,25 @@ import defaultLog from '@apify/log'; import { REQUEST_ID_LENGTH } from './consts'; +/** + * Resolves `segment` against `baseDirectory` and ensures the result stays within `baseDirectory`. + * Storage names and record keys are used as filesystem path components, so a value containing `..` + * or an absolute path could otherwise escape the intended directory. + */ +export function resolveWithinDirectory(baseDirectory: string, segment: string): string { + const base = resolve(baseDirectory); + const resolved = resolve(base, segment); + + if (resolved !== base && !resolved.startsWith(`${base}${sep}`)) { + throw new Error( + `"${segment}" is not allowed because it would resolve outside of the storage directory. ` + + `Storage names and record keys must not contain path traversal segments ("..") or absolute paths.`, + ); + } + + return resolved; +} + /** * Removes all properties with a null value * from the provided object. diff --git a/packages/memory-storage/test/key-value-store/record-key-path-traversal.test.ts b/packages/memory-storage/test/key-value-store/record-key-path-traversal.test.ts new file mode 100644 index 000000000000..e52e9fba0322 --- /dev/null +++ b/packages/memory-storage/test/key-value-store/record-key-path-traversal.test.ts @@ -0,0 +1,47 @@ +import { rm } from 'node:fs/promises'; +import { resolve } from 'node:path'; + +import { MemoryStorage } from '@crawlee/memory-storage'; +import { pathExists } from 'fs-extra'; + +import { waitTillWrittenToDisk } from '../__shared__'; + +describe('record key path traversal', () => { + const tmpLocation = resolve(__dirname, './tmp/record-key-path-traversal'); + + afterAll(async () => { + await rm(tmpLocation, { force: true, recursive: true }); + }); + + const storage = new MemoryStorage({ + localDataDirectory: tmpLocation, + persistStorage: true, + writeMetadata: true, + }); + + test('setRecord rejects a key that escapes the store directory', async () => { + const info = await storage.keyValueStores().getOrCreate('record-key-store'); + const client = storage.keyValueStore(info.id); + + await expect( + client.setRecord({ key: '../escaped-record', value: 'pwned', contentType: 'text/plain' }), + ).rejects.toThrow(); + + // The escaped file must not have been created outside the store directory. + await expect(pathExists(resolve(storage.keyValueStoresDirectory, 'escaped-record.txt'))).resolves.toBe(false); + }); + + test('setRecord still works for a regular key', async () => { + const info = await storage.keyValueStores().getOrCreate('record-key-store-ok'); + const client = storage.keyValueStore(info.id); + + await expect( + client.setRecord({ key: 'SAFEKEY', value: 'value', contentType: 'text/plain' }), + ).resolves.not.toThrow(); + + // The store was opened by name, so it lives under its name directory. + const storePath = resolve(storage.keyValueStoresDirectory, info.name!); + await waitTillWrittenToDisk(resolve(storePath, 'SAFEKEY.txt')); + await expect(pathExists(resolve(storePath, 'SAFEKEY.txt'))).resolves.toBe(true); + }); +}); diff --git a/packages/memory-storage/test/storage-name-path-traversal.test.ts b/packages/memory-storage/test/storage-name-path-traversal.test.ts new file mode 100644 index 000000000000..9afd816756c7 --- /dev/null +++ b/packages/memory-storage/test/storage-name-path-traversal.test.ts @@ -0,0 +1,50 @@ +import { rm } from 'node:fs/promises'; +import { resolve, sep } from 'node:path'; + +import { MemoryStorage } from '@crawlee/memory-storage'; + +describe('storage name path traversal', () => { + const tmpLocation = resolve(__dirname, './tmp/storage-name-path-traversal'); + + afterAll(async () => { + await rm(tmpLocation, { force: true, recursive: true }); + }); + + const storage = new MemoryStorage({ + localDataDirectory: tmpLocation, + persistStorage: true, + writeMetadata: true, + }); + + const traversalNames = ['../escaped', `..${sep}escaped`, resolve(tmpLocation, '..', 'escaped-absolute')]; + + describe('getOrCreate rejects names that escape the storage directory', () => { + test.each(traversalNames)('key-value store name %s', async (name) => { + await expect(storage.keyValueStores().getOrCreate(name)).rejects.toThrow(); + }); + + test.each(traversalNames)('dataset name %s', async (name) => { + await expect(storage.datasets().getOrCreate(name)).rejects.toThrow(); + }); + + test.each(traversalNames)('request queue name %s', async (name) => { + await expect(storage.requestQueues().getOrCreate(name)).rejects.toThrow(); + }); + }); + + test('rename via update rejects names that escape the storage directory', async () => { + const info = await storage.keyValueStores().getOrCreate('legit-store'); + const client = storage.keyValueStore(info.id); + + await expect(client.update({ name: '../escaped-rename' })).rejects.toThrow(); + }); + + test('legitimate names still work', async () => { + const info = await storage.keyValueStores().getOrCreate('normal-name'); + const client = storage.keyValueStore(info.id); + + await expect( + client.setRecord({ key: 'SAFEKEY', value: 'value', contentType: 'text/plain' }), + ).resolves.not.toThrow(); + }); +}); diff --git a/yarn.lock b/yarn.lock index ddbff3adf9bc..36015348ac5a 100644 --- a/yarn.lock +++ b/yarn.lock @@ -7897,9 +7897,9 @@ __metadata: linkType: hard "hono@npm:^4.11.4": - version: 4.12.18 - resolution: "hono@npm:4.12.18" - checksum: 10c0/b0b9688fd9e41a1847b077d579dc0e92a28b67c247c6ee7d1e751c0bae269824c30c7773feff1a2874e40ea36a3d2f9d1fc5ba618a28ecdf2ca1b33ed2473864 + version: 4.12.23 + resolution: "hono@npm:4.12.23" + checksum: 10c0/58945bb3aeb16d710b4a6c2809ba02b86b7269f6a3a67e126fc81cee5e9ae8ad2d9aa553db03c4f1a104ec03e423072e33932cb23eb3bbc48774acc72fc9c346 languageName: node linkType: hard