Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions packages/memory-storage/src/cache-helpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ export async function findOrCacheDatasetByPossibleId(client: MemoryStorage, entr
return found;
}

const datasetDir = resolve(client.datasetsDirectory, entryNameOrId);
const datasetDir = resolveWithinDirectory(client.datasetsDirectory, entryNameOrId);

try {
// Check if directory exists
Expand Down Expand Up @@ -125,7 +125,7 @@ export async function findOrCacheKeyValueStoreByPossibleId(client: MemoryStorage
return found;
}

const keyValueStoreDir = resolve(client.keyValueStoresDirectory, entryNameOrId);
const keyValueStoreDir = resolveWithinDirectory(client.keyValueStoresDirectory, entryNameOrId);

try {
// Check if directory exists
Expand Down Expand Up @@ -278,7 +278,7 @@ export async function findRequestQueueByPossibleId(client: MemoryStorage, entryN
return found;
}

const requestQueueDir = resolve(client.requestQueuesDirectory, entryNameOrId);
const requestQueueDir = resolveWithinDirectory(client.requestQueuesDirectory, entryNameOrId);

try {
// Check if directory exists
Expand Down Expand Up @@ -392,4 +392,4 @@ import { DatasetClient } from './resource-clients/dataset';
import type { InternalKeyRecord } from './resource-clients/key-value-store';
import { KeyValueStoreClient } from './resource-clients/key-value-store';
import { RequestQueueClient } from './resource-clients/request-queue';
import { memoryStorageLog } from './utils';
import { memoryStorageLog, resolveWithinDirectory } from './utils';
10 changes: 5 additions & 5 deletions packages/memory-storage/src/fs/key-value-store/fs.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { readFile, rm } from 'node:fs/promises';
import { dirname, resolve } from 'node:path';
import { dirname } from 'node:path';
import { basename } from 'node:path/win32';

import { AsyncQueue } from '@sapphire/async-queue';
Expand All @@ -8,7 +8,7 @@ import mime from 'mime-types';

import { lockAndWrite } from '../../background-handler/fs-utils';
import type { InternalKeyRecord } from '../../resource-clients/key-value-store';
import { memoryStorageLog } from '../../utils';
import { memoryStorageLog, resolveWithinDirectory } from '../../utils';
import type { StorageImplementation } from '../common';
import type { CreateStorageImplementationOptions } from '.';

Expand All @@ -35,7 +35,7 @@ export class KeyValueFileSystemEntry implements StorageImplementation<InternalKe
} catch {
try {
// Try without extension
file = await readFile(resolve(this.storeDirectory, this.rawRecord.key));
file = await readFile(resolveWithinDirectory(this.storeDirectory, this.rawRecord.key));
memoryStorageLog.warning(
[
`Key-value entry "${this.rawRecord.key}" for store ${basename(
Expand Down Expand Up @@ -68,8 +68,8 @@ export class KeyValueFileSystemEntry implements StorageImplementation<InternalKe
? data.key
: `${data.key}.${data.extension}`;

this.filePath ??= resolve(this.storeDirectory, fileName);
this.fileMetadataPath ??= resolve(this.storeDirectory, `${data.key}.__metadata__.json`);
this.filePath ??= resolveWithinDirectory(this.storeDirectory, fileName);
this.fileMetadataPath ??= resolveWithinDirectory(this.storeDirectory, `${data.key}.__metadata__.json`);

const { value, ...rest } = data;
this.rawRecord = rest;
Expand Down
7 changes: 3 additions & 4 deletions packages/memory-storage/src/resource-clients/dataset.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
/* eslint-disable import/no-duplicates */
import { randomUUID } from 'node:crypto';
import { rm } from 'node:fs/promises';
import { resolve } from 'node:path';

import type * as storage from '@crawlee/types';
import type { Dictionary } from '@crawlee/types';
Expand All @@ -14,7 +13,7 @@ import { StorageTypes } from '../consts';
import type { StorageImplementation } from '../fs/common';
import { createDatasetStorageImplementation } from '../fs/dataset';
import type { MemoryStorage } from '../index';
import { createPaginatedEntryList, createPaginatedList } from '../utils';
import { createPaginatedEntryList, createPaginatedList, resolveWithinDirectory } from '../utils';
import { BaseClient } from './common/base-client';

/**
Expand Down Expand Up @@ -53,7 +52,7 @@ export class DatasetClient<Data extends Dictionary = Dictionary>
constructor(options: DatasetClientOptions) {
super(options.id ?? randomUUID());
this.name = options.name;
this.datasetDirectory = resolve(options.baseStorageDirectory, this.name ?? this.id);
this.datasetDirectory = resolveWithinDirectory(options.baseStorageDirectory, this.name ?? this.id);
this.client = options.client;
}

Expand Down Expand Up @@ -100,7 +99,7 @@ export class DatasetClient<Data extends Dictionary = Dictionary>

const previousDir = existingStoreById.datasetDirectory;

existingStoreById.datasetDirectory = resolve(
existingStoreById.datasetDirectory = resolveWithinDirectory(
this.client.datasetsDirectory,
parsed.name ?? existingStoreById.name ?? existingStoreById.id,
);
Expand Down
14 changes: 10 additions & 4 deletions packages/memory-storage/src/resource-clients/key-value-store.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import { randomUUID } from 'node:crypto';
import { rm } from 'node:fs/promises';
import { resolve } from 'node:path';
import { Readable } from 'node:stream';

import type * as storage from '@crawlee/types';
Expand All @@ -16,7 +15,14 @@ import { DEFAULT_API_PARAM_LIMIT, StorageTypes } from '../consts';
import type { StorageImplementation } from '../fs/common';
import { createKeyValueStorageImplementation } from '../fs/key-value-store';
import type { MemoryStorage } from '../index';
import { createKeyList, createKeyStringList, createLazyIterablePromise, isBuffer, isStream } from '../utils';
import {
createKeyList,
createKeyStringList,
createLazyIterablePromise,
isBuffer,
isStream,
resolveWithinDirectory,
} from '../utils';
import { BaseClient } from './common/base-client';

const DEFAULT_LOCAL_FILE_EXTENSION = 'bin';
Expand Down Expand Up @@ -49,7 +55,7 @@ export class KeyValueStoreClient extends BaseClient {
constructor(options: KeyValueStoreClientOptions) {
super(options.id ?? randomUUID());
this.name = options.name;
this.keyValueStoreDirectory = resolve(options.baseStorageDirectory, this.name ?? this.id);
this.keyValueStoreDirectory = resolveWithinDirectory(options.baseStorageDirectory, this.name ?? this.id);
this.client = options.client;
}

Expand Down Expand Up @@ -96,7 +102,7 @@ export class KeyValueStoreClient extends BaseClient {

const previousDir = existingStoreById.keyValueStoreDirectory;

existingStoreById.keyValueStoreDirectory = resolve(
existingStoreById.keyValueStoreDirectory = resolveWithinDirectory(
this.client.keyValueStoresDirectory,
parsed.name ?? existingStoreById.name ?? existingStoreById.id,
);
Expand Down
7 changes: 3 additions & 4 deletions packages/memory-storage/src/resource-clients/request-queue.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import { randomUUID } from 'node:crypto';
import { rm } from 'node:fs/promises';
import { resolve } from 'node:path';

import type * as storage from '@crawlee/types';
import { AsyncQueue } from '@sapphire/async-queue';
Expand All @@ -14,7 +13,7 @@ import { createRequestQueueStorageImplementation } from '../fs/request-queue';
import type { RequestQueueFileSystemEntry } from '../fs/request-queue/fs';
import type { RequestQueueMemoryEntry } from '../fs/request-queue/memory';
import type { MemoryStorage } from '../index';
import { purgeNullsFromObject, uniqueKeyToRequestId } from '../utils';
import { purgeNullsFromObject, resolveWithinDirectory, uniqueKeyToRequestId } from '../utils';
import { BaseClient } from './common/base-client';

const requestShape = s.object({
Expand Down Expand Up @@ -68,7 +67,7 @@ export class RequestQueueClient extends BaseClient implements storage.RequestQue
constructor(options: RequestQueueClientOptions) {
super(options.id ?? randomUUID());
this.name = options.name;
this.requestQueueDirectory = resolve(options.baseStorageDirectory, this.name ?? this.id);
this.requestQueueDirectory = resolveWithinDirectory(options.baseStorageDirectory, this.name ?? this.id);
this.client = options.client;
}

Expand Down Expand Up @@ -128,7 +127,7 @@ export class RequestQueueClient extends BaseClient implements storage.RequestQue

const previousDir = existingQueueById.requestQueueDirectory;

existingQueueById.requestQueueDirectory = resolve(
existingQueueById.requestQueueDirectory = resolveWithinDirectory(
this.client.requestQueuesDirectory,
parsed.name ?? existingQueueById.name ?? existingQueueById.id,
);
Expand Down
20 changes: 20 additions & 0 deletions packages/memory-storage/src/utils.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { createHash } from 'node:crypto';
import { resolve, sep } from 'node:path';

import type * as storage from '@crawlee/types';
import { s } from '@sapphire/shapeshift';
Expand All @@ -7,6 +8,25 @@ import defaultLog from '@apify/log';

import { REQUEST_ID_LENGTH } from './consts';

/**
* Resolves `segment` against `baseDirectory` and ensures the result stays within `baseDirectory`.
* Storage names and record keys are used as filesystem path components, so a value containing `..`
* or an absolute path could otherwise escape the intended directory.
*/
export function resolveWithinDirectory(baseDirectory: string, segment: string): string {
const base = resolve(baseDirectory);
const resolved = resolve(base, segment);

if (resolved !== base && !resolved.startsWith(`${base}${sep}`)) {
throw new Error(
`"${segment}" is not allowed because it would resolve outside of the storage directory. ` +
`Storage names and record keys must not contain path traversal segments ("..") or absolute paths.`,
);
}

return resolved;
}

/**
* Removes all properties with a null value
* from the provided object.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import { rm } from 'node:fs/promises';
import { resolve } from 'node:path';

import { MemoryStorage } from '@crawlee/memory-storage';
import { pathExists } from 'fs-extra';

import { waitTillWrittenToDisk } from '../__shared__';

describe('record key path traversal', () => {
const tmpLocation = resolve(__dirname, './tmp/record-key-path-traversal');

afterAll(async () => {
await rm(tmpLocation, { force: true, recursive: true });
});

const storage = new MemoryStorage({
localDataDirectory: tmpLocation,
persistStorage: true,
writeMetadata: true,
});

test('setRecord rejects a key that escapes the store directory', async () => {
const info = await storage.keyValueStores().getOrCreate('record-key-store');
const client = storage.keyValueStore(info.id);

await expect(
client.setRecord({ key: '../escaped-record', value: 'pwned', contentType: 'text/plain' }),
).rejects.toThrow();

// The escaped file must not have been created outside the store directory.
await expect(pathExists(resolve(storage.keyValueStoresDirectory, 'escaped-record.txt'))).resolves.toBe(false);
});

test('setRecord still works for a regular key', async () => {
const info = await storage.keyValueStores().getOrCreate('record-key-store-ok');
const client = storage.keyValueStore(info.id);

await expect(
client.setRecord({ key: 'SAFEKEY', value: 'value', contentType: 'text/plain' }),
).resolves.not.toThrow();

// The store was opened by name, so it lives under its name directory.
const storePath = resolve(storage.keyValueStoresDirectory, info.name!);
await waitTillWrittenToDisk(resolve(storePath, 'SAFEKEY.txt'));
await expect(pathExists(resolve(storePath, 'SAFEKEY.txt'))).resolves.toBe(true);
});
});
50 changes: 50 additions & 0 deletions packages/memory-storage/test/storage-name-path-traversal.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import { rm } from 'node:fs/promises';
import { resolve, sep } from 'node:path';

import { MemoryStorage } from '@crawlee/memory-storage';

describe('storage name path traversal', () => {
const tmpLocation = resolve(__dirname, './tmp/storage-name-path-traversal');

afterAll(async () => {
await rm(tmpLocation, { force: true, recursive: true });
});

const storage = new MemoryStorage({
localDataDirectory: tmpLocation,
persistStorage: true,
writeMetadata: true,
});

const traversalNames = ['../escaped', `..${sep}escaped`, resolve(tmpLocation, '..', 'escaped-absolute')];

describe('getOrCreate rejects names that escape the storage directory', () => {
test.each(traversalNames)('key-value store name %s', async (name) => {
await expect(storage.keyValueStores().getOrCreate(name)).rejects.toThrow();
});

test.each(traversalNames)('dataset name %s', async (name) => {
await expect(storage.datasets().getOrCreate(name)).rejects.toThrow();
});

test.each(traversalNames)('request queue name %s', async (name) => {
await expect(storage.requestQueues().getOrCreate(name)).rejects.toThrow();
});
});

test('rename via update rejects names that escape the storage directory', async () => {
const info = await storage.keyValueStores().getOrCreate('legit-store');
const client = storage.keyValueStore(info.id);

await expect(client.update({ name: '../escaped-rename' })).rejects.toThrow();
});

test('legitimate names still work', async () => {
const info = await storage.keyValueStores().getOrCreate('normal-name');
const client = storage.keyValueStore(info.id);

await expect(
client.setRecord({ key: 'SAFEKEY', value: 'value', contentType: 'text/plain' }),
).resolves.not.toThrow();
});
});
6 changes: 3 additions & 3 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -7897,9 +7897,9 @@ __metadata:
linkType: hard

"hono@npm:^4.11.4":
version: 4.12.18
resolution: "hono@npm:4.12.18"
checksum: 10c0/b0b9688fd9e41a1847b077d579dc0e92a28b67c247c6ee7d1e751c0bae269824c30c7773feff1a2874e40ea36a3d2f9d1fc5ba618a28ecdf2ca1b33ed2473864
version: 4.12.23
resolution: "hono@npm:4.12.23"
checksum: 10c0/58945bb3aeb16d710b4a6c2809ba02b86b7269f6a3a67e126fc81cee5e9ae8ad2d9aa553db03c4f1a104ec03e423072e33932cb23eb3bbc48774acc72fc9c346
languageName: node
linkType: hard

Expand Down
Loading