Skip to content

Commit

Permalink
API: Increase protection of files downloaded via the REST API (#9676)
Browse files Browse the repository at this point in the history
  • Loading branch information
pedr committed Jan 22, 2024
1 parent a863f92 commit d4d4002
Show file tree
Hide file tree
Showing 2 changed files with 126 additions and 51 deletions.
95 changes: 72 additions & 23 deletions packages/lib/services/rest/routes/notes.test.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
import Logger from '@joplin/utils/Logger';
import shim from '../../../shim';
import uuid from '../../../uuid';
import { downloadMediaFile } from './notes';
import Setting from '../../../models/Setting';
import { readFile, readdir, remove, writeFile } from 'fs-extra';
const md5 = require('md5');

const imagePath = `${__dirname}/../../../images/SideMenuHeader.png`;
const jpgBase64Content = '/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAMCAgICAgMCAgIDAwMDBAYEBAQEBAgGBgUGCQgKCgkICQkKDA8MCgsOCwkJDRENDg8QEBEQCgwSExIQEw8QEBD/wAALCAAFAAUBAREA/8QAFAABAAAAAAAAAAAAAAAAAAAACf/EAB8QAAEEAQUBAAAAAAAAAAAAAAQBAgUGAwAREiExM//aAAgBAQAAPwBJarVpGHm7KWbapCSwyZ6FDjkLyYE1W/LHyV2zfOk2TrzX/9k=';

describe('routes/notes', () => {

Expand All @@ -21,44 +26,50 @@ describe('routes/notes', () => {
'https://joplinapp.org/valid/image_url.png',
'http://joplinapp.org/valid/image_url.png',
])('should try to download and return a local path to a valid URL', async (url) => {
const fetchBlobSpy = jest.fn();
jest.spyOn(shim, 'fetchBlob').mockImplementation(fetchBlobSpy);
jest.spyOn(uuid, 'create').mockReturnValue('mocked_uuid_value');
const fetchBlobSpy = jest.fn(async (_url, options) => {
await writeFile(options.path, Buffer.from(jpgBase64Content, 'base64'));
});
const spy = jest.spyOn(shim, 'fetchBlob').mockImplementation(fetchBlobSpy);

const response = await downloadMediaFile(url);

expect(response.endsWith('mocked_uuid_value.png')).toBe(true);
expect(fetchBlobSpy).toBeCalledTimes(1);
const files = await readdir(Setting.value('tempDir'));

expect(files.length).toBe(1);
expect(fetchBlobSpy).toHaveBeenCalledTimes(1);
expect(response).toBe(`${Setting.value('tempDir')}/${files[0]}`);
await remove(response);
spy.mockRestore();
});

test('should get file from local drive if protocol allows it', async () => {
const url = 'file://valid/image.png';
const url = `file:///${imagePath}`;
const originalFileContent = await readFile(imagePath);

const fsDriverCopySpy = jest.fn();
jest.spyOn(shim, 'fsDriver').mockImplementation(() => {
return {
copy: fsDriverCopySpy,
} as any;
});
jest.spyOn(uuid, 'create').mockReturnValue('mocked_uuid_value');
const response = await downloadMediaFile(url, null, ['file:']);

const response = await downloadMediaFile(url);
const files = await readdir(Setting.value('tempDir'));
expect(files.length).toBe(1);
expect(response).toBe(`${Setting.value('tempDir')}/${files[0]}`);

expect(response.endsWith('mocked_uuid_value.png')).toBe(true);
expect(fsDriverCopySpy).toBeCalledTimes(1);
const responseFileContent = await readFile(response);
expect(md5(responseFileContent)).toBe(md5(originalFileContent));
await remove(response);
});

test('should be able to handle URLs with data', async () => {
const url = '';

const imageFromDataUrlSpy = jest.fn();
jest.spyOn(shim, 'imageFromDataUrl').mockImplementation(imageFromDataUrlSpy);
jest.spyOn(uuid, 'create').mockReturnValue('mocked_uuid_value');
const originalFileContent = Buffer.from(url.split('data:image/gif;base64,')[1], 'base64');

const response = await downloadMediaFile(url);

expect(response.endsWith('mocked_uuid_value.gif')).toBe(true);
expect(imageFromDataUrlSpy).toBeCalledTimes(1);
const files = await readdir(Setting.value('tempDir'));
expect(files.length).toBe(1);
expect(response).toBe(`${Setting.value('tempDir')}/${files[0]}`);

const responseFileContent = await readFile(response);
expect(md5(responseFileContent)).toBe(md5(originalFileContent));
await remove(response);
});

test('should not process URLs with data that is not image type', async () => {
Expand All @@ -68,6 +79,8 @@ describe('routes/notes', () => {
const response = await downloadMediaFile(url);
Logger.globalLogger.enabled = true;

const files = await readdir(Setting.value('tempDir'));
expect(files.length).toBe(0);
expect(response).toBe('');
});

Expand All @@ -76,6 +89,42 @@ describe('routes/notes', () => {

const response = await downloadMediaFile(url);

const files = await readdir(Setting.value('tempDir'));
expect(files.length).toBe(0);
expect(response).toBe('');
});

test('should not copy content from invalid protocols', async () => {
const url = 'file:///home/user/file.db';

const allowedProtocols: string[] = [];
const mediaFilePath = await downloadMediaFile(url, null, allowedProtocols);

const files = await readdir(Setting.value('tempDir'));
expect(files.length).toBe(0);
expect(mediaFilePath).toBe('');
});

test.each([
'https://joplinapp.org/valid/image_url',
'https://joplinapp.org/valid/image_url.invalid_url',
])('should correct the file extension in filename from files without or invalid ones', async (url) => {
const spy = jest.spyOn(shim, 'fetchBlob').mockImplementation(async (_url, options) => {
await writeFile(options.path, Buffer.from(jpgBase64Content, 'base64'));
return {
headers: {
'content-type': 'image/jpg',
},
};
});

const response = await downloadMediaFile(url);

const files = await readdir(Setting.value('tempDir'));
expect(files.length).toBe(1);
expect(response).toBe(`${Setting.value('tempDir')}/${files[0]}`);

await remove(response);
spy.mockRestore();
});
});
82 changes: 54 additions & 28 deletions packages/lib/services/rest/routes/notes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -189,67 +189,86 @@ async function tryToGuessExtFromMimeType(response: any, mediaPath: string) {
return newMediaPath;
}

export async function downloadMediaFile(url: string, fetchOptions?: FetchOptions) {
logger.info('Downloading media file', url);

const tempDir = Setting.value('tempDir');
const getFileExtension = (url: string, isDataUrl: boolean) => {
let fileExt = isDataUrl ? mimeUtils.toFileExtension(mimeUtils.fromDataUrl(url)) : safeFileExtension(fileExtension(url).toLowerCase());
if (!mimeUtils.fromFileExtension(fileExt)) fileExt = ''; // If the file extension is unknown - clear it.
if (fileExt) fileExt = `.${fileExt}`;

// The URL we get to download have been extracted from the Markdown document
url = markdownUtils.unescapeLinkUrl(url);
return fileExt;
};

const isDataUrl = url && url.toLowerCase().indexOf('data:') === 0;
const generateMediaPath = (url: string, isDataUrl: boolean, fileExt: string) => {
const tempDir = Setting.value('tempDir');
const name = isDataUrl ? md5(`${Math.random()}_${Date.now()}`) : filename(url);
// Append a UUID because simply checking if the file exists is not enough since
// multiple resources can be downloaded at the same time (race condition).
const mediaPath = `${tempDir}/${safeFilename(name)}_${uuid.create()}${fileExt}`;
return mediaPath;
};

const isValidUrl = (url: string, isDataUrl: boolean, urlProtocol?: string, allowedProtocols?: string[]) => {
if (!urlProtocol) return false;

// PDFs and other heavy resoucres are often served as seperate files insted of data urls, its very unlikely to encounter a pdf as a data url
if (isDataUrl && !url.toLowerCase().startsWith('data:image/')) {
logger.warn(`Resources in data URL format is only supported for images ${url}`);
return '';
return false;
}

const invalidProtocols = ['cid:'];
const defaultAllowedProtocols = ['http:', 'https:', 'data:'];
const allowed = allowedProtocols ?? defaultAllowedProtocols;
const isAllowedProtocol = allowed.includes(urlProtocol);

return isAllowedProtocol;
};

export async function downloadMediaFile(url: string, fetchOptions?: FetchOptions, allowedProtocols?: string[]) {
logger.info('Downloading media file', url);

// The URL we get to download have been extracted from the Markdown document
url = markdownUtils.unescapeLinkUrl(url);

const isDataUrl = url && url.toLowerCase().indexOf('data:') === 0;
const urlProtocol = urlUtils.urlProtocol(url)?.toLowerCase();

if (!urlProtocol || invalidProtocols.includes(urlProtocol)) {
if (!isValidUrl(url, isDataUrl, urlProtocol, allowedProtocols)) {
return '';
}

const name = isDataUrl ? md5(`${Math.random()}_${Date.now()}`) : filename(url);
let fileExt = isDataUrl ? mimeUtils.toFileExtension(mimeUtils.fromDataUrl(url)) : safeFileExtension(fileExtension(url).toLowerCase());
if (!mimeUtils.fromFileExtension(fileExt)) fileExt = ''; // If the file extension is unknown - clear it.
if (fileExt) fileExt = `.${fileExt}`;

// Append a UUID because simply checking if the file exists is not enough since
// multiple resources can be downloaded at the same time (race condition).
let mediaPath = `${tempDir}/${safeFilename(name)}_${uuid.create()}${fileExt}`;
const fileExt = getFileExtension(url, isDataUrl);
const mediaPath = generateMediaPath(url, isDataUrl, fileExt);
let newMediaPath = undefined;

try {
if (isDataUrl) {
await shim.imageFromDataUrl(url, mediaPath);
} else if (urlProtocol === 'file:') {
// Can't think of any reason to disallow this at this point
// if (!allowFileProtocolImages) throw new Error('For security reasons, this URL with file:// protocol cannot be downloaded');
const localPath = fileUriToPath(url);
await shim.fsDriver().copy(localPath, mediaPath);
} else {
const response = await shim.fetchBlob(url, { path: mediaPath, maxRetry: 1, ...fetchOptions });

// If we could not find the file extension from the URL, try to get it
// now based on the Content-Type header.
if (!fileExt) mediaPath = await tryToGuessExtFromMimeType(response, mediaPath);
if (!fileExt) {
// If we could not find the file extension from the URL, try to get it
// now based on the Content-Type header.
newMediaPath = await tryToGuessExtFromMimeType(response, mediaPath);
}
}
return mediaPath;
return newMediaPath ?? mediaPath;
} catch (error) {
logger.warn(`Cannot download image at ${url}`, error);
return '';
}
}

async function downloadMediaFiles(urls: string[], fetchOptions?: FetchOptions) {
async function downloadMediaFiles(urls: string[], fetchOptions?: FetchOptions, allowedProtocols?: string[]) {
const PromisePool = require('es6-promise-pool');

const output: any = {};

const downloadOne = async (url: string) => {
const mediaPath = await downloadMediaFile(url, fetchOptions); // , allowFileProtocolImages);
const mediaPath = await downloadMediaFile(url, fetchOptions, allowedProtocols);
if (mediaPath) output[url] = { path: mediaPath, originalUrl: url };
};

Expand Down Expand Up @@ -374,14 +393,20 @@ async function attachImageFromDataUrl(note: any, imageDataUrl: string, cropRect:
return await shim.attachFileToNote(note, tempFilePath);
}

export const extractNoteFromHTML = async (requestNote: RequestNote, requestId: number, imageSizes: any, fetchOptions?: FetchOptions) => {
export const extractNoteFromHTML = async (
requestNote: RequestNote,
requestId: number,
imageSizes: any,
fetchOptions?: FetchOptions,
allowedProtocols?: string[],
) => {
const note = await requestNoteToNote(requestNote);

const mediaUrls = extractMediaUrls(note.markup_language, note.body);

logger.info(`Request (${requestId}): Downloading media files: ${mediaUrls.length}`);

const mediaFiles = await downloadMediaFiles(mediaUrls, fetchOptions); // , allowFileProtocolImages);
const mediaFiles = await downloadMediaFiles(mediaUrls, fetchOptions, allowedProtocols);

logger.info(`Request (${requestId}): Creating resources from paths: ${Object.getOwnPropertyNames(mediaFiles).length}`);

Expand Down Expand Up @@ -433,7 +458,8 @@ export default async function(request: Request, id: string = null, link: string

logger.info('Images:', imageSizes);

const extracted = await extractNoteFromHTML(requestNote, requestId, imageSizes);
const allowedProtocolsForDownloadMediaFiles = ['http:', 'https:', 'file:', 'data:'];
const extracted = await extractNoteFromHTML(requestNote, requestId, imageSizes, undefined, allowedProtocolsForDownloadMediaFiles);

let note = await Note.save(extracted.note, extracted.saveOptions);

Expand Down

0 comments on commit d4d4002

Please sign in to comment.