Skip to content

Commit

Permalink
fix(Query): Parse html string error responses to avoid displaying raw…
Browse files Browse the repository at this point in the history
… HTML as error message (apache#29321)
  • Loading branch information
rtexelm committed Jun 26, 2024
1 parent b5a72e2 commit de6a518
Show file tree
Hide file tree
Showing 4 changed files with 345 additions and 30 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,12 @@ import {
t,
SupersetError,
ErrorTypeEnum,
isProbablyHTML,
isJsonString,
} from '@superset-ui/core';

// The response always contains an error attribute, can contain anything from the
// SupersetClientResponse object, and can contain a spread JSON blob
// The response always contains an error attribute, can contain anything from
// the SupersetClientResponse object, and can contain a spread JSON blob
export type ClientErrorObject = {
error: string;
errors?: SupersetError[];
Expand All @@ -51,8 +53,74 @@ type ErrorType =

type ErrorTextSource = 'dashboard' | 'chart' | 'query' | 'dataset' | 'database';

export function parseErrorJson(responseObject: JsonObject): ClientErrorObject {
let error = { ...responseObject };
const ERROR_CODE_LOOKUP = {
400: 'Bad request',
401: 'Unauthorized',
402: 'Payment required',
403: 'Forbidden',
404: 'Not found',
405: 'Method not allowed',
406: 'Not acceptable',
407: 'Proxy authentication required',
408: 'Request timeout',
409: 'Conflict',
410: 'Gone',
411: 'Length required',
412: 'Precondition failed',
413: 'Payload too large',
414: 'URI too long',
415: 'Unsupported media type',
416: 'Range not satisfiable',
417: 'Expectation failed',
418: "I'm a teapot",
500: 'Server error',
501: 'Not implemented',
502: 'Bad gateway',
503: 'Service unavailable',
504: 'Gateway timeout',
505: 'HTTP version not supported',
506: 'Variant also negotiates',
507: 'Insufficient storage',
508: 'Loop detected',
510: 'Not extended',
511: 'Network authentication required',
599: 'Network error',
};

export function checkForHtml(str: string): boolean {
return !isJsonString(str) && isProbablyHTML(str);
}

export function parseStringResponse(str: string): string {
if (checkForHtml(str)) {
for (const [code, message] of Object.entries(ERROR_CODE_LOOKUP)) {
const regex = new RegExp(`${code}|${message}`, 'i');
if (regex.test(str)) {
return t(message);
}
}
return t('Unknown error');
}
return str;
}

export function getErrorFromStatusCode(status: number): string | null {
return ERROR_CODE_LOOKUP[status] || null;
}

export function retrieveErrorMessage(
str: string,
errorObject: JsonObject,
): string {
const statusError =
'status' in errorObject ? getErrorFromStatusCode(errorObject.status) : null;

// Prefer status code message over the response or HTML text
return statusError || parseStringResponse(str);
}

export function parseErrorJson(responseJson: JsonObject): ClientErrorObject {
let error = { ...responseJson };
// Backwards compatibility for old error renderers with the new error object
if (error.errors && error.errors.length > 0) {
error.error = error.description = error.errors[0].message;
Expand All @@ -67,7 +135,11 @@ export function parseErrorJson(responseObject: JsonObject): ClientErrorObject {
t('Invalid input');
}
if (typeof error.message === 'string') {
error.error = error.message;
if (checkForHtml(error.message)) {
error.error = retrieveErrorMessage(error.message, error);
} else {
error.error = error.message;
}
}
}
if (error.stack) {
Expand Down Expand Up @@ -95,11 +167,12 @@ export function getClientErrorObject(
| { response: Response }
| string,
): Promise<ClientErrorObject> {
// takes a SupersetClientResponse as input, attempts to read response as Json if possible,
// and returns a Promise that resolves to a plain object with error key and text value.
// takes a SupersetClientResponse as input, attempts to read response as Json
// if possible, and returns a Promise that resolves to a plain object with
// error key and text value.
return new Promise(resolve => {
if (typeof response === 'string') {
resolve({ error: response });
resolve({ error: parseStringResponse(response) });
return;
}

Expand Down Expand Up @@ -149,20 +222,32 @@ export function getClientErrorObject(

const responseObject =
response instanceof Response ? response : response.response;

if (responseObject && !responseObject.bodyUsed) {
// attempt to read the body as json, and fallback to text. we must clone the
// response in order to fallback to .text() because Response is single-read
// attempt to read the body as json, and fallback to text. we must clone
// the response in order to fallback to .text() because Response is
// single-read
responseObject
.clone()
.json()
.then(errorJson => {
const error = { ...responseObject, ...errorJson };
// Destructuring instead of spreading to avoid loss of sibling properties to the body
const { url, status, statusText, redirected, type } = responseObject;
const responseSummary = { url, status, statusText, redirected, type };
const error = {
...errorJson,
...responseSummary,
};
resolve(parseErrorJson(error));
})
.catch(() => {
// fall back to reading as text
responseObject.text().then((errorText: any) => {
resolve({ ...responseObject, error: errorText });
resolve({
// Destructuring not necessary here
...responseObject,
error: retrieveErrorMessage(errorText, responseObject),
});
});
});
return;
Expand All @@ -177,7 +262,7 @@ export function getClientErrorObject(
}
resolve({
...responseObject,
error,
error: parseStringResponse(error),
});
});
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ import {
sanitizeHtmlIfNeeded,
safeHtmlSpan,
removeHTMLTags,
isJsonString,
getParagraphContents,
} from './html';

describe('sanitizeHtml', () => {
Expand Down Expand Up @@ -113,3 +115,77 @@ describe('removeHTMLTags', () => {
expect(output).toBe('Unclosed tag');
});
});

describe('isJsonString', () => {
test('valid JSON object', () => {
const jsonString = '{"name": "John", "age": 30, "city": "New York"}';
expect(isJsonString(jsonString)).toBe(true);
});

test('valid JSON array', () => {
const jsonString = '[1, 2, 3, 4, 5]';
expect(isJsonString(jsonString)).toBe(true);
});

test('valid JSON string', () => {
const jsonString = '"Hello, world!"';
expect(isJsonString(jsonString)).toBe(true);
});

test('invalid JSON with syntax error', () => {
const jsonString = '{"name": "John", "age": 30, "city": "New York"';
expect(isJsonString(jsonString)).toBe(false);
});

test('empty string', () => {
const jsonString = '';
expect(isJsonString(jsonString)).toBe(false);
});

test('non-JSON string', () => {
const jsonString = '<p>Hello, <strong>World!</strong></p>';
expect(isJsonString(jsonString)).toBe(false);
});

test('non-JSON formatted number', () => {
const jsonString = '12345abc';
expect(isJsonString(jsonString)).toBe(false);
});
});

describe('getParagraphContents', () => {
test('should return an object with keys for each paragraph tag', () => {
const htmlString =
'<div><p>First paragraph.</p><p>Second paragraph.</p></div>';
const result = getParagraphContents(htmlString);
expect(result).toEqual({
p1: 'First paragraph.',
p2: 'Second paragraph.',
});
});

test('should return null if the string is not HTML', () => {
const nonHtmlString = 'Just a plain text string.';
expect(getParagraphContents(nonHtmlString)).toBeNull();
});

test('should return null if there are no <p> tags in the HTML string', () => {
const htmlStringWithoutP = '<div><span>No paragraph here.</span></div>';
expect(getParagraphContents(htmlStringWithoutP)).toBeNull();
});

test('should return an object with empty string for empty <p> tag', () => {
const htmlStringWithEmptyP = '<div><p></p></div>';
const result = getParagraphContents(htmlStringWithEmptyP);
expect(result).toEqual({ p1: '' });
});

test('should handle HTML strings with nested <p> tags correctly', () => {
const htmlStringWithNestedP =
'<div><p>First paragraph <span>with nested</span> content.</p></div>';
const result = getParagraphContents(htmlStringWithNestedP);
expect(result).toEqual({
p1: 'First paragraph with nested content.',
});
});
});
55 changes: 52 additions & 3 deletions superset-frontend/packages/superset-ui-core/src/utils/html.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,26 @@ export function sanitizeHtml(htmlString: string) {
return xssFilter.process(htmlString);
}

export function hasHtmlTagPattern(str: string): boolean {
const htmlTagPattern =
/<(html|head|body|div|span|a|p|h[1-6]|title|meta|link|script|style)/i;

return htmlTagPattern.test(str);
}

export function isProbablyHTML(text: string) {
return Array.from(
new DOMParser().parseFromString(text, 'text/html').body.childNodes,
).some(({ nodeType }) => nodeType === 1);
const cleanedStr = text.trim().toLowerCase();

if (
cleanedStr.startsWith('<!doctype html>') &&
hasHtmlTagPattern(cleanedStr)
) {
return true;
}

const parser = new DOMParser();
const doc = parser.parseFromString(cleanedStr, 'text/html');
return Array.from(doc.body.childNodes).some(({ nodeType }) => nodeType === 1);
}

export function sanitizeHtmlIfNeeded(htmlString: string) {
Expand All @@ -70,3 +86,36 @@ export function safeHtmlSpan(possiblyHtmlString: string) {
export function removeHTMLTags(str: string): string {
return str.replace(/<[^>]*>/g, '');
}

export function isJsonString(str: string): boolean {
try {
JSON.parse(str);
return true;
} catch (e) {
return false;
}
}

export function getParagraphContents(
str: string,
): { [key: string]: string } | null {
if (!isProbablyHTML(str)) {
return null;
}

const parser = new DOMParser();
const doc = parser.parseFromString(str, 'text/html');
const pTags = doc.querySelectorAll('p');

if (pTags.length === 0) {
return null;
}

const paragraphContents: { [key: string]: string } = {};

pTags.forEach((pTag, index) => {
paragraphContents[`p${index + 1}`] = pTag.textContent || '';
});

return paragraphContents;
}
Loading

0 comments on commit de6a518

Please sign in to comment.