Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cleaning non utf8 strings #22

Merged
merged 4 commits into from
Dec 21, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4,999 changes: 1,274 additions & 3,725 deletions package-lock.json

Large diffs are not rendered by default.

5 changes: 3 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "deep-cuts",
"version": "2.2.0",
"version": "2.3.1",
"description": "Useful utilities and rare b-sides.",
"author": "Trevor Ewen",
"license": "MIT",
Expand Down Expand Up @@ -59,9 +59,10 @@
"qs": "^6.5.2"
},
"devDependencies": {
"@size-limit/preset-small-lib": "^5.0.1",
"@size-limit/preset-small-lib": "^7.0.4",
"@types/jest": "^25.2.3",
"@types/qs": "^6.9.7",
"axios": "^0.24.0",
"data-mining-tools": "^1.1.1",
"husky": "^7.0.1",
"size-limit": "^5.0.1",
Expand Down
1 change: 1 addition & 0 deletions src/__tests__/stream.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ describe('stream', () => {
fs.createReadStream(path.join(__dirname, 'fixtures/bad.json'))
);
} catch (e) {
// @ts-ignore
expect(e.message).toEqual(
'Stream did not resolve to a JSON object, you may need to process it another way.'
);
Expand Down
75 changes: 75 additions & 0 deletions src/__tests__/string.test.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { stringToBoolean, escapeForRegExp } from '../';
import { cleanSpecialCharacters } from '../string';

describe('string', () => {
describe('stringToBoolean()', () => {
Expand Down Expand Up @@ -146,4 +147,78 @@ describe('string', () => {
);
});
});

describe('cleanSpecialCharacters()', () => {
it('should return an empty string if passed undefined', () => {
// @ts-ignore
expect(cleanSpecialCharacters(undefined)).toEqual('');
});

it('should return an empty string if passed null', () => {
// @ts-ignore
expect(cleanSpecialCharacters(null)).toEqual('');
});

it('should return an empty string if passed an empty string', () => {
expect(cleanSpecialCharacters('')).toEqual('');
});

it('should return a string with no special characters as-is', () => {
expect(cleanSpecialCharacters('Koolaid, Punch, Results.xlsx')).toEqual(
'Koolaid, Punch, Results.xlsx'
);
});

it('should return a string with no special characters as-is, even if all spaces', () => {
expect(cleanSpecialCharacters(' ')).toEqual(' ');
});

it('should play nice with numbers', () => {
// @ts-ignore
expect(cleanSpecialCharacters(25.624)).toEqual('25.624');
});

it('should play nice with numbers in a string', () => {
expect(cleanSpecialCharacters('56 Signs.pdf')).toEqual('56 Signs.pdf');
});

it('should play nice with punctuation', () => {
expect(
cleanSpecialCharacters('Reginald. Sr. Officer of the war $5.00')
).toEqual('Reginald. Sr. Officer of the war $5.00');
});

it('should remove all characters with unicode code above 127', () => {
expect(
cleanSpecialCharacters(
`Hello, ${String.fromCharCode(890)} my name is ${String.fromCharCode(
129
)}hill`
)
).toEqual('Hello, my name is hill');
});

it('should be able to specify a replacement for unicode characters above 127', () => {
expect(
cleanSpecialCharacters(
`Hello, ${String.fromCharCode(1200)} my name is ${String.fromCharCode(
135
)}hill`,
'X'
)
).toEqual('Hello, X my name is Xhill');
});

it('should play nice with characters right at the edge', () => {
expect(
cleanSpecialCharacters(
[
String.fromCharCode(50),
String.fromCharCode(60),
String.fromCharCode(126),
].join('')
)
).toEqual('2<~');
});
});
});
11 changes: 3 additions & 8 deletions src/array.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,9 @@ export async function tailRecursion<T>(
payload: any[] = []
): Promise<any[]> {
if (collection?.length) {
const first: T = collection[0];
if (first) {
const response: any = await operation(first);
return tailRecursion(
collection.slice(1),
operation,
payload.concat(response)
);
for (const item of collection) {
const response: any = await operation(item);
payload.push(response);
}
}
return payload;
Expand Down
15 changes: 15 additions & 0 deletions src/string.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,18 @@ export function escapeForRegExp(str: string): string {
: str;
/* eslint-enable no-useless-escape */
}

export function cleanSpecialCharacters(
str: string,
replacement: string = ''
): string {
return String(str || '')
.split('')
.map(char => {
if (char.charCodeAt(0) > 127) {
return replacement;
}
return char;
})
.join('');
}