Skip to content

Commit b22a03a

Browse files
authored
feat(cli): add --group-size to group by a size limit (#2438)
* feat(cli): add --group-size to group by a size limt --group-size and --group can be used together "--group 5 --group-size 1Gi" will make groups that are at most 5 items and less than 1Gi * refactor: remove unused comment
1 parent 14deef8 commit b22a03a

File tree

4 files changed

+181
-46
lines changed

4 files changed

+181
-46
lines changed
Lines changed: 61 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,47 +1,79 @@
11
import { fsa } from '@chunkd/fs';
22
import o from 'ospec';
3-
import { asyncFilter, chunkArray } from '../action.aws.list.js';
3+
import { asyncFilter, chunkFiles, FileSizeInfo } from '../action.aws.list.js';
44

5-
o.spec('chunkArray', () => {
5+
o.spec('chunkFiles', () => {
66
o('should chunk a array', () => {
7-
o(chunkArray([1, 2, 3, 4], 2)).deepEquals([
8-
[1, 2],
9-
[3, 4],
10-
]);
11-
});
12-
13-
o('should chunk a set', () => {
14-
o(chunkArray(new Set([1, 2, 3, 4, 4]), 2)).deepEquals([
15-
[1, 2],
16-
[3, 4],
7+
o(chunkFiles([{ path: '1' }, { path: '2' }, { path: '3' }, { path: '4' }], 2, -1)).deepEquals([
8+
['1', '2'],
9+
['3', '4'],
1710
]);
1811
});
1912

2013
o('should chunk small set', () => {
21-
o(chunkArray(new Set([1]), 2)).deepEquals([[1]]);
14+
o(chunkFiles([{ path: '1' }], 2, -1)).deepEquals([['1']]);
2215
});
2316

2417
o('should chunk large set', () => {
25-
o(chunkArray(new Set([1, 2, 3, 4, 4]), 5)).deepEquals([[1, 2, 3, 4]]);
18+
o(chunkFiles([{ path: '1' }, { path: '2' }, { path: '3' }, { path: '4' }], 5, -1)).deepEquals([
19+
['1', '2', '3', '4'],
20+
]);
2621
});
2722

2823
o('should chunk into single sets', () => {
29-
o(chunkArray(new Set([1, 2, 3, 4, 4]), 1)).deepEquals([[1], [2], [3], [4]]);
24+
o(chunkFiles([{ path: '1' }, { path: '2' }, { path: '3' }, { path: '4' }], 1, -1)).deepEquals([
25+
['1'],
26+
['2'],
27+
['3'],
28+
['4'],
29+
]);
30+
});
31+
32+
o('should chunk by size', () => {
33+
o(
34+
chunkFiles(
35+
[
36+
{ path: '1', size: 100 },
37+
{ path: '2', size: 200 },
38+
{ path: '3', size: 300 },
39+
{ path: '4', size: 400 },
40+
],
41+
-1,
42+
300,
43+
),
44+
).deepEquals([['1', '2'], ['3'], ['4']]);
45+
});
46+
47+
o('should chunk by size or count which ever comes first', () => {
48+
o(
49+
chunkFiles(
50+
[
51+
{ path: '1', size: 100 },
52+
{ path: '2', size: 100 },
53+
{ path: '3', size: 100 },
54+
{ path: '4', size: 100 },
55+
{ path: '5', size: 600 },
56+
{ path: '6', size: 600 },
57+
],
58+
2,
59+
500,
60+
),
61+
).deepEquals([['1', '2'], ['3', '4'], ['5'], ['6']]);
3062
});
3163
});
3264

3365
o.spec('asyncFilter', () => {
34-
const fileList = [
35-
'a.tiff',
36-
'B.TIFF',
37-
'/foo/bar/baz.tiff',
38-
'/foo/xls.ts',
39-
'c:\\foo\\bar.txt',
40-
's3://foo/bar.tiff',
41-
's3://foo/bar.ts',
42-
's3://foo/bar/baz.tif',
66+
const fileList: FileSizeInfo[] = [
67+
{ path: 'a.tiff' },
68+
{ path: 'B.TIFF' },
69+
{ path: '/foo/bar/baz.tiff' },
70+
{ path: '/foo/xls.ts' },
71+
{ path: 'c:\\foo\\bar.txt' },
72+
{ path: 's3://foo/bar.tiff' },
73+
{ path: 's3://foo/bar.ts' },
74+
{ path: 's3://foo/bar/baz.tif' },
4375
];
44-
async function* generator(): AsyncGenerator<string> {
76+
async function* generator(): AsyncGenerator<FileSizeInfo> {
4577
for (const file of fileList) yield file;
4678
}
4779
o('should filter all', async () => {
@@ -50,20 +82,20 @@ o.spec('asyncFilter', () => {
5082

5183
o('should filter exact', async () => {
5284
for (const file of fileList) {
53-
if (file.startsWith('c:\\')) continue; // not a valid regexp
54-
o(await fsa.toArray(asyncFilter(generator(), file))).deepEquals([file]);
85+
if (file.path.startsWith('c:\\')) continue; // not a valid regexp
86+
o(await fsa.toArray(asyncFilter(generator(), file.path))).deepEquals([file]);
5587
}
5688
});
5789

5890
o('should filter suffix', async () => {
5991
o(await fsa.toArray(asyncFilter(generator(), '.tiff$'))).deepEquals(
60-
fileList.filter((f) => f.toLowerCase().endsWith('.tiff')),
92+
fileList.filter((f) => f.path.toLowerCase().endsWith('.tiff')),
6193
);
6294
});
6395

6496
o('should filter tif or tiff', async () => {
6597
o(await fsa.toArray(asyncFilter(generator(), '.tiff?$'))).deepEquals(
66-
fileList.filter((f) => f.toLowerCase().endsWith('.tiff') || f.toLowerCase().endsWith('.tif')),
98+
fileList.filter((f) => f.path.toLowerCase().endsWith('.tiff') || f.path.toLowerCase().endsWith('.tif')),
6799
);
68100
});
69101
});
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
import o from 'ospec';
2+
import { parseSize } from '../sizes.js';
3+
4+
o.spec('parseSize', () => {
5+
o('should parse bytes', () => {
6+
o(parseSize('1')).equals(1);
7+
o(parseSize('1KB')).equals(1024);
8+
o(parseSize('1MB')).equals(1024 * 1024);
9+
o(parseSize('1GB')).equals(1024 * 1024 * 1024);
10+
o(parseSize('1TB')).equals(1024 * 1024 * 1024 * 1024);
11+
});
12+
13+
o('should parse negative bytes', () => {
14+
o(parseSize('-1.2 ')).equals(-1);
15+
o(parseSize('-1.2 KB')).equals(Math.round(-1.2 * 1024));
16+
o(parseSize('-1.2 MB')).equals(Math.round(-1.2 * 1024 * 1024));
17+
o(parseSize('-1.2 GB')).equals(Math.round(-1.2 * 1024 * 1024 * 1024));
18+
o(parseSize('-1.2 TB')).equals(Math.round(-1.2 * 1024 * 1024 * 1024 * 1024));
19+
});
20+
21+
o('should parse partial bytes', () => {
22+
o(parseSize('1.2 ')).equals(1);
23+
o(parseSize('1.2 KB')).equals(Math.round(1.2 * 1024));
24+
o(parseSize('1.2 MB')).equals(Math.round(1.2 * 1024 * 1024));
25+
o(parseSize('1.2 GB')).equals(Math.round(1.2 * 1024 * 1024 * 1024));
26+
o(parseSize('1.2 TB')).equals(Math.round(1.2 * 1024 * 1024 * 1024 * 1024));
27+
});
28+
29+
o('should parse metric', () => {
30+
o(parseSize('1Ki')).equals(1000);
31+
o(parseSize('1Mi')).equals(1000 * 1000);
32+
o(parseSize('1Gi')).equals(1000 * 1000 * 1000);
33+
o(parseSize('1Ti')).equals(1000 * 1000 * 1000 * 1000);
34+
});
35+
36+
o('should parse partial metric', () => {
37+
o(parseSize('1.2 Ki')).equals(1.2 * 1000);
38+
o(parseSize('1.2 Mi')).equals(1.2 * 1000 * 1000);
39+
o(parseSize('1.2 Gi')).equals(1.2 * 1000 * 1000 * 1000);
40+
o(parseSize('1.2 Ti')).equals(1.2 * 1000 * 1000 * 1000 * 1000);
41+
});
42+
o('should parse negative metric', () => {
43+
o(parseSize('-1.2 ')).equals(-1);
44+
o(parseSize('-1.2 Ki')).equals(-1.2 * 1000);
45+
o(parseSize('-1.2 Mi')).equals(-1.2 * 1000 * 1000);
46+
o(parseSize('-1.2 Gi')).equals(-1.2 * 1000 * 1000 * 1000);
47+
o(parseSize('-1.2 Ti')).equals(-1.2 * 1000 * 1000 * 1000 * 1000);
48+
});
49+
50+
o('should fail on invalid test', () => {
51+
o(() => parseSize('1 B B')).throws(Error);
52+
o(() => parseSize('1 ZB')).throws(Error);
53+
o(() => parseSize('1 Zi')).throws(Error);
54+
o(() => parseSize('a')).throws(Error);
55+
});
56+
});

packages/cli/src/cli/aws/action.aws.list.ts

Lines changed: 36 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,17 @@
11
import { Env, fsa, LogConfig, RoleRegister } from '@basemaps/shared';
22
import { CommandLineAction, CommandLineIntegerParameter, CommandLineStringParameter } from '@rushstack/ts-command-line';
3+
import { parseSize } from './sizes.js';
4+
5+
export interface FileSizeInfo {
6+
path: string;
7+
size?: number;
8+
}
39

410
export class CommandList extends CommandLineAction {
511
private filter: CommandLineStringParameter;
612
private output: CommandLineStringParameter;
713
private group: CommandLineIntegerParameter;
14+
private groupSize: CommandLineStringParameter;
815
private limit: CommandLineIntegerParameter;
916
private config: CommandLineStringParameter;
1017

@@ -27,6 +34,11 @@ export class CommandList extends CommandLineAction {
2734
parameterLongName: '--group',
2835
description: 'Group files into this number per group',
2936
});
37+
this.groupSize = this.defineStringParameter({
38+
argumentName: 'GROUP_SIZE',
39+
parameterLongName: '--group-size',
40+
description: 'Group files into this size per group, eg "5Gi" or "3TB"',
41+
});
3042
this.limit = this.defineIntegerParameter({
3143
argumentName: 'LIMIT',
3244
parameterLongName: '--limit',
@@ -63,48 +75,55 @@ export class CommandList extends CommandLineAction {
6375
const limit = this.limit.value ?? -1; // no limit by default
6476
const filter = this.filter.value ?? '*'; // Filter everything by default
6577

66-
const outputFiles = new Set<string>();
78+
const outputFiles: FileSizeInfo[] = [];
6779
for (const targetPath of paths) {
6880
logger.debug({ path: targetPath }, 'List');
6981
const assumedRole = await RoleRegister.findRole(targetPath);
7082
if (assumedRole) logger.debug({ path: targetPath, roleArn: assumedRole?.roleArn }, 'List:Role');
7183

72-
const fileList = await fsa.toArray(asyncFilter(fsa.list(targetPath), filter));
84+
const fileList = await fsa.toArray(asyncFilter(fsa.details(targetPath), filter));
7385
logger.debug({ path: targetPath, fileCount: fileList.length }, 'List:Count');
7486

7587
for (const file of fileList) {
76-
outputFiles.add(file);
77-
if (limit > 0 && outputFiles.size >= limit) break;
88+
outputFiles.push(file);
89+
if (limit > 0 && outputFiles.length >= limit) break;
7890
}
79-
if (limit > 0 && outputFiles.size >= limit) break;
91+
if (limit > 0 && outputFiles.length >= limit) break;
8092
}
8193

82-
if (this.group.value == null || this.group.value < 1) {
83-
await fsa.write(outputPath, JSON.stringify([...outputFiles.values()]));
84-
} else {
85-
await fsa.write(outputPath, JSON.stringify(chunkArray(outputFiles, this.group.value)));
86-
}
94+
const maxSize = parseSize(this.groupSize.value ?? '-1');
95+
const maxLength = this.group.value ?? -1;
96+
await fsa.write(outputPath, JSON.stringify(chunkFiles(outputFiles, maxLength, maxSize)));
8797
}
8898
}
8999

90-
export async function* asyncFilter(source: AsyncGenerator<string>, filter: string): AsyncGenerator<string> {
100+
export async function* asyncFilter<T extends { path: string }>(
101+
source: AsyncGenerator<T>,
102+
filter: string,
103+
): AsyncGenerator<T> {
91104
if (filter === '*') return yield* source;
92105

93106
const re = new RegExp(filter.toLowerCase(), 'i');
94107
for await (const f of source) {
95108
// Always match on lowercase
96-
if (re.test(f.toLowerCase())) yield f;
109+
if (re.test(f.path.toLowerCase())) yield f;
97110
}
98111
}
99112

100-
export function chunkArray<T>(values: Set<T> | T[], size: number): T[][] {
101-
const output: T[][] = [];
102-
let current: T[] = [];
113+
/** Chunk files into a max size (eg 1GB chunks) or max count (eg 100 files) or what ever comes first when both are defined */
114+
export function chunkFiles(values: FileSizeInfo[], count: number, size: number): string[][] {
115+
if (count == null && size == null) return [values.map((c) => c.path)];
116+
117+
const output: string[][] = [];
118+
let current: string[] = [];
119+
let totalSize = 0;
103120
for (const v of values) {
104-
current.push(v);
105-
if (current.length >= size) {
121+
current.push(v.path);
122+
if (v.size) totalSize += v.size;
123+
if ((count > 0 && current.length >= count) || (size > 0 && totalSize >= size)) {
106124
output.push(current);
107125
current = [];
126+
totalSize = 0;
108127
}
109128
}
110129
if (current.length > 0) output.push(current);

packages/cli/src/cli/aws/sizes.ts

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
export const FileSizeMap = new Map<string, number>([
2+
['kb', 1024],
3+
['mb', 1024 * 1024],
4+
['gb', 1024 * 1024 * 1024],
5+
['tb', 1024 * 1024 * 1024 * 1024],
6+
['ki', 1000],
7+
['mi', 1000 * 1000],
8+
['gi', 1000 * 1000 * 1000],
9+
['ti', 1000 * 1000 * 1000 * 1000],
10+
]);
11+
12+
/**
13+
* Convert a number eg "1KB" to size in bytes (1024)
14+
*
15+
* Rounded to the nearest byte
16+
*/
17+
export function parseSize(size: string): number {
18+
const textString = size.toLowerCase().replace(/ /g, '').trim();
19+
if (textString.endsWith('i') || textString.endsWith('b')) {
20+
const lastVal = textString.slice(textString.length - 2);
21+
const denominator = FileSizeMap.get(lastVal);
22+
if (denominator == null) throw new Error(`Failed to parse: ${size} as a file size`);
23+
return Math.round(denominator * Number(textString.slice(0, textString.length - 2)));
24+
}
25+
const fileSize = Number(textString);
26+
if (isNaN(fileSize)) throw new Error(`Failed to parse: ${size} as a file size`);
27+
return Math.round(fileSize);
28+
}

0 commit comments

Comments
 (0)