Skip to content

Commit

Permalink
[js/webgpu] allows a ProgramInfo's RunData to use zero sized output (#…
Browse files Browse the repository at this point in the history
…19614)

### Description
This PR allows zero-sized output.

To make the implementation simple, it does not support partial
zero-sized tensor. Which means, either all outputs are zero-sized, or an
error will be reported.

added 2 tests:
 - op test of `Add` with input T[2,0] T[2,1], and
 - test_split_zero_size_splits
  • Loading branch information
fs-eire committed Feb 23, 2024
1 parent efbe2b8 commit aec2389
Show file tree
Hide file tree
Showing 6 changed files with 71 additions and 9 deletions.
32 changes: 28 additions & 4 deletions js/web/lib/wasm/jsep/backend-webgpu.ts
Original file line number Diff line number Diff line change
Expand Up @@ -385,11 +385,16 @@ export class WebGpuBackend {
// create info for inputs
const inputDatas: GpuData[] = [];
for (let i = 0; i < inputTensorViews.length; ++i) {
const gpuData = this.gpuDataManager.get(inputTensorViews[i].data);
const data = inputTensorViews[i].data;
// if tensor view data is 0, it means the output is zero-sized tensor, and there is no GPU data for it.
if (data === 0) {
continue;
}
const gpuData = this.gpuDataManager.get(data);
if (!gpuData) {
throw new Error(`no GPU data for input: ${inputTensorViews[i].data}`);
throw new Error(`no GPU data for input: ${data}`);
}
inputDatas[i] = gpuData;
inputDatas.push(gpuData);
}

const {outputs, dispatchGroup, programUniforms} = program.getRunData(inputTensorViews);
Expand Down Expand Up @@ -419,6 +424,11 @@ export class WebGpuBackend {
const tensorView = (isTemporary || isPersistent) ?
createIntermediateOutput(outputs[i].dataType, outputs[i].dims) :
createKernelOutput(validatedOutputIndices[i], outputs[i].dataType, outputs[i].dims);
outputTensorViews.push(tensorView);
// if tensor view data is 0, it means the output is zero-sized tensor, and there is no GPU data for it.
if (tensorView.data === 0) {
continue;
}
const gpuData = this.gpuDataManager.get(tensorView.data);
if (!gpuData) {
throw new Error(`no GPU data for output: ${tensorView.data}`);
Expand All @@ -434,10 +444,24 @@ export class WebGpuBackend {
}
persistentData.push(gpuData);
}
outputTensorViews.push(tensorView);
outputDatas.push(gpuData);
}

// when there are any zero-sized tensor in the inputs or outputs, we should report error unless all outputs are
// zero-sized tensors.
if (inputDatas.length !== inputTensorViews.length || outputDatas.length !== outputTensorViews.length) {
// if all outputs are zero-sized tensors, there is no need to run the program.
if (outputDatas.length === 0) {
TRACE_FUNC_END(program.name);
return outputTensorViews;
}
// if some outputs are zero-sized tensors, report an error.
//
// TODO: so far we don't see any use case that outputs include both zero-sized tensors and non-zero-sized tensors.
// If we see such use case, we need to make a change here to support it.
throw new Error(
`Program ${program.name} has zero-sized tensor(s) in inputs or outputs. This is not supported now.`);
}

// load uniforms
// TODO: add cache for uniform (is it necessary?)
Expand Down
3 changes: 2 additions & 1 deletion js/web/lib/wasm/jsep/init.ts
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,8 @@ class ComputeContextImpl implements ComputeContext {
throw new Error(`Unsupported data type: ${dataType}`);
}
const bufferSize = elementSize * ShapeUtil.size(dims);
return new TensorViewImpl(this.module, dataType, this.backend.gpuDataManager.create(bufferSize).id, dims);
const gpuDataId = bufferSize > 0 ? this.backend.gpuDataManager.create(bufferSize).id : 0;
return new TensorViewImpl(this.module, dataType, gpuDataId, dims);
};
return this.backend.run(program, mappedInputs, outputIndices, createKernelOutput, createTemporaryOutput);
}
Expand Down
11 changes: 10 additions & 1 deletion js/web/lib/wasm/jsep/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,16 @@ export class BroadcastUtil {
if (aLen !== bLen && aLen > 1 && bLen > 1) {
return undefined;
}
cdims[crank - i] = Math.max(aLen, bLen);
const max = Math.max(aLen, bLen);
if (aLen && bLen) {
cdims[crank - i] = Math.max(aLen, bLen);
} else {
// when either aLen or bLen is 0, the other should be either 0 or 1, otherwise it is not broadcastable.
if (max > 1) {
return undefined;
}
cdims[crank - i] = 0;
}
}

return cdims;
Expand Down
22 changes: 22 additions & 0 deletions js/web/test/data/ops/add.jsonc
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,28 @@
"type": "float32"
}
]
},
{
"name": "T[2,0] T[2,1]",
"inputs": [
{
"data": [],
"dims": [2, 0],
"type": "float32"
},
{
"data": [1, 2],
"dims": [2, 1],
"type": "float32"
}
],
"outputs": [
{
"data": [],
"dims": [2, 0],
"type": "float32"
}
]
}
]
}
Expand Down
2 changes: 1 addition & 1 deletion js/web/test/suite-test-list.jsonc
Original file line number Diff line number Diff line change
Expand Up @@ -1231,7 +1231,7 @@
"test_split_variable_parts_1d",
"test_split_variable_parts_2d",
"test_split_variable_parts_default_axis",
// // "test_split_zero_size_splits",
"test_split_zero_size_splits",
"test_sqrt_example",
"test_sqrt",
"test_squeeze_negative_axes",
Expand Down
10 changes: 8 additions & 2 deletions js/web/test/test-runner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -573,7 +573,9 @@ export async function sessionRun(options: {
// replace the CPU tensors in feeds into GPU tensors
for (const name in feeds) {
if (Object.hasOwnProperty.call(feeds, name)) {
feeds[name] = createGpuTensorForInput(feeds[name]);
if (feeds[name].size > 0) {
feeds[name] = createGpuTensorForInput(feeds[name]);
}
}
}
}
Expand All @@ -582,7 +584,11 @@ export async function sessionRun(options: {
for (const name in options.outputsMetaInfo) {
if (Object.hasOwnProperty.call(options.outputsMetaInfo, name)) {
const {type, dims} = options.outputsMetaInfo[name];
fetches[name] = createGpuTensorForOutput(type, dims);
if (dims.some(d => d === 0)) {
fetches[name] = new ort.Tensor(type, [], dims);
} else {
fetches[name] = createGpuTensorForOutput(type, dims);
}
}
}
}
Expand Down

0 comments on commit aec2389

Please sign in to comment.