Skip to content

Commit

Permalink
feat(simd): add more vector fns
Browse files Browse the repository at this point in the history
  • Loading branch information
postspectacular committed Oct 20, 2019
1 parent 4023a8f commit 4f4cea4
Show file tree
Hide file tree
Showing 18 changed files with 427 additions and 115 deletions.
20 changes: 16 additions & 4 deletions packages/simd/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,25 +32,37 @@ See
[/assembly](https://github.com/thi-ng/umbrella/tree/feature/simd/packages/simd/assembly)
for sources:

- `abs4_f32`
- `add4_f32`
- `addn4_f32`
- `clamp4_f32`
- `div4_f32` (*)
- `divn4_f32` (*)
- `dot2_f32_aos` (2x vec2 per iteration)
- `dot4_f32_aos`
- `dot4_f32_soa`
- `invsqrt4_f32` (*)
- `madd4_f32`
- `maddn4_f32`
- `max4_f32`
- `min4_f32`
- `msub4_f32`
- `msubn4_f32`
- `mul4_f32`
- `mul_m23v2_aos`
- `mul_m23v2_aos_single` (2x vec2 per iteration)
- `muln4_f32`
- `mul_m23v2_aos` (2x vec2 per iteration)
- `mul_m44v4_aos`
- `mul_m44v4_aos_single`
- `neg4_f32`
- `normalize4_f32_aos`
- `sqrt4_f32` (*)
- `sub4_f32`
- `subn4_f32`

(*) Missing native implementation, waiting on...

Also see [src/api.ts](https://github.com/thi-ng/umbrella/tree/feature/simd/packages/simd/src/api.ts) for documentation about the exposed TS/JS API...
Also see
[src/api.ts](https://github.com/thi-ng/umbrella/tree/feature/simd/packages/simd/src/api.ts)
for documentation about the exposed TS/JS API...

## Status

Expand Down
17 changes: 17 additions & 0 deletions packages/simd/assembly/abs.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
export function abs4_f32(
out: usize,
a: usize,
num: usize,
so: usize,
sa: usize
): usize {
so <<= 2;
sa <<= 2;
const res = out;
for (; num-- > 0; ) {
v128.store(out, f32x4.abs(v128.load(a)));
out += so;
a += sa;
}
return res;
}
6 changes: 3 additions & 3 deletions packages/simd/assembly/add.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ export function add4_f32(
a: usize,
b: usize,
num: usize,
so: usize = 4,
sa: usize = 4,
sb: usize = 4
so: usize,
sa: usize,
sb: usize
): usize {
so <<= 2;
sa <<= 2;
Expand Down
19 changes: 19 additions & 0 deletions packages/simd/assembly/addn.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
export function addn4_f32(
out: usize,
a: usize,
n: f32,
num: usize,
so: usize,
sa: usize
): usize {
so <<= 2;
sa <<= 2;
const res = out;
const vn = f32x4.splat(n);
for (; num-- > 0; ) {
v128.store(out, f32x4.add(v128.load(a), vn));
out += so;
a += sa;
}
return res;
}
49 changes: 49 additions & 0 deletions packages/simd/assembly/clamp.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/**
* Takes three vec4 buffers, clamps `a` componentwise to `min(max(a, b),
* c)` and stores results in `out`. Both AOS / SOA layouts are
* supported, as long as all buffers are using the same layout.
*
* All strides must by multiples of 4. All pointers must be aligned to
* multiples of 16. Returns `out` pointer.
*
* Set `sb` and `sc` to 0 for clamping all `a` vectors against same
* bounds.
*
* @param out
* @param a
* @param b
* @param c
* @param num number of vec4
* @param so out element stride
* @param sa A element stride
* @param sb B element stride
* @param sc C element stride
*/
export function clamp4_f32(
out: usize,
a: usize,
b: usize,
c: usize,
num: usize,
so: usize,
sa: usize,
sb: usize,
sc: usize
): usize {
const res = out;
so <<= 2;
sa <<= 2;
sb <<= 2;
sc <<= 2;
for (; num-- > 0; ) {
v128.store(
out,
f32x4.min(f32x4.max(v128.load(a), v128.load(b)), v128.load(c))
);
out += so;
a += sa;
b += sb;
c += sc;
}
return res;
}
19 changes: 19 additions & 0 deletions packages/simd/assembly/divn.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
export function divn4_f32(
out: usize,
a: usize,
n: f32,
num: usize,
so: usize,
sa: usize
): usize {
so <<= 2;
sa <<= 2;
const res = out;
const vn = f32x4.splat(n);
for (; num-- > 0; ) {
v128.store(out, f32x4.div(v128.load(a), vn));
out += so;
a += sa;
}
return res;
}
13 changes: 12 additions & 1 deletion packages/simd/assembly/index.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,25 @@
export * from "./abs";
export * from "./add";
export * from "./addn";
export * from "./clamp";

// TODO waiting for native impl
// export * from "./div";
// export * from "./divn";

export * from "./dot";
export * from "./madd";
export * from "./maddn";
export * from "./max";
export * from "./min";
export * from "./mul";
export * from "./muln";
export * from "./mulv";
export * from "./sub";
export * from "./neg";
export * from "./normalize";

// TODO waiting for native impl
// export * from "./sqrt";

export * from "./sub";
export * from "./subn";
6 changes: 3 additions & 3 deletions packages/simd/assembly/maddn.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
export function maddn4_f32(
out: usize,
a: usize,
b: f32,
n: f32,
c: usize,
num: usize,
so: usize,
Expand All @@ -12,9 +12,9 @@ export function maddn4_f32(
so <<= 2;
sa <<= 2;
sc <<= 2;
const vb = v128.splat<f32>(b);
const vn = f32x4.splat(n);
for (; num-- > 0; ) {
v128.store(out, f32x4.add(f32x4.mul(v128.load(a), vb), v128.load(c)));
v128.store(out, f32x4.add(f32x4.mul(v128.load(a), vn), v128.load(c)));
out += so;
a += sa;
c += sc;
Expand Down
21 changes: 21 additions & 0 deletions packages/simd/assembly/max.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
export function max4_f32(
out: usize,
a: usize,
b: usize,
num: usize,
so: usize,
sa: usize,
sb: usize
): usize {
so <<= 2;
sa <<= 2;
sb <<= 2;
const res = out;
for (; num-- > 0; ) {
v128.store(out, f32x4.max(v128.load(a), v128.load(b)));
out += so;
a += sa;
b += sb;
}
return res;
}
21 changes: 21 additions & 0 deletions packages/simd/assembly/min.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
export function min4_f32(
out: usize,
a: usize,
b: usize,
num: usize,
so: usize,
sa: usize,
sb: usize
): usize {
so <<= 2;
sa <<= 2;
sb <<= 2;
const res = out;
for (; num-- > 0; ) {
v128.store(out, f32x4.min(v128.load(a), v128.load(b)));
out += so;
a += sa;
b += sb;
}
return res;
}
46 changes: 46 additions & 0 deletions packages/simd/assembly/msub.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/**
* Takes three vec4 buffers, computes componentwise a * b - c and stores
* results in `out`. Both AOS / SOA layouts are supported, as long as
* all buffers are using the same layout.
*
* All strides must by multiples of 4. All pointers must be aligned to
* multiples of 16. Returns `out` pointer.
*
* @param out
* @param a
* @param b
* @param c
* @param num number of vec4
* @param so out element stride
* @param sa A element stride
* @param sb B element stride
* @param sc C element stride
*/
export function msub4_f32(
out: usize,
a: usize,
b: usize,
c: usize,
num: usize,
so: usize,
sa: usize,
sb: usize,
sc: usize
): usize {
const res = out;
so <<= 2;
sa <<= 2;
sb <<= 2;
sc <<= 2;
for (; num-- > 0; ) {
v128.store(
out,
f32x4.sub(f32x4.mul(v128.load(a), v128.load(b)), v128.load(c))
);
out += so;
a += sa;
b += sb;
c += sc;
}
return res;
}
23 changes: 23 additions & 0 deletions packages/simd/assembly/msubn.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
export function msubn4_f32(
out: usize,
a: usize,
n: f32,
c: usize,
num: usize,
so: usize,
sa: usize,
sc: usize
): usize {
const res = out;
so <<= 2;
sa <<= 2;
sc <<= 2;
const vn = f32x4.splat(n);
for (; num-- > 0; ) {
v128.store(out, f32x4.sub(f32x4.mul(v128.load(a), vn), v128.load(c)));
out += so;
a += sa;
c += sc;
}
return res;
}
19 changes: 19 additions & 0 deletions packages/simd/assembly/muln.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
export function muln4_f32(
out: usize,
a: usize,
n: f32,
num: usize,
so: usize,
sa: usize
): usize {
so <<= 2;
sa <<= 2;
const res = out;
const vn = f32x4.splat(n);
for (; num-- > 0; ) {
v128.store(out, f32x4.mul(v128.load(a), vn));
out += so;
a += sa;
}
return res;
}

0 comments on commit 4f4cea4

Please sign in to comment.