diff --git a/lib/node_modules/@stdlib/blas/base/dgemm/README.md b/lib/node_modules/@stdlib/blas/base/dgemm/README.md index b8bd1feb27e0..92e5a990b944 100644 --- a/lib/node_modules/@stdlib/blas/base/dgemm/README.md +++ b/lib/node_modules/@stdlib/blas/base/dgemm/README.md @@ -190,18 +190,79 @@ console.log( C ); #include "stdlib/blas/base/dgemm.h" ``` -#### TODO +#### c_dgemm( layout, transA, transB, M, N, K, alpha, \*A, LDA, \*B, LDB, beta, \*C, LDC ) -TODO. +Performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C` where `op(X)` is either `op(X) = X` or `op(X) = X^T`, `α` and `β` are scalars, `A`, `B`, and `C` are matrices, with `op(A)` an `M` by `K` matrix, `op(B)` a `K` by `N` matrix, and `C` an `M` by `N` matrix. + +```c +#include "stdlib/blas/base/shared.h" + +const double A[] = { 1.0, 2.0, 3.0, 4.0 }; +const double B[] = { 1.0, 1.0, 0.0, 1.0 }; +double C[] = { 1.0, 2.0, 3.0, 4.0 }; + +c_dgemm( CblasRowMajor, CblasNoTrans, CblasNoTrans, 2, 2, 2, 1.0, A, 2, B, 2, 1.0, C, 2 ); +``` + +The function accepts the following arguments: + +- **layout**: `[in] CBLAS_LAYOUT` storage layout. +- **transA**: `[in] CBLAS_TRANSPOSE` specifies whether `A` should be transposed, conjugate-transposed, or not transposed. +- **transB**: `[in] CBLAS_TRANSPOSE` specifies whether `B` should be transposed, conjugate-transposed, or not transposed. +- **M**: `[in] CBLAS_INT` number of rows in the matrix `op(A)` and in the matrix `C`. +- **N**: `[in] CBLAS_INT` number of columns in the matrix `op(B)` and in the matrix `C`. +- **K**: `[in] CBLAS_INT` number of columns in the matrix `op(A)` and number of rows in the matrix `op(B)`. +- **alpha**: `[in] double` scalar constant. +- **A**: `[in] double*` first input matrix. +- **LDA**: `[in] CBLAS_INT` stride of the first dimension of `A` (a.k.a., leading dimension of the matrix `A`). +- **B**: `[in] double*` second input matrix. +- **LDB**: `[in] CBLAS_INT` stride of the first dimension of `B` (a.k.a., leading dimension of the matrix `B`). +- **beta**: `[in] double` scalar constant. +- **C**: `[inout] double*` third input matrix. +- **LDC**: `[in] CBLAS_INT` stride of the first dimension of `C` (a.k.a., leading dimension of the matrix `C`). + +```c +void c_dgemm( const CBLAS_LAYOUT layout, const CBLAS_TRANSPOSE transA, const CBLAS_TRANSPOSE transB, const CBLAS_INT M, const CBLAS_INT N, const CBLAS_INT K, const double alpha, const double *A, const CBLAS_INT LDA, const double *B, const CBLAS_INT LDB, const double beta, double *C, const CBLAS_INT LDC ) +``` + +#### c_dgemm_ndarray( transA, transB, M, N, K, alpha, \*A, sa1, sa2, oa, \*B, sb1, sb2, ob, beta, \*C, sc1, sc2, oc ) + +Performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C`, using alternative indexing semantics and where `op(X)` is either `op(X) = X` or `op(X) = X^T`, `α` and `β` are scalars, `A`, `B`, and `C` are matrices, with `op(A)` an `M` by `K` matrix, `op(B)` a `K` by `N` matrix, and `C` an `M` by `N` matrix. ```c -TODO +#include "stdlib/blas/base/shared.h" + +const double A[] = { 1.0, 2.0, 3.0, 4.0 }; +const double B[] = { 1.0, 1.0, 0.0, 1.0 }; +double C[] = { 1.0, 2.0, 3.0, 4.0 }; + +c_dgemm_ndarray( CblasNoTrans, CblasNoTrans, 2, 2, 2, 1.0, A, 2, 1, 0, B, 2, 1, 0, 1.0, C, 2, 1, 0 ); ``` -TODO +The function accepts the following arguments: + +- **transA**: `[in] CBLAS_TRANSPOSE` specifies whether `A` should be transposed, conjugate-transposed, or not transposed. +- **transB**: `[in] CBLAS_TRANSPOSE` specifies whether `B` should be transposed, conjugate-transposed, or not transposed. +- **M**: `[in] CBLAS_INT` number of rows in the matrix `op(A)` and in the matrix `C`. +- **N**: `[in] CBLAS_INT` number of columns in the matrix `op(B)` and in the matrix `C`. +- **K**: `[in] CBLAS_INT` number of columns in the matrix `op(A)` and number of rows in the matrix `op(B)`. +- **alpha**: `[in] double` scalar constant. +- **A**: `[in] double*` first input matrix. +- **sa1**: `[in] CBLAS_INT` stride of the first dimension of `A`. +- **sa2**: `[in] CBLAS_INT` stride of the second dimension of `A`. +- **oa**: `[in] CBLAS_INT` starting index for `A`. +- **B**: `[in] double*` second input matrix. +- **sb1**: `[in] CBLAS_INT` stride of the first dimension of `B`. +- **sb2**: `[in] CBLAS_INT` stride of the second dimension of `B`. +- **ob**: `[in] CBLAS_INT` starting index for `B`. +- **beta**: `[in] double` scalar constant. +- **C**: `[inout] double*` third input matrix. +- **sc1**: `[in] CBLAS_INT` stride of the first dimension of `C`. +- **sc2**: `[in] CBLAS_INT` stride of the second dimension of `C`. +- **oc**: `[in] CBLAS_INT` starting index for `C`. ```c -TODO +void c_dgemm_ndarray( const CBLAS_TRANSPOSE transA, const CBLAS_TRANSPOSE transB, const CBLAS_INT M, const CBLAS_INT N, const CBLAS_INT K, const double alpha, const double *A, const CBLAS_INT strideA1, const CBLAS_INT strideA2, const CBLAS_INT offsetA, const double *B, const CBLAS_INT strideB1, const CBLAS_INT strideB2, const CBLAS_INT offsetB, const double beta, double *C, const CBLAS_INT strideC1, const CBLAS_INT strideC2, const CBLAS_INT offsetC ) ``` @@ -223,7 +284,52 @@ TODO ### Examples ```c -TODO +#include "stdlib/blas/base/dgemm.h" +#include "stdlib/blas/base/shared.h" +#include + +int main( void ) { + // Define matrices stored in row-major order: + const double A[ 3*2 ] = { + 1.0, 2.0, + 3.0, 4.0, + 5.0, 6.0 + }; + const double B[ 2*4 ] = { + 1.0, 2.0, 3.0, 4.0, + 5.0, 6.0, 7.0, 8.0 + }; + double C[ 3*4 ] = { + 1.0, 2.0, 3.0, 4.0, + 5.0, 6.0, 7.0, 8.0, + 9.0, 10.0, 11.0, 12.0 + }; + + // Specify matrix dimensions: + const int M = 3; + const int N = 4; + const int K = 2; + + // Perform the matrix-matrix operation `C = α*A*B + β*C`: + c_dgemm( CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, 1.0, A, K, B, N, 1.0, C, N ); + + // Print the result: + for ( int i = 0; i < M; i++ ) { + for ( int j = 0; j < N; j++ ) { + printf( "C[ %i ][ %i ] = %lf\n", i, j, C[ (i*N)+j ] ); + } + } + + // Perform the matrix-matrix operation `C = α*A*B + β*C` using alternative indexing semantics: + c_dgemm_ndarray( CblasNoTrans, CblasNoTrans, M, N, K, 1.0, A, K, 1, 0, B, N, 1, 0, 1.0, C, N, 1, 0 ); + + // Print the result: + for ( int i = 0; i < M; i++ ) { + for ( int j = 0; j < N; j++ ) { + printf( "C[ %i ][ %i ] = %lf\n", i, j, C[ (i*N)+j ] ); + } + } +} ``` diff --git a/lib/node_modules/@stdlib/blas/base/dgemm/benchmark/benchmark.native.js b/lib/node_modules/@stdlib/blas/base/dgemm/benchmark/benchmark.native.js new file mode 100644 index 000000000000..2c9915d7405b --- /dev/null +++ b/lib/node_modules/@stdlib/blas/base/dgemm/benchmark/benchmark.native.js @@ -0,0 +1,111 @@ +/** +* @license Apache-2.0 +* +* Copyright (c) 2026 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +'use strict'; + +// MODULES // + +var resolve = require( 'path' ).resolve; +var bench = require( '@stdlib/bench' ); +var uniform = require( '@stdlib/random/array/uniform' ); +var format = require( '@stdlib/string/format' ); +var isnan = require( '@stdlib/math/base/assert/is-nan' ); +var pow = require( '@stdlib/math/base/special/pow' ); +var floor = require( '@stdlib/math/base/special/floor' ); +var tryRequire = require( '@stdlib/utils/try-require' ); +var pkg = require( './../package.json' ).name; + + +// VARIABLES // + +var dgemm = tryRequire( resolve( __dirname, './../lib/dgemm.native.js' ) ); +var opts = { + 'skip': ( dgemm instanceof Error ) +}; +var options = { + 'dtype': 'float64' +}; + + +// FUNCTIONS // + +/** +* Creates a benchmark function. +* +* @private +* @param {PositiveInteger} N - array dimension size +* @returns {Function} benchmark function +*/ +function createBenchmark( N ) { + var A = uniform( N*N, -10.0, 10.0, options ); + var B = uniform( N*N, -10.0, 10.0, options ); + var C = uniform( N*N, -10.0, 10.0, options ); + return benchmark; + + /** + * Benchmark function. + * + * @private + * @param {Benchmark} b - benchmark instance + */ + function benchmark( b ) { + var z; + var i; + + b.tic(); + for ( i = 0; i < b.iterations; i++ ) { + z = dgemm( 'row-major', 'no-transpose', 'no-transpose', N, N, N, 1.0, A, N, B, N, 1.0, C, N ); + if ( isnan( z[ i%z.length ] ) ) { + b.fail( 'should not return NaN' ); + } + } + b.toc(); + if ( isnan( z[ i%z.length ] ) ) { + b.fail( 'should not return NaN' ); + } + b.pass( 'benchmark finished' ); + b.end(); + } +} + + +// MAIN // + +/** +* Main execution sequence. +* +* @private +*/ +function main() { + var min; + var max; + var N; + var f; + var i; + + min = 1; // 10^min + max = 5; // 10^max + + for ( i = min; i <= max; i++ ) { + N = floor( pow( pow( 10, i ), 1.0/2.0 ) ); + f = createBenchmark( N ); + bench( format( '%s::native:size=%d', pkg, N*N ), opts, f ); + } +} + +main(); diff --git a/lib/node_modules/@stdlib/blas/base/dgemm/benchmark/benchmark.ndarray.native.js b/lib/node_modules/@stdlib/blas/base/dgemm/benchmark/benchmark.ndarray.native.js new file mode 100644 index 000000000000..b41ffc7bc402 --- /dev/null +++ b/lib/node_modules/@stdlib/blas/base/dgemm/benchmark/benchmark.ndarray.native.js @@ -0,0 +1,111 @@ +/** +* @license Apache-2.0 +* +* Copyright (c) 2026 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +'use strict'; + +// MODULES // + +var resolve = require( 'path' ).resolve; +var bench = require( '@stdlib/bench' ); +var uniform = require( '@stdlib/random/array/uniform' ); +var format = require( '@stdlib/string/format' ); +var isnan = require( '@stdlib/math/base/assert/is-nan' ); +var pow = require( '@stdlib/math/base/special/pow' ); +var floor = require( '@stdlib/math/base/special/floor' ); +var tryRequire = require( '@stdlib/utils/try-require' ); +var pkg = require( './../package.json' ).name; + + +// VARIABLES // + +var dgemm = tryRequire( resolve( __dirname, './../lib/ndarray.native.js' ) ); +var opts = { + 'skip': ( dgemm instanceof Error ) +}; +var options = { + 'dtype': 'float64' +}; + + +// FUNCTIONS // + +/** +* Creates a benchmark function. +* +* @private +* @param {PositiveInteger} N - array dimension size +* @returns {Function} benchmark function +*/ +function createBenchmark( N ) { + var A = uniform( N*N, -10.0, 10.0, options ); + var B = uniform( N*N, -10.0, 10.0, options ); + var C = uniform( N*N, -10.0, 10.0, options ); + return benchmark; + + /** + * Benchmark function. + * + * @private + * @param {Benchmark} b - benchmark instance + */ + function benchmark( b ) { + var z; + var i; + + b.tic(); + for ( i = 0; i < b.iterations; i++ ) { + z = dgemm( 'no-transpose', 'no-transpose', N, N, N, 1.0, A, 1, N, 0, B, 1, N, 0, 1.0, C, 1, N, 0 ); + if ( isnan( z[ i%z.length ] ) ) { + b.fail( 'should not return NaN' ); + } + } + b.toc(); + if ( isnan( z[ i%z.length ] ) ) { + b.fail( 'should not return NaN' ); + } + b.pass( 'benchmark finished' ); + b.end(); + } +} + + +// MAIN // + +/** +* Main execution sequence. +* +* @private +*/ +function main() { + var min; + var max; + var N; + var f; + var i; + + min = 1; // 10^min + max = 5; // 10^max + + for ( i = min; i <= max; i++ ) { + N = floor( pow( pow( 10, i ), 1.0/2.0 ) ); + f = createBenchmark( N ); + bench( format( '%s::native:ndarray:size=%d', pkg, N*N ), opts, f ); + } +} + +main(); diff --git a/lib/node_modules/@stdlib/blas/base/dgemm/benchmark/c/Makefile b/lib/node_modules/@stdlib/blas/base/dgemm/benchmark/c/Makefile new file mode 100644 index 000000000000..0756dc7da20a --- /dev/null +++ b/lib/node_modules/@stdlib/blas/base/dgemm/benchmark/c/Makefile @@ -0,0 +1,146 @@ +#/ +# @license Apache-2.0 +# +# Copyright (c) 2026 The Stdlib Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#/ + +# VARIABLES # + +ifndef VERBOSE + QUIET := @ +else + QUIET := +endif + +# Determine the OS ([1][1], [2][2]). +# +# [1]: https://en.wikipedia.org/wiki/Uname#Examples +# [2]: http://stackoverflow.com/a/27776822/2225624 +OS ?= $(shell uname) +ifneq (, $(findstring MINGW,$(OS))) + OS := WINNT +else +ifneq (, $(findstring MSYS,$(OS))) + OS := WINNT +else +ifneq (, $(findstring CYGWIN,$(OS))) + OS := WINNT +else +ifneq (, $(findstring Windows_NT,$(OS))) + OS := WINNT +endif +endif +endif +endif + +# Define the program used for compiling C source files: +ifdef C_COMPILER + CC := $(C_COMPILER) +else + CC := gcc +endif + +# Define the command-line options when compiling C files: +CFLAGS ?= \ + -std=c99 \ + -O3 \ + -Wall \ + -pedantic + +# Determine whether to generate position independent code ([1][1], [2][2]). +# +# [1]: https://gcc.gnu.org/onlinedocs/gcc/Code-Gen-Options.html#Code-Gen-Options +# [2]: http://stackoverflow.com/questions/5311515/gcc-fpic-option +ifeq ($(OS), WINNT) + fPIC ?= +else + fPIC ?= -fPIC +endif + +# List of includes (e.g., `-I /foo/bar -I /beep/boop/include`): +INCLUDE ?= + +# List of source files: +SOURCE_FILES ?= + +# List of libraries (e.g., `-lopenblas -lpthread`): +LIBRARIES ?= + +# List of library paths (e.g., `-L /foo/bar -L /beep/boop`): +LIBPATH ?= + +# List of C targets: +c_targets := benchmark.length.out + + +# RULES # + +#/ +# Compiles source files. +# +# @param {string} [C_COMPILER] - C compiler (e.g., `gcc`) +# @param {string} [CFLAGS] - C compiler options +# @param {(string|void)} [fPIC] - compiler flag determining whether to generate position independent code (e.g., `-fPIC`) +# @param {string} [INCLUDE] - list of includes (e.g., `-I /foo/bar -I /beep/boop/include`) +# @param {string} [SOURCE_FILES] - list of source files +# @param {string} [LIBPATH] - list of library paths (e.g., `-L /foo/bar -L /beep/boop`) +# @param {string} [LIBRARIES] - list of libraries (e.g., `-lopenblas -lpthread`) +# +# @example +# make +# +# @example +# make all +#/ +all: $(c_targets) + +.PHONY: all + +#/ +# Compiles C source files. +# +# @private +# @param {string} CC - C compiler (e.g., `gcc`) +# @param {string} CFLAGS - C compiler options +# @param {(string|void)} fPIC - compiler flag determining whether to generate position independent code (e.g., `-fPIC`) +# @param {string} INCLUDE - list of includes (e.g., `-I /foo/bar`) +# @param {string} SOURCE_FILES - list of source files +# @param {string} LIBPATH - list of library paths (e.g., `-L /foo/bar`) +# @param {string} LIBRARIES - list of libraries (e.g., `-lopenblas`) +#/ +$(c_targets): %.out: %.c + $(QUIET) $(CC) $(CFLAGS) $(fPIC) $(INCLUDE) -o $@ $(SOURCE_FILES) $< $(LIBPATH) -lm $(LIBRARIES) + +#/ +# Runs compiled benchmarks. +# +# @example +# make run +#/ +run: $(c_targets) + $(QUIET) ./$< + +.PHONY: run + +#/ +# Removes generated files. +# +# @example +# make clean +#/ +clean: + $(QUIET) -rm -f *.o *.out + +.PHONY: clean diff --git a/lib/node_modules/@stdlib/blas/base/dgemm/benchmark/c/benchmark.length.c b/lib/node_modules/@stdlib/blas/base/dgemm/benchmark/c/benchmark.length.c new file mode 100644 index 000000000000..7a6404df48b9 --- /dev/null +++ b/lib/node_modules/@stdlib/blas/base/dgemm/benchmark/c/benchmark.length.c @@ -0,0 +1,203 @@ +/** +* @license Apache-2.0 +* +* Copyright (c) 2026 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +#include "stdlib/blas/base/dgemm.h" +#include +#include +#include +#include +#include + +#define NAME "dgemm" +#define ITERATIONS 10000000 +#define REPEATS 3 +#define MIN 1 +#define MAX 5 + +/** +* Prints the TAP version. +*/ +static void print_version( void ) { + printf( "TAP version 13\n" ); +} + +/** +* Prints the TAP summary. +* +* @param total total number of tests +* @param passing total number of passing tests +*/ +static void print_summary( int total, int passing ) { + printf( "#\n" ); + printf( "1..%d\n", total ); // TAP plan + printf( "# total %d\n", total ); + printf( "# pass %d\n", passing ); + printf( "#\n" ); + printf( "# ok\n" ); +} + +/** +* Prints benchmarks results. +* +* @param iterations number of iterations +* @param elapsed elapsed time in seconds +*/ +static void print_results( int iterations, double elapsed ) { + double rate = (double)iterations / elapsed; + printf( " ---\n" ); + printf( " iterations: %d\n", iterations ); + printf( " elapsed: %0.9f\n", elapsed ); + printf( " rate: %0.9f\n", rate ); + printf( " ...\n" ); +} + +/** +* Returns a clock time. +* +* @return clock time +*/ +static double tic( void ) { + struct timeval now; + gettimeofday( &now, NULL ); + return (double)now.tv_sec + (double)now.tv_usec/1.0e6; +} + +/** +* Generates a random number on the interval [0,1). +* +* @return random number +*/ +static double rand_double( void ) { + int r = rand(); + return (double)r / ( (double)RAND_MAX + 1.0 ); +} + +/** +* Runs a benchmark. +* +* @param iterations number of iterations +* @param N array dimension size +* @return elapsed time in seconds +*/ +static double benchmark1( int iterations, int N ) { + double elapsed; + double A[ N*N ]; + double B[ N*N ]; + double C[ N*N ]; + double t; + int i; + int j; + + for ( i = 0; i < N; i++ ) { + for ( j = 0; j < N; j++ ) { + A[ (i*N)+j ] = ( rand_double()*20.0 ) - 10.0; + B[ (i*N)+j ] = ( rand_double()*20.0 ) - 10.0; + C[ (i*N)+j ] = ( rand_double()*20.0 ) - 10.0; + } + } + t = tic(); + for ( i = 0; i < iterations; i++ ) { + // cppcheck-suppress uninitvar + c_dgemm( CblasRowMajor, CblasNoTrans, CblasNoTrans, N, N, N, 1.0, A, N, B, N, 1.0, C, N ); + if ( C[ i%(N*N) ] != C[ i%(N*N) ] ) { + printf( "should not return NaN\n" ); + break; + } + } + elapsed = tic() - t; + if ( C[ i%(N*N) ] != C[ i%(N*N) ] ) { + printf( "should not return NaN\n" ); + } + return elapsed; +} + +/** +* Runs a benchmark. +* +* @param iterations number of iterations +* @param N array dimension size +* @return elapsed time in seconds +*/ +static double benchmark2( int iterations, int N ) { + double elapsed; + double A[ N*N ]; + double B[ N*N ]; + double C[ N*N ]; + double t; + int i; + int j; + + for ( i = 0; i < N; i++ ) { + for ( j = 0; j < N; j++ ) { + A[ (i*N)+j ] = ( rand_double()*20.0 ) - 10.0; + B[ (i*N)+j ] = ( rand_double()*20.0 ) - 10.0; + C[ (i*N)+j ] = ( rand_double()*20.0 ) - 10.0; + } + } + t = tic(); + for ( i = 0; i < iterations; i++ ) { + // cppcheck-suppress uninitvar + c_dgemm_ndarray( CblasNoTrans, CblasNoTrans, N, N, N, 1.0, A, N, 1, 0, B, N, 1, 0, 1.0, C, N, 1, 0 ); + if ( C[ i%(N*N) ] != C[ i%(N*N) ] ) { + printf( "should not return NaN\n" ); + break; + } + } + elapsed = tic() - t; + if ( C[ i%(N*N) ] != C[ i%(N*N) ] ) { + printf( "should not return NaN\n" ); + } + return elapsed; +} + +/** +* Main execution sequence. +*/ +int main( void ) { + double elapsed; + int count; + int iter; + int N; + int i; + int j; + + // Use the current time to seed the random number generator: + srand( time( NULL ) ); + + print_version(); + count = 0; + for ( i = MIN; i <= MAX; i++ ) { + N = floor( pow( pow( 10, i ), 1.0/2.0 ) ); + iter = ITERATIONS / pow( 10, i-1 ); + for ( j = 0; j < REPEATS; j++ ) { + count += 1; + printf( "# c::%s:size=%d\n", NAME, N*N ); + elapsed = benchmark1( iter, N ); + print_results( iter, elapsed ); + printf( "ok %d benchmark finished\n", count ); + } + for ( j = 0; j < REPEATS; j++ ) { + count += 1; + printf( "# c::%s:ndarray:size=%d\n", NAME, N*N ); + elapsed = benchmark2( iter, N ); + print_results( iter, elapsed ); + printf( "ok %d benchmark finished\n", count ); + } + } + print_summary( count, count ); +} diff --git a/lib/node_modules/@stdlib/blas/base/dgemm/binding.gyp b/lib/node_modules/@stdlib/blas/base/dgemm/binding.gyp new file mode 100644 index 000000000000..60dce9d0b31a --- /dev/null +++ b/lib/node_modules/@stdlib/blas/base/dgemm/binding.gyp @@ -0,0 +1,265 @@ +# @license Apache-2.0 +# +# Copyright (c) 2026 The Stdlib Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# A `.gyp` file for building a Node.js native add-on. +# +# [1]: https://gyp.gsrc.io/docs/InputFormatReference.md +# [2]: https://gyp.gsrc.io/docs/UserDocumentation.md +{ + # List of files to include in this file: + 'includes': [ + './include.gypi', + ], + + # Define variables to be used throughout the configuration for all targets: + 'variables': { + # Target name should match the add-on export name: + 'addon_target_name%': 'addon', + + # Fortran compiler (to override -Dfortran_compiler=): + 'fortran_compiler%': 'gfortran', + + # Fortran compiler flags: + 'fflags': [ + # Specify the Fortran standard to which a program is expected to conform: + '-std=f95', + + # Indicate that the layout is free-form source code: + '-ffree-form', + + # Aggressive optimization: + '-O3', + + # Enable commonly used warning options: + '-Wall', + + # Warn if source code contains problematic language features: + '-Wextra', + + # Warn if a procedure is called without an explicit interface: + '-Wimplicit-interface', + + # Do not transform names of entities specified in Fortran source files by appending underscores (i.e., don't mangle names, thus allowing easier usage in C wrappers): + '-fno-underscoring', + + # Warn if source code contains Fortran 95 extensions and C-language constructs: + '-pedantic', + + # Compile but do not link (output is an object file): + '-c', + ], + + # Set variables based on the host OS: + 'conditions': [ + [ + 'OS=="win"', + { + # Define the object file suffix: + 'obj': 'obj', + }, + { + # Define the object file suffix: + 'obj': 'o', + } + ], # end condition (OS=="win") + ], # end conditions + }, # end variables + + # Define compile targets: + 'targets': [ + + # Target to generate an add-on: + { + # The target name should match the add-on export name: + 'target_name': '<(addon_target_name)', + + # Define dependencies: + 'dependencies': [], + + # Define directories which contain relevant include headers: + 'include_dirs': [ + # Local include directory: + '<@(include_dirs)', + ], + + # List of source files: + 'sources': [ + '<@(src_files)', + ], + + # Settings which should be applied when a target's object files are used as linker input: + 'link_settings': { + # Define libraries: + 'libraries': [ + '<@(libraries)', + ], + + # Define library directories: + 'library_dirs': [ + '<@(library_dirs)', + ], + }, + + # C/C++ compiler flags: + 'cflags': [ + # Enable commonly used warning options: + '-Wall', + + # Aggressive optimization: + '-O3', + ], + + # C specific compiler flags: + 'cflags_c': [ + # Specify the C standard to which a program is expected to conform: + '-std=c99', + ], + + # C++ specific compiler flags: + 'cflags_cpp': [ + # Specify the C++ standard to which a program is expected to conform: + '-std=c++11', + ], + + # Linker flags: + 'ldflags': [], + + # Apply conditions based on the host OS: + 'conditions': [ + [ + 'OS=="mac"', + { + # Linker flags: + 'ldflags': [ + '-undefined dynamic_lookup', + '-Wl,-no-pie', + '-Wl,-search_paths_first', + ], + }, + ], # end condition (OS=="mac") + [ + 'OS!="win"', + { + # C/C++ flags: + 'cflags': [ + # Generate platform-independent code: + '-fPIC', + ], + }, + ], # end condition (OS!="win") + ], # end conditions + + # Define custom build actions for particular inputs: + 'rules': [ + { + # Define a rule for processing Fortran files: + 'extension': 'f', + + # Define the pathnames to be used as inputs when performing processing: + 'inputs': [ + # Full path of the current input: + '<(RULE_INPUT_PATH)' + ], + + # Define the outputs produced during processing: + 'outputs': [ + # Store an output object file in a directory for placing intermediate results (only accessible within a single target): + '<(INTERMEDIATE_DIR)/<(RULE_INPUT_ROOT).<(obj)' + ], + + # Define the rule for compiling Fortran based on the host OS: + 'conditions': [ + [ + 'OS=="win"', + + # Rule to compile Fortran on Windows: + { + 'rule_name': 'compile_fortran_windows', + 'message': 'Compiling Fortran file <(RULE_INPUT_PATH) on Windows...', + + 'process_outputs_as_sources': 0, + + # Define the command-line invocation: + 'action': [ + '<(fortran_compiler)', + '<@(fflags)', + '<@(_inputs)', + '-o', + '<@(_outputs)', + ], + }, + + # Rule to compile Fortran on non-Windows: + { + 'rule_name': 'compile_fortran_linux', + 'message': 'Compiling Fortran file <(RULE_INPUT_PATH) on Linux...', + + 'process_outputs_as_sources': 1, + + # Define the command-line invocation: + 'action': [ + '<(fortran_compiler)', + '<@(fflags)', + '-fPIC', # generate platform-independent code + '<@(_inputs)', + '-o', + '<@(_outputs)', + ], + } + ], # end condition (OS=="win") + ], # end conditions + }, # end rule (extension=="f") + ], # end rules + }, # end target <(addon_target_name) + + # Target to copy a generated add-on to a standard location: + { + 'target_name': 'copy_addon', + + # Declare that the output of this target is not linked: + 'type': 'none', + + # Define dependencies: + 'dependencies': [ + # Require that the add-on be generated before building this target: + '<(addon_target_name)', + ], + + # Define a list of actions: + 'actions': [ + { + 'action_name': 'copy_addon', + 'message': 'Copying addon...', + + # Explicitly list the inputs in the command-line invocation below: + 'inputs': [], + + # Declare the expected outputs: + 'outputs': [ + '<(addon_output_dir)/<(addon_target_name).node', + ], + + # Define the command-line invocation: + 'action': [ + 'cp', + '<(PRODUCT_DIR)/<(addon_target_name).node', + '<(addon_output_dir)/<(addon_target_name).node', + ], + }, + ], # end actions + }, # end target copy_addon + ], # end targets +} diff --git a/lib/node_modules/@stdlib/blas/base/dgemm/examples/c/Makefile b/lib/node_modules/@stdlib/blas/base/dgemm/examples/c/Makefile new file mode 100644 index 000000000000..c8f8e9a1517b --- /dev/null +++ b/lib/node_modules/@stdlib/blas/base/dgemm/examples/c/Makefile @@ -0,0 +1,146 @@ +#/ +# @license Apache-2.0 +# +# Copyright (c) 2026 The Stdlib Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#/ + +# VARIABLES # + +ifndef VERBOSE + QUIET := @ +else + QUIET := +endif + +# Determine the OS ([1][1], [2][2]). +# +# [1]: https://en.wikipedia.org/wiki/Uname#Examples +# [2]: http://stackoverflow.com/a/27776822/2225624 +OS ?= $(shell uname) +ifneq (, $(findstring MINGW,$(OS))) + OS := WINNT +else +ifneq (, $(findstring MSYS,$(OS))) + OS := WINNT +else +ifneq (, $(findstring CYGWIN,$(OS))) + OS := WINNT +else +ifneq (, $(findstring Windows_NT,$(OS))) + OS := WINNT +endif +endif +endif +endif + +# Define the program used for compiling C source files: +ifdef C_COMPILER + CC := $(C_COMPILER) +else + CC := gcc +endif + +# Define the command-line options when compiling C files: +CFLAGS ?= \ + -std=c99 \ + -O3 \ + -Wall \ + -pedantic + +# Determine whether to generate position independent code ([1][1], [2][2]). +# +# [1]: https://gcc.gnu.org/onlinedocs/gcc/Code-Gen-Options.html#Code-Gen-Options +# [2]: http://stackoverflow.com/questions/5311515/gcc-fpic-option +ifeq ($(OS), WINNT) + fPIC ?= +else + fPIC ?= -fPIC +endif + +# List of includes (e.g., `-I /foo/bar -I /beep/boop/include`): +INCLUDE ?= + +# List of source files: +SOURCE_FILES ?= + +# List of libraries (e.g., `-lopenblas -lpthread`): +LIBRARIES ?= + +# List of library paths (e.g., `-L /foo/bar -L /beep/boop`): +LIBPATH ?= + +# List of C targets: +c_targets := example.out + + +# RULES # + +#/ +# Compiles source files. +# +# @param {string} [C_COMPILER] - C compiler (e.g., `gcc`) +# @param {string} [CFLAGS] - C compiler options +# @param {(string|void)} [fPIC] - compiler flag determining whether to generate position independent code (e.g., `-fPIC`) +# @param {string} [INCLUDE] - list of includes (e.g., `-I /foo/bar -I /beep/boop/include`) +# @param {string} [SOURCE_FILES] - list of source files +# @param {string} [LIBPATH] - list of library paths (e.g., `-L /foo/bar -L /beep/boop`) +# @param {string} [LIBRARIES] - list of libraries (e.g., `-lopenblas -lpthread`) +# +# @example +# make +# +# @example +# make all +#/ +all: $(c_targets) + +.PHONY: all + +#/ +# Compiles C source files. +# +# @private +# @param {string} CC - C compiler (e.g., `gcc`) +# @param {string} CFLAGS - C compiler options +# @param {(string|void)} fPIC - compiler flag determining whether to generate position independent code (e.g., `-fPIC`) +# @param {string} INCLUDE - list of includes (e.g., `-I /foo/bar`) +# @param {string} SOURCE_FILES - list of source files +# @param {string} LIBPATH - list of library paths (e.g., `-L /foo/bar`) +# @param {string} LIBRARIES - list of libraries (e.g., `-lopenblas`) +#/ +$(c_targets): %.out: %.c + $(QUIET) $(CC) $(CFLAGS) $(fPIC) $(INCLUDE) -o $@ $(SOURCE_FILES) $< $(LIBPATH) -lm $(LIBRARIES) + +#/ +# Runs compiled examples. +# +# @example +# make run +#/ +run: $(c_targets) + $(QUIET) ./$< + +.PHONY: run + +#/ +# Removes generated files. +# +# @example +# make clean +#/ +clean: + $(QUIET) -rm -f *.o *.out + +.PHONY: clean diff --git a/lib/node_modules/@stdlib/blas/base/dgemm/examples/c/example.c b/lib/node_modules/@stdlib/blas/base/dgemm/examples/c/example.c new file mode 100644 index 000000000000..6c97829aa64e --- /dev/null +++ b/lib/node_modules/@stdlib/blas/base/dgemm/examples/c/example.c @@ -0,0 +1,72 @@ +/** +* @license Apache-2.0 +* +* Copyright (c) 2026 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +#include "stdlib/blas/base/dgemm.h" +#include "stdlib/blas/base/shared.h" +#include + +int main( void ) { + // Define a 3x2 matrix A stored in row-major order: + const double A[ 3*2 ] = { + 1.0, 2.0, + 3.0, 4.0, + 5.0, 6.0 + }; + + // Define a 2x4 matrix B stored in row-major order: + const double B[ 2*4 ] = { + 1.0, 2.0, 3.0, 4.0, + 5.0, 6.0, 7.0, 8.0 + }; + + // Define a 3x4 output matrix C: + double C[ 3*4 ] = { + 1.0, 2.0, 3.0, 4.0, + 5.0, 6.0, 7.0, 8.0, + 9.0, 10.0, 11.0, 12.0 + }; + + // Define dimensions: + const int M = 3; // rows of op(A) + const int N = 4; // columns of op(B) + const int K = 2; // columns of op(A) + + // Define scalars: + const double alpha = 1.0; + const double beta = 1.0; + + // Perform the matrix-matrix operation `C = α*op(A)*op(B) + β*C`: + c_dgemm( CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, A, K, B, N, beta, C, N ); + + // Print the result: + for ( int i = 0; i < M; i++ ) { + for ( int j = 0; j < N; j++ ) { + printf( "C[ %i, %i ] = %lf\n", i, j, C[ i*N + j ] ); + } + } + + // Perform the matrix-matrix operation `C = α*op(A)*op(B) + β*C` using alternative indexing semantics: + c_dgemm_ndarray( CblasNoTrans, CblasNoTrans, M, N, K, alpha, A, K, 1, 0, B, N, 1, 0, beta, C, N, 1, 0 ); + + // Print the result: + for ( int i = 0; i < M; i++ ) { + for ( int j = 0; j < N; j++ ) { + printf( "C[ %i, %i ] = %lf\n", i, j, C[ i*N + j ] ); + } + } +} diff --git a/lib/node_modules/@stdlib/blas/base/dgemm/include.gypi b/lib/node_modules/@stdlib/blas/base/dgemm/include.gypi new file mode 100644 index 000000000000..dcb556d250e8 --- /dev/null +++ b/lib/node_modules/@stdlib/blas/base/dgemm/include.gypi @@ -0,0 +1,70 @@ +# @license Apache-2.0 +# +# Copyright (c) 2026 The Stdlib Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# A GYP include file for building a Node.js native add-on. +# +# Note that nesting variables is required due to how GYP processes a configuration. Any variables defined within a nested 'variables' section is defined in the outer scope. Thus, conditions in the outer variable scope are free to use these variables without running into "variable undefined" errors. +# +# Main documentation: +# +# [1]: https://gyp.gsrc.io/docs/InputFormatReference.md +# [2]: https://gyp.gsrc.io/docs/UserDocumentation.md +# +# Variable nesting hacks: +# +# [3]: https://chromium.googlesource.com/external/skia/gyp/+/master/common_variables.gypi +# [4]: https://src.chromium.org/viewvc/chrome/trunk/src/build/common.gypi?revision=127004 +{ + # Define variables to be used throughout the configuration for all targets: + 'variables': { + 'variables': { + # Host BLAS library (to override -Dblas=): + 'blas%': '', + + # Path to BLAS library (to override -Dblas_dir=): + 'blas_dir%': '', + }, # end variables + + # Source directory: + 'src_dir': './src', + + # Include directories: + 'include_dirs': [ + '<@(blas_dir)', + ' [ 2.0, 5.0, 6.0, 11.0 ] +*/ +function dgemm( order, transA, transB, M, N, K, alpha, A, LDA, B, LDB, beta, C, LDC ) { // eslint-disable-line max-params + var nrowsa; + var nrowsb; + var valc; + var isrm; + var iscm; + + if (!isLayout( order ) ) { + throw new TypeError( format( 'invalid argument, first argument must be a valid order. Value: `%s`.', order ) ); + } + if ( !isMatrixTranspose( transA ) ) { + throw new TypeError( format( 'invalid argument, second argument must be a valid transpose operation. Value: `%s`.', transA ) ); + } + if ( !isMatrixTranspose( transB ) ) { + throw new TypeError( format( 'invalid argument, third argument must be a valid transpose operation. Value: `%s`.', transB ) ); + } + if ( M < 0 ) { + throw new RangeError( format( 'invalid argument. Fourth argument must be a nonnegative integer. Value: `%d`.', M ) ); + } + if ( N < 0 ) { + throw new RangeError( format( 'invalid argument. Fifth argument must be a nonnegative integer. Value: `%d`.', N ) ); + } + if ( K < 0 ) { + throw new RangeError( format( 'invalid argument. Sixth argument must be a nonnegative integer. Value: `%d`.', K ) ); + } + isrm = isRowMajor( order ); + iscm = isColumnMajor( order ); + if ( + ( isrm && transA === 'no-transpose' ) || + (iscm && transA !== 'no-transpose' ) + ) { + nrowsa = K; + } else { + nrowsa = M; + } + if ( + ( isrm && transB === 'no-transpose' ) || + ( iscm && transB !== 'no-transpose' ) + ) { + nrowsb = N; + } else { + nrowsb = K; + } + if ( LDA < max( 1, nrowsa ) ) { + throw new RangeError( format( 'invalid argument. Ninth argument must be greater than or equal to max(1,%d) when `A` is not transposed and max(1,%d) otherwise. Value: `%d`.', M, K, LDA ) ); + } + if ( LDB < max( 1, nrowsb ) ) { + throw new RangeError( format( 'invalid argument. Eleventh argument must be greater than or equal to max(1,%d ) when `B` is not transposed and max(1,%d) otherwise. Value: `%d`.', K, N, LDB ) ); + } + if ( isrm ) { + valc = N; + } else { + valc = M; + } + if ( LDC < max( 1, valc ) ) { + throw new RangeError( format( 'invalid argument. Fourteenth argument must be greater than or equal to max(1,%d). Value: `%d`.', valc, LDC ) ); + } + addon( resolveOrder( order ), resolveTrans( transA ), resolveTrans( transB ), M, N, K, alpha, A, LDA, B, LDB, beta, C, LDC ); // eslint-disable-line max-len + return C; +} + + +// EXPORTS // + +module.exports = dgemm; diff --git a/lib/node_modules/@stdlib/blas/base/dgemm/lib/native.js b/lib/node_modules/@stdlib/blas/base/dgemm/lib/native.js new file mode 100644 index 000000000000..19e46d42238e --- /dev/null +++ b/lib/node_modules/@stdlib/blas/base/dgemm/lib/native.js @@ -0,0 +1,35 @@ +/** +* @license Apache-2.0 +* +* Copyright (c) 2026 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +'use strict'; + +// MODULES // + +var setReadOnly = require( '@stdlib/utils/define-nonenumerable-read-only-property' ); +var dgemm = require( './dgemm.native.js' ); +var ndarray = require( './ndarray.native.js' ); + + +// MAIN // + +setReadOnly( dgemm, 'ndarray', ndarray ); + + +// EXPORTS // + +module.exports = dgemm; diff --git a/lib/node_modules/@stdlib/blas/base/dgemm/lib/ndarray.native.js b/lib/node_modules/@stdlib/blas/base/dgemm/lib/ndarray.native.js new file mode 100644 index 000000000000..b9463e829a54 --- /dev/null +++ b/lib/node_modules/@stdlib/blas/base/dgemm/lib/ndarray.native.js @@ -0,0 +1,105 @@ +/** +* @license Apache-2.0 +* +* Copyright (c) 2026 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +'use strict'; + +// MODULES // + +var isMatrixTranspose = require( '@stdlib/blas/base/assert/is-transpose-operation' ); +var resolveTrans = require( '@stdlib/blas/base/transpose-operation-resolve-enum' ); +var format = require( '@stdlib/string/format' ); +var addon = require( './../src/addon.node' ); + + +// MAIN // + +/** +* Performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C` where `op(X)` is either `op(X) = X` or `op(X) = X^T`, `α` and `β` are scalars, `A`, `B`, and `C` are matrices, with `op(A)` an `M` by `K` matrix, `op(B)` a `K` by `N` matrix, and `C` an `M` by `N` matrix. +* +* @param {string} transA - specifies whether `A` should be transposed, conjugate-transposed, or not transposed +* @param {string} transB - specifies whether `B` should be transposed, conjugate-transposed, or not transposed +* @param {NonNegativeInteger} M - number of rows in the matrix `op(A)` and in the matrix `C` +* @param {NonNegativeInteger} N - number of columns in the matrix `op(B)` and in the matrix `C` +* @param {NonNegativeInteger} K - number of columns in the matrix `op(A)` and number of rows in the matrix `op(B)` +* @param {number} alpha - scalar constant +* @param {Float64Array} A - first matrix +* @param {integer} strideA1 - stride of the first dimension of `A` +* @param {integer} strideA2 - stride of the second dimension of `A` +* @param {NonNegativeInteger} offsetA - starting index for `A` +* @param {Float64Array} B - second matrix +* @param {integer} strideB1 - stride of the first dimension of `B` +* @param {integer} strideB2 - stride of the second dimension of `B` +* @param {NonNegativeInteger} offsetB - starting index for `B` +* @param {number} beta - scalar constant +* @param {Float64Array} C - third matrix +* @param {integer} strideC1 - stride of the first dimension of `C` +* @param {integer} strideC2 - stride of the second dimension of `C` +* @param {NonNegativeInteger} offsetC - starting index for `C` +* @throws {TypeError} first argument must be a valid transpose operation +* @throws {TypeError} second argument must be a valid transpose operation +* @throws {RangeError} third argument must be a nonnegative integer +* @throws {RangeError} fourth argument must be a nonnegative integer +* @throws {RangeError} fifth argument must be a nonnegative integer +* @throws {RangeError} seventeenth argument must be non-zero +* @throws {RangeError} eighteenth argument must be non-zero +* @returns {Float64Array} `C` +* +* @example +* var Float64Array = require( '@stdlib/array/float64' ); +* +* var A = new Float64Array( [ 1.0, 2.0, 3.0, 4.0 ] ); +* var B = new Float64Array( [ 1.0, 1.0, 0.0, 1.0 ] ); +* var C = new Float64Array( [ 1.0, 2.0, 3.0, 4.0 ] ); +* +* dgemm( 'no-transpose', 'no-transpose', 2, 2, 2, 1.0, A, 2, 1, 0, B, 2, 1, 0, 1.0, C, 2, 1, 0 ); +* // C => [ 2.0, 5.0, 6.0, 11.0 ] +*/ +function dgemm( transA, transB, M, N, K, alpha, A, strideA1, strideA2, offsetA, B, strideB1, strideB2, offsetB, beta, C, strideC1, strideC2, offsetC ) { // eslint-disable-line max-params, max-len + if ( !isMatrixTranspose( transA ) ) { + throw new TypeError( format( 'invalid argument. First argument must be a valid transpose operation. Value: `%s`.', transA ) ); + } + if ( !isMatrixTranspose( transB ) ) { + throw new TypeError( format( 'invalid argument. Second argument must be a valid transpose operation. Value: `%s`.', transB ) ); + } + if ( M < 0 ) { + throw new RangeError( format( 'invalid argument. Third argument must be a nonnegative integer. Value: `%d`.', M ) ); + } + if ( N < 0 ) { + throw new RangeError( format( 'invalid argument. Fourth argument must be a nonnegative integer. Value: `%d`.', N ) ); + } + if ( K < 0 ) { + throw new RangeError( format( 'invalid argument. Fifth argument must be a nonnegative integer. Value: `%d`.', K ) ); + } + if ( strideC1 === 0 ) { + throw new RangeError( format( 'invalid argument. Seventeenth argument must be non-zero. Value: `%d`.', strideC1 ) ); + } + if ( strideC2 === 0 ) { + throw new RangeError( format( 'invalid argument. Eighteenth argument must be non-zero. Value: `%d`.', strideC2 ) ); + } + // Check if we can early return... + if ( M === 0 || N === 0 || ( ( beta === 1.0 ) && ( ( alpha === 0.0 ) || ( K === 0 ) ) ) ) { + return C; + } + addon.ndarray( resolveTrans( transA ), resolveTrans( transB ), M, N, K, alpha, A, strideA1, strideA2, offsetA, B, strideB1, strideB2, offsetB, beta, C, strideC1, strideC2, offsetC ); // eslint-disable-line max-len + return C; +} + + +// EXPORTS // + +module.exports = dgemm; diff --git a/lib/node_modules/@stdlib/blas/base/dgemm/manifest.json b/lib/node_modules/@stdlib/blas/base/dgemm/manifest.json new file mode 100644 index 000000000000..b7f2c55cd2ce --- /dev/null +++ b/lib/node_modules/@stdlib/blas/base/dgemm/manifest.json @@ -0,0 +1,517 @@ +{ + "options": { + "task": "build", + "os": "linux", + "blas": "", + "wasm": false + }, + "fields": [ + { + "field": "src", + "resolve": true, + "relative": true + }, + { + "field": "include", + "resolve": true, + "relative": true + }, + { + "field": "libraries", + "resolve": false, + "relative": false + }, + { + "field": "libpath", + "resolve": true, + "relative": false + } + ], + "confs": [ + { + "task": "build", + "os": "linux", + "blas": "", + "wasm": false, + "src": [ + "./src/dgemm.c", + "./src/dgemm_ndarray.c" + ], + "include": [ + "./include" + ], + "libraries": [], + "libpath": [], + "dependencies": [ + "@stdlib/blas/base/shared", + "@stdlib/blas/base/xerbla", + "@stdlib/blas/base/ddot", + "@stdlib/strided/base/stride2offset", + "@stdlib/ndarray/base/assert/is-row-major", + "@stdlib/napi/export", + "@stdlib/napi/argv", + "@stdlib/napi/argv-int64", + "@stdlib/napi/argv-int32", + "@stdlib/napi/argv-double", + "@stdlib/napi/argv-strided-float64array2d" + ] + }, + { + "task": "benchmark", + "os": "linux", + "blas": "", + "wasm": false, + "src": [ + "./src/dgemm.c", + "./src/dgemm_ndarray.c" + ], + "include": [ + "./include" + ], + "libraries": [], + "libpath": [], + "dependencies": [ + "@stdlib/blas/base/shared", + "@stdlib/blas/base/xerbla", + "@stdlib/blas/base/ddot", + "@stdlib/strided/base/stride2offset", + "@stdlib/ndarray/base/assert/is-row-major" + ] + }, + { + "task": "examples", + "os": "linux", + "blas": "", + "wasm": false, + "src": [ + "./src/dgemm.c", + "./src/dgemm_ndarray.c" + ], + "include": [ + "./include" + ], + "libraries": [], + "libpath": [], + "dependencies": [ + "@stdlib/blas/base/shared", + "@stdlib/blas/base/xerbla", + "@stdlib/blas/base/ddot", + "@stdlib/strided/base/stride2offset", + "@stdlib/ndarray/base/assert/is-row-major" + ] + }, + + { + "task": "build", + "os": "linux", + "blas": "openblas", + "wasm": false, + "src": [ + "./src/dgemm_cblas.c", + "./src/dgemm_ndarray.c" + ], + "include": [ + "./include" + ], + "libraries": [ + "-lopenblas", + "-lpthread" + ], + "libpath": [], + "dependencies": [ + "@stdlib/blas/base/shared", + "@stdlib/blas/base/xerbla", + "@stdlib/blas/base/ddot", + "@stdlib/strided/base/stride2offset", + "@stdlib/ndarray/base/assert/is-row-major", + "@stdlib/napi/export", + "@stdlib/napi/argv", + "@stdlib/napi/argv-int64", + "@stdlib/napi/argv-int32", + "@stdlib/napi/argv-double", + "@stdlib/napi/argv-strided-float64array2d" + ] + }, + { + "task": "benchmark", + "os": "linux", + "blas": "openblas", + "wasm": false, + "src": [ + "./src/dgemm_cblas.c", + "./src/dgemm_ndarray.c" + ], + "include": [ + "./include" + ], + "libraries": [ + "-lopenblas", + "-lpthread" + ], + "libpath": [], + "dependencies": [ + "@stdlib/blas/base/shared", + "@stdlib/blas/base/xerbla", + "@stdlib/blas/base/ddot", + "@stdlib/strided/base/stride2offset", + "@stdlib/ndarray/base/assert/is-row-major" + ] + }, + { + "task": "examples", + "os": "linux", + "blas": "openblas", + "wasm": false, + "src": [ + "./src/dgemm_cblas.c", + "./src/dgemm_ndarray.c" + ], + "include": [ + "./include" + ], + "libraries": [ + "-lopenblas", + "-lpthread" + ], + "libpath": [], + "dependencies": [ + "@stdlib/blas/base/shared", + "@stdlib/blas/base/xerbla", + "@stdlib/blas/base/ddot", + "@stdlib/strided/base/stride2offset", + "@stdlib/ndarray/base/assert/is-row-major" + ] + }, + + { + "task": "build", + "os": "mac", + "blas": "", + "wasm": false, + "src": [ + "./src/dgemm.c", + "./src/dgemm_ndarray.c" + ], + "include": [ + "./include" + ], + "libraries": [], + "libpath": [], + "dependencies": [ + "@stdlib/blas/base/shared", + "@stdlib/blas/base/xerbla", + "@stdlib/blas/base/ddot", + "@stdlib/strided/base/stride2offset", + "@stdlib/ndarray/base/assert/is-row-major", + "@stdlib/napi/export", + "@stdlib/napi/argv", + "@stdlib/napi/argv-int64", + "@stdlib/napi/argv-int32", + "@stdlib/napi/argv-double", + "@stdlib/napi/argv-strided-float64array2d" + ] + }, + { + "task": "benchmark", + "os": "mac", + "blas": "", + "wasm": false, + "src": [ + "./src/dgemm.c", + "./src/dgemm_ndarray.c" + ], + "include": [ + "./include" + ], + "libraries": [], + "libpath": [], + "dependencies": [ + "@stdlib/blas/base/shared", + "@stdlib/blas/base/xerbla", + "@stdlib/blas/base/ddot", + "@stdlib/strided/base/stride2offset", + "@stdlib/ndarray/base/assert/is-row-major" + ] + }, + { + "task": "examples", + "os": "mac", + "blas": "", + "wasm": false, + "src": [ + "./src/dgemm.c", + "./src/dgemm_ndarray.c" + ], + "include": [ + "./include" + ], + "libraries": [], + "libpath": [], + "dependencies": [ + "@stdlib/blas/base/shared", + "@stdlib/blas/base/xerbla", + "@stdlib/blas/base/ddot", + "@stdlib/strided/base/stride2offset", + "@stdlib/ndarray/base/assert/is-row-major" + ] + }, + + { + "task": "build", + "os": "mac", + "blas": "apple_accelerate_framework", + "wasm": false, + "src": [ + "./src/dgemm_cblas.c", + "./src/dgemm_ndarray.c" + ], + "include": [ + "./include" + ], + "libraries": [ + "-lblas" + ], + "libpath": [], + "dependencies": [ + "@stdlib/blas/base/shared", + "@stdlib/blas/base/xerbla", + "@stdlib/blas/base/ddot", + "@stdlib/strided/base/stride2offset", + "@stdlib/ndarray/base/assert/is-row-major", + "@stdlib/napi/export", + "@stdlib/napi/argv", + "@stdlib/napi/argv-int64", + "@stdlib/napi/argv-int32", + "@stdlib/napi/argv-double", + "@stdlib/napi/argv-strided-float64array2d" + ] + }, + { + "task": "benchmark", + "os": "mac", + "blas": "apple_accelerate_framework", + "wasm": false, + "src": [ + "./src/dgemm_cblas.c", + "./src/dgemm_ndarray.c" + ], + "include": [ + "./include" + ], + "libraries": [ + "-lblas" + ], + "libpath": [], + "dependencies": [ + "@stdlib/blas/base/shared", + "@stdlib/blas/base/xerbla", + "@stdlib/blas/base/ddot", + "@stdlib/strided/base/stride2offset", + "@stdlib/ndarray/base/assert/is-row-major" + ] + }, + { + "task": "examples", + "os": "mac", + "blas": "apple_accelerate_framework", + "wasm": false, + "src": [ + "./src/dgemm_cblas.c", + "./src/dgemm_ndarray.c" + ], + "include": [ + "./include" + ], + "libraries": [ + "-lblas" + ], + "libpath": [], + "dependencies": [ + "@stdlib/blas/base/shared", + "@stdlib/blas/base/xerbla", + "@stdlib/blas/base/ddot", + "@stdlib/strided/base/stride2offset", + "@stdlib/ndarray/base/assert/is-row-major" + ] + }, + + { + "task": "build", + "os": "mac", + "blas": "openblas", + "wasm": false, + "src": [ + "./src/dgemm_cblas.c", + "./src/dgemm_ndarray.c" + ], + "include": [ + "./include" + ], + "libraries": [ + "-lopenblas", + "-lpthread" + ], + "libpath": [], + "dependencies": [ + "@stdlib/blas/base/shared", + "@stdlib/blas/base/xerbla", + "@stdlib/blas/base/ddot", + "@stdlib/strided/base/stride2offset", + "@stdlib/ndarray/base/assert/is-row-major", + "@stdlib/napi/export", + "@stdlib/napi/argv", + "@stdlib/napi/argv-int64", + "@stdlib/napi/argv-int32", + "@stdlib/napi/argv-double", + "@stdlib/napi/argv-strided-float64array2d" + ] + }, + { + "task": "benchmark", + "os": "mac", + "blas": "openblas", + "wasm": false, + "src": [ + "./src/dgemm_cblas.c", + "./src/dgemm_ndarray.c" + ], + "include": [ + "./include" + ], + "libraries": [ + "-lopenblas", + "-lpthread" + ], + "libpath": [], + "dependencies": [ + "@stdlib/blas/base/shared", + "@stdlib/blas/base/xerbla", + "@stdlib/blas/base/ddot", + "@stdlib/strided/base/stride2offset", + "@stdlib/ndarray/base/assert/is-row-major" + ] + }, + { + "task": "examples", + "os": "mac", + "blas": "openblas", + "wasm": false, + "src": [ + "./src/dgemm_cblas.c", + "./src/dgemm_ndarray.c" + ], + "include": [ + "./include" + ], + "libraries": [ + "-lopenblas", + "-lpthread" + ], + "libpath": [], + "dependencies": [ + "@stdlib/blas/base/shared", + "@stdlib/blas/base/xerbla", + "@stdlib/blas/base/ddot", + "@stdlib/strided/base/stride2offset", + "@stdlib/ndarray/base/assert/is-row-major" + ] + }, + + { + "task": "build", + "os": "win", + "blas": "", + "wasm": false, + "src": [ + "./src/dgemm.c", + "./src/dgemm_ndarray.c" + ], + "include": [ + "./include" + ], + "libraries": [], + "libpath": [], + "dependencies": [ + "@stdlib/blas/base/shared", + "@stdlib/blas/base/xerbla", + "@stdlib/blas/base/ddot", + "@stdlib/strided/base/stride2offset", + "@stdlib/ndarray/base/assert/is-row-major", + "@stdlib/napi/export", + "@stdlib/napi/argv", + "@stdlib/napi/argv-int64", + "@stdlib/napi/argv-int32", + "@stdlib/napi/argv-double", + "@stdlib/napi/argv-strided-float64array2d" + ] + }, + { + "task": "benchmark", + "os": "win", + "blas": "", + "wasm": false, + "src": [ + "./src/dgemm.c", + "./src/dgemm_ndarray.c" + ], + "include": [ + "./include" + ], + "libraries": [], + "libpath": [], + "dependencies": [ + "@stdlib/blas/base/shared", + "@stdlib/blas/base/xerbla", + "@stdlib/blas/base/ddot", + "@stdlib/strided/base/stride2offset", + "@stdlib/ndarray/base/assert/is-row-major" + ] + }, + { + "task": "examples", + "os": "win", + "blas": "", + "wasm": false, + "src": [ + "./src/dgemm.c", + "./src/dgemm_ndarray.c" + ], + "include": [ + "./include" + ], + "libraries": [], + "libpath": [], + "dependencies": [ + "@stdlib/blas/base/shared", + "@stdlib/blas/base/xerbla", + "@stdlib/blas/base/ddot", + "@stdlib/strided/base/stride2offset", + "@stdlib/ndarray/base/assert/is-row-major" + ] + }, + + { + "task": "build", + "os": "", + "blas": "", + "wasm": true, + "src": [ + "./src/dgemm.c", + "./src/dgemm_ndarray.c" + ], + "include": [ + "./include" + ], + "libraries": [], + "libpath": [], + "dependencies": [ + "@stdlib/blas/base/shared", + "@stdlib/blas/base/xerbla", + "@stdlib/blas/base/ddot", + "@stdlib/strided/base/stride2offset", + "@stdlib/ndarray/base/assert/is-row-major" + ] + } + ] +} + diff --git a/lib/node_modules/@stdlib/blas/base/dgemm/package.json b/lib/node_modules/@stdlib/blas/base/dgemm/package.json index af13a574a7be..c67a657aff2e 100644 --- a/lib/node_modules/@stdlib/blas/base/dgemm/package.json +++ b/lib/node_modules/@stdlib/blas/base/dgemm/package.json @@ -14,11 +14,15 @@ } ], "main": "./lib", + "browser": "./lib/main.js", + "gypfile": true, "directories": { "benchmark": "./benchmark", "doc": "./docs", "example": "./examples", + "include": "./include", "lib": "./lib", + "src": "./src", "test": "./test" }, "types": "./docs/types", diff --git a/lib/node_modules/@stdlib/blas/base/dgemm/src/Makefile b/lib/node_modules/@stdlib/blas/base/dgemm/src/Makefile new file mode 100644 index 000000000000..2caf905cedbe --- /dev/null +++ b/lib/node_modules/@stdlib/blas/base/dgemm/src/Makefile @@ -0,0 +1,70 @@ +#/ +# @license Apache-2.0 +# +# Copyright (c) 2026 The Stdlib Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#/ + +# VARIABLES # + +ifndef VERBOSE + QUIET := @ +else + QUIET := +endif + +# Determine the OS ([1][1], [2][2]). +# +# [1]: https://en.wikipedia.org/wiki/Uname#Examples +# [2]: http://stackoverflow.com/a/27776822/2225624 +OS ?= $(shell uname) +ifneq (, $(findstring MINGW,$(OS))) + OS := WINNT +else +ifneq (, $(findstring MSYS,$(OS))) + OS := WINNT +else +ifneq (, $(findstring CYGWIN,$(OS))) + OS := WINNT +else +ifneq (, $(findstring Windows_NT,$(OS))) + OS := WINNT +endif +endif +endif +endif + + +# RULES # + +#/ +# Removes generated files for building an add-on. +# +# @example +# make clean-addon +#/ +clean-addon: + $(QUIET) -rm -f *.o *.node + +.PHONY: clean-addon + +#/ +# Removes generated files. +# +# @example +# make clean +#/ +clean: clean-addon + +.PHONY: clean diff --git a/lib/node_modules/@stdlib/blas/base/dgemm/src/addon.c b/lib/node_modules/@stdlib/blas/base/dgemm/src/addon.c new file mode 100644 index 000000000000..9b8c0f7c93c9 --- /dev/null +++ b/lib/node_modules/@stdlib/blas/base/dgemm/src/addon.c @@ -0,0 +1,156 @@ +/** +* @license Apache-2.0 +* +* Copyright (c) 2026 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +#include "stdlib/blas/base/dgemm.h" +#include "stdlib/blas/base/shared.h" +#include "stdlib/napi/export.h" +#include "stdlib/napi/argv.h" +#include "stdlib/napi/argv_int64.h" +#include "stdlib/napi/argv_int32.h" +#include "stdlib/napi/argv_double.h" +#include "stdlib/napi/argv_strided_float64array2d.h" +#include + +/** +* Receives JavaScript callback invocation data. +* +* @param env environment under which the function is invoked +* @param info callback data +* @return Node-API value +*/ +static napi_value addon( napi_env env, napi_callback_info info ) { + CBLAS_INT sa1; + CBLAS_INT sa2; + CBLAS_INT sb1; + CBLAS_INT sb2; + CBLAS_INT sc1; + CBLAS_INT sc2; + CBLAS_INT dA1; + CBLAS_INT dA2; + CBLAS_INT dB1; + CBLAS_INT dB2; + + STDLIB_NAPI_ARGV( env, info, argv, argc, 14 ); + + STDLIB_NAPI_ARGV_INT32( env, layout, argv, 0 ); + + STDLIB_NAPI_ARGV_INT32( env, transA, argv, 1 ); + STDLIB_NAPI_ARGV_INT32( env, transB, argv, 2 ); + STDLIB_NAPI_ARGV_INT64( env, M, argv, 3 ); + STDLIB_NAPI_ARGV_INT64( env, N, argv, 4 ); + STDLIB_NAPI_ARGV_INT64( env, K, argv, 5 ); + STDLIB_NAPI_ARGV_INT64( env, LDA, argv, 8 ); + STDLIB_NAPI_ARGV_INT64( env, LDB, argv, 10 ); + STDLIB_NAPI_ARGV_INT64( env, LDC, argv, 13 ); + + STDLIB_NAPI_ARGV_DOUBLE( env, alpha, argv, 6 ); + STDLIB_NAPI_ARGV_DOUBLE( env, beta, argv, 11 ); + + if (layout == CblasColMajor) { + sa1 = 1; sa2 = LDA; + sb1 = 1; sb2 = LDB; + sc1 = 1; sc2 = LDC; + } else { // layout == CblasRowMajor + sa1 = LDA; sa2 = 1; + sb1 = LDB; sb2 = 1; + sc1 = LDC; sc2 = 1; + } + + if ( transA != CblasNoTrans ) { + dA1 = K; + dA2 = M; + } else { + dA1 = M; + dA2 = K; + } + if ( transB != CblasNoTrans ) { + dB1 = N; + dB2 = K; + } else { + dB1 = K; + dB2 = N; + } + + STDLIB_NAPI_ARGV_STRIDED_FLOAT64ARRAY2D( env, A, dA1, dA2, sa1, sa2, argv, 7 ); + STDLIB_NAPI_ARGV_STRIDED_FLOAT64ARRAY2D( env, B, dB1, dB2, sb1, sb2, argv, 9 ); + STDLIB_NAPI_ARGV_STRIDED_FLOAT64ARRAY2D( env, C, M, N, sc1, sc2, argv, 12 ); + + API_SUFFIX(c_dgemm)( layout, transA, transB, M, N, K, alpha, A, LDA, B, LDB, beta, C, LDC ); + + return NULL; +} + +/** +* Receives JavaScript callback invocation data. +* +* @param env environment under which the function is invoked +* @param info callback data +* @return Node-API value +*/ +static napi_value addon_method( napi_env env, napi_callback_info info ) { + CBLAS_INT dA1; + CBLAS_INT dA2; + CBLAS_INT dB1; + CBLAS_INT dB2; + + STDLIB_NAPI_ARGV( env, info, argv, argc, 19 ); + + STDLIB_NAPI_ARGV_INT32( env, transA, argv, 0 ); + STDLIB_NAPI_ARGV_INT32( env, transB, argv, 1 ); + + STDLIB_NAPI_ARGV_INT64( env, M, argv, 2 ); + STDLIB_NAPI_ARGV_INT64( env, N, argv, 3 ); + STDLIB_NAPI_ARGV_INT64( env, K, argv, 4 ); + STDLIB_NAPI_ARGV_INT64( env, strideA1, argv, 7 ); + STDLIB_NAPI_ARGV_INT64( env, strideA2, argv, 8 ); + STDLIB_NAPI_ARGV_INT64( env, offsetA, argv, 9 ); + STDLIB_NAPI_ARGV_INT64( env, strideB1, argv, 11 ); + STDLIB_NAPI_ARGV_INT64( env, strideB2, argv, 12 ); + STDLIB_NAPI_ARGV_INT64( env, offsetB, argv, 13 ); + STDLIB_NAPI_ARGV_INT64( env, strideC1, argv, 16 ); + STDLIB_NAPI_ARGV_INT64( env, strideC2, argv, 17 ); + STDLIB_NAPI_ARGV_INT64( env, offsetC, argv, 18 ); + + STDLIB_NAPI_ARGV_DOUBLE( env, alpha, argv, 5 ); + STDLIB_NAPI_ARGV_DOUBLE( env, beta, argv, 14 ); + + if ( transA != CblasNoTrans ) { + dA1 = K; + dA2 = M; + } else { + dA1 = M; + dA2 = K; + } + if ( transB != CblasNoTrans ) { + dB1 = N; + dB2 = K; + } else { + dB1 = K; + dB2 = N; + } + + STDLIB_NAPI_ARGV_STRIDED_FLOAT64ARRAY2D( env, A, dA1, dA2, strideA1, strideA2, argv, 6 ); + STDLIB_NAPI_ARGV_STRIDED_FLOAT64ARRAY2D( env, B, dB1, dB2, strideB1, strideB2, argv, 10 ); + STDLIB_NAPI_ARGV_STRIDED_FLOAT64ARRAY2D( env, C, M, N, strideC1, strideC2, argv, 15 ); + + API_SUFFIX(c_dgemm_ndarray)( transA, transB, M, N, K, alpha, A, strideA1, strideA2, offsetA, B, strideB1, strideB2, offsetB, beta, C, strideC1, strideC2, offsetC ); + + return NULL; +} + +STDLIB_NAPI_MODULE_EXPORT_FCN_WITH_METHOD( addon, "ndarray", addon_method ) diff --git a/lib/node_modules/@stdlib/blas/base/dgemm/src/dgemm.c b/lib/node_modules/@stdlib/blas/base/dgemm/src/dgemm.c new file mode 100644 index 000000000000..daf2164e877e --- /dev/null +++ b/lib/node_modules/@stdlib/blas/base/dgemm/src/dgemm.c @@ -0,0 +1,141 @@ +/** +* @license Apache-2.0 +* +* Copyright (c) 2026 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +#include "stdlib/blas/base/dgemm.h" +#include "stdlib/blas/base/shared.h" +#include "stdlib/blas/base/xerbla.h" +#include "stdlib/strided/base/stride2offset.h" + +/** +* Performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C` where `op(X)` is either `op(X) = X` or `op(X) = X^T`, `α` and `β` are scalars, `A`, `B`, and `C` are matrices, with `op(A)` an `M` by `K` matrix, `op(B)` a `K` by `N` matrix, and `C` an `M` by `N` matrix. +* +* @param layout storage layout +* @param transA specifies whether `A` should be transposed, conjugate-transposed, or not transposed +* @param transB specifies whether `B` should be transposed, conjugate-transposed, or not transposed +* @param M number of rows in the matrix `op(A)` and number of rows in the matrix `C` +* @param N number of columns in the matrix `op(B)` and number of columns in the matrix `C` +* @param K number of columns in the matrix `op(A)` and number of rows in the matrix `op(B)` +* @param alpha scalar constant +* @param A first matrix +* @param LDA stride of the first dimension of `A` (a.k.a., leading dimension of the matrix `A`) +* @param B second matrix +* @param LDB stride of the first dimension of `B` (a.k.a., leading dimension of the matrix `B`) +* @param beta scalar constant +* @param C third matrix +* @param LDC stride of the first dimension of `C` (a.k.a., leading dimension of the matrix `C`) +*/ +void API_SUFFIX(c_dgemm)( const CBLAS_LAYOUT layout, const CBLAS_TRANSPOSE transA, const CBLAS_TRANSPOSE transB, const CBLAS_INT M, const CBLAS_INT N, const CBLAS_INT K, const double alpha, const double *A, const CBLAS_INT LDA, const double *B, const CBLAS_INT LDB, const double beta, double *C, const CBLAS_INT LDC) { + CBLAS_INT vala; + CBLAS_INT valb; + CBLAS_INT valc; + CBLAS_INT sa1; + CBLAS_INT sa2; + CBLAS_INT sb1; + CBLAS_INT sb2; + CBLAS_INT sc1; + CBLAS_INT sc2; + CBLAS_INT va; + CBLAS_INT vb; + CBLAS_INT vc; + // Perform input argument validation... + if ( layout != CblasRowMajor && layout != CblasColMajor ) { + c_xerbla( 1, "c_dgemm", "Error: invalid argument. First argument must be a valid storage layout. Value: `%d`.", layout ); + } + if ( transA != CblasTrans && transA != CblasConjTrans && transA != CblasNoTrans ) { + c_xerbla( 2, "c_dgemm", "Error: invalid argument. Second argument must be a valid transpose operation. Value: `%d`.", transA ); + } + if ( transB != CblasTrans && transB != CblasConjTrans && transB != CblasNoTrans ) { + c_xerbla( 3, "c_dgemm", "Error: invalid argument. Third argument must be a valid transpose operation. Value: `%d`.", transB ); + } + if ( M < 0 ) { + c_xerbla( 4, "c_dgemm", "Error: invalid argument. Fourth argument must be a nonnegative integer. Value: `%d`.", M ); + } + if ( N < 0 ) { + c_xerbla( 5, "c_dgemm", "Error: invalid argument. Fifth argument must be a nonnegative integer. Value: `%d`.", N ); + } + if ( K < 0 ) { + c_xerbla( 6, "c_dgemm", "Error: invalid argument. Sixth argument must be a nonnegative integer. Value: `%d`.", K ); + } + // LDA check + if ( + (layout == CblasRowMajor && transA == CblasNoTrans) || + (layout == CblasColMajor && transA != CblasNoTrans) + ) { + va = K; + } else { + va = M; + } + // max (1, va) + if ( va < 1 ) { + vala = 1; + } else { + vala = va; + } + if ( LDA < vala ) { + c_xerbla( 9, "c_dgemm", "Error: invalid argument. Ninth argument must be at least the maximum of 1 and the number of rows in the matrix `op(A)`. Value: `%d`.", LDA ); + } + // LDB check + if ( + (layout == CblasRowMajor && transB == CblasNoTrans) || + (layout == CblasColMajor && transB != CblasNoTrans) + ) { + vb = N; + } else { + vb = K; + } + // max (1, vb) + if ( vb < 1 ) { + valb = 1; + } else { + valb = vb; + } + if ( LDB < valb ) { + c_xerbla( 11, "c_dgemm", "Error: invalid argument. Eleventh argument must be at least the maximum of 1 and the number of rows in the matrix `op(B)`. Value: `%d`.", LDB ); + } + // LDC check + if (layout == CblasRowMajor) { + vc = N; + } else { // layout == CblasColMajor + vc = M; + } + // max (1, vc) + if ( vc < 1 ) { + valc = 1; + } else { + valc = vc; + } + if ( LDC < valc ) { + c_xerbla( 14, "c_dgemm", "Error: invalid argument. Fourteenth argument must be at least the maximum of 1 and the number of rows in the matrix `C`. Value: `%d`.", LDC ); + } + // Check if we can early return... + if ( M == 0 || N == 0 || ((alpha == 0.0 || K == 0) && beta == 1.0)) { + // TODO: Check accuracy with ndarray.js + return; + } + if (layout == CblasColMajor) { + sa1 = 1; sa2 = LDA; + sb1 = 1; sb2 = LDB; + sc1 = 1; sc2 = LDC; + } else { // layout == CblasRowMajor + sa1 = LDA; sa2 = 1; + sb1 = LDB; sb2 = 1; + sc1 = LDC; sc2 = 1; + } + API_SUFFIX(c_dgemm_ndarray)( transA, transB, M, N, K, alpha, A, sa1, sa2, 0, B, sb1, sb2, 0, beta, C, sc1, sc2, 0 ); + return; +} diff --git a/lib/node_modules/@stdlib/blas/base/dgemm/src/dgemm_cblas.c b/lib/node_modules/@stdlib/blas/base/dgemm/src/dgemm_cblas.c new file mode 100644 index 000000000000..8260be721887 --- /dev/null +++ b/lib/node_modules/@stdlib/blas/base/dgemm/src/dgemm_cblas.c @@ -0,0 +1,43 @@ +/** +* @license Apache-2.0 +* +* Copyright (c) 2026 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +#include "stdlib/blas/base/dgemm.h" +#include "stdlib/blas/base/dgemm_cblas.h" +#include "stdlib/blas/base/shared.h" + +/** +* Performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C` where `op(X)` is either `op(X) = X` or `op(X) = X^T`, `α` and `β` are scalars, `A`, `B`, and `C` are matrices, with `op(A)` an `M` by `K` matrix, `op(B)` a `K` by `N` matrix, and `C` an `M` by `N` matrix. +* +* @param layout storage layout +* @param transA specifies whether `A` should be transposed, conjugate-transposed, or not transposed +* @param transB specifies whether `B` should be transposed, conjugate-transposed, or not transposed +* @param M number of rows in the matrix `op(A)` and number of rows in the matrix `C` +* @param N number of columns in the matrix `op(B)` and number of columns in the matrix `C` +* @param K number of columns in the matrix `op(A)` and number of rows in the matrix `op(B)` +* @param alpha scalar constant +* @param A first matrix +* @param LDA stride of the first dimension of `A` (a.k.a., leading dimension of the matrix `A`) +* @param B second matrix +* @param LDB stride of the first dimension of `B` (a.k.a., leading dimension of the matrix `B`) +* @param beta scalar constant +* @param C third matrix +* @param LDC stride of the first dimension of `C` (a.k.a., leading dimension of the matrix `C`) +*/ +void API_SUFFIX(c_dgemm)( const CBLAS_LAYOUT layout, const CBLAS_TRANSPOSE transA, const CBLAS_TRANSPOSE transB, const CBLAS_INT M, const CBLAS_INT N, const CBLAS_INT K, const double alpha, const double *A, const CBLAS_INT LDA, const double *B, const CBLAS_INT LDB, const double beta, double *C, const CBLAS_INT LDC) { + API_SUFFIX(cblas_dgemm)( layout, transA, transB, M, N, K, alpha, A, LDA, B, LDB, beta, C, LDC ); +} diff --git a/lib/node_modules/@stdlib/blas/base/dgemm/src/dgemm_ndarray.c b/lib/node_modules/@stdlib/blas/base/dgemm/src/dgemm_ndarray.c new file mode 100644 index 000000000000..9cf214811470 --- /dev/null +++ b/lib/node_modules/@stdlib/blas/base/dgemm/src/dgemm_ndarray.c @@ -0,0 +1,405 @@ +/** +* @license Apache-2.0 +* +* Copyright (c) 2026 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +#include "stdlib/blas/base/dgemm.h" +#include "stdlib/blas/base/shared.h" +#include "stdlib/blas/base/xerbla.h" +#include "stdlib/blas/base/ddot.h" +#include "stdlib/ndarray/base/assert/is_row_major.h" +#include +#include + +/** + * Fills a matrix with zeros. + * + * @private + * @param M number of rows in the matrix + * @param N number of columns in the matrix + * @param X matrix to fill with zeros + * @param strideX1 stride of the first dimension of `X` + * @param strideX2 stride of the second dimension of `X` + * @param offsetX starting index for `X` + */ +static void zeros(const CBLAS_INT M, const CBLAS_INT N, double *X, const CBLAS_INT strideX1, const CBLAS_INT strideX2, const CBLAS_INT offsetX) { + int64_t s[ 2 ]; + CBLAS_INT dx0; + CBLAS_INT dx1; + CBLAS_INT S0; + CBLAS_INT S1; + CBLAS_INT i0; + CBLAS_INT i1; + CBLAS_INT ix; + bool isrm; + + s[0] = strideX1; + s[1] = strideX2; + isrm = stdlib_ndarray_is_row_major( 2, s ); + if ( isrm ) { + // For row-major matrices, the last dimension has the fastest changing index... + S0 = N; + S1 = M; + dx0 = strideX2; + dx1 = strideX1 - ( S0*strideX2 ); + } else { // CblasColMajor + // For column-major matrices, the first dimension has the fastest changing index... + S0 = M; + S1 = N; + dx0 = strideX1; + dx1 = strideX2 - ( S0*strideX1 ); + } + ix = offsetX; + for ( i1 = 0; i1 < S1; i1++ ) { + for ( i0 = 0; i0 < S0; i0++ ) { + X[ ix ] = 0.0; + ix += dx0; + } + ix += dx1; + } +} + +/** + * Scales each element in a matrix by a scalar `β`. + * + * @private + * @param M number of rows in the matrix + * @param N number of columns in the matrix + * @param beta scalar constant + * @param X matrix to scale + * @param strideX1 stride of the first dimension of `X` + * @param strideX2 stride of the second dimension of `X` + * @param offsetX starting index for `X` + */ +static void scale( const CBLAS_INT M, const CBLAS_INT N, const double beta, double *X, const CBLAS_INT strideX1, const CBLAS_INT strideX2, const CBLAS_INT offsetX ) { + int64_t s[ 2 ]; + CBLAS_INT dx0; + CBLAS_INT dx1; + CBLAS_INT S0; + CBLAS_INT S1; + CBLAS_INT i0; + CBLAS_INT i1; + CBLAS_INT ix; + bool isrm; + + s[0] = strideX1; + s[1] = strideX2; + isrm = stdlib_ndarray_is_row_major( 2, s ); + if ( isrm ) { + // For row-major matrices, the last dimension has the fastest changing index... + S0 = N; + S1 = M; + dx0 = strideX2; + dx1 = strideX1 - ( S0*strideX2 ); + } else { // CblasColMajor + // For column-major matrices, the first dimension has the fastest changing index... + S0 = M; + S1 = N; + dx0 = strideX1; + dx1 = strideX2 - ( S0*strideX1 ); + } + ix = offsetX; + for ( i1 = 0; i1 < S1; i1++ ) { + for ( i0 = 0; i0 < S0; i0++ ) { + X[ ix ] *= beta; + ix += dx0; + } + ix += dx1; + } +} + +/** + * Performs matrix multiplication using a naive algorithm which is cache-optimal when `A` is row-major and `B` is column-major. + * + * @private + * @param M number of rows in the matrix `op(A)` and number of rows in the matrix `C` + * @param N number of columns in the matrix `op(B)` and number of columns in the matrix `C` + * @param K number of columns in the matrix `op(A)` and number of rows in the matrix `op(B)` + * @param alpha scalar constant + * @param A first matrix + * @param strideA1 stride of the first dimension of `A` + * @param strideA2 stride of the second dimension of `A` + * @param offsetA starting index for `A` + * @param B second matrix + * @param strideB1 stride of the first dimension of `B` + * @param strideB2 stride of the second dimension of `B` + * @param offsetB starting index for `B` + * @param C third matrix + * @param strideC1 stride of the first dimension of `C` + * @param strideC2 stride of the second dimension of `C` + * @param offsetC starting index for `C` + */ +static void naive( const CBLAS_INT M, const CBLAS_INT N, const CBLAS_INT K, const double alpha, const double *A, const CBLAS_INT strideA1, const CBLAS_INT strideA2, const CBLAS_INT offsetA, const double *B, const CBLAS_INT strideB1, const CBLAS_INT strideB2, const CBLAS_INT offsetB, double *C, const CBLAS_INT strideC1, const CBLAS_INT strideC2, const CBLAS_INT offsetC ) { + CBLAS_INT da0; + CBLAS_INT db0; + CBLAS_INT dc0; + CBLAS_INT dc1; + CBLAS_INT S0; + CBLAS_INT S1; + CBLAS_INT i0; + CBLAS_INT i1; + CBLAS_INT ia; + CBLAS_INT ib; + CBLAS_INT ic; + + S0 = N; + S1 = M; + da0 = strideA2; + db0 = strideB1; + dc0 = strideC2; + dc1 = strideC1 - ( S0*strideC2 ); + + ic = offsetC; + for ( i1 = 0; i1 < S1; i1++ ) { + ia = offsetA + ( i1*strideA1 ); + for ( i0 = 0; i0 < S0; i0++ ) { + ib = offsetB + ( i0*strideB2 ); + C[ ic ] += alpha * API_SUFFIX(c_ddot_ndarray)( K, A, da0, ia, B, db0, ib ); + ic += dc0; + } + ic += dc1; + } +} + +/** + * Performs matrix multiplication using loop tiling. + * + * @private + * @param M number of rows in the matrix `op(A)` and number of rows in the matrix `C` + * @param N number of columns in the matrix `op(B)` and number of columns in the matrix `C` + * @param K number of columns in the matrix `op(A)` and number of rows in the matrix `op(B)` + * @param alpha scalar constant + * @param A first matrix + * @param strideA1 stride of the first dimension of `A` + * @param strideA2 stride of the second dimension of `A` + * @param offsetA starting index for `A` + * @param B second matrix + * @param strideB1 stride of the first dimension of `B` + * @param strideB2 stride of the second dimension of `B` + * @param offsetB starting index for `B` + * @param C third matrix + * @param strideC1 stride of the first dimension of `C` + * @param strideC2 stride of the second dimension of `C` + * @param offsetC starting index for `C` + */ +static void blocked( const CBLAS_INT M, const CBLAS_INT N, const CBLAS_INT K, const double alpha, const double *A, const CBLAS_INT strideA1, const CBLAS_INT strideA2, const CBLAS_INT offsetA, const double *B, const CBLAS_INT strideB1, const CBLAS_INT strideB2, const CBLAS_INT offsetB, double *C, const CBLAS_INT strideC1, const CBLAS_INT strideC2, const CBLAS_INT offsetC ) { + const CBLAS_INT bsize = 32; // TODO: move this to a header file + CBLAS_INT da0; + CBLAS_INT db0; + CBLAS_INT dc0; + CBLAS_INT dc1; + CBLAS_INT oa1; + CBLAS_INT ob0; + CBLAS_INT oc0; + CBLAS_INT oc1; + CBLAS_INT S0; + CBLAS_INT S1; + CBLAS_INT s0; + CBLAS_INT s1; + CBLAS_INT sk; + CBLAS_INT i0; + CBLAS_INT i1; + CBLAS_INT j0; + CBLAS_INT j1; + CBLAS_INT ia; + CBLAS_INT ib; + CBLAS_INT ic; + CBLAS_INT oa; + CBLAS_INT ob; + CBLAS_INT k; + + // Note on variable naming convention: S#, da#, db#, dc#, i#, j# where # corresponds to the loop number, with `0` being the innermost loop... + + S0 = N; + S1 = M; + + // Define increments for the innermost loop: + da0 = strideA2; + db0 = strideB1; + dc0 = strideC2; + + // Iterate over blocks... + for ( j1 = S1; j1 > 0; ) { + if ( j1 < bsize ) { + s1 = j1; + j1 = 0; + } else { + s1 = bsize; + j1 -= bsize; + } + oa1 = offsetA + ( j1*strideA1 ); + oc1 = offsetC + ( j1*strideC1 ); + for ( j0 = S0; j0 > 0; ) { + if ( j0 < bsize ) { + s0 = j0; + j0 = 0; + } else { + s0 = bsize; + j0 -= bsize; + } + ob0 = offsetB + ( j0*strideB2 ); + oc0 = oc1 + ( j0*strideC2 ); // index offset for `C` for the current block + dc1 = strideC1 - ( s0*strideC2 ); // loop offset increment for `C` + for ( k = K; k > 0; ) { + if ( k < bsize ) { + sk = k; + k = 0; + } else { + sk = bsize; + k -= bsize; + } + oa = oa1 + ( k*strideA2 ); + ob = ob0 + ( k*strideB1 ); + ic = oc0; + for ( i1 = 0; i1 < s1; i1++ ) { + ia = oa + ( i1*strideA1 ); + for ( i0 = 0; i0 < s0; i0++ ) { + ib = ob + ( i0*strideB2 ); + C[ ic ] += alpha * API_SUFFIX(c_ddot_ndarray)( sk, A, da0, ia, B, db0, ib ); + ic += dc0; + } + ic += dc1; + } + } + } + } +} + +/** +* Performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C` where `op(X)` is either `op(X) = X` or `op(X) = X^T`, `α` and `β` are scalars, `A`, `B`, and `C` are matrices, with `op(A)` an `M` by `K` matrix, `op(B)` a `K` by `N` matrix, and `C` an `M` by `N` matrix. +* +* @param transA specifies whether `A` should be transposed, conjugate-transposed, or not transposed +* @param transB specifies whether `B` should be transposed, conjugate-transposed, or not transposed +* @param M number of rows in the matrix `op(A)` and number of rows in the matrix `C` +* @param N number of columns in the matrix `op(B)` and number of columns in the matrix `C` +* @param K number of columns in the matrix `op(A)` and number of rows in the matrix `op(B)` +* @param alpha scalar constant +* @param A first matrix +* @param strideA1 stride of the first dimension of `A` +* @param strideA2 stride of the second dimension of `A` +* @param offsetA starting index for `A` +* @param B second matrix +* @param strideB1 stride of the first dimension of `B` +* @param strideB2 stride of the second dimension of `B` +* @param offsetB starting index for `B` +* @param beta scalar constant +* @param C third matrix +* @param strideC1 stride of the first dimension of `C` +* @param strideC2 stride of the second dimension of `C` +* @param offsetC starting index for `C` +*/ +void API_SUFFIX(c_dgemm_ndarray)( const CBLAS_TRANSPOSE transA, const CBLAS_TRANSPOSE transB, const CBLAS_INT M, const CBLAS_INT N, const CBLAS_INT K, const double alpha, const double *A, const CBLAS_INT strideA1, const CBLAS_INT strideA2, const CBLAS_INT offsetA, const double *B, const CBLAS_INT strideB1, const CBLAS_INT strideB2, const CBLAS_INT offsetB, const double beta, double *C, const CBLAS_INT strideC1, const CBLAS_INT strideC2, const CBLAS_INT offsetC ) { + int64_t sa[ 2 ]; + int64_t sb[ 2 ]; + CBLAS_INT sa1; + CBLAS_INT sa2; + CBLAS_INT sb1; + CBLAS_INT sb2; + bool isrma; + bool isrmb; + + // Note on variable naming convention: da#, i# where # corresponds to the loop number, with `0` being the innermost loop... + + // Perform input argument validation... + if ( transA != CblasTrans && transA != CblasConjTrans && transA != CblasNoTrans ) { + c_xerbla( 1, "c_dgemm_ndarray", "Error: invalid argument. First argument must be a valid transpose operation. Value: `%d`.", transA ); + return; + } + if ( transB != CblasTrans && transB != CblasConjTrans && transB != CblasNoTrans ) { + c_xerbla( 2, "c_dgemm_ndarray", "Error: invalid argument. Second argument must be a valid transpose operation. Value: `%d`.", transB ); + return; + } + if ( M < 0 ) { + c_xerbla( 3, "c_dgemm_ndarray", "Error: invalid argument. Third argument must be a nonnegative integer. Value: `%d`.", M ); + return; + } + if( N < 0 ) { + c_xerbla( 4, "c_dgemm_ndarray", "Error: invalid argument. Fourth argument must be a nonnegative integer. Value: `%d`.", N ); + return; + } + if ( K < 0 ) { + c_xerbla( 5, "c_dgemm_ndarray", "Error: invalid argument. Fifth argument must be a nonnegative integer. Value: `%d`.", K ); + return; + } + + // Check whether we can avoid computation altogether... +if ( M == 0 || N == 0 || ( ( ( alpha == 0.0 ) || ( K == 0 ) ) && ( beta == 1.0) ) ) { + return; + } + + // Form: C = β⋅C + if ( beta == 0.0 ) { + zeros( M, N, C, strideC1, strideC2, offsetC ); + } else if ( beta != 1.0 ) { + scale( M, N, beta, C, strideC1, strideC2, offsetC ); + } + // Check whether we can early return + if ( alpha == 0.0 ) { + return; + } + + sa[ 0 ] = strideA1; + sa[ 1 ] = strideA2; + sb[ 0 ] = strideB1; + sb[ 1 ] = strideB2; + isrma = stdlib_ndarray_is_row_major( 2, sa ); + isrmb = stdlib_ndarray_is_row_major( 2, sb ); + + // Check whether we can avoid loop tiling and simply use the "naive" (cache-optimal) algorithm for performing matrix multiplication... + if ( isrma ) { + if ( transA == CblasNoTrans ) { + if ( !isrmb && transB == CblasNoTrans ) { + // Form: C = α⋅A⋅B + C + naive( M, N, K, alpha, A, strideA1, strideA2, offsetA, B, strideB1, strideB2, offsetB, C, strideC1, strideC2, offsetC ); + return; + } + if ( isrmb && transB != CblasNoTrans ) { + // Form: C = α⋅A⋅B^T + C + naive( M, N, K, alpha, A, strideA1, strideA2, offsetA, B, strideB2, strideB1, offsetB, C, strideC1, strideC2, offsetC ); + return; + } + } + } else if ( transA != CblasNoTrans ) { + if ( isrmb && transB != CblasNoTrans ) { + // Form: C = α⋅A^T⋅B^T + C + naive( M, N, K, alpha, A, strideA2, strideA1, offsetA, B, strideB2, strideB1, offsetB, C, strideC1, strideC2, offsetC ); + return; + } + if ( !isrmb && transB == CblasNoTrans ) { + // Form: C = α⋅A^T⋅B + C + naive( M, N, K, alpha, A, strideA2, strideA1, offsetA, B, strideB1, strideB2, offsetB, C, strideC1, strideC2, offsetC ); + return; + } + } + // Swap strides to perform transposes... + if ( transA != CblasNoTrans ) { + sa1 = strideA2; + sa2 = strideA1; + } else { + sa1 = strideA1; + sa2 = strideA2; + } + if ( transB != CblasNoTrans ) { + sb1 = strideB2; + sb2 = strideB1; + } else { + sb1 = strideB1; + sb2 = strideB2; + } + // Perform loop tiling to promote cache locality + blocked( M, N, K, alpha, A, sa1, sa2, offsetA, B, sb1, sb2, offsetB, C, strideC1, strideC2, offsetC ); + return; +} diff --git a/lib/node_modules/@stdlib/blas/base/dgemm/test/test.dgemm.native.js b/lib/node_modules/@stdlib/blas/base/dgemm/test/test.dgemm.native.js new file mode 100644 index 000000000000..363c1e39baa4 --- /dev/null +++ b/lib/node_modules/@stdlib/blas/base/dgemm/test/test.dgemm.native.js @@ -0,0 +1,714 @@ +/** +* @license Apache-2.0 +* +* Copyright (c) 2026 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +/* eslint-disable max-len */ + +'use strict'; + +// MODULES // + +var resolve = require( 'path' ).resolve; +var tape = require( 'tape' ); +var Float64Array = require( '@stdlib/array/float64' ); +var dscal = require( '@stdlib/blas/base/dscal' ); +var tryRequire = require( '@stdlib/utils/try-require' ); + + +// FIXTURES // + +var cntantb = require( './fixtures/column_major_nta_ntb.json' ); +var ctantb = require( './fixtures/column_major_ta_ntb.json' ); +var cntatb = require( './fixtures/column_major_nta_tb.json' ); +var ctatb = require( './fixtures/column_major_ta_tb.json' ); +var rntantb = require( './fixtures/row_major_nta_ntb.json' ); +var rtantb = require( './fixtures/row_major_ta_ntb.json' ); +var rntatb = require( './fixtures/row_major_nta_tb.json' ); +var rtatb = require( './fixtures/row_major_ta_tb.json' ); + + +// VARIABLES // + +var dgemm = tryRequire( resolve( __dirname, './../lib/dgemm.native.js' ) ); +var opts = { + 'skip': ( dgemm instanceof Error ) +}; + + +// TESTS // + +tape( 'main export is a function', opts, function test( t ) { + t.ok( true, __filename ); + t.strictEqual( typeof dgemm, 'function', 'main export is a function' ); + t.end(); +}); + +tape( 'the function has an arity of 14', opts, function test( t ) { + t.strictEqual( dgemm.length, 14, 'returns expected value' ); + t.end(); +}); + +tape( 'the function throws an error if provided an invalid first argument', opts, function test( t ) { + var values; + var data; + var i; + + data = rntantb; + + values = [ + 'foo', + 'bar', + 'beep', + 'boop' + ]; + + for ( i = 0; i < values.length; i++ ) { + t.throws( badValue( values[ i ] ), TypeError, 'throws an error when provided ' + values[ i ] ); + } + t.end(); + + function badValue( value ) { + return function badValue() { + dgemm( value, data.transA, data.transB, data.M, data.N, data.K, data.alpha, new Float64Array( data.A ), data.lda, new Float64Array( data.B ), data.ldb, data.beta, new Float64Array( data.C ), data.ldc ); + }; + } +}); + +tape( 'the function throws an error if provided an invalid second argument', opts, function test( t ) { + var values; + var data; + var i; + + data = rntantb; + + values = [ + 'foo', + 'bar', + 'beep', + 'boop' + ]; + + for ( i = 0; i < values.length; i++ ) { + t.throws( badValue( values[ i ] ), TypeError, 'throws an error when provided ' + values[ i ] ); + } + t.end(); + + function badValue( value ) { + return function badValue() { + dgemm( data.order, value, data.transB, data.M, data.N, data.K, data.alpha, new Float64Array( data.A ), data.lda, new Float64Array( data.B ), data.ldb, data.beta, new Float64Array( data.C ), data.ldc ); + }; + } +}); + +tape( 'the function throws an error if provided an invalid third argument', opts, function test( t ) { + var values; + var data; + var i; + + data = rntantb; + + values = [ + 'foo', + 'bar', + 'beep', + 'boop' + ]; + + for ( i = 0; i < values.length; i++ ) { + t.throws( badValue( values[ i ] ), TypeError, 'throws an error when provided ' + values[ i ] ); + } + t.end(); + + function badValue( value ) { + return function badValue() { + dgemm( data.order, data.transA, value, data.M, data.N, data.K, data.alpha, new Float64Array( data.A ), data.lda, new Float64Array( data.B ), data.ldb, data.beta, new Float64Array( data.C ), data.ldc ); + }; + } +}); + +tape( 'the function throws an error if provided an invalid fourth argument', opts, function test( t ) { + var values; + var data; + var i; + + data = rntantb; + + values = [ + -1, + -2, + -3 + ]; + + for ( i = 0; i < values.length; i++ ) { + t.throws( badValue( values[ i ] ), RangeError, 'throws an error when provided ' + values[ i ] ); + } + t.end(); + + function badValue( value ) { + return function badValue() { + dgemm( data.order, data.transA, data.transB, value, data.N, data.K, data.alpha, new Float64Array( data.A ), data.lda, new Float64Array( data.B ), data.ldb, data.beta, new Float64Array( data.C ), data.ldc ); + }; + } +}); + +tape( 'the function throws an error if provided an invalid fifth argument', opts, function test( t ) { + var values; + var data; + var i; + + data = rntantb; + + values = [ + -1, + -2, + -3 + ]; + + for ( i = 0; i < values.length; i++ ) { + t.throws( badValue( values[ i ] ), RangeError, 'throws an error when provided ' + values[ i ] ); + } + t.end(); + + function badValue( value ) { + return function badValue() { + dgemm( data.order, data.transA, data.transB, data.M, value, data.K, data.alpha, new Float64Array( data.A ), data.lda, new Float64Array( data.B ), data.ldb, data.beta, new Float64Array( data.C ), data.ldc ); + }; + } +}); + +tape( 'the function throws an error if provided an invalid sixth argument', opts, function test( t ) { + var values; + var data; + var i; + + data = rntantb; + + values = [ + -1, + -2, + -3 + ]; + + for ( i = 0; i < values.length; i++ ) { + t.throws( badValue( values[ i ] ), RangeError, 'throws an error when provided ' + values[ i ] ); + } + t.end(); + + function badValue( value ) { + return function badValue() { + dgemm( data.order, data.transA, data.transB, data.M, data.N, value, data.alpha, new Float64Array( data.A ), data.lda, new Float64Array( data.B ), data.ldb, data.beta, new Float64Array( data.C ), data.ldc ); + }; + } +}); + +tape( 'the function throws an error if provided an invalid ninth argument', opts, function test( t ) { + var values; + var data; + var i; + + data = rntantb; + + values = [ + 2, + 1, + 0, + -1, + -2, + -3 + ]; + + for ( i = 0; i < values.length; i++ ) { + t.throws( badValue( values[ i ] ), RangeError, 'throws an error when provided ' + values[ i ] ); + } + t.end(); + + function badValue( value ) { + return function badValue() { + dgemm( data.order, data.transA, data.transB, data.M, data.N, data.K, data.alpha, new Float64Array( data.A ), value, new Float64Array( data.B ), data.ldb, data.beta, new Float64Array( data.C ), data.ldc ); + }; + } +}); + +tape( 'the function throws an error if provided an invalid eleventh argument', opts, function test( t ) { + var values; + var data; + var i; + + data = rntantb; + + values = [ + 3, + 2, + 1, + 0, + -1, + -2, + -3 + ]; + + for ( i = 0; i < values.length; i++ ) { + t.throws( badValue( values[ i ] ), RangeError, 'throws an error when provided ' + values[ i ] ); + } + t.end(); + + function badValue( value ) { + return function badValue() { + dgemm( data.order, data.transA, data.transB, data.M, data.N, data.K, data.alpha, new Float64Array( data.A ), data.lda, new Float64Array( data.B ), value, data.beta, new Float64Array( data.C ), data.ldc ); + }; + } +}); + +tape( 'the function throws an error if provided an invalid fourteenth argument', opts, function test( t ) { + var values; + var data; + var i; + + data = rntantb; + + values = [ + 3, + 2, + 1, + 0, + -1, + -2, + -3 + ]; + + for ( i = 0; i < values.length; i++ ) { + t.throws( badValue( values[ i ] ), RangeError, 'throws an error when provided ' + values[ i ] ); + } + t.end(); + + function badValue( value ) { + return function badValue() { + dgemm( data.order, data.transA, data.transB, data.M, data.N, data.K, data.alpha, new Float64Array( data.A ), data.lda, new Float64Array( data.B ), data.ldb, data.beta, new Float64Array( data.C ), value ); + }; + } +}); + +tape( 'the function performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C` (row-major, no-transpose, no-transpose)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = rntantb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.order, data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.lda, b, data.ldb, data.beta, c, data.ldc ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C` (column-major, no-transpose, no-transpose)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = cntantb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.order, data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.lda, b, data.ldb, data.beta, c, data.ldc ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C` (row-major, transpose, no-transpose)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = rtantb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.order, data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.lda, b, data.ldb, data.beta, c, data.ldc ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C` (column-major, transpose, no-transpose)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = ctantb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.order, data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.lda, b, data.ldb, data.beta, c, data.ldc ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C` (row-major, no-transpose, transpose)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = rntatb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.order, data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.lda, b, data.ldb, data.beta, c, data.ldc ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C` (column-major, no-transpose, transpose)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = cntatb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.order, data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.lda, b, data.ldb, data.beta, c, data.ldc ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C` (row-major, transpose, transpose)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = rtatb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.order, data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.lda, b, data.ldb, data.beta, c, data.ldc ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C` (column-major, transpose, transpose)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = ctatb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.order, data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.lda, b, data.ldb, data.beta, c, data.ldc ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function returns a reference to the third input matrix (row-major)', opts, function test( t ) { + var data; + var out; + var a; + var b; + var c; + + data = rtatb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + out = dgemm( data.order, data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.lda, b, data.ldb, data.beta, c, data.ldc ); + t.strictEqual( out, c, 'returns expected value' ); + t.end(); +}); + +tape( 'the function returns a reference to the third input matrix (column-major)', opts, function test( t ) { + var data; + var out; + var a; + var b; + var c; + + data = ctatb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + out = dgemm( data.order, data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.lda, b, data.ldb, data.beta, c, data.ldc ); + t.strictEqual( out, c, 'returns expected value' ); + t.end(); +}); + +tape( 'if either `M` or `N` is `0`, the function returns the third input matrix unchanged (row-major)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = rtatb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C ); + + out = dgemm( data.order, data.transA, data.transB, 0, data.N, data.K, data.alpha, a, data.lda, b, data.ldb, data.beta, c, data.ldc ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + + out = dgemm( data.order, data.transA, data.transB, data.M, 0, data.K, data.alpha, a, data.lda, b, data.ldb, data.beta, c, data.ldc ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + + t.end(); +}); + +tape( 'if either `M` or `N` is `0`, the function returns the third input matrix unchanged (column-major)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = ctatb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C ); + + out = dgemm( data.order, data.transA, data.transB, 0, data.N, data.K, data.alpha, a, data.lda, b, data.ldb, data.beta, c, data.ldc ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + + out = dgemm( data.order, data.transA, data.transB, data.M, 0, data.K, data.alpha, a, data.lda, b, data.ldb, data.beta, c, data.ldc ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + + t.end(); +}); + +tape( 'if `α` or `K` is `0` and `β` is `1`, the function returns the third input matrix unchanged (row-major)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = rtatb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C ); + + out = dgemm( data.order, data.transA, data.transB, data.M, data.N, data.K, 0.0, a, data.lda, b, data.ldb, 1.0, c, data.ldc ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + + out = dgemm( data.order, data.transA, data.transB, data.M, data.N, 0, data.alpha, a, data.lda, b, data.ldb, 1.0, c, data.ldc ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + + t.end(); +}); + +tape( 'if `α` or `K` is `0` and `β` is `1`, the function returns the third input matrix unchanged (column-major)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = ctatb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C ); + + out = dgemm( data.order, data.transA, data.transB, data.M, data.N, data.K, 0.0, a, data.lda, b, data.ldb, 1.0, c, data.ldc ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + + out = dgemm( data.order, data.transA, data.transB, data.M, data.N, 0, data.alpha, a, data.lda, b, data.ldb, 1.0, c, data.ldc ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + + t.end(); +}); + +tape( 'if `α` is `0` and `β` is `0`, the function returns the third input matrix filled with zeros (row-major)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = rtatb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( c.length ); + + out = dgemm( data.order, data.transA, data.transB, data.M, data.N, data.K, 0.0, a, data.lda, b, data.ldb, 0.0, c, data.ldc ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + + t.end(); +}); + +tape( 'if `α` is `0` and `β` is `0`, the function returns the third input matrix filled with zeros (column-major)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = ctatb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( c.length ); + + out = dgemm( data.order, data.transA, data.transB, data.M, data.N, data.K, 0.0, a, data.lda, b, data.ldb, 0.0, c, data.ldc ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + + t.end(); +}); + +tape( 'if `α` is `0` and `β` is neither `0` nor `1`, the function returns the third input matrix scaled by `β` (row-major)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = rtatb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = dscal( c.length, 10.0, new Float64Array( c ), 1 ); + + out = dgemm( data.order, data.transA, data.transB, data.M, data.N, data.K, 0.0, a, data.lda, b, data.ldb, 10.0, c, data.ldc ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + + t.end(); +}); + +tape( 'if `α` is `0` and `β` is neither `0` nor `1`, the function returns the third input matrix scaled by `β` (column-major)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = ctatb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = dscal( c.length, 10.0, new Float64Array( c ), 1 ); + + out = dgemm( data.order, data.transA, data.transB, data.M, data.N, data.K, 0.0, a, data.lda, b, data.ldb, 10.0, c, data.ldc ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + + t.end(); +}); diff --git a/lib/node_modules/@stdlib/blas/base/dgemm/test/test.ndarray.native.js b/lib/node_modules/@stdlib/blas/base/dgemm/test/test.ndarray.native.js new file mode 100644 index 000000000000..d1872c2b11ca --- /dev/null +++ b/lib/node_modules/@stdlib/blas/base/dgemm/test/test.ndarray.native.js @@ -0,0 +1,1496 @@ +/** +* @license Apache-2.0 +* +* Copyright (c) 2026 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +/* eslint-disable max-len */ + +'use strict'; + +// MODULES // + +var resolve = require( 'path' ).resolve; +var tape = require( 'tape' ); +var Float64Array = require( '@stdlib/array/float64' ); +var ones = require( '@stdlib/array/ones' ); +var filled = require( '@stdlib/array/filled' ); +var dscal = require( '@stdlib/blas/base/dscal' ); +var tryRequire = require( '@stdlib/utils/try-require' ); + + +// FIXTURES // + +var cacbccntantb = require( './fixtures/ca_cb_cc_nta_ntb.json' ); +var cacbccntatb = require( './fixtures/ca_cb_cc_nta_tb.json' ); +var cacbcctantb = require( './fixtures/ca_cb_cc_ta_ntb.json' ); +var cacbcctatb = require( './fixtures/ca_cb_cc_ta_tb.json' ); +var cacbrcntantb = require( './fixtures/ca_cb_rc_nta_ntb.json' ); +var cacbrcntatb = require( './fixtures/ca_cb_rc_nta_tb.json' ); +var cacbrctantb = require( './fixtures/ca_cb_rc_ta_ntb.json' ); +var cacbrctatb = require( './fixtures/ca_cb_rc_ta_tb.json' ); +var carbccntantb = require( './fixtures/ca_rb_cc_nta_ntb.json' ); +var carbccntatb = require( './fixtures/ca_rb_cc_nta_tb.json' ); +var carbcctantb = require( './fixtures/ca_rb_cc_ta_ntb.json' ); +var carbcctatb = require( './fixtures/ca_rb_cc_ta_tb.json' ); +var carbrcntantb = require( './fixtures/ca_rb_rc_nta_ntb.json' ); +var carbrcntatb = require( './fixtures/ca_rb_rc_nta_tb.json' ); +var carbrctantb = require( './fixtures/ca_rb_rc_ta_ntb.json' ); +var carbrctatb = require( './fixtures/ca_rb_rc_ta_tb.json' ); +var racbccntantb = require( './fixtures/ra_cb_cc_nta_ntb.json' ); +var racbccntatb = require( './fixtures/ra_cb_cc_nta_tb.json' ); +var racbcctantb = require( './fixtures/ra_cb_cc_ta_ntb.json' ); +var racbcctatb = require( './fixtures/ra_cb_cc_ta_tb.json' ); +var racbrcntantb = require( './fixtures/ra_cb_rc_nta_ntb.json' ); +var racbrcntatb = require( './fixtures/ra_cb_rc_nta_tb.json' ); +var racbrctantb = require( './fixtures/ra_cb_rc_ta_ntb.json' ); +var racbrctatb = require( './fixtures/ra_cb_rc_ta_tb.json' ); +var rarbccntantb = require( './fixtures/ra_rb_cc_nta_ntb.json' ); +var rarbccntatb = require( './fixtures/ra_rb_cc_nta_tb.json' ); +var rarbcctantb = require( './fixtures/ra_rb_cc_ta_ntb.json' ); +var rarbcctatb = require( './fixtures/ra_rb_cc_ta_tb.json' ); +var rarbrcntantb = require( './fixtures/ra_rb_rc_nta_ntb.json' ); +var rarbrcntatb = require( './fixtures/ra_rb_rc_nta_tb.json' ); +var rarbrctantb = require( './fixtures/ra_rb_rc_ta_ntb.json' ); +var rarbrctatb = require( './fixtures/ra_rb_rc_ta_tb.json' ); +var carbcctantbsa1sa2 = require( './fixtures/ca_rb_cc_ta_ntb_sa1_sa2.json' ); +var carbcctantbsa1nsa2 = require( './fixtures/ca_rb_cc_ta_ntb_sa1n_sa2.json' ); +var carbcctantbsa1sa2n = require( './fixtures/ca_rb_cc_ta_ntb_sa1_sa2n.json' ); +var carbcctantbsa1nsa2n = require( './fixtures/ca_rb_cc_ta_ntb_sa1n_sa2n.json' ); +var rarbcctantbsb1sb2 = require( './fixtures/ra_rb_cc_ta_ntb_sb1_sb2.json' ); +var rarbcctantbsb1nsb2 = require( './fixtures/ra_rb_cc_ta_ntb_sb1n_sb2.json' ); +var rarbcctantbsb1sb2n = require( './fixtures/ra_rb_cc_ta_ntb_sb1_sb2n.json' ); +var rarbcctantbsb1nsb2n = require( './fixtures/ra_rb_cc_ta_ntb_sb1n_sb2n.json' ); +var racbrcntatbsc1sc2 = require( './fixtures/ra_cb_rc_nta_tb_sc1_sc2.json' ); +var racbrcntatbsc1nsc2 = require( './fixtures/ra_cb_rc_nta_tb_sc1n_sc2.json' ); +var racbrcntatbsc1sc2n = require( './fixtures/ra_cb_rc_nta_tb_sc1_sc2n.json' ); +var racbrcntatbsc1nsc2n = require( './fixtures/ra_cb_rc_nta_tb_sc1n_sc2n.json' ); +var rarbrcntantboa = require( './fixtures/ra_rb_rc_nta_ntb_oa.json' ); +var rarbrcntantbob = require( './fixtures/ra_rb_rc_nta_ntb_ob.json' ); +var rarbrcntantboc = require( './fixtures/ra_rb_rc_nta_ntb_oc.json' ); +var cap = require( './fixtures/ra_rb_rc_nta_ntb_complex_access_pattern.json' ); + + +// VARIABLES // + +var dgemm = tryRequire( resolve( __dirname, './../lib/ndarray.native.js' ) ); +var opts = { + 'skip': ( dgemm instanceof Error ) +}; + + +// TESTS // + +tape( 'main export is a function', opts, function test( t ) { + t.ok( true, __filename ); + t.strictEqual( typeof dgemm, 'function', 'main export is a function' ); + t.end(); +}); + +tape( 'the function has an arity of 19', opts, function test( t ) { + t.strictEqual( dgemm.length, 19, 'returns expected value' ); + t.end(); +}); + +tape( 'the function throws an error if provided an invalid first argument', opts, function test( t ) { + var values; + var data; + var i; + + data = rarbrcntantb; + + values = [ + 'foo', + 'bar', + 'beep', + 'boop' + ]; + + for ( i = 0; i < values.length; i++ ) { + t.throws( badValue( values[ i ] ), TypeError, 'throws an error when provided ' + values[ i ] ); + } + t.end(); + + function badValue( value ) { + return function badValue() { + dgemm( value, data.transB, data.M, data.N, data.K, data.alpha, new Float64Array( data.A ), data.strideA1, data.strideA2, data.offsetA, new Float64Array( data.B ), data.strideB1, data.strideB2, data.offsetB, data.beta, new Float64Array( data.C ), data.strideC1, data.strideC2, data.offsetC ); + }; + } +}); + +tape( 'the function throws an error if provided an invalid second argument', opts, function test( t ) { + var values; + var data; + var i; + + data = rarbrcntantb; + + values = [ + 'foo', + 'bar', + 'beep', + 'boop' + ]; + + for ( i = 0; i < values.length; i++ ) { + t.throws( badValue( values[ i ] ), TypeError, 'throws an error when provided ' + values[ i ] ); + } + t.end(); + + function badValue( value ) { + return function badValue() { + dgemm( data.transA, value, data.M, data.N, data.K, data.alpha, new Float64Array( data.A ), data.strideA1, data.strideA2, data.offsetA, new Float64Array( data.B ), data.strideB1, data.strideB2, data.offsetB, data.beta, new Float64Array( data.C ), data.strideC1, data.strideC2, data.offsetC ); + }; + } +}); + +tape( 'the function throws an error if provided an invalid third argument', opts, function test( t ) { + var values; + var data; + var i; + + data = rarbrcntantb; + + values = [ + -1, + -2, + -3 + ]; + + for ( i = 0; i < values.length; i++ ) { + t.throws( badValue( values[ i ] ), RangeError, 'throws an error when provided ' + values[ i ] ); + } + t.end(); + + function badValue( value ) { + return function badValue() { + dgemm( data.transA, data.transB, value, data.N, data.K, data.alpha, new Float64Array( data.A ), data.strideA1, data.strideA2, data.offsetA, new Float64Array( data.B ), data.strideB1, data.strideB2, data.offsetB, data.beta, new Float64Array( data.C ), data.strideC1, data.strideC2, data.offsetC ); + }; + } +}); + +tape( 'the function throws an error if provided an invalid fourth argument', opts, function test( t ) { + var values; + var data; + var i; + + data = rarbrcntantb; + + values = [ + -1, + -2, + -3 + ]; + + for ( i = 0; i < values.length; i++ ) { + t.throws( badValue( values[ i ] ), RangeError, 'throws an error when provided ' + values[ i ] ); + } + t.end(); + + function badValue( value ) { + return function badValue() { + dgemm( data.transA, data.transB, data.M, value, data.K, data.alpha, new Float64Array( data.A ), data.strideA1, data.strideA2, data.offsetA, new Float64Array( data.B ), data.strideB1, data.strideB2, data.offsetB, data.beta, new Float64Array( data.C ), data.strideC1, data.strideC2, data.offsetC ); + }; + } +}); + +tape( 'the function throws an error if provided an invalid fifth argument', opts, function test( t ) { + var values; + var data; + var i; + + data = rarbrcntantb; + + values = [ + -1, + -2, + -3 + ]; + + for ( i = 0; i < values.length; i++ ) { + t.throws( badValue( values[ i ] ), RangeError, 'throws an error when provided ' + values[ i ] ); + } + t.end(); + + function badValue( value ) { + return function badValue() { + dgemm( data.transA, data.transB, data.M, data.N, value, data.alpha, new Float64Array( data.A ), data.strideA1, data.strideA2, data.offsetA, new Float64Array( data.B ), data.strideB1, data.strideB2, data.offsetB, data.beta, new Float64Array( data.C ), data.strideC1, data.strideC2, data.offsetC ); + }; + } +}); + +tape( 'the function throws an error if provided an invalid seventeenth argument', opts, function test( t ) { + var values; + var data; + var i; + + data = rarbrcntantb; + + values = [ + 0 + ]; + + for ( i = 0; i < values.length; i++ ) { + t.throws( badValue( values[ i ] ), RangeError, 'throws an error when provided ' + values[ i ] ); + } + t.end(); + + function badValue( value ) { + return function badValue() { + dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, new Float64Array( data.A ), data.strideA1, data.strideA2, data.offsetA, new Float64Array( data.B ), data.strideB1, data.strideB2, data.offsetB, data.beta, new Float64Array( data.C ), value, data.strideC2, data.offsetC ); + }; + } +}); + +tape( 'the function throws an error if provided an invalid eighteenth argument', opts, function test( t ) { + var values; + var data; + var i; + + data = rarbrcntantb; + + values = [ + 0 + ]; + + for ( i = 0; i < values.length; i++ ) { + t.throws( badValue( values[ i ] ), RangeError, 'throws an error when provided ' + values[ i ] ); + } + t.end(); + + function badValue( value ) { + return function badValue() { + dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, new Float64Array( data.A ), data.strideA1, data.strideA2, data.offsetA, new Float64Array( data.B ), data.strideB1, data.strideB2, data.offsetB, data.beta, new Float64Array( data.C ), data.strideC1, value, data.offsetC ); + }; + } +}); + +tape( 'the function performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C` (column_major, column_major, column_major, no-transpose, no-transpose)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = cacbccntantb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C` (column_major, column_major, column_major, transpose, no-transpose)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = cacbcctantb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C` (column_major, column_major, column_major, no-transpose, transpose)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = cacbccntatb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C` (column_major, column_major, column_major, transpose, transpose)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = cacbcctatb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C` (column_major, column_major, row_major, no-transpose, no-transpose)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = cacbrcntantb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C` (column_major, column_major, row_major, transpose, no-transpose)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = cacbrctantb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C` (column_major, column_major, row_major, no-transpose, transpose)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = cacbrcntatb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C` (column_major, column_major, row_major, transpose, transpose)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = cacbrctatb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C` (column_major, row_major, column_major, no-transpose, no-transpose)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = carbccntantb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C` (column_major, row_major, column_major, transpose, no-transpose)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = carbcctantb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C` (column_major, row_major, column_major, no-transpose, transpose)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = carbccntatb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C` (column_major, row_major, column_major, transpose, transpose)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = carbcctatb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C` (column_major, row_major, row_major, no-transpose, no-transpose)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = carbrcntantb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C` (column_major, row_major, row_major, transpose, no-transpose)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = carbrctantb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C` (column_major, row_major, row_major, no-transpose, transpose)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = carbrcntatb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C` (column_major, row_major, row_major, transpose, transpose)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = carbrctatb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C` (row_major, column_major, column_major, no-transpose, no-transpose)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = racbccntantb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C` (row_major, column_major, column_major, transpose, no-transpose)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = racbcctantb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C` (row_major, column_major, column_major, no-transpose, transpose)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = racbccntatb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C` (row_major, column_major, column_major, transpose, transpose)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = racbcctatb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C` (row_major, column_major, row_major, no-transpose, no-transpose)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = racbrcntantb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C` (row_major, column_major, row_major, transpose, no-transpose)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = racbrctantb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C` (row_major, column_major, row_major, no-transpose, transpose)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = racbrcntatb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C` (row_major, column_major, row_major, transpose, transpose)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = racbrctatb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C` (row_major, row_major, column_major, no-transpose, no-transpose)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = rarbccntantb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C` (row_major, row_major, column_major, transpose, no-transpose)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = rarbcctantb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C` (row_major, row_major, column_major, no-transpose, transpose)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = rarbccntatb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C` (row_major, row_major, column_major, transpose, transpose)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = rarbcctatb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C` (row_major, row_major, row_major, no-transpose, no-transpose)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = rarbrcntantb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C` (row_major, row_major, row_major, transpose, no-transpose)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = rarbrctantb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C` (row_major, row_major, row_major, no-transpose, transpose)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = rarbrcntatb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function performs the matrix-matrix operation `C = α*op(A)*op(B) + β*C` (row_major, row_major, row_major, transpose, transpose)', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = rarbrctatb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function returns a reference to the third input matrix', opts, function test( t ) { + var data; + var out; + var a; + var b; + var c; + + data = rarbrcntantb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.end(); +}); + +tape( 'if either `M` or `N` is `0`, the function returns the third input matrix unchanged', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = rarbrctatb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C ); + + out = dgemm( data.transA, data.transB, 0, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + + out = dgemm( data.transA, data.transB, data.M, 0, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + + t.end(); +}); + +tape( 'if `α` or `K` is `0` and `β` is `1`, the function returns the third input matrix unchanged', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = rarbrctatb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, 0.0, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, 1.0, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + + out = dgemm( data.transA, data.transB, data.M, data.N, 0, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, 1.0, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + + t.end(); +}); + +tape( 'if `α` is `0` and `β` is `0`, the function returns the third input matrix filled with zeros', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = rarbrctatb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( c.length ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, 0.0, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, 0.0, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + + t.end(); +}); + +tape( 'if `α` is `0` and `β` is neither `0` nor `1`, the function returns the third input matrix scaled by `β`', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = rarbrctatb; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = dscal( c.length, 10.0, new Float64Array( c ), 1 ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, 0.0, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, 10.0, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + + t.end(); +}); + +tape( 'the function supports specifying the strides of the first and second dimensions of `A`', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = carbcctantbsa1sa2; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function supports a negative stride for the first dimension of `A`', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = carbcctantbsa1nsa2; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function supports a negative stride for the second dimension of `A`', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = carbcctantbsa1sa2n; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function supports negative strides for `A`', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = carbcctantbsa1nsa2n; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function supports specifying an offset parameter for `A`', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = rarbrcntantboa; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function supports specifying the strides of the first and second dimensions of `B`', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = rarbcctantbsb1sb2; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function supports a negative stride for the first dimension of `B`', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = rarbcctantbsb1nsb2; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function supports a negative stride for the second dimension of `B`', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = rarbcctantbsb1sb2n; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function supports negative strides for `B`', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = rarbcctantbsb1nsb2n; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function supports specifying an offset parameter for `B`', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = rarbrcntantbob; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function supports specifying the strides of the first and second dimensions of `C`', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = racbrcntatbsc1sc2; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function supports a negative stride for the first dimension of `C`', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = racbrcntatbsc1nsc2; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function supports a negative stride for the second dimension of `C`', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = racbrcntatbsc1sc2n; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function supports negative strides for `C`', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = racbrcntatbsc1nsc2n; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function supports specifying an offset parameter for `C`', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = rarbrcntantboc; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function supports complex access patterns', opts, function test( t ) { + var expected; + var data; + var out; + var a; + var b; + var c; + + data = cap; + + a = new Float64Array( data.A ); + b = new Float64Array( data.B ); + c = new Float64Array( data.C ); + + expected = new Float64Array( data.C_out ); + + out = dgemm( data.transA, data.transB, data.M, data.N, data.K, data.alpha, a, data.strideA1, data.strideA2, data.offsetA, b, data.strideB1, data.strideB2, data.offsetB, data.beta, c, data.strideC1, data.strideC2, data.offsetC ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function supports computation over large arrays (row-major, row-major, row-major)', opts, function test( t ) { + var expected; + var out; + var N; + var a; + var b; + var c; + + N = 100; + + a = ones( N*N, 'float64' ); + b = ones( a.length, 'float64' ); + c = new Float64Array( a.length ); + + expected = filled( N, a.length, 'float64' ); + + out = dgemm( 'no-transpose', 'no-transpose', N, N, N, 1.0, a, N, 1, 0, b, N, 1, 0, 1.0, c, N, 1, 0 ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +}); + +tape( 'the function supports computation over large arrays (column-major, column-major, column-major)', opts, function test( t ) { + var expected; + var out; + var N; + var a; + var b; + var c; + + N = 100; + + a = ones( N*N, 'float64' ); + b = ones( a.length, 'float64' ); + c = new Float64Array( a.length ); + + expected = filled( N, a.length, 'float64' ); + + out = dgemm( 'no-transpose', 'no-transpose', N, N, N, 1.0, a, 1, N, 0, b, 1, N, 0, 1.0, c, 1, N, 0 ); + t.strictEqual( out, c, 'returns expected value' ); + t.deepEqual( out, expected, 'returns expected value' ); + t.end(); +});