Skip to content

feat: update JavaScript implementation and add C ndarray API for blas/base/snrm2 #2924

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Sep 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 124 additions & 1 deletion lib/node_modules/@stdlib/blas/base/snrm2/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ var z = snrm2( 4, x1, 2 );
// returns 5.0
```

If either `N` or `stride` is less than or equal to `0`, the function returns `0`.
If `N` is less than or equal to `0`, the function returns `0`.

#### snrm2.ndarray( N, x, stride, offset )

Expand Down Expand Up @@ -160,6 +160,129 @@ console.log( out );

<!-- /.examples -->

<!-- C interface documentation. -->

* * *

<section class="c">

## C APIs

<!-- Section to include introductory text. Make sure to keep an empty line after the intro `section` element and another before the `/section` close. -->

<section class="intro">

</section>

<!-- /.intro -->

<!-- C usage documentation. -->

<section class="usage">

### Usage

```c
#include "stdlib/blas/base/snrm2.h"
```

#### c_snrm2( N, \*X, stride )

Computes the L2-norm of a complex single-precision floating-point vector.

```c
const float x[] = { 1.0f, 2.0f, 2.0f, -7.0f, -2.0f, 3.0f, 4.0f, 2.0f };

float norm = c_snrm2( 4, x, 2 );
// returns 5.0f
```

The function accepts the following arguments:

- **N**: `[in] CBLAS_INT` number of indexed elements.
- **X**: `[in] float*` input array.
- **stride**: `[in] CBLAS_INT` index increment for `X`.

```c
float c_snrm2( const CBLAS_INT N, const float *X, const CBLAS_INT stride );
```

#### c_snrm2_ndarray( N, \*X, stride, offset )

Computes the L2-norm of a complex single-precision floating-point vector using alternative indexing semantics.

```c
const float x[] = { 1.0f, 2.0f, 2.0f, -7.0f, -2.0f, 3.0f, 4.0f, 2.0f };

float norm = c_snrm2_ndarray( 4, x, 2, 0 );
// returns 5.0f
```

The function accepts the following arguments:

- **N**: `[in] CBLAS_INT` number of indexed elements.
- **X**: `[in] float*` input array.
- **stride**: `[in] CBLAS_INT` index increment for `X`.
- **offset**: `[in] CBLAS_INT` starting index for `X`.

```c
float c_snrm2_ndarray( const CBLAS_INT N, const float *X, const CBLAS_INT stride, const CBLAS_INT offset );
```

</section>

<!-- /.usage -->

<!-- C API usage notes. Make sure to keep an empty line after the `section` element and another before the `/section` close. -->

<section class="notes">

</section>

<!-- /.notes -->

<!-- C API usage examples. -->

<section class="examples">

### Examples

```c
#include "stdlib/blas/base/snrm2.h"
#include <stdio.h>

int main( void ) {
// Create a strided array:
const float x[] = { 1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, -8.0f };

// Specify the number of indexed elements:
const int N = 8;

// Specify a stride:
const int strideX = 1;

// Compute the L2-norm:
float l2 = c_snrm2( N, x, strideX );

// Print the result:
printf( "L2-norm: %f\n", l2 );

// Compute the L2-norm:
l2 = c_snrm2_ndarray( N, x, -strideX, 7 );

// Print the result:
printf( "L2-norm: %f\n", l2 );
}
```

</section>

<!-- /.examples -->

</section>

<!-- /.c -->

<!-- Section for related `stdlib` packages. Do not manually edit this section, as it is automatically populated. -->

<section class="related">
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ static float rand_float( void ) {
* @param len array length
* @return elapsed time in seconds
*/
static double benchmark( int iterations, int len ) {
static double benchmark1( int iterations, int len ) {
double elapsed;
float x[ len ];
float z;
Expand All @@ -120,6 +120,39 @@ static double benchmark( int iterations, int len ) {
return elapsed;
}

/**
* Runs a benchmark.
*
* @param iterations number of iterations
* @param len array length
* @return elapsed time in seconds
*/
static double benchmark2( int iterations, int len ) {
double elapsed;
float x[ len ];
float z;
double t;
int i;

for ( i = 0; i < len; i++ ) {
x[ i ] = ( rand_float() * 20000.0f ) - 10000.0f;
}
z = 0.0f;
t = tic();
for ( i = 0; i < iterations; i++ ) {
z = c_snrm2_ndarray( len, x, 1, 0 );
if ( z != z ) {
printf( "should not return NaN\n" );
break;
}
}
elapsed = tic() - t;
if ( z != z ) {
printf( "should not return NaN\n" );
}
return elapsed;
}

/**
* Main execution sequence.
*/
Expand All @@ -142,7 +175,14 @@ int main( void ) {
for ( j = 0; j < REPEATS; j++ ) {
count += 1;
printf( "# c::%s:len=%d\n", NAME, len );
elapsed = benchmark( iter, len );
elapsed = benchmark1( iter, len );
print_results( iter, elapsed );
printf( "ok %d benchmark finished\n", count );
}
for ( j = 0; j < REPEATS; j++ ) {
count += 1;
printf( "# c::%s:ndarray:len=%d\n", NAME, len );
elapsed = benchmark2( iter, len );
print_results( iter, elapsed );
printf( "ok %d benchmark finished\n", count );
}
Expand Down
2 changes: 1 addition & 1 deletion lib/node_modules/@stdlib/blas/base/snrm2/docs/repl.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
Indexing is relative to the first index. To introduce an offset, use a typed
array view.

If `N <= 0` or `stride <= 0`, the function returns `0`.
If `N <= 0` the function returns `0`.

Parameters
----------
Expand Down
6 changes: 6 additions & 0 deletions lib/node_modules/@stdlib/blas/base/snrm2/examples/c/example.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,10 @@ int main( void ) {

// Print the result:
printf( "L2-norm: %f\n", l2 );

// Compute the L2-norm:
l2 = c_snrm2_ndarray( N, x, -strideX, 7 );

// Print the result:
printf( "L2-norm: %f\n", l2 );
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
#ifndef SNRM2_H
#define SNRM2_H

#include "stdlib/blas/base/shared.h"

/*
* If C++, prevent name mangling so that the compiler emits a binary file having undecorated names, thus mirroring the behavior of a C compiler.
*/
Expand All @@ -32,7 +34,12 @@ extern "C" {
/**
* Computes the L2-norm of a single-precision floating-point vector.
*/
float c_snrm2( const int N, const float *X, const int stride );
float API_SUFFIX(c_snrm2)( const CBLAS_INT N, const float *X, const CBLAS_INT stride );

/**
* Computes the L2-norm of a single-precision floating-point vector using alternative indexing semantics.
*/
float API_SUFFIX(c_snrm2_ndarray)( const CBLAS_INT N, const float *X, const CBLAS_INT stride, const CBLAS_INT offset );

#ifdef __cplusplus
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
#ifndef SNRM2_CBLAS_H
#define SNRM2_CBLAS_H

#include "stdlib/blas/base/shared.h"

/*
* If C++, prevent name mangling so that the compiler emits a binary file having undecorated names, thus mirroring the behavior of a C compiler.
*/
Expand All @@ -32,7 +34,7 @@ extern "C" {
/**
* Computes the L2-norm of a single-precision floating-point vector.
*/
float cblas_snrm2( const int N, const float *X, const int stride );
float API_SUFFIX(cblas_snrm2)( const CBLAS_INT N, const float *X, const CBLAS_INT stride );

#ifdef __cplusplus
}
Expand Down
93 changes: 73 additions & 20 deletions lib/node_modules/@stdlib/blas/base/snrm2/lib/ndarray.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,20 @@

// MODULES //

var sqrtf = require( '@stdlib/math/base/special/sqrtf' );
var FLOAT32_MAX = require( '@stdlib/constants/float32/max' );
var f32 = require( '@stdlib/number/float64/base/to-float32' );
var absf = require( '@stdlib/math/base/special/absf' );
var float64ToFloat32 = require( '@stdlib/number/float64/base/to-float32' );
var abs2f = require( '@stdlib/math/base/special/abs2f' );
var sqrtf = require( '@stdlib/math/base/special/sqrtf' );


// VARIABLES //

// Blue's scaling constants:
var tsml = 1.08420217E-19;
var tbig = 4.50359963E+15;
var ssml = 3.77789319E+22;
var sbig = 1.32348898E-23;


// MAIN //
Expand All @@ -45,37 +56,79 @@ var float64ToFloat32 = require( '@stdlib/number/float64/base/to-float32' );
* // returns 5.0
*/
function snrm2( N, x, stride, offset ) {
var scale;
var ssq;
var notbig;
var sumsq;
var abig;
var amed;
var asml;
var ymax;
var ymin;
var scl;
var ax;
var ix;
var v;
var i;

if ( N <= 0 ) {
return 0.0;
}
if ( N === 1 ) {
return absf( x[ offset ] );
}
ix = offset;
scale = 0.0;
ssq = 1.0;

// Initialize loop values for accumulation:
notbig = true;

sumsq = 0.0;
abig = 0.0;
amed = 0.0;
asml = 0.0;
scl = 1.0;

// Compute the sum of squares using 3 accumulators--`abig` (sum of squares scaled down to avoid overflow), `asml` (sum of squares scaled up to avoid underflow), `amed` (sum of squares that do not require scaling)--and thresholds and multipliers--`tbig` (values bigger than this are scaled down by `sbig`) and `tsml` (values smaller than this are scaled up by `ssml`)...
for ( i = 0; i < N; i++ ) {
if ( x[ ix ] !== 0.0 ) {
ax = absf( x[ ix ] );
if ( scale < ax ) {
v = float64ToFloat32( scale/ax );
ssq = float64ToFloat32( 1.0 + float64ToFloat32( ssq * float64ToFloat32( v*v ) ) ); // eslint-disable-line max-len
scale = ax;
} else {
v = float64ToFloat32( ax/scale );
ssq = float64ToFloat32( ssq + float64ToFloat32( v*v ) );
ax = absf( x[ ix ] );
if ( ax > tbig ) {
abig = f32( abig + abs2f( ax * sbig ) );
notbig = false;
} else if ( ax < tsml ) {
if ( notbig ) {
asml = f32( asml + abs2f( ax * ssml ) );
}
} else {
amed = f32( amed + f32( ax * ax ) );
}
ix += stride;
}
return float64ToFloat32( scale * sqrtf( ssq ) );
// Combine `abig` and `amed` or `amed` and `asml` if more than one accumulator was used...
if ( abig > 0.0 ) {
// Combine `abig` and `amed` if `abig` > 0...
if ( amed > 0.0 || ( amed > FLOAT32_MAX ) || ( amed !== amed ) ) {
abig = f32( abig + f32( f32( amed * sbig ) * sbig ) );
}
scl = f32( 1.0 / sbig );
sumsq = abig;
} else if ( asml > 0.0 ) {
// Combine `amed` and `asml` if `asml` > 0...
if ( amed > 0.0 || amed > FLOAT32_MAX || ( amed !== amed ) ) {
amed = sqrtf( amed );
asml = f32( sqrtf( asml ) / ssml );
if ( asml > amed ) {
ymin = amed;
ymax = asml;
} else {
ymin = asml;
ymax = amed;
}
scl = 1.0;
sumsq = f32( f32( ymax * ymax ) * f32( 1.0 + abs2f( ymin / ymax ) ) ); // eslint-disable-line max-len
} else {
scl = f32( 1.0 / ssml );
sumsq = asml;
}
} else {
// All values are mid-range...
scl = 1.0;
sumsq = amed;
}
return f32( sqrtf( sumsq ) * scl );
}


Expand Down
Loading
Loading