Permalink
Fetching contributors…
Cannot retrieve contributors at this time
3689 lines (3219 sloc) 101 KB
/* -*- c -*- */
/*
*****************************************************************************
** INCLUDES **
*****************************************************************************
*/
#define NPY_NO_DEPRECATED_API NPY_API_VERSION
#include "Python.h"
#include "numpy/arrayobject.h"
#include "numpy/ufuncobject.h"
#include "npy_pycompat.h"
#include "npy_config.h"
#include <stddef.h>
#include <stdio.h>
#include <assert.h>
#include <math.h>
static const char* umath_linalg_version_string = "0.1.5";
/*
****************************************************************************
* Debugging support *
****************************************************************************
*/
#define TRACE_TXT(...) do { fprintf (stderr, __VA_ARGS__); } while (0)
#define STACK_TRACE do {} while (0)
#define TRACE\
do { \
fprintf (stderr, \
"%s:%d:%s\n", \
__FILE__, \
__LINE__, \
__FUNCTION__); \
STACK_TRACE; \
} while (0)
#if 0
#include <execinfo.h>
void
dbg_stack_trace()
{
void *trace[32];
size_t size;
size = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
backtrace_symbols_fd(trace, size, 1);
}
#undef STACK_TRACE
#define STACK_TRACE do { dbg_stack_trace(); } while (0)
#endif
/*
*****************************************************************************
* BLAS/LAPACK calling macros *
*****************************************************************************
*/
#ifdef NO_APPEND_FORTRAN
# define FNAME(x) x
#else
# define FNAME(x) x##_
#endif
typedef struct { float r, i; } f2c_complex;
typedef struct { double r, i; } f2c_doublecomplex;
/* typedef long int (*L_fp)(); */
extern int
FNAME(sgeev)(char *jobvl, char *jobvr, int *n,
float a[], int *lda, float wr[], float wi[],
float vl[], int *ldvl, float vr[], int *ldvr,
float work[], int lwork[],
int *info);
extern int
FNAME(dgeev)(char *jobvl, char *jobvr, int *n,
double a[], int *lda, double wr[], double wi[],
double vl[], int *ldvl, double vr[], int *ldvr,
double work[], int lwork[],
int *info);
extern int
FNAME(cgeev)(char *jobvl, char *jobvr, int *n,
f2c_doublecomplex a[], int *lda,
f2c_doublecomplex w[],
f2c_doublecomplex vl[], int *ldvl,
f2c_doublecomplex vr[], int *ldvr,
f2c_doublecomplex work[], int *lwork,
double rwork[],
int *info);
extern int
FNAME(zgeev)(char *jobvl, char *jobvr, int *n,
f2c_doublecomplex a[], int *lda,
f2c_doublecomplex w[],
f2c_doublecomplex vl[], int *ldvl,
f2c_doublecomplex vr[], int *ldvr,
f2c_doublecomplex work[], int *lwork,
double rwork[],
int *info);
extern int
FNAME(ssyevd)(char *jobz, char *uplo, int *n,
float a[], int *lda, float w[], float work[],
int *lwork, int iwork[], int *liwork,
int *info);
extern int
FNAME(dsyevd)(char *jobz, char *uplo, int *n,
double a[], int *lda, double w[], double work[],
int *lwork, int iwork[], int *liwork,
int *info);
extern int
FNAME(cheevd)(char *jobz, char *uplo, int *n,
f2c_complex a[], int *lda,
float w[], f2c_complex work[],
int *lwork, float rwork[], int *lrwork, int iwork[],
int *liwork,
int *info);
extern int
FNAME(zheevd)(char *jobz, char *uplo, int *n,
f2c_doublecomplex a[], int *lda,
double w[], f2c_doublecomplex work[],
int *lwork, double rwork[], int *lrwork, int iwork[],
int *liwork,
int *info);
extern int
FNAME(sgelsd)(int *m, int *n, int *nrhs,
float a[], int *lda, float b[], int *ldb,
float s[], float *rcond, int *rank,
float work[], int *lwork, int iwork[],
int *info);
extern int
FNAME(dgelsd)(int *m, int *n, int *nrhs,
double a[], int *lda, double b[], int *ldb,
double s[], double *rcond, int *rank,
double work[], int *lwork, int iwork[],
int *info);
extern int
FNAME(cgelsd)(int *m, int *n, int *nrhs,
f2c_complex a[], int *lda,
f2c_complex b[], int *ldb,
float s[], float *rcond, int *rank,
f2c_complex work[], int *lwork,
float rwork[], int iwork[],
int *info);
extern int
FNAME(zgelsd)(int *m, int *n, int *nrhs,
f2c_doublecomplex a[], int *lda,
f2c_doublecomplex b[], int *ldb,
double s[], double *rcond, int *rank,
f2c_doublecomplex work[], int *lwork,
double rwork[], int iwork[],
int *info);
extern int
FNAME(sgesv)(int *n, int *nrhs,
float a[], int *lda,
int ipiv[],
float b[], int *ldb,
int *info);
extern int
FNAME(dgesv)(int *n, int *nrhs,
double a[], int *lda,
int ipiv[],
double b[], int *ldb,
int *info);
extern int
FNAME(cgesv)(int *n, int *nrhs,
f2c_complex a[], int *lda,
int ipiv[],
f2c_complex b[], int *ldb,
int *info);
extern int
FNAME(zgesv)(int *n, int *nrhs,
f2c_doublecomplex a[], int *lda,
int ipiv[],
f2c_doublecomplex b[], int *ldb,
int *info);
extern int
FNAME(sgetrf)(int *m, int *n,
float a[], int *lda,
int ipiv[],
int *info);
extern int
FNAME(dgetrf)(int *m, int *n,
double a[], int *lda,
int ipiv[],
int *info);
extern int
FNAME(cgetrf)(int *m, int *n,
f2c_complex a[], int *lda,
int ipiv[],
int *info);
extern int
FNAME(zgetrf)(int *m, int *n,
f2c_doublecomplex a[], int *lda,
int ipiv[],
int *info);
extern int
FNAME(spotrf)(char *uplo, int *n,
float a[], int *lda,
int *info);
extern int
FNAME(dpotrf)(char *uplo, int *n,
double a[], int *lda,
int *info);
extern int
FNAME(cpotrf)(char *uplo, int *n,
f2c_complex a[], int *lda,
int *info);
extern int
FNAME(zpotrf)(char *uplo, int *n,
f2c_doublecomplex a[], int *lda,
int *info);
extern int
FNAME(sgesdd)(char *jobz, int *m, int *n,
float a[], int *lda, float s[], float u[],
int *ldu, float vt[], int *ldvt, float work[],
int *lwork, int iwork[], int *info);
extern int
FNAME(dgesdd)(char *jobz, int *m, int *n,
double a[], int *lda, double s[], double u[],
int *ldu, double vt[], int *ldvt, double work[],
int *lwork, int iwork[], int *info);
extern int
FNAME(cgesdd)(char *jobz, int *m, int *n,
f2c_complex a[], int *lda,
float s[], f2c_complex u[], int *ldu,
f2c_complex vt[], int *ldvt,
f2c_complex work[], int *lwork,
float rwork[], int iwork[], int *info);
extern int
FNAME(zgesdd)(char *jobz, int *m, int *n,
f2c_doublecomplex a[], int *lda,
double s[], f2c_doublecomplex u[], int *ldu,
f2c_doublecomplex vt[], int *ldvt,
f2c_doublecomplex work[], int *lwork,
double rwork[], int iwork[], int *info);
extern int
FNAME(spotrs)(char *uplo, int *n, int *nrhs,
float a[], int *lda,
float b[], int *ldb,
int *info);
extern int
FNAME(dpotrs)(char *uplo, int *n, int *nrhs,
double a[], int *lda,
double b[], int *ldb,
int *info);
extern int
FNAME(cpotrs)(char *uplo, int *n, int *nrhs,
f2c_complex a[], int *lda,
f2c_complex b[], int *ldb,
int *info);
extern int
FNAME(zpotrs)(char *uplo, int *n, int *nrhs,
f2c_doublecomplex a[], int *lda,
f2c_doublecomplex b[], int *ldb,
int *info);
extern int
FNAME(spotri)(char *uplo, int *n,
float a[], int *lda,
int *info);
extern int
FNAME(dpotri)(char *uplo, int *n,
double a[], int *lda,
int *info);
extern int
FNAME(cpotri)(char *uplo, int *n,
f2c_complex a[], int *lda,
int *info);
extern int
FNAME(zpotri)(char *uplo, int *n,
f2c_doublecomplex a[], int *lda,
int *info);
extern int
FNAME(scopy)(int *n,
float *sx, int *incx,
float *sy, int *incy);
extern int
FNAME(dcopy)(int *n,
double *sx, int *incx,
double *sy, int *incy);
extern int
FNAME(ccopy)(int *n,
f2c_complex *sx, int *incx,
f2c_complex *sy, int *incy);
extern int
FNAME(zcopy)(int *n,
f2c_doublecomplex *sx, int *incx,
f2c_doublecomplex *sy, int *incy);
extern float
FNAME(sdot)(int *n,
float *sx, int *incx,
float *sy, int *incy);
extern double
FNAME(ddot)(int *n,
double *sx, int *incx,
double *sy, int *incy);
extern void
FNAME(cdotu)(f2c_complex *ret, int *n,
f2c_complex *sx, int *incx,
f2c_complex *sy, int *incy);
extern void
FNAME(zdotu)(f2c_doublecomplex *ret, int *n,
f2c_doublecomplex *sx, int *incx,
f2c_doublecomplex *sy, int *incy);
extern void
FNAME(cdotc)(f2c_complex *ret, int *n,
f2c_complex *sx, int *incx,
f2c_complex *sy, int *incy);
extern void
FNAME(zdotc)(f2c_doublecomplex *ret, int *n,
f2c_doublecomplex *sx, int *incx,
f2c_doublecomplex *sy, int *incy);
extern int
FNAME(sgemm)(char *transa, char *transb,
int *m, int *n, int *k,
float *alpha,
float *a, int *lda,
float *b, int *ldb,
float *beta,
float *c, int *ldc);
extern int
FNAME(dgemm)(char *transa, char *transb,
int *m, int *n, int *k,
double *alpha,
double *a, int *lda,
double *b, int *ldb,
double *beta,
double *c, int *ldc);
extern int
FNAME(cgemm)(char *transa, char *transb,
int *m, int *n, int *k,
f2c_complex *alpha,
f2c_complex *a, int *lda,
f2c_complex *b, int *ldb,
f2c_complex *beta,
f2c_complex *c, int *ldc);
extern int
FNAME(zgemm)(char *transa, char *transb,
int *m, int *n, int *k,
f2c_doublecomplex *alpha,
f2c_doublecomplex *a, int *lda,
f2c_doublecomplex *b, int *ldb,
f2c_doublecomplex *beta,
f2c_doublecomplex *c, int *ldc);
#define LAPACK_T(FUNC) \
TRACE_TXT("Calling LAPACK ( " # FUNC " )\n"); \
FNAME(FUNC)
#define BLAS(FUNC) \
FNAME(FUNC)
#define LAPACK(FUNC) \
FNAME(FUNC)
typedef int fortran_int;
typedef float fortran_real;
typedef double fortran_doublereal;
typedef f2c_complex fortran_complex;
typedef f2c_doublecomplex fortran_doublecomplex;
/*
*****************************************************************************
** Some handy functions **
*****************************************************************************
*/
static NPY_INLINE int
get_fp_invalid_and_clear(void)
{
int status;
status = npy_clear_floatstatus_barrier((char*)&status);
return !!(status & NPY_FPE_INVALID);
}
static NPY_INLINE void
set_fp_invalid_or_clear(int error_occurred)
{
if (error_occurred) {
npy_set_floatstatus_invalid();
}
else {
npy_clear_floatstatus_barrier((char*)&error_occurred);
}
}
/*
*****************************************************************************
** Some handy constants **
*****************************************************************************
*/
#define UMATH_LINALG_MODULE_NAME "_umath_linalg"
typedef union {
fortran_complex f;
npy_cfloat npy;
float array[2];
} COMPLEX_t;
typedef union {
fortran_doublecomplex f;
npy_cdouble npy;
double array[2];
} DOUBLECOMPLEX_t;
static float s_one;
static float s_zero;
static float s_minus_one;
static float s_ninf;
static float s_nan;
static double d_one;
static double d_zero;
static double d_minus_one;
static double d_ninf;
static double d_nan;
static COMPLEX_t c_one;
static COMPLEX_t c_zero;
static COMPLEX_t c_minus_one;
static COMPLEX_t c_ninf;
static COMPLEX_t c_nan;
static DOUBLECOMPLEX_t z_one;
static DOUBLECOMPLEX_t z_zero;
static DOUBLECOMPLEX_t z_minus_one;
static DOUBLECOMPLEX_t z_ninf;
static DOUBLECOMPLEX_t z_nan;
static void init_constants(void)
{
/*
this is needed as NPY_INFINITY and NPY_NAN macros
can't be used as initializers. I prefer to just set
all the constants the same way.
*/
s_one = 1.0f;
s_zero = 0.0f;
s_minus_one = -1.0f;
s_ninf = -NPY_INFINITYF;
s_nan = NPY_NANF;
d_one = 1.0;
d_zero = 0.0;
d_minus_one = -1.0;
d_ninf = -NPY_INFINITY;
d_nan = NPY_NAN;
c_one.array[0] = 1.0f;
c_one.array[1] = 0.0f;
c_zero.array[0] = 0.0f;
c_zero.array[1] = 0.0f;
c_minus_one.array[0] = -1.0f;
c_minus_one.array[1] = 0.0f;
c_ninf.array[0] = -NPY_INFINITYF;
c_ninf.array[1] = 0.0f;
c_nan.array[0] = NPY_NANF;
c_nan.array[1] = NPY_NANF;
z_one.array[0] = 1.0;
z_one.array[1] = 0.0;
z_zero.array[0] = 0.0;
z_zero.array[1] = 0.0;
z_minus_one.array[0] = -1.0;
z_minus_one.array[1] = 0.0;
z_ninf.array[0] = -NPY_INFINITY;
z_ninf.array[1] = 0.0;
z_nan.array[0] = NPY_NAN;
z_nan.array[1] = NPY_NAN;
}
/*
*****************************************************************************
** Structs used for data rearrangement **
*****************************************************************************
*/
/*
* this struct contains information about how to linearize a matrix in a local
* buffer so that it can be used by blas functions. All strides are specified
* in bytes and are converted to elements later in type specific functions.
*
* rows: number of rows in the matrix
* columns: number of columns in the matrix
* row_strides: the number bytes between consecutive rows.
* column_strides: the number of bytes between consecutive columns.
* output_lead_dim: BLAS/LAPACK-side leading dimension, in elements
*/
typedef struct linearize_data_struct
{
npy_intp rows;
npy_intp columns;
npy_intp row_strides;
npy_intp column_strides;
npy_intp output_lead_dim;
} LINEARIZE_DATA_t;
static NPY_INLINE void
init_linearize_data_ex(LINEARIZE_DATA_t *lin_data,
npy_intp rows,
npy_intp columns,
npy_intp row_strides,
npy_intp column_strides,
npy_intp output_lead_dim)
{
lin_data->rows = rows;
lin_data->columns = columns;
lin_data->row_strides = row_strides;
lin_data->column_strides = column_strides;
lin_data->output_lead_dim = output_lead_dim;
}
static NPY_INLINE void
init_linearize_data(LINEARIZE_DATA_t *lin_data,
npy_intp rows,
npy_intp columns,
npy_intp row_strides,
npy_intp column_strides)
{
init_linearize_data_ex(
lin_data, rows, columns, row_strides, column_strides, columns);
}
static NPY_INLINE void
dump_ufunc_object(PyUFuncObject* ufunc)
{
TRACE_TXT("\n\n%s '%s' (%d input(s), %d output(s), %d specialization(s).\n",
ufunc->core_enabled? "generalized ufunc" : "scalar ufunc",
ufunc->name, ufunc->nin, ufunc->nout, ufunc->ntypes);
if (ufunc->core_enabled) {
int arg;
int dim;
TRACE_TXT("\t%s (%d dimension(s) detected).\n",
ufunc->core_signature, ufunc->core_num_dim_ix);
for (arg = 0; arg < ufunc->nargs; arg++){
int * arg_dim_ix = ufunc->core_dim_ixs + ufunc->core_offsets[arg];
TRACE_TXT("\t\targ %d (%s) has %d dimension(s): (",
arg, arg < ufunc->nin? "INPUT" : "OUTPUT",
ufunc->core_num_dims[arg]);
for (dim = 0; dim < ufunc->core_num_dims[arg]; dim ++) {
TRACE_TXT(" %d", arg_dim_ix[dim]);
}
TRACE_TXT(" )\n");
}
}
}
static NPY_INLINE void
dump_linearize_data(const char* name, const LINEARIZE_DATA_t* params)
{
TRACE_TXT("\n\t%s rows: %zd columns: %zd"\
"\n\t\trow_strides: %td column_strides: %td"\
"\n", name, params->rows, params->columns,
params->row_strides, params->column_strides);
}
static NPY_INLINE void
print_FLOAT(npy_float s)
{
TRACE_TXT(" %8.4f", s);
}
static NPY_INLINE void
print_DOUBLE(npy_double d)
{
TRACE_TXT(" %10.6f", d);
}
static NPY_INLINE void
print_CFLOAT(npy_cfloat c)
{
float* c_parts = (float*)&c;
TRACE_TXT("(%8.4f, %8.4fj)", c_parts[0], c_parts[1]);
}
static NPY_INLINE void
print_CDOUBLE(npy_cdouble z)
{
double* z_parts = (double*)&z;
TRACE_TXT("(%8.4f, %8.4fj)", z_parts[0], z_parts[1]);
}
/**begin repeat
#TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE#
#typ = npy_float, npy_double, npy_cfloat, npy_cdouble#
*/
static NPY_INLINE void
dump_@TYPE@_matrix(const char* name,
size_t rows, size_t columns,
const @typ@* ptr)
{
size_t i, j;
TRACE_TXT("\n%s %p (%zd, %zd)\n", name, ptr, rows, columns);
for (i = 0; i < rows; i++)
{
TRACE_TXT("| ");
for (j = 0; j < columns; j++)
{
print_@TYPE@(ptr[j*rows + i]);
TRACE_TXT(", ");
}
TRACE_TXT(" |\n");
}
}
/**end repeat**/
/*
*****************************************************************************
** Basics **
*****************************************************************************
*/
static NPY_INLINE fortran_int
fortran_int_min(fortran_int x, fortran_int y) {
return x < y ? x : y;
}
static NPY_INLINE fortran_int
fortran_int_max(fortran_int x, fortran_int y) {
return x > y ? x : y;
}
#define INIT_OUTER_LOOP_1 \
npy_intp dN = *dimensions++;\
npy_intp N_;\
npy_intp s0 = *steps++;
#define INIT_OUTER_LOOP_2 \
INIT_OUTER_LOOP_1\
npy_intp s1 = *steps++;
#define INIT_OUTER_LOOP_3 \
INIT_OUTER_LOOP_2\
npy_intp s2 = *steps++;
#define INIT_OUTER_LOOP_4 \
INIT_OUTER_LOOP_3\
npy_intp s3 = *steps++;
#define INIT_OUTER_LOOP_5 \
INIT_OUTER_LOOP_4\
npy_intp s4 = *steps++;
#define INIT_OUTER_LOOP_6 \
INIT_OUTER_LOOP_5\
npy_intp s5 = *steps++;
#define INIT_OUTER_LOOP_7 \
INIT_OUTER_LOOP_6\
npy_intp s6 = *steps++;
#define BEGIN_OUTER_LOOP_2 \
for (N_ = 0;\
N_ < dN;\
N_++, args[0] += s0,\
args[1] += s1) {
#define BEGIN_OUTER_LOOP_3 \
for (N_ = 0;\
N_ < dN;\
N_++, args[0] += s0,\
args[1] += s1,\
args[2] += s2) {
#define BEGIN_OUTER_LOOP_4 \
for (N_ = 0;\
N_ < dN;\
N_++, args[0] += s0,\
args[1] += s1,\
args[2] += s2,\
args[3] += s3) {
#define BEGIN_OUTER_LOOP_5 \
for (N_ = 0;\
N_ < dN;\
N_++, args[0] += s0,\
args[1] += s1,\
args[2] += s2,\
args[3] += s3,\
args[4] += s4) {
#define BEGIN_OUTER_LOOP_6 \
for (N_ = 0;\
N_ < dN;\
N_++, args[0] += s0,\
args[1] += s1,\
args[2] += s2,\
args[3] += s3,\
args[4] += s4,\
args[5] += s5) {
#define BEGIN_OUTER_LOOP_7 \
for (N_ = 0;\
N_ < dN;\
N_++, args[0] += s0,\
args[1] += s1,\
args[2] += s2,\
args[3] += s3,\
args[4] += s4,\
args[5] += s5,\
args[6] += s6) {
#define END_OUTER_LOOP }
static NPY_INLINE void
update_pointers(npy_uint8** bases, ptrdiff_t* offsets, size_t count)
{
size_t i;
for (i = 0; i < count; ++i) {
bases[i] += offsets[i];
}
}
/* disable -Wmaybe-uninitialized as there is some code that generate false
positives with this warning
*/
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
/*
*****************************************************************************
** HELPER FUNCS **
*****************************************************************************
*/
/* rearranging of 2D matrices using blas */
/**begin repeat
#TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE#
#typ = float, double, COMPLEX_t, DOUBLECOMPLEX_t#
#copy = scopy, dcopy, ccopy, zcopy#
#nan = s_nan, d_nan, c_nan, z_nan#
#zero = s_zero, d_zero, c_zero, z_zero#
*/
static NPY_INLINE void *
linearize_@TYPE@_matrix(void *dst_in,
void *src_in,
const LINEARIZE_DATA_t* data)
{
@typ@ *src = (@typ@ *) src_in;
@typ@ *dst = (@typ@ *) dst_in;
if (dst) {
int i, j;
@typ@* rv = dst;
fortran_int columns = (fortran_int)data->columns;
fortran_int column_strides =
(fortran_int)(data->column_strides/sizeof(@typ@));
fortran_int one = 1;
for (i = 0; i < data->rows; i++) {
if (column_strides > 0) {
FNAME(@copy@)(&columns,
(void*)src, &column_strides,
(void*)dst, &one);
}
else if (column_strides < 0) {
FNAME(@copy@)(&columns,
(void*)((@typ@*)src + (columns-1)*column_strides),
&column_strides,
(void*)dst, &one);
}
else {
/*
* Zero stride has undefined behavior in some BLAS
* implementations (e.g. OSX Accelerate), so do it
* manually
*/
for (j = 0; j < columns; ++j) {
memcpy((@typ@*)dst + j, (@typ@*)src, sizeof(@typ@));
}
}
src += data->row_strides/sizeof(@typ@);
dst += data->output_lead_dim;
}
return rv;
} else {
return src;
}
}
static NPY_INLINE void *
delinearize_@TYPE@_matrix(void *dst_in,
void *src_in,
const LINEARIZE_DATA_t* data)
{
@typ@ *src = (@typ@ *) src_in;
@typ@ *dst = (@typ@ *) dst_in;
if (src) {
int i;
@typ@ *rv = src;
fortran_int columns = (fortran_int)data->columns;
fortran_int column_strides =
(fortran_int)(data->column_strides/sizeof(@typ@));
fortran_int one = 1;
for (i = 0; i < data->rows; i++) {
if (column_strides > 0) {
FNAME(@copy@)(&columns,
(void*)src, &one,
(void*)dst, &column_strides);
}
else if (column_strides < 0) {
FNAME(@copy@)(&columns,
(void*)src, &one,
(void*)((@typ@*)dst + (columns-1)*column_strides),
&column_strides);
}
else {
/*
* Zero stride has undefined behavior in some BLAS
* implementations (e.g. OSX Accelerate), so do it
* manually
*/
if (columns > 0) {
memcpy((@typ@*)dst,
(@typ@*)src + (columns-1),
sizeof(@typ@));
}
}
src += data->output_lead_dim;
dst += data->row_strides/sizeof(@typ@);
}
return rv;
} else {
return src;
}
}
static NPY_INLINE void
nan_@TYPE@_matrix(void *dst_in, const LINEARIZE_DATA_t* data)
{
@typ@ *dst = (@typ@ *) dst_in;
int i, j;
for (i = 0; i < data->rows; i++) {
@typ@ *cp = dst;
ptrdiff_t cs = data->column_strides/sizeof(@typ@);
for (j = 0; j < data->columns; ++j) {
*cp = @nan@;
cp += cs;
}
dst += data->row_strides/sizeof(@typ@);
}
}
static NPY_INLINE void
zero_@TYPE@_matrix(void *dst_in, const LINEARIZE_DATA_t* data)
{
@typ@ *dst = (@typ@ *) dst_in;
int i, j;
for (i = 0; i < data->rows; i++) {
@typ@ *cp = dst;
ptrdiff_t cs = data->column_strides/sizeof(@typ@);
for (j = 0; j < data->columns; ++j) {
*cp = @zero@;
cp += cs;
}
dst += data->row_strides/sizeof(@typ@);
}
}
/**end repeat**/
/* identity square matrix generation */
/**begin repeat
#TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE#
#typ = float, double, COMPLEX_t, DOUBLECOMPLEX_t#
#cblas_type = s, d, c, z#
*/
static NPY_INLINE void
identity_@TYPE@_matrix(void *ptr, size_t n)
{
size_t i;
@typ@ *matrix = (@typ@*) ptr;
/* in IEEE floating point, zeroes are represented as bitwise 0 */
memset(matrix, 0, n*n*sizeof(@typ@));
for (i = 0; i < n; ++i)
{
*matrix = @cblas_type@_one;
matrix += n+1;
}
}
/**end repeat**/
/* lower/upper triangular matrix using blas (in place) */
/**begin repeat
#TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE#
#typ = float, double, COMPLEX_t, DOUBLECOMPLEX_t#
#cblas_type = s, d, c, z#
*/
static NPY_INLINE void
triu_@TYPE@_matrix(void *ptr, size_t n)
{
size_t i, j;
@typ@ *matrix = (@typ@*)ptr;
matrix += n;
for (i = 1; i < n; ++i) {
for (j = 0; j < i; ++j) {
matrix[j] = @cblas_type@_zero;
}
matrix += n;
}
}
/**end repeat**/
/* -------------------------------------------------------------------------- */
/* Determinants */
/**begin repeat
#TYPE = FLOAT, DOUBLE#
#typ = npy_float, npy_double#
#log_func = npy_logf, npy_log#
#exp_func = npy_expf, npy_exp#
#zero = 0.0f, 0.0#
*/
static NPY_INLINE void
@TYPE@_slogdet_from_factored_diagonal(@typ@* src,
fortran_int m,
@typ@ *sign,
@typ@ *logdet)
{
@typ@ acc_sign = *sign;
@typ@ acc_logdet = @zero@;
int i;
for (i = 0; i < m; i++) {
@typ@ abs_element = *src;
if (abs_element < @zero@) {
acc_sign = -acc_sign;
abs_element = -abs_element;
}
acc_logdet += @log_func@(abs_element);
src += m+1;
}
*sign = acc_sign;
*logdet = acc_logdet;
}
static NPY_INLINE @typ@
@TYPE@_det_from_slogdet(@typ@ sign, @typ@ logdet)
{
@typ@ result = sign * @exp_func@(logdet);
return result;
}
/**end repeat**/
/**begin repeat
#TYPE = CFLOAT, CDOUBLE#
#typ = npy_cfloat, npy_cdouble#
#basetyp = npy_float, npy_double#
#abs_func = npy_cabsf, npy_cabs#
#log_func = npy_logf, npy_log#
#exp_func = npy_expf, npy_exp#
#zero = 0.0f, 0.0#
*/
#define RE(COMPLEX) (((@basetyp@*)(&COMPLEX))[0])
#define IM(COMPLEX) (((@basetyp@*)(&COMPLEX))[1])
static NPY_INLINE @typ@
@TYPE@_mult(@typ@ op1, @typ@ op2)
{
@typ@ rv;
RE(rv) = RE(op1)*RE(op2) - IM(op1)*IM(op2);
IM(rv) = RE(op1)*IM(op2) + IM(op1)*RE(op2);
return rv;
}
static NPY_INLINE void
@TYPE@_slogdet_from_factored_diagonal(@typ@* src,
fortran_int m,
@typ@ *sign,
@basetyp@ *logdet)
{
int i;
@typ@ sign_acc = *sign;
@basetyp@ logdet_acc = @zero@;
for (i = 0; i < m; i++)
{
@basetyp@ abs_element = @abs_func@(*src);
@typ@ sign_element;
RE(sign_element) = RE(*src) / abs_element;
IM(sign_element) = IM(*src) / abs_element;
sign_acc = @TYPE@_mult(sign_acc, sign_element);
logdet_acc += @log_func@(abs_element);
src += m + 1;
}
*sign = sign_acc;
*logdet = logdet_acc;
}
static NPY_INLINE @typ@
@TYPE@_det_from_slogdet(@typ@ sign, @basetyp@ logdet)
{
@typ@ tmp;
RE(tmp) = @exp_func@(logdet);
IM(tmp) = @zero@;
return @TYPE@_mult(sign, tmp);
}
#undef RE
#undef IM
/**end repeat**/
/* As in the linalg package, the determinant is computed via LU factorization
* using LAPACK.
* slogdet computes sign + log(determinant).
* det computes sign * exp(slogdet).
*/
/**begin repeat
#TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE#
#typ = npy_float, npy_double, npy_cfloat, npy_cdouble#
#basetyp = npy_float, npy_double, npy_float, npy_double#
#cblas_type = s, d, c, z#
*/
static NPY_INLINE void
@TYPE@_slogdet_single_element(fortran_int m,
void* src,
fortran_int* pivots,
@typ@ *sign,
@basetyp@ *logdet)
{
fortran_int info = 0;
fortran_int lda = fortran_int_max(m, 1);
int i;
/* note: done in place */
LAPACK(@cblas_type@getrf)(&m, &m, (void *)src, &lda, pivots, &info);
if (info == 0) {
int change_sign = 0;
/* note: fortran uses 1 based indexing */
for (i = 0; i < m; i++)
{
change_sign += (pivots[i] != (i+1));
}
memcpy(sign,
(change_sign % 2)?
&@cblas_type@_minus_one :
&@cblas_type@_one
, sizeof(*sign));
@TYPE@_slogdet_from_factored_diagonal(src, m, sign, logdet);
} else {
/*
if getrf fails, use 0 as sign and -inf as logdet
*/
memcpy(sign, &@cblas_type@_zero, sizeof(*sign));
memcpy(logdet, &@cblas_type@_ninf, sizeof(*logdet));
}
}
static void
@TYPE@_slogdet(char **args,
npy_intp *dimensions,
npy_intp *steps,
void *NPY_UNUSED(func))
{
fortran_int m;
npy_uint8 *tmp_buff = NULL;
size_t matrix_size;
size_t pivot_size;
size_t safe_m;
/* notes:
* matrix will need to be copied always, as factorization in lapack is
* made inplace
* matrix will need to be in column-major order, as expected by lapack
* code (fortran)
* always a square matrix
* need to allocate memory for both, matrix_buffer and pivot buffer
*/
INIT_OUTER_LOOP_3
m = (fortran_int) dimensions[0];
safe_m = m;
matrix_size = safe_m * safe_m * sizeof(@typ@);
pivot_size = safe_m * sizeof(fortran_int);
tmp_buff = (npy_uint8 *)malloc(matrix_size + pivot_size);
if (tmp_buff) {
LINEARIZE_DATA_t lin_data;
/* swapped steps to get matrix in FORTRAN order */
init_linearize_data(&lin_data, m, m, steps[1], steps[0]);
BEGIN_OUTER_LOOP_3
linearize_@TYPE@_matrix(tmp_buff, args[0], &lin_data);
@TYPE@_slogdet_single_element(m,
(void*)tmp_buff,
(fortran_int*)(tmp_buff+matrix_size),
(@typ@*)args[1],
(@basetyp@*)args[2]);
END_OUTER_LOOP
free(tmp_buff);
}
}
static void
@TYPE@_det(char **args,
npy_intp *dimensions,
npy_intp *steps,
void *NPY_UNUSED(func))
{
fortran_int m;
npy_uint8 *tmp_buff;
size_t matrix_size;
size_t pivot_size;
size_t safe_m;
/* notes:
* matrix will need to be copied always, as factorization in lapack is
* made inplace
* matrix will need to be in column-major order, as expected by lapack
* code (fortran)
* always a square matrix
* need to allocate memory for both, matrix_buffer and pivot buffer
*/
INIT_OUTER_LOOP_2
m = (fortran_int) dimensions[0];
safe_m = m;
matrix_size = safe_m * safe_m * sizeof(@typ@);
pivot_size = safe_m * sizeof(fortran_int);
tmp_buff = (npy_uint8 *)malloc(matrix_size + pivot_size);
if (tmp_buff) {
LINEARIZE_DATA_t lin_data;
@typ@ sign;
@basetyp@ logdet;
/* swapped steps to get matrix in FORTRAN order */
init_linearize_data(&lin_data, m, m, steps[1], steps[0]);
BEGIN_OUTER_LOOP_2
linearize_@TYPE@_matrix(tmp_buff, args[0], &lin_data);
@TYPE@_slogdet_single_element(m,
(void*)tmp_buff,
(fortran_int*)(tmp_buff + matrix_size),
&sign,
&logdet);
*(@typ@ *)args[1] = @TYPE@_det_from_slogdet(sign, logdet);
END_OUTER_LOOP
free(tmp_buff);
}
}
/**end repeat**/
/* -------------------------------------------------------------------------- */
/* Eigh family */
typedef struct eigh_params_struct {
void *A; /* matrix */
void *W; /* eigenvalue vector */
void *WORK; /* main work buffer */
void *RWORK; /* secondary work buffer (for complex versions) */
void *IWORK;
fortran_int N;
fortran_int LWORK;
fortran_int LRWORK;
fortran_int LIWORK;
char JOBZ;
char UPLO;
fortran_int LDA;
} EIGH_PARAMS_t;
/**begin repeat
#TYPE = FLOAT, DOUBLE#
#typ = npy_float, npy_double#
#ftyp = fortran_real, fortran_doublereal#
#lapack_func = ssyevd, dsyevd#
*/
static NPY_INLINE fortran_int
call_@lapack_func@(EIGH_PARAMS_t *params)
{
fortran_int rv;
LAPACK(@lapack_func@)(&params->JOBZ, &params->UPLO, &params->N,
params->A, &params->LDA, params->W,
params->WORK, &params->LWORK,
params->IWORK, &params->LIWORK,
&rv);
return rv;
}
/*
* Initialize the parameters to use in for the lapack function _syevd
* Handles buffer allocation
*/
static NPY_INLINE int
init_@lapack_func@(EIGH_PARAMS_t* params, char JOBZ, char UPLO,
fortran_int N)
{
npy_uint8 *mem_buff = NULL;
npy_uint8 *mem_buff2 = NULL;
fortran_int lwork;
fortran_int liwork;
npy_uint8 *a, *w, *work, *iwork;
size_t safe_N = N;
size_t alloc_size = safe_N * (safe_N + 1) * sizeof(@typ@);
fortran_int lda = fortran_int_max(N, 1);
mem_buff = malloc(alloc_size);
if (!mem_buff) {
goto error;
}
a = mem_buff;
w = mem_buff + safe_N * safe_N * sizeof(@typ@);
params->A = a;
params->W = w;
params->RWORK = NULL; /* unused */
params->N = N;
params->LRWORK = 0; /* unused */
params->JOBZ = JOBZ;
params->UPLO = UPLO;
params->LDA = lda;
/* Work size query */
{
@typ@ query_work_size;
fortran_int query_iwork_size;
params->LWORK = -1;
params->LIWORK = -1;
params->WORK = &query_work_size;
params->IWORK = &query_iwork_size;
if (call_@lapack_func@(params) != 0) {
goto error;
}
lwork = (fortran_int)query_work_size;
liwork = query_iwork_size;
}
mem_buff2 = malloc(lwork*sizeof(@typ@) + liwork*sizeof(fortran_int));
if (!mem_buff2) {
goto error;
}
work = mem_buff2;
iwork = mem_buff2 + lwork*sizeof(@typ@);
params->LWORK = lwork;
params->WORK = work;
params->LIWORK = liwork;
params->IWORK = iwork;
return 1;
error:
/* something failed */
memset(params, 0, sizeof(*params));
free(mem_buff2);
free(mem_buff);
return 0;
}
/**end repeat**/
/**begin repeat
#TYPE = CFLOAT, CDOUBLE#
#typ = npy_cfloat, npy_cdouble#
#basetyp = npy_float, npy_double#
#ftyp = fortran_complex, fortran_doublecomplex#
#fbasetyp = fortran_real, fortran_doublereal#
#lapack_func = cheevd, zheevd#
*/
static NPY_INLINE fortran_int
call_@lapack_func@(EIGH_PARAMS_t *params)
{
fortran_int rv;
LAPACK(@lapack_func@)(&params->JOBZ, &params->UPLO, &params->N,
params->A, &params->LDA, params->W,
params->WORK, &params->LWORK,
params->RWORK, &params->LRWORK,
params->IWORK, &params->LIWORK,
&rv);
return rv;
}
/*
* Initialize the parameters to use in for the lapack function _heev
* Handles buffer allocation
*/
static NPY_INLINE int
init_@lapack_func@(EIGH_PARAMS_t *params,
char JOBZ,
char UPLO,
fortran_int N)
{
npy_uint8 *mem_buff = NULL;
npy_uint8 *mem_buff2 = NULL;
fortran_int lwork;
fortran_int lrwork;
fortran_int liwork;
npy_uint8 *a, *w, *work, *rwork, *iwork;
size_t safe_N = N;
fortran_int lda = fortran_int_max(N, 1);
mem_buff = malloc(safe_N * safe_N * sizeof(@typ@) +
safe_N * sizeof(@basetyp@));
if (!mem_buff) {
goto error;
}
a = mem_buff;
w = mem_buff + safe_N * safe_N * sizeof(@typ@);
params->A = a;
params->W = w;
params->N = N;
params->JOBZ = JOBZ;
params->UPLO = UPLO;
params->LDA = lda;
/* Work size query */
{
@ftyp@ query_work_size;
@fbasetyp@ query_rwork_size;
fortran_int query_iwork_size;
params->LWORK = -1;
params->LRWORK = -1;
params->LIWORK = -1;
params->WORK = &query_work_size;
params->RWORK = &query_rwork_size;
params->IWORK = &query_iwork_size;
if (call_@lapack_func@(params) != 0) {
goto error;
}
lwork = (fortran_int)*(@fbasetyp@*)&query_work_size;
lrwork = (fortran_int)query_rwork_size;
liwork = query_iwork_size;
}
mem_buff2 = malloc(lwork*sizeof(@typ@) +
lrwork*sizeof(@basetyp@) +
liwork*sizeof(fortran_int));
if (!mem_buff2) {
goto error;
}
work = mem_buff2;
rwork = work + lwork*sizeof(@typ@);
iwork = rwork + lrwork*sizeof(@basetyp@);
params->WORK = work;
params->RWORK = rwork;
params->IWORK = iwork;
params->LWORK = lwork;
params->LRWORK = lrwork;
params->LIWORK = liwork;
return 1;
/* something failed */
error:
memset(params, 0, sizeof(*params));
free(mem_buff2);
free(mem_buff);
return 0;
}
/**end repeat**/
/**begin repeat
#TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE#
#BASETYPE = FLOAT, DOUBLE, FLOAT, DOUBLE#
#typ = npy_float, npy_double, npy_cfloat, npy_cdouble#
#basetyp = npy_float, npy_double, npy_float, npy_double#
#lapack_func = ssyevd, dsyevd, cheevd, zheevd#
**/
/*
* (M, M)->(M,)(M, M)
* dimensions[1] -> M
* args[0] -> A[in]
* args[1] -> W
* args[2] -> A[out]
*/
static NPY_INLINE void
release_@lapack_func@(EIGH_PARAMS_t *params)
{
/* allocated memory in A and WORK */
free(params->A);
free(params->WORK);
memset(params, 0, sizeof(*params));
}
static NPY_INLINE void
@TYPE@_eigh_wrapper(char JOBZ,
char UPLO,
char**args,
npy_intp* dimensions,
npy_intp* steps)
{
ptrdiff_t outer_steps[3];
size_t iter;
size_t outer_dim = *dimensions++;
size_t op_count = (JOBZ=='N')?2:3;
EIGH_PARAMS_t eigh_params;
int error_occurred = get_fp_invalid_and_clear();
for (iter = 0; iter < op_count; ++iter) {
outer_steps[iter] = (ptrdiff_t) steps[iter];
}
steps += op_count;
if (init_@lapack_func@(&eigh_params,
JOBZ,
UPLO,
(fortran_int)dimensions[0])) {
LINEARIZE_DATA_t matrix_in_ld;
LINEARIZE_DATA_t eigenvectors_out_ld;
LINEARIZE_DATA_t eigenvalues_out_ld;
init_linearize_data(&matrix_in_ld,
eigh_params.N, eigh_params.N,
steps[1], steps[0]);
init_linearize_data(&eigenvalues_out_ld,
1, eigh_params.N,
0, steps[2]);
if ('V' == eigh_params.JOBZ) {
init_linearize_data(&eigenvectors_out_ld,
eigh_params.N, eigh_params.N,
steps[4], steps[3]);
}
for (iter = 0; iter < outer_dim; ++iter) {
int not_ok;
/* copy the matrix in */
linearize_@TYPE@_matrix(eigh_params.A, args[0], &matrix_in_ld);
not_ok = call_@lapack_func@(&eigh_params);
if (!not_ok) {
/* lapack ok, copy result out */
delinearize_@BASETYPE@_matrix(args[1],
eigh_params.W,
&eigenvalues_out_ld);
if ('V' == eigh_params.JOBZ) {
delinearize_@TYPE@_matrix(args[2],
eigh_params.A,
&eigenvectors_out_ld);
}
} else {
/* lapack fail, set result to nan */
error_occurred = 1;
nan_@BASETYPE@_matrix(args[1], &eigenvalues_out_ld);
if ('V' == eigh_params.JOBZ) {
nan_@TYPE@_matrix(args[2], &eigenvectors_out_ld);
}
}
update_pointers((npy_uint8**)args, outer_steps, op_count);
}
release_@lapack_func@(&eigh_params);
}
set_fp_invalid_or_clear(error_occurred);
}
/**end repeat**/
/**begin repeat
#TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE#
*/
static void
@TYPE@_eighlo(char **args,
npy_intp *dimensions,
npy_intp *steps,
void *NPY_UNUSED(func))
{
@TYPE@_eigh_wrapper('V', 'L', args, dimensions, steps);
}
static void
@TYPE@_eighup(char **args,
npy_intp *dimensions,
npy_intp *steps,
void* NPY_UNUSED(func))
{
@TYPE@_eigh_wrapper('V', 'U', args, dimensions, steps);
}
static void
@TYPE@_eigvalshlo(char **args,
npy_intp *dimensions,
npy_intp *steps,
void* NPY_UNUSED(func))
{
@TYPE@_eigh_wrapper('N', 'L', args, dimensions, steps);
}
static void
@TYPE@_eigvalshup(char **args,
npy_intp *dimensions,
npy_intp *steps,
void* NPY_UNUSED(func))
{
@TYPE@_eigh_wrapper('N', 'U', args, dimensions, steps);
}
/**end repeat**/
/* -------------------------------------------------------------------------- */
/* Solve family (includes inv) */
typedef struct gesv_params_struct
{
void *A; /* A is (N, N) of base type */
void *B; /* B is (N, NRHS) of base type */
fortran_int * IPIV; /* IPIV is (N) */
fortran_int N;
fortran_int NRHS;
fortran_int LDA;
fortran_int LDB;
} GESV_PARAMS_t;
/**begin repeat
#TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE#
#typ = npy_float, npy_double, npy_cfloat, npy_cdouble#
#ftyp = fortran_real, fortran_doublereal,
fortran_complex, fortran_doublecomplex#
#lapack_func = sgesv, dgesv, cgesv, zgesv#
*/
static NPY_INLINE fortran_int
call_@lapack_func@(GESV_PARAMS_t *params)
{
fortran_int rv;
LAPACK(@lapack_func@)(&params->N, &params->NRHS,
params->A, &params->LDA,
params->IPIV,
params->B, &params->LDB,
&rv);
return rv;
}
/*
* Initialize the parameters to use in for the lapack function _heev
* Handles buffer allocation
*/
static NPY_INLINE int
init_@lapack_func@(GESV_PARAMS_t *params, fortran_int N, fortran_int NRHS)
{
npy_uint8 *mem_buff = NULL;
npy_uint8 *a, *b, *ipiv;
size_t safe_N = N;
size_t safe_NRHS = NRHS;
fortran_int ld = fortran_int_max(N, 1);
mem_buff = malloc(safe_N * safe_N * sizeof(@ftyp@) +
safe_N * safe_NRHS*sizeof(@ftyp@) +
safe_N * sizeof(fortran_int));
if (!mem_buff) {
goto error;
}
a = mem_buff;
b = a + safe_N * safe_N * sizeof(@ftyp@);
ipiv = b + safe_N * safe_NRHS * sizeof(@ftyp@);
params->A = a;
params->B = b;
params->IPIV = (fortran_int*)ipiv;
params->N = N;
params->NRHS = NRHS;
params->LDA = ld;
params->LDB = ld;
return 1;
error:
free(mem_buff);
memset(params, 0, sizeof(*params));
return 0;
}
static NPY_INLINE void
release_@lapack_func@(GESV_PARAMS_t *params)
{
/* memory block base is in A */
free(params->A);
memset(params, 0, sizeof(*params));
}
static void
@TYPE@_solve(char **args, npy_intp *dimensions, npy_intp *steps,
void *NPY_UNUSED(func))
{
GESV_PARAMS_t params;
fortran_int n, nrhs;
int error_occurred = get_fp_invalid_and_clear();
INIT_OUTER_LOOP_3
n = (fortran_int)dimensions[0];
nrhs = (fortran_int)dimensions[1];
if (init_@lapack_func@(&params, n, nrhs)) {
LINEARIZE_DATA_t a_in, b_in, r_out;
init_linearize_data(&a_in, n, n, steps[1], steps[0]);
init_linearize_data(&b_in, nrhs, n, steps[3], steps[2]);
init_linearize_data(&r_out, nrhs, n, steps[5], steps[4]);
BEGIN_OUTER_LOOP_3
int not_ok;
linearize_@TYPE@_matrix(params.A, args[0], &a_in);
linearize_@TYPE@_matrix(params.B, args[1], &b_in);
not_ok =call_@lapack_func@(&params);
if (!not_ok) {
delinearize_@TYPE@_matrix(args[2], params.B, &r_out);
} else {
error_occurred = 1;
nan_@TYPE@_matrix(args[2], &r_out);
}
END_OUTER_LOOP
release_@lapack_func@(&params);
}
set_fp_invalid_or_clear(error_occurred);
}
static void
@TYPE@_solve1(char **args, npy_intp *dimensions, npy_intp *steps,
void *NPY_UNUSED(func))
{
GESV_PARAMS_t params;
int error_occurred = get_fp_invalid_and_clear();
fortran_int n;
INIT_OUTER_LOOP_3
n = (fortran_int)dimensions[0];
if (init_@lapack_func@(&params, n, 1)) {
LINEARIZE_DATA_t a_in, b_in, r_out;
init_linearize_data(&a_in, n, n, steps[1], steps[0]);
init_linearize_data(&b_in, 1, n, 1, steps[2]);
init_linearize_data(&r_out, 1, n, 1, steps[3]);
BEGIN_OUTER_LOOP_3
int not_ok;
linearize_@TYPE@_matrix(params.A, args[0], &a_in);
linearize_@TYPE@_matrix(params.B, args[1], &b_in);
not_ok = call_@lapack_func@(&params);
if (!not_ok) {
delinearize_@TYPE@_matrix(args[2], params.B, &r_out);
} else {
error_occurred = 1;
nan_@TYPE@_matrix(args[2], &r_out);
}
END_OUTER_LOOP
release_@lapack_func@(&params);
}
set_fp_invalid_or_clear(error_occurred);
}
static void
@TYPE@_inv(char **args, npy_intp *dimensions, npy_intp *steps,
void *NPY_UNUSED(func))
{
GESV_PARAMS_t params;
fortran_int n;
int error_occurred = get_fp_invalid_and_clear();
INIT_OUTER_LOOP_2
n = (fortran_int)dimensions[0];
if (init_@lapack_func@(&params, n, n)) {
LINEARIZE_DATA_t a_in, r_out;
init_linearize_data(&a_in, n, n, steps[1], steps[0]);
init_linearize_data(&r_out, n, n, steps[3], steps[2]);
BEGIN_OUTER_LOOP_2
int not_ok;
linearize_@TYPE@_matrix(params.A, args[0], &a_in);
identity_@TYPE@_matrix(params.B, n);
not_ok = call_@lapack_func@(&params);
if (!not_ok) {
delinearize_@TYPE@_matrix(args[1], params.B, &r_out);
} else {
error_occurred = 1;
nan_@TYPE@_matrix(args[1], &r_out);
}
END_OUTER_LOOP
release_@lapack_func@(&params);
}
set_fp_invalid_or_clear(error_occurred);
}
/**end repeat**/
/* -------------------------------------------------------------------------- */
/* Cholesky decomposition */
typedef struct potr_params_struct
{
void *A;
fortran_int N;
fortran_int LDA;
char UPLO;
} POTR_PARAMS_t;
/**begin repeat
#TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE#
#ftyp = fortran_real, fortran_doublereal,
fortran_complex, fortran_doublecomplex#
#lapack_func = spotrf, dpotrf, cpotrf, zpotrf#
*/
static NPY_INLINE fortran_int
call_@lapack_func@(POTR_PARAMS_t *params)
{
fortran_int rv;
LAPACK(@lapack_func@)(&params->UPLO,
&params->N, params->A, &params->LDA,
&rv);
return rv;
}
static NPY_INLINE int
init_@lapack_func@(POTR_PARAMS_t *params, char UPLO, fortran_int N)
{
npy_uint8 *mem_buff = NULL;
npy_uint8 *a;
size_t safe_N = N;
fortran_int lda = fortran_int_max(N, 1);
mem_buff = malloc(safe_N * safe_N * sizeof(@ftyp@));
if (!mem_buff) {
goto error;
}
a = mem_buff;
params->A = a;
params->N = N;
params->LDA = lda;
params->UPLO = UPLO;
return 1;
error:
free(mem_buff);
memset(params, 0, sizeof(*params));
return 0;
}
static NPY_INLINE void
release_@lapack_func@(POTR_PARAMS_t *params)
{
/* memory block base in A */
free(params->A);
memset(params, 0, sizeof(*params));
}
static void
@TYPE@_cholesky(char uplo, char **args, npy_intp *dimensions, npy_intp *steps)
{
POTR_PARAMS_t params;
int error_occurred = get_fp_invalid_and_clear();
fortran_int n;
INIT_OUTER_LOOP_2
assert(uplo == 'L');
n = (fortran_int)dimensions[0];
if (init_@lapack_func@(&params, uplo, n)) {
LINEARIZE_DATA_t a_in, r_out;
init_linearize_data(&a_in, n, n, steps[1], steps[0]);
init_linearize_data(&r_out, n, n, steps[3], steps[2]);
BEGIN_OUTER_LOOP_2
int not_ok;
linearize_@TYPE@_matrix(params.A, args[0], &a_in);
not_ok = call_@lapack_func@(&params);
if (!not_ok) {
triu_@TYPE@_matrix(params.A, params.N);
delinearize_@TYPE@_matrix(args[1], params.A, &r_out);
} else {
error_occurred = 1;
nan_@TYPE@_matrix(args[1], &r_out);
}
END_OUTER_LOOP
release_@lapack_func@(&params);
}
set_fp_invalid_or_clear(error_occurred);
}
static void
@TYPE@_cholesky_lo(char **args, npy_intp *dimensions, npy_intp *steps,
void *NPY_UNUSED(func))
{
@TYPE@_cholesky('L', args, dimensions, steps);
}
/**end repeat**/
/* -------------------------------------------------------------------------- */
/* eig family */
typedef struct geev_params_struct {
void *A;
void *WR; /* RWORK in complex versions, REAL W buffer for (sd)geev*/
void *WI;
void *VLR; /* REAL VL buffers for _geev where _ is s, d */
void *VRR; /* REAL VR buffers for _geev hwere _ is s, d */
void *WORK;
void *W; /* final w */
void *VL; /* final vl */
void *VR; /* final vr */
fortran_int N;
fortran_int LDA;
fortran_int LDVL;
fortran_int LDVR;
fortran_int LWORK;
char JOBVL;
char JOBVR;
} GEEV_PARAMS_t;
static NPY_INLINE void
dump_geev_params(const char *name, GEEV_PARAMS_t* params)
{
TRACE_TXT("\n%s\n"
"\t%10s: %p\n"\
"\t%10s: %p\n"\
"\t%10s: %p\n"\
"\t%10s: %p\n"\
"\t%10s: %p\n"\
"\t%10s: %p\n"\
"\t%10s: %p\n"\
"\t%10s: %p\n"\
"\t%10s: %p\n"\
"\t%10s: %d\n"\
"\t%10s: %d\n"\
"\t%10s: %d\n"\
"\t%10s: %d\n"\
"\t%10s: %d\n"\
"\t%10s: %c\n"\
"\t%10s: %c\n",
name,
"A", params->A,
"WR", params->WR,
"WI", params->WI,
"VLR", params->VLR,
"VRR", params->VRR,
"WORK", params->WORK,
"W", params->W,
"VL", params->VL,
"VR", params->VR,
"N", (int)params->N,
"LDA", (int)params->LDA,
"LDVL", (int)params->LDVL,
"LDVR", (int)params->LDVR,
"LWORK", (int)params->LWORK,
"JOBVL", params->JOBVL,
"JOBVR", params->JOBVR);
}
/**begin repeat
#TYPE = FLOAT, DOUBLE#
#CTYPE = CFLOAT, CDOUBLE#
#typ = float, double#
#complextyp = COMPLEX_t, DOUBLECOMPLEX_t#
#lapack_func = sgeev, dgeev#
#zero = 0.0f, 0.0#
*/
static NPY_INLINE fortran_int
call_@lapack_func@(GEEV_PARAMS_t* params)
{
fortran_int rv;
LAPACK(@lapack_func@)(&params->JOBVL, &params->JOBVR,
&params->N, params->A, &params->LDA,
params->WR, params->WI,
params->VLR, &params->LDVL,
params->VRR, &params->LDVR,
params->WORK, &params->LWORK,
&rv);
return rv;
}
static NPY_INLINE int
init_@lapack_func@(GEEV_PARAMS_t *params, char jobvl, char jobvr, fortran_int n)
{
npy_uint8 *mem_buff = NULL;
npy_uint8 *mem_buff2 = NULL;
npy_uint8 *a, *wr, *wi, *vlr, *vrr, *work, *w, *vl, *vr;
size_t safe_n = n;
size_t a_size = safe_n * safe_n * sizeof(@typ@);
size_t wr_size = safe_n * sizeof(@typ@);
size_t wi_size = safe_n * sizeof(@typ@);
size_t vlr_size = jobvl=='V' ? safe_n * safe_n * sizeof(@typ@) : 0;
size_t vrr_size = jobvr=='V' ? safe_n * safe_n * sizeof(@typ@) : 0;
size_t w_size = wr_size*2;
size_t vl_size = vlr_size*2;
size_t vr_size = vrr_size*2;
size_t work_count = 0;
fortran_int ld = fortran_int_max(n, 1);
/* allocate data for known sizes (all but work) */
mem_buff = malloc(a_size + wr_size + wi_size +
vlr_size + vrr_size +
w_size + vl_size + vr_size);
if (!mem_buff) {
goto error;
}
a = mem_buff;
wr = a + a_size;
wi = wr + wr_size;
vlr = wi + wi_size;
vrr = vlr + vlr_size;
w = vrr + vrr_size;
vl = w + w_size;
vr = vl + vl_size;
params->A = a;
params->WR = wr;
params->WI = wi;
params->VLR = vlr;
params->VRR = vrr;
params->W = w;
params->VL = vl;
params->VR = vr;
params->N = n;
params->LDA = ld;
params->LDVL = ld;
params->LDVR = ld;
params->JOBVL = jobvl;
params->JOBVR = jobvr;
/* Work size query */
{
@typ@ work_size_query;
params->LWORK = -1;
params->WORK = &work_size_query;
if (call_@lapack_func@(params) != 0) {
goto error;
}
work_count = (size_t)work_size_query;
}
mem_buff2 = malloc(work_count*sizeof(@typ@));
if (!mem_buff2) {
goto error;
}
work = mem_buff2;
params->LWORK = (fortran_int)work_count;
params->WORK = work;
return 1;
error:
free(mem_buff2);
free(mem_buff);
memset(params, 0, sizeof(*params));
return 0;
}
static NPY_INLINE void
mk_@TYPE@_complex_array_from_real(@complextyp@ *c, const @typ@ *re, size_t n)
{
size_t iter;
for (iter = 0; iter < n; ++iter) {
c[iter].array[0] = re[iter];
c[iter].array[1] = @zero@;
}
}
static NPY_INLINE void
mk_@TYPE@_complex_array(@complextyp@ *c,
const @typ@ *re,
const @typ@ *im,
size_t n)
{
size_t iter;
for (iter = 0; iter < n; ++iter) {
c[iter].array[0] = re[iter];
c[iter].array[1] = im[iter];
}
}
static NPY_INLINE void
mk_@TYPE@_complex_array_conjugate_pair(@complextyp@ *c,
const @typ@ *r,
size_t n)
{
size_t iter;
for (iter = 0; iter < n; ++iter) {
@typ@ re = r[iter];
@typ@ im = r[iter+n];
c[iter].array[0] = re;
c[iter].array[1] = im;
c[iter+n].array[0] = re;
c[iter+n].array[1] = -im;
}
}
/*
* make the complex eigenvectors from the real array produced by sgeev/zgeev.
* c is the array where the results will be left.
* r is the source array of reals produced by sgeev/zgeev
* i is the eigenvalue imaginary part produced by sgeev/zgeev
* n is so that the order of the matrix is n by n
*/
static NPY_INLINE void
mk_@lapack_func@_complex_eigenvectors(@complextyp@ *c,
const @typ@ *r,
const @typ@ *i,
size_t n)
{
size_t iter = 0;
while (iter < n)
{
if (i[iter] == @zero@) {
/* eigenvalue was real, eigenvectors as well... */
mk_@TYPE@_complex_array_from_real(c, r, n);
c += n;
r += n;
iter ++;
} else {
/* eigenvalue was complex, generate a pair of eigenvectors */
mk_@TYPE@_complex_array_conjugate_pair(c, r, n);
c += 2*n;
r += 2*n;
iter += 2;
}
}
}
static NPY_INLINE void
process_@lapack_func@_results(GEEV_PARAMS_t *params)
{
/* REAL versions of geev need the results to be translated
* into complex versions. This is the way to deal with imaginary
* results. In our gufuncs we will always return complex arrays!
*/
mk_@TYPE@_complex_array(params->W, params->WR, params->WI, params->N);
/* handle the eigenvectors */
if ('V' == params->JOBVL) {
mk_@lapack_func@_complex_eigenvectors(params->VL, params->VLR,
params->WI, params->N);
}
if ('V' == params->JOBVR) {
mk_@lapack_func@_complex_eigenvectors(params->VR, params->VRR,
params->WI, params->N);
}
}
/**end repeat**/
/**begin repeat
#TYPE = CFLOAT, CDOUBLE#
#typ = COMPLEX_t, DOUBLECOMPLEX_t#
#ftyp = fortran_complex, fortran_doublecomplex#
#realtyp = float, double#
#lapack_func = cgeev, zgeev#
*/
static NPY_INLINE fortran_int
call_@lapack_func@(GEEV_PARAMS_t* params)
{
fortran_int rv;
LAPACK(@lapack_func@)(&params->JOBVL, &params->JOBVR,
&params->N, params->A, &params->LDA,
params->W,
params->VL, &params->LDVL,
params->VR, &params->LDVR,
params->WORK, &params->LWORK,
params->WR, /* actually RWORK */
&rv);
return rv;
}
static NPY_INLINE int
init_@lapack_func@(GEEV_PARAMS_t* params,
char jobvl,
char jobvr,
fortran_int n)
{
npy_uint8 *mem_buff = NULL;
npy_uint8 *mem_buff2 = NULL;
npy_uint8 *a, *w, *vl, *vr, *work, *rwork;
size_t safe_n = n;
size_t a_size = safe_n * safe_n * sizeof(@ftyp@);
size_t w_size = safe_n * sizeof(@ftyp@);
size_t vl_size = jobvl=='V'? safe_n * safe_n * sizeof(@ftyp@) : 0;
size_t vr_size = jobvr=='V'? safe_n * safe_n * sizeof(@ftyp@) : 0;
size_t rwork_size = 2 * safe_n * sizeof(@realtyp@);
size_t work_count = 0;
size_t total_size = a_size + w_size + vl_size + vr_size + rwork_size;
fortran_int ld = fortran_int_max(n, 1);
mem_buff = malloc(total_size);
if (!mem_buff) {
goto error;
}
a = mem_buff;
w = a + a_size;
vl = w + w_size;
vr = vl + vl_size;
rwork = vr + vr_size;
params->A = a;
params->WR = rwork;
params->WI = NULL;
params->VLR = NULL;
params->VRR = NULL;
params->VL = vl;
params->VR = vr;
params->W = w;
params->N = n;
params->LDA = ld;
params->LDVL = ld;
params->LDVR = ld;
params->JOBVL = jobvl;
params->JOBVR = jobvr;
/* Work size query */
{
@typ@ work_size_query;
params->LWORK = -1;
params->WORK = &work_size_query;
if (call_@lapack_func@(params) != 0) {
goto error;
}
work_count = (size_t) work_size_query.array[0];
/* Fix a bug in lapack 3.0.0 */
if(work_count == 0) work_count = 1;
}
mem_buff2 = malloc(work_count*sizeof(@ftyp@));
if (!mem_buff2) {
goto error;
}
work = mem_buff2;
params->LWORK = (fortran_int)work_count;
params->WORK = work;
return 1;
error:
free(mem_buff2);
free(mem_buff);
memset(params, 0, sizeof(*params));
return 0;
}
static NPY_INLINE void
process_@lapack_func@_results(GEEV_PARAMS_t *NPY_UNUSED(params))
{
/* nothing to do here, complex versions are ready to copy out */
}
/**end repeat**/
/**begin repeat
#TYPE = FLOAT, DOUBLE, CDOUBLE#
#COMPLEXTYPE = CFLOAT, CDOUBLE, CDOUBLE#
#ftype = fortran_real, fortran_doublereal, fortran_doublecomplex#
#lapack_func = sgeev, dgeev, zgeev#
*/
static NPY_INLINE void
release_@lapack_func@(GEEV_PARAMS_t *params)
{
free(params->WORK);
free(params->A);
memset(params, 0, sizeof(*params));
}
static NPY_INLINE void
@TYPE@_eig_wrapper(char JOBVL,
char JOBVR,
char**args,
npy_intp* dimensions,
npy_intp* steps)
{
ptrdiff_t outer_steps[4];
size_t iter;
size_t outer_dim = *dimensions++;
size_t op_count = 2;
int error_occurred = get_fp_invalid_and_clear();
GEEV_PARAMS_t geev_params;
assert(JOBVL == 'N');
STACK_TRACE;
op_count += 'V'==JOBVL?1:0;
op_count += 'V'==JOBVR?1:0;
for (iter = 0; iter < op_count; ++iter) {
outer_steps[iter] = (ptrdiff_t) steps[iter];
}
steps += op_count;
if (init_@lapack_func@(&geev_params,
JOBVL, JOBVR,
(fortran_int)dimensions[0])) {
LINEARIZE_DATA_t a_in;
LINEARIZE_DATA_t w_out;
LINEARIZE_DATA_t vl_out;
LINEARIZE_DATA_t vr_out;
init_linearize_data(&a_in,
geev_params.N, geev_params.N,
steps[1], steps[0]);
steps += 2;
init_linearize_data(&w_out,
1, geev_params.N,
0, steps[0]);
steps += 1;
if ('V' == geev_params.JOBVL) {
init_linearize_data(&vl_out,
geev_params.N, geev_params.N,
steps[1], steps[0]);
steps += 2;
}
if ('V' == geev_params.JOBVR) {
init_linearize_data(&vr_out,
geev_params.N, geev_params.N,
steps[1], steps[0]);
}
for (iter = 0; iter < outer_dim; ++iter) {
int not_ok;
char **arg_iter = args;
/* copy the matrix in */
linearize_@TYPE@_matrix(geev_params.A, *arg_iter++, &a_in);
not_ok = call_@lapack_func@(&geev_params);
if (!not_ok) {
process_@lapack_func@_results(&geev_params);
delinearize_@COMPLEXTYPE@_matrix(*arg_iter++,
geev_params.W,
&w_out);
if ('V' == geev_params.JOBVL) {
delinearize_@COMPLEXTYPE@_matrix(*arg_iter++,
geev_params.VL,
&vl_out);
}
if ('V' == geev_params.JOBVR) {
delinearize_@COMPLEXTYPE@_matrix(*arg_iter++,
geev_params.VR,
&vr_out);
}
} else {
/* geev failed */
error_occurred = 1;
nan_@COMPLEXTYPE@_matrix(*arg_iter++, &w_out);
if ('V' == geev_params.JOBVL) {
nan_@COMPLEXTYPE@_matrix(*arg_iter++, &vl_out);
}
if ('V' == geev_params.JOBVR) {
nan_@COMPLEXTYPE@_matrix(*arg_iter++, &vr_out);
}
}
update_pointers((npy_uint8**)args, outer_steps, op_count);
}
release_@lapack_func@(&geev_params);
}
set_fp_invalid_or_clear(error_occurred);
}
static void
@TYPE@_eig(char **args,
npy_intp *dimensions,
npy_intp *steps,
void *NPY_UNUSED(func))
{
@TYPE@_eig_wrapper('N', 'V', args, dimensions, steps);
}
static void
@TYPE@_eigvals(char **args,
npy_intp *dimensions,
npy_intp *steps,
void *NPY_UNUSED(func))
{
@TYPE@_eig_wrapper('N', 'N', args, dimensions, steps);
}
/**end repeat**/
/* -------------------------------------------------------------------------- */
/* singular value decomposition */
typedef struct gessd_params_struct
{
void *A;
void *S;
void *U;
void *VT;
void *WORK;
void *RWORK;
void *IWORK;
fortran_int M;
fortran_int N;
fortran_int LDA;
fortran_int LDU;
fortran_int LDVT;
fortran_int LWORK;
char JOBZ;
} GESDD_PARAMS_t;
static NPY_INLINE void
dump_gesdd_params(const char *name,
GESDD_PARAMS_t *params)
{
TRACE_TXT("\n%s:\n"\
"%14s: %18p\n"\
"%14s: %18p\n"\
"%14s: %18p\n"\
"%14s: %18p\n"\
"%14s: %18p\n"\
"%14s: %18p\n"\
"%14s: %18p\n"\
"%14s: %18d\n"\
"%14s: %18d\n"\
"%14s: %18d\n"\
"%14s: %18d\n"\
"%14s: %18d\n"\
"%14s: %18d\n"\
"%14s: %15c'%c'\n",
name,
"A", params->A,
"S", params->S,
"U", params->U,
"VT", params->VT,
"WORK", params->WORK,
"RWORK", params->RWORK,
"IWORK", params->IWORK,
"M", (int)params->M,
"N", (int)params->N,
"LDA", (int)params->LDA,
"LDU", (int)params->LDU,
"LDVT", (int)params->LDVT,
"LWORK", (int)params->LWORK,
"JOBZ", ' ', params->JOBZ);
}
static NPY_INLINE int
compute_urows_vtcolumns(char jobz,
fortran_int m, fortran_int n,
fortran_int *urows, fortran_int *vtcolumns)
{
fortran_int min_m_n = fortran_int_min(m, n);
switch(jobz)
{
case 'N':
*urows = 0;
*vtcolumns = 0;
break;
case 'A':
*urows = m;
*vtcolumns = n;
break;
case 'S':
{
*urows = min_m_n;
*vtcolumns = min_m_n;
}
break;
default:
return 0;
}
return 1;
}
/**begin repeat
#TYPE = FLOAT, DOUBLE#
#lapack_func = sgesdd, dgesdd#
#ftyp = fortran_real, fortran_doublereal#
*/
static NPY_INLINE fortran_int
call_@lapack_func@(GESDD_PARAMS_t *params)
{
fortran_int rv;
LAPACK(@lapack_func@)(&params->JOBZ, &params->M, &params->N,
params->A, &params->LDA,
params->S,
params->U, &params->LDU,
params->VT, &params->LDVT,
params->WORK, &params->LWORK,
params->IWORK,
&rv);
return rv;
}
static NPY_INLINE int
init_@lapack_func@(GESDD_PARAMS_t *params,
char jobz,
fortran_int m,
fortran_int n)
{
npy_uint8 *mem_buff = NULL;
npy_uint8 *mem_buff2 = NULL;
npy_uint8 *a, *s, *u, *vt, *work, *iwork;
size_t safe_m = m;
size_t safe_n = n;
size_t a_size = safe_m * safe_n * sizeof(@ftyp@);
fortran_int min_m_n = fortran_int_min(m, n);
size_t safe_min_m_n = min_m_n;
size_t s_size = safe_min_m_n * sizeof(@ftyp@);
fortran_int u_row_count, vt_column_count;
size_t safe_u_row_count, safe_vt_column_count;
size_t u_size, vt_size;
fortran_int work_count;
size_t work_size;
size_t iwork_size = 8 * safe_min_m_n * sizeof(fortran_int);
fortran_int ld = fortran_int_max(m, 1);
if (!compute_urows_vtcolumns(jobz, m, n, &u_row_count, &vt_column_count)) {
goto error;
}
safe_u_row_count = u_row_count;
safe_vt_column_count = vt_column_count;
u_size = safe_u_row_count * safe_m * sizeof(@ftyp@);
vt_size = safe_n * safe_vt_column_count * sizeof(@ftyp@);
mem_buff = malloc(a_size + s_size + u_size + vt_size + iwork_size);
if (!mem_buff) {
goto error;
}
a = mem_buff;
s = a + a_size;
u = s + s_size;
vt = u + u_size;
iwork = vt + vt_size;
/* fix vt_column_count so that it is a valid lapack parameter (0 is not) */
vt_column_count = fortran_int_max(1, vt_column_count);
params->M = m;
params->N = n;
params->A = a;
params->S = s;
params->U = u;
params->VT = vt;
params->RWORK = NULL;
params->IWORK = iwork;
params->M = m;
params->N = n;
params->LDA = ld;
params->LDU = ld;
params->LDVT = vt_column_count;
params->JOBZ = jobz;
/* Work size query */
{
@ftyp@ work_size_query;
params->LWORK = -1;
params->WORK = &work_size_query;
if (call_@lapack_func@(params) != 0) {
goto error;
}
work_count = (fortran_int)work_size_query;
/* Fix a bug in lapack 3.0.0 */
if(work_count == 0) work_count = 1;
work_size = (size_t)work_count * sizeof(@ftyp@);
}
mem_buff2 = malloc(work_size);
if (!mem_buff2) {
goto error;
}
work = mem_buff2;
params->LWORK = work_count;
params->WORK = work;
return 1;
error:
TRACE_TXT("%s failed init\n", __FUNCTION__);
free(mem_buff);
free(mem_buff2);
memset(params, 0, sizeof(*params));
return 0;
}
/**end repeat**/
/**begin repeat
#TYPE = CFLOAT, CDOUBLE#
#ftyp = fortran_complex, fortran_doublecomplex#
#frealtyp = fortran_real, fortran_doublereal#
#typ = COMPLEX_t, DOUBLECOMPLEX_t#
#lapack_func = cgesdd, zgesdd#
*/
static NPY_INLINE fortran_int
call_@lapack_func@(GESDD_PARAMS_t *params)
{
fortran_int rv;
LAPACK(@lapack_func@)(&params->JOBZ, &params->M, &params->N,
params->A, &params->LDA,
params->S,
params->U, &params->LDU,
params->VT, &params->LDVT,
params->WORK, &params->LWORK,
params->RWORK,
params->IWORK,
&rv);
return rv;
}
static NPY_INLINE int
init_@lapack_func@(GESDD_PARAMS_t *params,
char jobz,
fortran_int m,
fortran_int n)
{
npy_uint8 *mem_buff = NULL, *mem_buff2 = NULL;
npy_uint8 *a,*s, *u, *vt, *work, *rwork, *iwork;
size_t a_size, s_size, u_size, vt_size, work_size, rwork_size, iwork_size;
size_t safe_u_row_count, safe_vt_column_count;
fortran_int u_row_count, vt_column_count, work_count;
size_t safe_m = m;
size_t safe_n = n;
fortran_int min_m_n = fortran_int_min(m, n);
size_t safe_min_m_n = min_m_n;
fortran_int ld = fortran_int_max(m, 1);
if (!compute_urows_vtcolumns(jobz, m, n, &u_row_count, &vt_column_count)) {
goto error;
}
safe_u_row_count = u_row_count;
safe_vt_column_count = vt_column_count;
a_size = safe_m * safe_n * sizeof(@ftyp@);
s_size = safe_min_m_n * sizeof(@frealtyp@);
u_size = safe_u_row_count * safe_m * sizeof(@ftyp@);
vt_size = safe_n * safe_vt_column_count * sizeof(@ftyp@);
rwork_size = 'N'==jobz?
(7 * safe_min_m_n) :
(5*safe_min_m_n * safe_min_m_n + 5*safe_min_m_n);
rwork_size *= sizeof(@ftyp@);
iwork_size = 8 * safe_min_m_n* sizeof(fortran_int);
mem_buff = malloc(a_size +
s_size +
u_size +
vt_size +
rwork_size +
iwork_size);
if (!mem_buff) {
goto error;
}
a = mem_buff;
s = a + a_size;
u = s + s_size;
vt = u + u_size;
rwork = vt + vt_size;
iwork = rwork + rwork_size;
/* fix vt_column_count so that it is a valid lapack parameter (0 is not) */
vt_column_count = fortran_int_max(1, vt_column_count);
params->A = a;
params->S = s;
params->U = u;
params->VT = vt;
params->RWORK = rwork;
params->IWORK = iwork;
params->M = m;
params->N = n;
params->LDA = ld;
params->LDU = ld;
params->LDVT = vt_column_count;
params->JOBZ = jobz;
/* Work size query */
{
@ftyp@ work_size_query;
params->LWORK = -1;
params->WORK = &work_size_query;
if (call_@lapack_func@(params) != 0) {
goto error;
}
work_count = (fortran_int)((@typ@*)&work_size_query)->array[0];
/* Fix a bug in lapack 3.0.0 */
if(work_count == 0) work_count = 1;
work_size = (size_t)work_count * sizeof(@ftyp@);
}
mem_buff2 = malloc(work_size);
if (!mem_buff2) {
goto error;
}
work = mem_buff2;
params->LWORK = work_count;
params->WORK = work;
return 1;
error:
TRACE_TXT("%s failed init\n", __FUNCTION__);
free(mem_buff2);
free(mem_buff);
memset(params, 0, sizeof(*params));
return 0;
}
/**end repeat**/
/**begin repeat
#TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE#
#REALTYPE = FLOAT, DOUBLE, FLOAT, DOUBLE#
#lapack_func = sgesdd, dgesdd, cgesdd, zgesdd#
*/
static NPY_INLINE void
release_@lapack_func@(GESDD_PARAMS_t* params)
{
/* A and WORK contain allocated blocks */
free(params->A);
free(params->WORK);
memset(params, 0, sizeof(*params));
}
static NPY_INLINE void
@TYPE@_svd_wrapper(char JOBZ,
char **args,
npy_intp* dimensions,
npy_intp* steps)
{
ptrdiff_t outer_steps[4];
int error_occurred = get_fp_invalid_and_clear();
size_t iter;
size_t outer_dim = *dimensions++;
size_t op_count = (JOBZ=='N')?2:4;
GESDD_PARAMS_t params;
for (iter = 0; iter < op_count; ++iter) {
outer_steps[iter] = (ptrdiff_t) steps[iter];
}
steps += op_count;
if (init_@lapack_func@(&params,
JOBZ,
(fortran_int)dimensions[0],
(fortran_int)dimensions[1])) {
LINEARIZE_DATA_t a_in, u_out, s_out, v_out;
fortran_int min_m_n = params.M < params.N ? params.M : params.N;
init_linearize_data(&a_in, params.N, params.M, steps[1], steps[0]);
if ('N' == params.JOBZ) {
/* only the singular values are wanted */
init_linearize_data(&s_out, 1, min_m_n, 0, steps[2]);
} else {
fortran_int u_columns, v_rows;
if ('S' == params.JOBZ) {
u_columns = min_m_n;
v_rows = min_m_n;
} else { /* JOBZ == 'A' */
u_columns = params.M;
v_rows = params.N;
}
init_linearize_data(&u_out,
u_columns, params.M,
steps[3], steps[2]);
init_linearize_data(&s_out,
1, min_m_n,
0, steps[4]);
init_linearize_data(&v_out,
params.N, v_rows,
steps[6], steps[5]);
}
for (iter = 0; iter < outer_dim; ++iter) {
int not_ok;
/* copy the matrix in */
linearize_@TYPE@_matrix(params.A, args[0], &a_in);
not_ok = call_@lapack_func@(&params);
if (!not_ok) {
if ('N' == params.JOBZ) {
delinearize_@REALTYPE@_matrix(args[1], params.S, &s_out);
} else {
if ('A' == params.JOBZ && min_m_n == 0) {
/* Lapack has betrayed us and left these uninitialized,
* so produce an identity matrix for whichever of u
* and v is not empty.
*/
identity_@TYPE@_matrix(params.U, params.M);
identity_@TYPE@_matrix(params.VT, params.N);
}
delinearize_@TYPE@_matrix(args[1], params.U, &u_out);
delinearize_@REALTYPE@_matrix(args[2], params.S, &s_out);
delinearize_@TYPE@_matrix(args[3], params.VT, &v_out);
}
} else {
error_occurred = 1;
if ('N' == params.JOBZ) {
nan_@REALTYPE@_matrix(args[1], &s_out);
} else {
nan_@TYPE@_matrix(args[1], &u_out);
nan_@REALTYPE@_matrix(args[2], &s_out);
nan_@TYPE@_matrix(args[3], &v_out);
}
}
update_pointers((npy_uint8**)args, outer_steps, op_count);
}
release_@lapack_func@(&params);
}
set_fp_invalid_or_clear(error_occurred);
}
/**end repeat*/
/* svd gufunc entry points */
/**begin repeat
#TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE#
*/
static void
@TYPE@_svd_N(char **args,
npy_intp *dimensions,
npy_intp *steps,
void *NPY_UNUSED(func))
{
@TYPE@_svd_wrapper('N', args, dimensions, steps);
}
static void
@TYPE@_svd_S(char **args,
npy_intp *dimensions,
npy_intp *steps,
void *NPY_UNUSED(func))
{
@TYPE@_svd_wrapper('S', args, dimensions, steps);
}
static void
@TYPE@_svd_A(char **args,
npy_intp *dimensions,
npy_intp *steps,
void *NPY_UNUSED(func))
{
@TYPE@_svd_wrapper('A', args, dimensions, steps);
}
/**end repeat**/
/* -------------------------------------------------------------------------- */
/* least squares */
typedef struct gelsd_params_struct
{
fortran_int M;
fortran_int N;
fortran_int NRHS;
void *A;
fortran_int LDA;
void *B;
fortran_int LDB;
void *S;
void *RCOND;
fortran_int RANK;
void *WORK;
fortran_int LWORK;
void *RWORK;
void *IWORK;
} GELSD_PARAMS_t;
static inline void
dump_gelsd_params(const char *name,
GELSD_PARAMS_t *params)
{
TRACE_TXT("\n%s:\n"\
"%14s: %18p\n"\
"%14s: %18p\n"\
"%14s: %18p\n"\
"%14s: %18p\n"\
"%14s: %18p\n"\
"%14s: %18p\n"\
"%14s: %18d\n"\
"%14s: %18d\n"\
"%14s: %18d\n"\
"%14s: %18d\n"\
"%14s: %18d\n"\
"%14s: %18d\n"\
"%14s: %18d\n"\
"%14s: %18p\n",
name,
"A", params->A,
"B", params->B,
"S", params->S,
"WORK", params->WORK,
"RWORK", params->RWORK,
"IWORK", params->IWORK,
"M", (int)params->M,
"N", (int)params->N,
"NRHS", (int)params->NRHS,
"LDA", (int)params->LDA,
"LDB", (int)params->LDB,
"LWORK", (int)params->LWORK,
"RANK", (int)params->RANK,
"RCOND", params->RCOND);
}
/**begin repeat
#TYPE=FLOAT,DOUBLE#
#lapack_func=sgelsd,dgelsd#
#ftyp=fortran_real,fortran_doublereal#
*/
static inline fortran_int
call_@lapack_func@(GELSD_PARAMS_t *params)
{
fortran_int rv;
LAPACK(@lapack_func@)(&params->M, &params->N, &params->NRHS,
params->A, &params->LDA,
params->B, &params->LDB,
params->S,
params->RCOND, &params->RANK,
params->WORK, &params->LWORK,
params->IWORK,
&rv);
return rv;
}
static inline int
init_@lapack_func@(GELSD_PARAMS_t *params,
fortran_int m,
fortran_int n,
fortran_int nrhs)
{
npy_uint8 *mem_buff = NULL;
npy_uint8 *mem_buff2 = NULL;
npy_uint8 *a, *b, *s, *work, *iwork;
fortran_int min_m_n = fortran_int_min(m, n);
fortran_int max_m_n = fortran_int_max(m, n);
size_t safe_min_m_n = min_m_n;
size_t safe_max_m_n = max_m_n;
size_t safe_m = m;
size_t safe_n = n;
size_t safe_nrhs = nrhs;
size_t a_size = safe_m * safe_n * sizeof(@ftyp@);
size_t b_size = safe_max_m_n * safe_nrhs * sizeof(@ftyp@);
size_t s_size = safe_min_m_n * sizeof(@ftyp@);
fortran_int work_count;
size_t work_size;
size_t iwork_size;
fortran_int lda = fortran_int_max(1, m);
fortran_int ldb = fortran_int_max(1, fortran_int_max(m,n));
mem_buff = malloc(a_size + b_size + s_size);
if (!mem_buff)
goto error;
a = mem_buff;
b = a + a_size;
s = b + b_size;
params->M = m;
params->N = n;
params->NRHS = nrhs;
params->A = a;
params->B = b;
params->S = s;
params->LDA = lda;
params->LDB = ldb;
{
/* compute optimal work size */
@ftyp@ work_size_query;
fortran_int iwork_size_query;
params->WORK = &work_size_query;
params->IWORK = &iwork_size_query;
params->RWORK = NULL;
params->LWORK = -1;
if (call_@lapack_func@(params) != 0)
goto error;
work_count = (fortran_int)work_size_query;
work_size = (size_t) work_size_query * sizeof(@ftyp@);
iwork_size = (size_t)iwork_size_query * sizeof(fortran_int);
}
mem_buff2 = malloc(work_size + iwork_size);
if (!mem_buff2)
goto error;
work = mem_buff2;
iwork = work + work_size;
params->WORK = work;
params->RWORK = NULL;
params->IWORK = iwork;
params->LWORK = work_count;
return 1;
error:
TRACE_TXT("%s failed init\n", __FUNCTION__);
free(mem_buff);
free(mem_buff2);
memset(params, 0, sizeof(*params));
return 0;
}
/**end repeat**/
/**begin repeat
#TYPE=CFLOAT,CDOUBLE#
#ftyp=fortran_complex,fortran_doublecomplex#
#frealtyp=fortran_real,fortran_doublereal#
#typ=COMPLEX_t,DOUBLECOMPLEX_t#
#lapack_func=cgelsd,zgelsd#
*/
static inline fortran_int
call_@lapack_func@(GELSD_PARAMS_t *params)
{
fortran_int rv;
LAPACK(@lapack_func@)(&params->M, &params->N, &params->NRHS,
params->A, &params->LDA,
params->B, &params->LDB,
params->S,
params->RCOND, &params->RANK,
params->WORK, &params->LWORK,
params->RWORK, params->IWORK,
&rv);
return rv;
}
static inline int
init_@lapack_func@(GELSD_PARAMS_t *params,
fortran_int m,
fortran_int n,
fortran_int nrhs)
{
npy_uint8 *mem_buff = NULL;
npy_uint8 *mem_buff2 = NULL;
npy_uint8 *a, *b, *s, *work, *iwork, *rwork;
fortran_int min_m_n = fortran_int_min(m, n);
fortran_int max_m_n = fortran_int_max(m, n);
size_t safe_min_m_n = min_m_n;
size_t safe_max_m_n = max_m_n;
size_t safe_m = m;
size_t safe_n = n;
size_t safe_nrhs = nrhs;
size_t a_size = safe_m * safe_n * sizeof(@ftyp@);
size_t b_size = safe_max_m_n * safe_nrhs * sizeof(@ftyp@);
size_t s_size = safe_min_m_n * sizeof(@frealtyp@);
fortran_int work_count;
size_t work_size, rwork_size, iwork_size;
fortran_int lda = fortran_int_max(1, m);
fortran_int ldb = fortran_int_max(1, fortran_int_max(m,n));
mem_buff = malloc(a_size + b_size + s_size);
if (!mem_buff)
goto error;
a = mem_buff;
b = a + a_size;
s = b + b_size;
params->M = m;
params->N = n;
params->NRHS = nrhs;
params->A = a;
params->B = b;
params->S = s;
params->LDA = lda;
params->LDB = ldb;
{
/* compute optimal work size */
@ftyp@ work_size_query;
@frealtyp@ rwork_size_query;
fortran_int iwork_size_query;
params->WORK = &work_size_query;
params->IWORK = &iwork_size_query;
params->RWORK = &rwork_size_query;
params->LWORK = -1;
if (call_@lapack_func@(params) != 0)
goto error;
work_count = (fortran_int)work_size_query.r;
work_size = (size_t )work_size_query.r * sizeof(@ftyp@);
rwork_size = (size_t)rwork_size_query * sizeof(@frealtyp@);
iwork_size = (size_t)iwork_size_query * sizeof(fortran_int);
}
mem_buff2 = malloc(work_size + rwork_size + iwork_size);
if (!mem_buff2)
goto error;
work = mem_buff2;
rwork = work + work_size;
iwork = rwork + rwork_size;
params->WORK = work;
params->RWORK = rwork;
params->IWORK = iwork;
params->LWORK = work_count;
return 1;
error:
TRACE_TXT("%s failed init\n", __FUNCTION__);
free(mem_buff);
free(mem_buff2);
memset(params, 0, sizeof(*params));
return 0;
}
/**end repeat**/
/**begin repeat
#TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE#
#REALTYPE=FLOAT,DOUBLE,FLOAT,DOUBLE#
#lapack_func=sgelsd,dgelsd,cgelsd,zgelsd#
#dot_func=sdot,ddot,cdotc,zdotc#
#typ = npy_float, npy_double, npy_cfloat, npy_cdouble#
#basetyp = npy_float, npy_double, npy_float, npy_double#
#ftyp = fortran_real, fortran_doublereal,
fortran_complex, fortran_doublecomplex#
#cmplx = 0, 0, 1, 1#
*/
static inline void
release_@lapack_func@(GELSD_PARAMS_t* params)
{
/* A and WORK contain allocated blocks */
free(params->A);
free(params->WORK);
memset(params, 0, sizeof(*params));
}
/** Compute the squared l2 norm of a contiguous vector */
static @basetyp@
@TYPE@_abs2(@typ@ *p, npy_intp n) {
npy_intp i;
@basetyp@ res = 0;
for (i = 0; i < n; i++) {
@typ@ el = p[i];
#if @cmplx@
res += el.real*el.real + el.imag*el.imag;
#else
res += el*el;
#endif
}
return res;
}
static void
@TYPE@_lstsq(char **args, npy_intp *dimensions, npy_intp *steps,
void *NPY_UNUSED(func))
{
GELSD_PARAMS_t params;
int error_occurred = get_fp_invalid_and_clear();
fortran_int n, m, nrhs;
fortran_int excess;
INIT_OUTER_LOOP_7
m = (fortran_int)dimensions[0];
n = (fortran_int)dimensions[1];
nrhs = (fortran_int)dimensions[2];
excess = m - n;
if (init_@lapack_func@(&params, m, n, nrhs)) {
LINEARIZE_DATA_t a_in, b_in, x_out, s_out, r_out;
init_linearize_data(&a_in, n, m, steps[1], steps[0]);
init_linearize_data_ex(&b_in, nrhs, m, steps[3], steps[2], fortran_int_max(n, m));
init_linearize_data_ex(&x_out, nrhs, n, steps[5], steps[4], fortran_int_max(n, m));
init_linearize_data(&r_out, 1, nrhs, 1, steps[6]);
init_linearize_data(&s_out, 1, fortran_int_min(n, m), 1, steps[7]);
BEGIN_OUTER_LOOP_7
int not_ok;
linearize_@TYPE@_matrix(params.A, args[0], &a_in);
linearize_@TYPE@_matrix(params.B, args[1], &b_in);
params.RCOND = args[2];
not_ok = call_@lapack_func@(&params);
if (!not_ok) {
delinearize_@TYPE@_matrix(args[3], params.B, &x_out);
*(npy_int*) args[5] = params.RANK;
delinearize_@REALTYPE@_matrix(args[6], params.S, &s_out);
/* Note that linalg.lstsq discards this when excess == 0 */
if (excess >= 0 && params.RANK == n) {
/* Compute the residuals as the square sum of each column */
int i;
char *resid = args[4];
@ftyp@ *components = (@ftyp@ *)params.B + n;
for (i = 0; i < nrhs; i++) {
@ftyp@ *vector = components + i*m;
/* Numpy and fortran floating types are the same size,
* so this cast is safe */
@basetyp@ abs2 = @TYPE@_abs2((@typ@ *)vector, excess);
memcpy(
resid + i*r_out.column_strides,
&abs2, sizeof(abs2));
}
}
else {
/* Note that this is always discarded by linalg.lstsq */
nan_@REALTYPE@_matrix(args[4], &r_out);
}
} else {
error_occurred = 1;
nan_@TYPE@_matrix(args[3], &x_out);
nan_@REALTYPE@_matrix(args[4], &r_out);
*(npy_int*) args[5] = -1;
nan_@REALTYPE@_matrix(args[6], &s_out);
}
END_OUTER_LOOP
release_@lapack_func@(&params);
}
set_fp_invalid_or_clear(error_occurred);
}
/**end repeat**/
#pragma GCC diagnostic pop
/* -------------------------------------------------------------------------- */
/* gufunc registration */
static void *array_of_nulls[] = {
(void *)NULL,
(void *)NULL,
(void *)NULL,
(void *)NULL,
(void *)NULL,
(void *)NULL,
(void *)NULL,
(void *)NULL,
(void *)NULL,
(void *)NULL,
(void *)NULL,
(void *)NULL,
(void *)NULL,
(void *)NULL,
(void *)NULL,
(void *)NULL
};
#define FUNC_ARRAY_NAME(NAME) NAME ## _funcs
#define GUFUNC_FUNC_ARRAY_REAL(NAME) \
static PyUFuncGenericFunction \
FUNC_ARRAY_NAME(NAME)[] = { \
FLOAT_ ## NAME, \
DOUBLE_ ## NAME \
}
#define GUFUNC_FUNC_ARRAY_REAL_COMPLEX(NAME) \
static PyUFuncGenericFunction \
FUNC_ARRAY_NAME(NAME)[] = { \
FLOAT_ ## NAME, \
DOUBLE_ ## NAME, \
CFLOAT_ ## NAME, \
CDOUBLE_ ## NAME \
}
/* There are problems with eig in complex single precision.
* That kernel is disabled
*/
#define GUFUNC_FUNC_ARRAY_EIG(NAME) \
static PyUFuncGenericFunction \
FUNC_ARRAY_NAME(NAME)[] = { \
FLOAT_ ## NAME, \
DOUBLE_ ## NAME, \
CDOUBLE_ ## NAME \
}
GUFUNC_FUNC_ARRAY_REAL_COMPLEX(slogdet);
GUFUNC_FUNC_ARRAY_REAL_COMPLEX(det);
GUFUNC_FUNC_ARRAY_REAL_COMPLEX(eighlo);
GUFUNC_FUNC_ARRAY_REAL_COMPLEX(eighup);
GUFUNC_FUNC_ARRAY_REAL_COMPLEX(eigvalshlo);
GUFUNC_FUNC_ARRAY_REAL_COMPLEX(eigvalshup);
GUFUNC_FUNC_ARRAY_REAL_COMPLEX(solve);
GUFUNC_FUNC_ARRAY_REAL_COMPLEX(solve1);
GUFUNC_FUNC_ARRAY_REAL_COMPLEX(inv);
GUFUNC_FUNC_ARRAY_REAL_COMPLEX(cholesky_lo);
GUFUNC_FUNC_ARRAY_REAL_COMPLEX(svd_N);
GUFUNC_FUNC_ARRAY_REAL_COMPLEX(svd_S);
GUFUNC_FUNC_ARRAY_REAL_COMPLEX(svd_A);
GUFUNC_FUNC_ARRAY_REAL_COMPLEX(lstsq);
GUFUNC_FUNC_ARRAY_EIG(eig);
GUFUNC_FUNC_ARRAY_EIG(eigvals);
static char equal_2_types[] = {
NPY_FLOAT, NPY_FLOAT,
NPY_DOUBLE, NPY_DOUBLE,
NPY_CFLOAT, NPY_CFLOAT,
NPY_CDOUBLE, NPY_CDOUBLE
};
static char equal_3_types[] = {
NPY_FLOAT, NPY_FLOAT, NPY_FLOAT,
NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE,
NPY_CFLOAT, NPY_CFLOAT, NPY_CFLOAT,
NPY_CDOUBLE, NPY_CDOUBLE, NPY_CDOUBLE
};
/* second result is logdet, that will always be a REAL */
static char slogdet_types[] = {
NPY_FLOAT, NPY_FLOAT, NPY_FLOAT,
NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE,
NPY_CFLOAT, NPY_CFLOAT, NPY_FLOAT,
NPY_CDOUBLE, NPY_CDOUBLE, NPY_DOUBLE
};
static char eigh_types[] = {
NPY_FLOAT, NPY_FLOAT, NPY_FLOAT,
NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE,
NPY_CFLOAT, NPY_FLOAT, NPY_CFLOAT,
NPY_CDOUBLE, NPY_DOUBLE, NPY_CDOUBLE
};
static char eighvals_types[] = {
NPY_FLOAT, NPY_FLOAT,
NPY_DOUBLE, NPY_DOUBLE,
NPY_CFLOAT, NPY_FLOAT,
NPY_CDOUBLE, NPY_DOUBLE
};
static char eig_types[] = {
NPY_FLOAT, NPY_CFLOAT, NPY_CFLOAT,
NPY_DOUBLE, NPY_CDOUBLE, NPY_CDOUBLE,
NPY_CDOUBLE, NPY_CDOUBLE, NPY_CDOUBLE
};
static char eigvals_types[] = {
NPY_FLOAT, NPY_CFLOAT,
NPY_DOUBLE, NPY_CDOUBLE,
NPY_CDOUBLE, NPY_CDOUBLE
};
static char svd_1_1_types[] = {
NPY_FLOAT, NPY_FLOAT,
NPY_DOUBLE, NPY_DOUBLE,
NPY_CFLOAT, NPY_FLOAT,
NPY_CDOUBLE, NPY_DOUBLE
};
static char svd_1_3_types[] = {
NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, NPY_FLOAT,
NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE,
NPY_CFLOAT, NPY_CFLOAT, NPY_FLOAT, NPY_CFLOAT,
NPY_CDOUBLE, NPY_CDOUBLE, NPY_DOUBLE, NPY_CDOUBLE
};
/* A, b, rcond, x, resid, rank, s, */
static char lstsq_types[] = {
NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, NPY_INT, NPY_FLOAT,
NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, NPY_INT, NPY_DOUBLE,
NPY_CFLOAT, NPY_CFLOAT, NPY_FLOAT, NPY_CFLOAT, NPY_FLOAT, NPY_INT, NPY_FLOAT,
NPY_CDOUBLE, NPY_CDOUBLE, NPY_DOUBLE, NPY_CDOUBLE, NPY_DOUBLE, NPY_INT, NPY_DOUBLE,
};
typedef struct gufunc_descriptor_struct {
char *name;
char *signature;
char *doc;
int ntypes;
int nin;
int nout;
PyUFuncGenericFunction *funcs;
char *types;
} GUFUNC_DESCRIPTOR_t;
GUFUNC_DESCRIPTOR_t gufunc_descriptors [] = {
{
"slogdet",
"(m,m)->(),()",
"slogdet on the last two dimensions and broadcast on the rest. \n"\
"Results in two arrays, one with sign and the other with log of the"\
" determinants. \n"\
" \"(m,m)->(),()\" \n",
4, 1, 2,
FUNC_ARRAY_NAME(slogdet),
slogdet_types
},
{
"det",
"(m,m)->()",
"det of the last two dimensions and broadcast on the rest. \n"\
" \"(m,m)->()\" \n",
4, 1, 1,
FUNC_ARRAY_NAME(det),
equal_2_types
},
{
"eigh_lo",
"(m,m)->(m),(m,m)",
"eigh on the last two dimension and broadcast to the rest, using"\
" lower triangle \n"\
"Results in a vector of eigenvalues and a matrix with the"\
"eigenvectors. \n"\
" \"(m,m)->(m),(m,m)\" \n",
4, 1, 2,
FUNC_ARRAY_NAME(eighlo),
eigh_types
},
{
"eigh_up",
"(m,m)->(m),(m,m)",
"eigh on the last two dimension and broadcast to the rest, using"\
" upper triangle. \n"\
"Results in a vector of eigenvalues and a matrix with the"\
" eigenvectors. \n"\
" \"(m,m)->(m),(m,m)\" \n",
4, 1, 2,
FUNC_ARRAY_NAME(eighup),
eigh_types
},
{
"eigvalsh_lo",
"(m,m)->(m)",
"eigh on the last two dimension and broadcast to the rest, using"\
" lower triangle. \n"\
"Results in a vector of eigenvalues and a matrix with the"\
"eigenvectors. \n"\
" \"(m,m)->(m)\" \n",
4, 1, 1,
FUNC_ARRAY_NAME(eigvalshlo),
eighvals_types
},
{
"eigvalsh_up",
"(m,m)->(m)",
"eigvalsh on the last two dimension and broadcast to the rest,"\
" using upper triangle. \n"\
"Results in a vector of eigenvalues and a matrix with the"\
"eigenvectors.\n"\
" \"(m,m)->(m)\" \n",
4, 1, 1,
FUNC_ARRAY_NAME(eigvalshup),
eighvals_types
},
{
"solve",
"(m,m),(m,n)->(m,n)",
"solve the system a x = b, on the last two dimensions, broadcast"\
" to the rest. \n"\
"Results in a matrices with the solutions. \n"\
" \"(m,m),(m,n)->(m,n)\" \n",
4, 2, 1,
FUNC_ARRAY_NAME(solve),
equal_3_types
},
{
"solve1",
"(m,m),(m)->(m)",
"solve the system a x = b, for b being a vector, broadcast in"\
" the outer dimensions. \n"\
"Results in vectors with the solutions. \n"\
" \"(m,m),(m)->(m)\" \n",
4, 2, 1,
FUNC_ARRAY_NAME(solve1),
equal_3_types
},
{
"inv",
"(m, m)->(m, m)",
"compute the inverse of the last two dimensions and broadcast"\
" to the rest. \n"\
"Results in the inverse matrices. \n"\
" \"(m,m)->(m,m)\" \n",
4, 1, 1,
FUNC_ARRAY_NAME(inv),
equal_2_types
},
{
"cholesky_lo",
"(m,m)->(m,m)",
"cholesky decomposition of hermitian positive-definite matrices. \n"\
"Broadcast to all outer dimensions. \n"\
" \"(m,m)->(m,m)\" \n",
4, 1, 1,
FUNC_ARRAY_NAME(cholesky_lo),
equal_2_types
},
{
"svd_m",
"(m,n)->(m)",
"svd when n>=m. ",
4, 1, 1,
FUNC_ARRAY_NAME(svd_N),
svd_1_1_types
},
{
"svd_n",
"(m,n)->(n)",
"svd when n<=m",
4, 1, 1,
FUNC_ARRAY_NAME(svd_N),
svd_1_1_types
},
{
"svd_m_s",
"(m,n)->(m,m),(m),(m,n)",
"svd when m<=n",
4, 1, 3,
FUNC_ARRAY_NAME(svd_S),
svd_1_3_types
},
{
"svd_n_s",
"(m,n)->(m,n),(n),(n,n)",
"svd when m>=n",
4, 1, 3,
FUNC_ARRAY_NAME(svd_S),
svd_1_3_types
},
{
"svd_m_f",
"(m,n)->(m,m),(m),(n,n)",
"svd when m<=n",
4, 1, 3,
FUNC_ARRAY_NAME(svd_A),
svd_1_3_types
},
{
"svd_n_f",
"(m,n)->(m,m),(n),(n,n)",
"svd when m>=n",
4, 1, 3,
FUNC_ARRAY_NAME(svd_A),
svd_1_3_types
},
{
"eig",
"(m,m)->(m),(m,m)",
"eig on the last two dimension and broadcast to the rest. \n"\
"Results in a vector with the eigenvalues and a matrix with the"\
" eigenvectors. \n"\
" \"(m,m)->(m),(m,m)\" \n",
3, 1, 2,
FUNC_ARRAY_NAME(eig),
eig_types
},
{
"eigvals",
"(m,m)->(m)",
"eigvals on the last two dimension and broadcast to the rest. \n"\
"Results in a vector of eigenvalues. \n",
3, 1, 1,
FUNC_ARRAY_NAME(eigvals),
eigvals_types
},
{
"lstsq_m",
"(m,n),(m,nrhs),()->(n,nrhs),(nrhs),(),(m)",
"least squares on the last two dimensions and broadcast to the rest. \n"\
"For m <= n. \n",
4, 3, 4,
FUNC_ARRAY_NAME(lstsq),
lstsq_types
},
{
"lstsq_n",
"(m,n),(m,nrhs),()->(n,nrhs),(nrhs),(),(n)",
"least squares on the last two dimensions and broadcast to the rest. \n"\
"For m >= n, meaning that residuals are produced. \n",
4, 3, 4,
FUNC_ARRAY_NAME(lstsq),
lstsq_types
}
};
static void
addUfuncs(PyObject *dictionary) {
PyObject *f;
int i;
const int gufunc_count = sizeof(gufunc_descriptors)/
sizeof(gufunc_descriptors[0]);
for (i = 0; i < gufunc_count; i++) {
GUFUNC_DESCRIPTOR_t* d = &gufunc_descriptors[i];
f = PyUFunc_FromFuncAndDataAndSignature(d->funcs,
array_of_nulls,
d->types,
d->ntypes,
d->nin,
d->nout,
PyUFunc_None,
d->name,
d->doc,
0,
d->signature);
PyDict_SetItemString(dictionary, d->name, f);
#if 0
dump_ufunc_object((PyUFuncObject*) f);
#endif
Py_DECREF(f);
}
}
/* -------------------------------------------------------------------------- */
/* Module initialization stuff */
static PyMethodDef UMath_LinAlgMethods[] = {
{NULL, NULL, 0, NULL} /* Sentinel */
};
#if defined(NPY_PY3K)
static struct PyModuleDef moduledef = {
PyModuleDef_HEAD_INIT,
UMATH_LINALG_MODULE_NAME,
NULL,
-1,
UMath_LinAlgMethods,
NULL,
NULL,
NULL,
NULL
};
#endif
#if defined(NPY_PY3K)
#define RETVAL(x) x
PyObject *PyInit__umath_linalg(void)
#else
#define RETVAL(x)
PyMODINIT_FUNC
init_umath_linalg(void)
#endif
{
PyObject *m;
PyObject *d;
PyObject *version;
init_constants();
#if defined(NPY_PY3K)
m = PyModule_Create(&moduledef);
#else
m = Py_InitModule(UMATH_LINALG_MODULE_NAME, UMath_LinAlgMethods);
#endif
if (m == NULL) {
return RETVAL(NULL);
}
import_array();
import_ufunc();
d = PyModule_GetDict(m);
version = PyString_FromString(umath_linalg_version_string);
PyDict_SetItemString(d, "__version__", version);
Py_DECREF(version);
/* Load the ufunc operators into the module's namespace */
addUfuncs(d);
if (PyErr_Occurred()) {
PyErr_SetString(PyExc_RuntimeError,
"cannot load _umath_linalg module.");
return RETVAL(NULL);
}
return RETVAL(m);
}