Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

ENH: sparse/dsolve: upgrade to SuperLU 4.3

Merge branch 'enh/superlu-update'
  • Loading branch information...
commit be6c2b63ce2f838156733cf7ca2c62d132ca3017 2 parents 1983db6 + f414439
@pv pv authored
Showing with 1,861 additions and 1,464 deletions.
  1. +9 −6 scipy/sparse/linalg/dsolve/SuperLU/README
  2. +2 −2 scipy/sparse/linalg/dsolve/SuperLU/SRC/cdiagonal.c
  3. +3 −3 scipy/sparse/linalg/dsolve/SuperLU/SRC/cgsequ.c
  4. +129 −95 scipy/sparse/linalg/dsolve/SuperLU/SRC/cgsisx.c
  5. +21 −12 scipy/sparse/linalg/dsolve/SuperLU/SRC/cgsitrf.c
  6. +17 −17 scipy/sparse/linalg/dsolve/SuperLU/SRC/cgsrfs.c
  7. +28 −25 scipy/sparse/linalg/dsolve/SuperLU/SRC/cgssvx.c
  8. +4 −4 scipy/sparse/linalg/dsolve/SuperLU/SRC/clacon.c
  9. +3 −3 scipy/sparse/linalg/dsolve/SuperLU/SRC/clangs.c
  10. +0 −1  scipy/sparse/linalg/dsolve/SuperLU/SRC/claqgs.c
  11. +5 −5 scipy/sparse/linalg/dsolve/SuperLU/SRC/cldperm.c
  12. +3 −3 scipy/sparse/linalg/dsolve/SuperLU/SRC/cmemory.c
  13. +1 −1  scipy/sparse/linalg/dsolve/SuperLU/SRC/cpanel_bmod.c
  14. +12 −14 scipy/sparse/linalg/dsolve/SuperLU/SRC/cpivotL.c
  15. +3 −4 scipy/sparse/linalg/dsolve/SuperLU/SRC/cpivotgrowth.c
  16. +2 −2 scipy/sparse/linalg/dsolve/SuperLU/SRC/creadtriple.c
  17. +2 −2 scipy/sparse/linalg/dsolve/SuperLU/SRC/cutil.c
  18. +0 −58 scipy/sparse/linalg/dsolve/SuperLU/SRC/dGetDiagU.c
  19. +129 −95 scipy/sparse/linalg/dsolve/SuperLU/SRC/dgsisx.c
  20. +25 −11 scipy/sparse/linalg/dsolve/SuperLU/SRC/dgsitrf.c
  21. +1 −1  scipy/sparse/linalg/dsolve/SuperLU/SRC/dgsrfs.c
  22. +28 −25 scipy/sparse/linalg/dsolve/SuperLU/SRC/dgssvx.c
  23. +0 −234 scipy/sparse/linalg/dsolve/SuperLU/SRC/dgstrsL.c
  24. +0 −224 scipy/sparse/linalg/dsolve/SuperLU/SRC/dgstrsU.c
  25. +0 −1  scipy/sparse/linalg/dsolve/SuperLU/SRC/dlaqgs.c
  26. +4 −4 scipy/sparse/linalg/dsolve/SuperLU/SRC/dldperm.c
  27. +3 −3 scipy/sparse/linalg/dsolve/SuperLU/SRC/dmemory.c
  28. +1 −1  scipy/sparse/linalg/dsolve/SuperLU/SRC/dpanel_bmod.c
  29. +9 −11 scipy/sparse/linalg/dsolve/SuperLU/SRC/dpivotL.c
  30. +0 −1  scipy/sparse/linalg/dsolve/SuperLU/SRC/dpivotgrowth.c
  31. +2 −2 scipy/sparse/linalg/dsolve/SuperLU/SRC/dreadtriple.c
  32. +24 −25 scipy/sparse/linalg/dsolve/SuperLU/SRC/get_perm_c.c
  33. +17 −5 scipy/sparse/linalg/dsolve/SuperLU/SRC/html_mainpage.h
  34. +1 −1  scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_ccolumn_dfs.c
  35. +17 −8 scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_ccopy_to_ucol.c
  36. +33 −15 scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_cdrop_row.c
  37. +1 −1  scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_cpanel_dfs.c
  38. +13 −13 scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_cpivotL.c
  39. +1 −1  scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_dcolumn_dfs.c
  40. +12 −4 scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_dcopy_to_ucol.c
  41. +30 −8 scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_ddrop_row.c
  42. +4 −4 scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_dpivotL.c
  43. +1 −1  scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_scolumn_dfs.c
  44. +12 −4 scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_scopy_to_ucol.c
  45. +30 −8 scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_sdrop_row.c
  46. +4 −4 scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_spivotL.c
  47. +1 −1  scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_zcolumn_dfs.c
  48. +13 −4 scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_zcopy_to_ucol.c
  49. +27 −9 scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_zdrop_row.c
  50. +4 −4 scipy/sparse/linalg/dsolve/SuperLU/SRC/ilu_zpivotL.c
  51. +1 −1  scipy/sparse/linalg/dsolve/SuperLU/SRC/memory.c
  52. +74 −0 scipy/sparse/linalg/dsolve/SuperLU/SRC/qselect.c
  53. +3 −3 scipy/sparse/linalg/dsolve/SuperLU/SRC/scipy_slu_config.h
  54. +3 −3 scipy/sparse/linalg/dsolve/SuperLU/SRC/scomplex.c
  55. +3 −3 scipy/sparse/linalg/dsolve/SuperLU/SRC/scsum1.c
  56. +1 −1  scipy/sparse/linalg/dsolve/SuperLU/SRC/sgsequ.c
  57. +129 −95 scipy/sparse/linalg/dsolve/SuperLU/SRC/sgsisx.c
  58. +25 −11 scipy/sparse/linalg/dsolve/SuperLU/SRC/sgsitrf.c
  59. +1 −1  scipy/sparse/linalg/dsolve/SuperLU/SRC/sgsrfs.c
  60. +28 −25 scipy/sparse/linalg/dsolve/SuperLU/SRC/sgssvx.c
  61. +1 −1  scipy/sparse/linalg/dsolve/SuperLU/SRC/slamch.c
  62. +0 −1  scipy/sparse/linalg/dsolve/SuperLU/SRC/slaqgs.c
  63. +4 −4 scipy/sparse/linalg/dsolve/SuperLU/SRC/sldperm.c
  64. +109 −12 scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_Cnames.h
  65. +7 −5 scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_cdefs.h
  66. +6 −4 scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_ddefs.h
  67. +2 −2 scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_scomplex.h
  68. +7 −5 scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_sdefs.h
  69. +72 −72 scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_util.h
  70. +6 −4 scipy/sparse/linalg/dsolve/SuperLU/SRC/slu_zdefs.h
  71. +3 −3 scipy/sparse/linalg/dsolve/SuperLU/SRC/smemory.c
  72. +14 −5 scipy/sparse/linalg/dsolve/SuperLU/SRC/sp_ienv.c
  73. +1 −1  scipy/sparse/linalg/dsolve/SuperLU/SRC/spanel_bmod.c
  74. +9 −11 scipy/sparse/linalg/dsolve/SuperLU/SRC/spivotL.c
  75. +0 −1  scipy/sparse/linalg/dsolve/SuperLU/SRC/spivotgrowth.c
  76. +2 −2 scipy/sparse/linalg/dsolve/SuperLU/SRC/sreadtriple.c
  77. +71 −0 scipy/sparse/linalg/dsolve/SuperLU/SRC/superlu_enum_consts.h
  78. +8 −6 scipy/sparse/linalg/dsolve/SuperLU/SRC/superlu_timer.c
  79. +19 −8 scipy/sparse/linalg/dsolve/SuperLU/SRC/util.c
  80. +129 −95 scipy/sparse/linalg/dsolve/SuperLU/SRC/zgsisx.c
  81. +20 −11 scipy/sparse/linalg/dsolve/SuperLU/SRC/zgsitrf.c
  82. +1 −1  scipy/sparse/linalg/dsolve/SuperLU/SRC/zgsrfs.c
  83. +28 −25 scipy/sparse/linalg/dsolve/SuperLU/SRC/zgssvx.c
  84. +0 −1  scipy/sparse/linalg/dsolve/SuperLU/SRC/zlaqgs.c
  85. +4 −4 scipy/sparse/linalg/dsolve/SuperLU/SRC/zldperm.c
  86. +3 −3 scipy/sparse/linalg/dsolve/SuperLU/SRC/zmemory.c
  87. +1 −1  scipy/sparse/linalg/dsolve/SuperLU/SRC/zpanel_bmod.c
  88. +9 −11 scipy/sparse/linalg/dsolve/SuperLU/SRC/zpivotL.c
  89. +0 −1  scipy/sparse/linalg/dsolve/SuperLU/SRC/zpivotgrowth.c
  90. +2 −2 scipy/sparse/linalg/dsolve/SuperLU/SRC/zreadtriple.c
  91. +347 −0 scipy/sparse/linalg/dsolve/SuperLU/scipychanges.patch
  92. +0 −23 scipy/sparse/linalg/dsolve/SuperLU/scipychanges.txt
  93. +10 −5 scipy/sparse/linalg/dsolve/_superluobject.c
  94. +7 −1 scipy/sparse/linalg/dsolve/_superluobject.h
View
15 scipy/sparse/linalg/dsolve/SuperLU/README
@@ -1,4 +1,4 @@
- SuperLU (Version 4.0)
+ SuperLU (Version 4.1)
=====================
Copyright (c) 2003, The Regents of the University of California, through
@@ -77,14 +77,14 @@ on your system setup:
machine specific has been defined in this include file.
Example machine-specific make.inc include files are provided
- in the MAKE_INC/ directory for several systems, such as
- IBM RS/6000, DEC Alpha, SunOS 4.x, SunOS 5.x (Solaris), HP-PA and
- SGI Iris 4.x. When you have selected the machine to which you wish
+ in the MAKE_INC/ directory for several systems, such as Linux,
+ IBM RS/6000, SunOS 5.x (Solaris), HP-PA and MacX.
+ When you have selected the machine to which you wish
to install SuperLU, copy the appropriate sample include file (if one
is present) into make.inc. For example, if you wish to run
- SuperLU on an IBM RS/6000, you can do
+ SuperLU on an linux, you can do
- cp MAKE_INC/make.rs6k make.inc
+ cp MAKE_INC/make.linux make.inc
For the systems other than listed above, slight modifications to the
make.inc file will need to be made.
@@ -154,3 +154,6 @@ The test results are in the files below:
October 15, 2003 Version 3.0
August 1, 2008 Version 3.1
June 30, 2009 Version 4.0
+ November 23, 2010 Version 4.1
+ August 25, 2011 Version 4.2
+
View
4 scipy/sparse/linalg/dsolve/SuperLU/SRC/cdiagonal.c
@@ -95,7 +95,7 @@ int cdominate(int n, NCformat *Astore)
{
if ((rowind_new[j + fill] = rowind[j]) == i) diag = j;
nzval_new[j + fill] = nzval[j];
- s += slu_c_abs1(&nzval_new[j + fill]);
+ s += c_abs1(&nzval_new[j + fill]);
}
if (diag >= 0) {
nzval_new[diag+fill].r = s * 3.0;
@@ -122,7 +122,7 @@ int cdominate(int n, NCformat *Astore)
for (j = colptr[i]; j < colptr[i + 1]; j++)
{
if (rowind[j] == i) diag = j;
- s += slu_c_abs1(&nzval[j]);
+ s += c_abs1(&nzval[j]);
}
nzval[diag].r = s * 3.0;
nzval[diag].i = 0.0;
View
6 scipy/sparse/linalg/dsolve/SuperLU/SRC/cgsequ.c
@@ -92,7 +92,7 @@ cgsequ(SuperMatrix *A, float *r, float *c, float *rowcnd,
int i, j, irow;
float rcmin, rcmax;
float bignum, smlnum;
- extern double slamch_(char *);
+ extern float slamch_(char *);
/* Test the input parameters. */
*info = 0;
@@ -127,7 +127,7 @@ cgsequ(SuperMatrix *A, float *r, float *c, float *rowcnd,
for (j = 0; j < A->ncol; ++j)
for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
irow = Astore->rowind[i];
- r[irow] = SUPERLU_MAX( r[irow], slu_c_abs1(&Aval[i]) );
+ r[irow] = SUPERLU_MAX( r[irow], c_abs1(&Aval[i]) );
}
/* Find the maximum and minimum scale factors. */
@@ -162,7 +162,7 @@ cgsequ(SuperMatrix *A, float *r, float *c, float *rowcnd,
for (j = 0; j < A->ncol; ++j)
for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
irow = Astore->rowind[i];
- c[j] = SUPERLU_MAX( c[j], slu_c_abs1(&Aval[i]) * r[irow] );
+ c[j] = SUPERLU_MAX( c[j], c_abs1(&Aval[i]) * r[irow] );
}
/* Find the maximum and minimum scale factors. */
View
224 scipy/sparse/linalg/dsolve/SuperLU/SRC/cgsisx.c
@@ -1,11 +1,12 @@
/*! @file cgsisx.c
- * \brief Gives the approximate solutions of linear equations A*X=B or A'*X=B
+ * \brief Computes an approximate solutions of linear equations A*X=B or A'*X=B
*
* <pre>
- * -- SuperLU routine (version 4.0) --
+ * -- SuperLU routine (version 4.2) --
* Lawrence Berkeley National Laboratory.
- * June 30, 2009
+ * November, 2010
+ * August, 2011
* </pre>
*/
#include "slu_cdefs.h"
@@ -16,9 +17,10 @@
* Purpose
* =======
*
- * CGSISX gives the approximate solutions of linear equations A*X=B or A'*X=B,
- * using the ILU factorization from cgsitrf(). An estimation of
- * the condition number is provided. It performs the following steps:
+ * CGSISX computes an approximate solutions of linear equations
+ * A*X=B or A'*X=B, using the ILU factorization from cgsitrf().
+ * An estimation of the condition number is provided.
+ * The routine performs the following steps:
*
* 1. If A is stored column-wise (A->Stype = SLU_NC):
*
@@ -71,50 +73,50 @@
* entries of modulus 1 on the diagonal and off-diagonal entries
* of modulus at most 1. If MC64 fails, dgsequ() is used to
* equilibrate the system.
+ * ( Default: LargeDiag )
* 2) options->ILU_DropTol = tau is the threshold for dropping.
* For L, it is used directly (for the whole row in a supernode);
* For U, ||A(:,i)||_oo * tau is used as the threshold
* for the i-th column.
* If a secondary dropping rule is required, tau will
* also be used to compute the second threshold.
+ * ( Default: 1e-4 )
* 3) options->ILU_FillFactor = gamma, used as the initial guess
* of memory growth.
* If a secondary dropping rule is required, it will also
* be used as an upper bound of the memory.
+ * ( Default: 10 )
* 4) options->ILU_DropRule specifies the dropping rule.
- * Option Explanation
- * ====== ===========
- * DROP_BASIC: Basic dropping rule, supernodal based ILU.
- * DROP_PROWS: Supernodal based ILUTP, p = gamma * nnz(A) / n.
- * DROP_COLUMN: Variation of ILUTP, for j-th column,
- * p = gamma * nnz(A(:,j)).
- * DROP_AREA; Variation of ILUTP, for j-th column, use
- * nnz(F(:,1:j)) / nnz(A(:,1:j)) to control the
- * memory.
- * DROP_DYNAMIC: Modify the threshold tau during the
- * factorizaion.
- * If nnz(L(:,1:j)) / nnz(A(:,1:j)) < gamma
- * tau_L(j) := MIN(1, tau_L(j-1) * 2);
- * Otherwise
- * tau_L(j) := MIN(1, tau_L(j-1) * 2);
- * tau_U(j) uses the similar rule.
- * NOTE: the thresholds used by L and U are
- * indenpendent.
- * DROP_INTERP: Compute the second dropping threshold by
- * interpolation instead of sorting (default).
- * In this case, the actual fill ratio is not
- * guaranteed smaller than gamma.
+ * Option Meaning
+ * ====== ===========
+ * DROP_BASIC: Basic dropping rule, supernodal based ILUTP(tau).
+ * DROP_PROWS: Supernodal based ILUTP(p,tau), p = gamma*nnz(A)/n.
+ * DROP_COLUMN: Variant of ILUTP(p,tau), for j-th column,
+ * p = gamma * nnz(A(:,j)).
+ * DROP_AREA: Variation of ILUTP, for j-th column, use
+ * nnz(F(:,1:j)) / nnz(A(:,1:j)) to control memory.
+ * DROP_DYNAMIC: Modify the threshold tau during factorizaion:
+ * If nnz(L(:,1:j)) / nnz(A(:,1:j)) > gamma
+ * tau_L(j) := MIN(tau_0, tau_L(j-1) * 2);
+ * Otherwise
+ * tau_L(j) := MAX(tau_0, tau_L(j-1) / 2);
+ * tau_U(j) uses the similar rule.
+ * NOTE: the thresholds used by L and U are separate.
+ * DROP_INTERP: Compute the second dropping threshold by
+ * interpolation instead of sorting (default).
+ * In this case, the actual fill ratio is not
+ * guaranteed smaller than gamma.
* DROP_PROWS, DROP_COLUMN and DROP_AREA are mutually exclusive.
- * ( The default option is DROP_BASIC | DROP_AREA. )
- * 5) options->ILU_Norm is the criterion of computing the average
- * value of a row in L.
- * options->ILU_Norm average(x[1:n])
+ * ( Default: DROP_BASIC | DROP_AREA )
+ * 5) options->ILU_Norm is the criterion of measuring the magnitude
+ * of a row in a supernode of L. ( Default is INF_NORM )
+ * options->ILU_Norm RowSize(x[1:n])
* ================= ===============
* ONE_NORM ||x||_1 / n
* TWO_NORM ||x||_2 / sqrt(n)
* INF_NORM max{|x[i]|}
* 6) options->ILU_MILU specifies the type of MILU's variation.
- * = SILU (default): do not perform MILU;
+ * = SILU: do not perform Modified ILU;
* = SMILU_1 (not recommended):
* U(i,i) := U(i,i) + sum(dropped entries);
* = SMILU_2:
@@ -123,10 +125,12 @@
* U(i,i) := U(i,i) + SGN(U(i,i)) * sum(|dropped entries|);
* NOTE: Even SMILU_1 does not preserve the column sum because of
* late dropping.
+ * ( Default: SILU )
* 7) options->ILU_FillTol is used as the perturbation when
* encountering zero pivots. If some U(i,i) = 0, so that U is
* exactly singular, then
* U(i,i) := ||A(:,i)|| * options->ILU_FillTol ** (1 - i / n).
+ * ( Default: 1e-2 )
*
* 2. If A is stored row-wise (A->Stype = SLU_NR), apply the above algorithm
* to the transpose of A:
@@ -194,8 +198,11 @@
* On entry, If options->Fact = FACTORED and equed is not 'N',
* then A must have been equilibrated by the scaling factors in
* R and/or C.
- * On exit, A is not modified if options->Equil = NO, or if
- * options->Equil = YES but equed = 'N' on exit.
+ * On exit, A is not modified
+ * if options->Equil = NO, or
+ * if options->Equil = YES but equed = 'N' on exit, or
+ * if options->RowPerm = NO.
+ *
* Otherwise, if options->Equil = YES and equed is not 'N',
* A is scaled as follows:
* If A->Stype = SLU_NC:
@@ -207,6 +214,16 @@
* equed = 'C': transpose(A) := transpose(A) * diag(C)
* equed = 'B': transpose(A) := diag(R) * transpose(A) * diag(C).
*
+ * If options->RowPerm = LargeDiag, MC64 is used to scale and permute
+ * the matrix to an I-matrix, that is A is modified as follows:
+ * P*Dr*A*Dc has entries of modulus 1 on the diagonal and
+ * off-diagonal entries of modulus at most 1. P is a permutation
+ * obtained from MC64.
+ * If MC64 fails, cgsequ() is used to equilibrate the system,
+ * and A is scaled as above, but no permutation is involved.
+ * On exit, A is restored to the orginal row numbering, so
+ * Dr*A*Dc is returned.
+ *
* perm_c (input/output) int*
* If A->Stype = SLU_NC, Column permutation vector of size A->ncol,
* which defines the permutation matrix Pc; perm_c[i] = j means
@@ -223,8 +240,8 @@
* perm_r (input/output) int*
* If A->Stype = SLU_NC, row permutation vector of size A->nrow,
* which defines the permutation matrix Pr, and is determined
- * by partial pivoting. perm_r[i] = j means row i of A is in
- * position j in Pr*A.
+ * by MC64 first then followed by partial pivoting.
+ * perm_r[i] = j means row i of A is in position j in Pr*A.
*
* If A->Stype = SLU_NR, permutation vector of size A->ncol, which
* determines permutation of rows of transpose(A)
@@ -261,7 +278,7 @@
* If equed = 'N' or 'C', R is not accessed.
* If options->Fact = FACTORED, R is an input argument,
* otherwise, R is output.
- * If options->zFact = FACTORED and equed = 'R' or 'B', each element
+ * If options->Fact = FACTORED and equed = 'R' or 'B', each element
* of R must be positive.
*
* C (input/output) float*, dimension (A->ncol)
@@ -384,7 +401,7 @@ cgsisx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
DNformat *Bstore, *Xstore;
complex *Bmat, *Xmat;
- int ldb, ldx, nrhs;
+ int ldb, ldx, nrhs, n;
SuperMatrix *AA;/* A in SLU_NC format used by the factorization routine.*/
SuperMatrix AC; /* Matrix postmultiplied by Pc */
int colequ, equil, nofact, notran, rowequ, permc_spec, mc64;
@@ -397,7 +414,7 @@ cgsisx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
double t0; /* temporary time */
double *utime;
- int *perm = NULL;
+ int *perm = NULL; /* permutation returned from MC64 */
/* External functions */
extern float clangs(char *, SuperMatrix *);
@@ -409,6 +426,7 @@ cgsisx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
ldb = Bstore->lda;
ldx = Xstore->lda;
nrhs = B->ncol;
+ n = B->nrow;
*info = 0;
nofact = (options->Fact != FACTORED);
@@ -427,10 +445,12 @@ cgsisx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
}
/* Test the input parameters */
- if (!nofact && options->Fact != DOFACT && options->Fact != SamePattern &&
+ if (options->Fact != DOFACT && options->Fact != SamePattern &&
options->Fact != SamePattern_SameRowPerm &&
- !notran && options->Trans != TRANS && options->Trans != CONJ &&
- !equil && options->Equil != NO)
+ options->Fact != FACTORED &&
+ options->Trans != NOTRANS && options->Trans != TRANS &&
+ options->Trans != CONJ &&
+ options->Equil != NO && options->Equil != YES)
*info = -1;
else if ( A->nrow != A->ncol || A->nrow < 0 ||
(A->Stype != SLU_NC && A->Stype != SLU_NR) ||
@@ -516,37 +536,46 @@ cgsisx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
int *colptr = Astore->colptr;
int *rowind = Astore->rowind;
complex *nzval = (complex *)Astore->nzval;
- int n = AA->nrow;
if ( mc64 ) {
- *equed = 'B';
- rowequ = colequ = 1;
t0 = SuperLU_timer_();
if ((perm = intMalloc(n)) == NULL)
ABORT("SUPERLU_MALLOC fails for perm[]");
info1 = cldperm(5, n, nnz, colptr, rowind, nzval, perm, R, C);
- if (info1 > 0) { /* MC64 fails, call cgsequ() later */
+ if (info1 != 0) { /* MC64 fails, call cgsequ() later */
mc64 = 0;
SUPERLU_FREE(perm);
perm = NULL;
} else {
- for (i = 0; i < n; i++) {
- R[i] = exp(R[i]);
- C[i] = exp(C[i]);
- }
- /* permute and scale the matrix */
+ if ( equil ) {
+ rowequ = colequ = 1;
+ for (i = 0; i < n; i++) {
+ R[i] = exp(R[i]);
+ C[i] = exp(C[i]);
+ }
+ /* scale the matrix */
+ for (j = 0; j < n; j++) {
+ for (i = colptr[j]; i < colptr[j + 1]; i++) {
+ cs_mult(&nzval[i], &nzval[i], R[rowind[i]] * C[j]);
+ }
+ }
+ *equed = 'B';
+ }
+
+ /* permute the matrix */
for (j = 0; j < n; j++) {
for (i = colptr[j]; i < colptr[j + 1]; i++) {
- cs_mult(&nzval[i], &nzval[i], R[rowind[i]] * C[j]);
+ /*nzval[i] *= R[rowind[i]] * C[j];*/
rowind[i] = perm[rowind[i]];
}
}
}
utime[EQUIL] = SuperLU_timer_() - t0;
}
- if ( !mc64 & equil ) {
+
+ if ( !mc64 & equil ) { /* Only perform equilibration, no row perm */
t0 = SuperLU_timer_();
/* Compute row and column scalings to equilibrate the matrix A. */
cgsequ(AA, R, C, &rowcnd, &colcnd, &amax, &info1);
@@ -561,22 +590,6 @@ cgsisx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
}
}
- if ( nrhs > 0 ) {
- /* Scale the right hand side if equilibration was performed. */
- if ( notran ) {
- if ( rowequ ) {
- for (j = 0; j < nrhs; ++j)
- for (i = 0; i < A->nrow; ++i) {
- cs_mult(&Bmat[i+j*ldb], &Bmat[i+j*ldb], R[i]);
- }
- }
- } else if ( colequ ) {
- for (j = 0; j < nrhs; ++j)
- for (i = 0; i < A->nrow; ++i) {
- cs_mult(&Bmat[i+j*ldb], &Bmat[i+j*ldb], C[i]);
- }
- }
- }
if ( nofact ) {
@@ -608,6 +621,26 @@ cgsisx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
mem_usage->total_needed = *info - A->ncol;
return;
}
+
+ if ( mc64 ) { /* Fold MC64's perm[] into perm_r[]. */
+ NCformat *Astore = AA->Store;
+ int nnz = Astore->nnz, *rowind = Astore->rowind;
+ int *perm_tmp, *iperm;
+ if ((perm_tmp = intMalloc(2*n)) == NULL)
+ ABORT("SUPERLU_MALLOC fails for perm_tmp[]");
+ iperm = perm_tmp + n;
+ for (i = 0; i < n; ++i) perm_tmp[i] = perm_r[perm[i]];
+ for (i = 0; i < n; ++i) {
+ perm_r[i] = perm_tmp[i];
+ iperm[perm[i]] = i;
+ }
+
+ /* Restore A's original row indices. */
+ for (i = 0; i < nnz; ++i) rowind[i] = iperm[rowind[i]];
+
+ SUPERLU_FREE(perm); /* MC64 permutation */
+ SUPERLU_FREE(perm_tmp);
+ }
}
if ( options->PivotGrowth ) {
@@ -630,7 +663,24 @@ cgsisx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
utime[RCOND] = SuperLU_timer_() - t0;
}
- if ( nrhs > 0 ) {
+ if ( nrhs > 0 ) { /* Solve the system */
+ complex *rhs_work;
+
+ /* Scale and permute the right-hand side if equilibration
+ and permutation from MC64 were performed. */
+ if ( notran ) {
+ if ( rowequ ) {
+ for (j = 0; j < nrhs; ++j)
+ for (i = 0; i < n; ++i)
+ cs_mult(&Bmat[i+j*ldb], &Bmat[i+j*ldb], R[i]);
+ }
+ } else if ( colequ ) {
+ for (j = 0; j < nrhs; ++j)
+ for (i = 0; i < n; ++i) {
+ cs_mult(&Bmat[i+j*ldb], &Bmat[i+j*ldb], C[i]);
+ }
+ }
+
/* Compute the solution matrix X. */
for (j = 0; j < nrhs; j++) /* Save a copy of the right hand sides */
for (i = 0; i < B->nrow; i++)
@@ -645,42 +695,26 @@ cgsisx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
if ( notran ) {
if ( colequ ) {
for (j = 0; j < nrhs; ++j)
- for (i = 0; i < A->nrow; ++i) {
+ for (i = 0; i < n; ++i) {
cs_mult(&Xmat[i+j*ldx], &Xmat[i+j*ldx], C[i]);
}
}
- } else {
+ } else { /* transposed system */
if ( rowequ ) {
- if (perm) {
- complex *tmp;
- int n = A->nrow;
-
- if ((tmp = complexMalloc(n)) == NULL)
- ABORT("SUPERLU_MALLOC fails for tmp[]");
- for (j = 0; j < nrhs; j++) {
- for (i = 0; i < n; i++)
- tmp[i] = Xmat[i + j * ldx]; /*dcopy*/
- for (i = 0; i < n; i++)
- cs_mult(&Xmat[i+j*ldx], &tmp[perm[i]], R[i]);
- }
- SUPERLU_FREE(tmp);
- } else {
- for (j = 0; j < nrhs; ++j)
- for (i = 0; i < A->nrow; ++i) {
- cs_mult(&Xmat[i+j*ldx], &Xmat[i+j*ldx], R[i]);
- }
- }
+ for (j = 0; j < nrhs; ++j)
+ for (i = 0; i < A->nrow; ++i) {
+ cs_mult(&Xmat[i+j*ldx], &Xmat[i+j*ldx], R[i]);
+ }
}
}
+
} /* end if nrhs > 0 */
if ( options->ConditionNumber ) {
- /* Set INFO = A->ncol+1 if the matrix is singular to working precision. */
+ /* The matrix is singular to working precision. */
if ( *rcond < slamch_("E") && *info == 0) *info = A->ncol + 1;
}
- if (perm) SUPERLU_FREE(perm);
-
if ( nofact ) {
ilu_cQuerySpace(L, U, mem_usage);
Destroy_CompCol_Permuted(&AC);
View
33 scipy/sparse/linalg/dsolve/SuperLU/SRC/cgsitrf.c
@@ -1,11 +1,12 @@
-/*! @file cgsitf.c
+/*! @file cgsitrf.c
* \brief Computes an ILU factorization of a general sparse matrix
*
* <pre>
- * -- SuperLU routine (version 4.0) --
+ * -- SuperLU routine (version 4.1) --
* Lawrence Berkeley National Laboratory.
* June 30, 2009
+ *
* </pre>
*/
@@ -192,8 +193,9 @@ cgsitrf(superlu_options_t *options, SuperMatrix *A, int relax, int panel_size,
int nzlumax;
float *amax;
complex drop_sum;
+ float alpha, omega; /* used in MILU, mimicing DRIC */
static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */
- int *iwork2; /* used by the second dropping rule */
+ float *swork2; /* used by the second dropping rule */
/* Local scalars */
fact_t fact = options->Fact;
@@ -224,6 +226,7 @@ cgsitrf(superlu_options_t *options, SuperMatrix *A, int relax, int panel_size,
int nnzLj, nnzUj;
double tol_L = drop_tol, tol_U = drop_tol;
complex zero = {0.0, 0.0};
+ float one = 1.0;
/* Executable */
iinfo = 0;
@@ -267,14 +270,15 @@ cgsitrf(superlu_options_t *options, SuperMatrix *A, int relax, int panel_size,
for (k = 0; k < n; k++) iswap[k] = perm_c[k];
amax = (float *) floatMalloc(panel_size);
if (drop_rule & DROP_SECONDARY)
- iwork2 = (int *)intMalloc(n);
+ swork2 = (float *)floatMalloc(n);
else
- iwork2 = NULL;
+ swork2 = NULL;
nnzAj = 0;
nnzLj = 0;
nnzUj = 0;
- last_drop = SUPERLU_MAX(min_mn - 2 * sp_ienv(3), (int)(min_mn * 0.95));
+ last_drop = SUPERLU_MAX(min_mn - 2 * sp_ienv(7), (int)(min_mn * 0.95));
+ alpha = pow((double)n, -1.0 / options->ILU_MILU_Dim);
/* Identify relaxed snodes */
relax_end = (int *) intMalloc(n);
@@ -335,7 +339,7 @@ cgsitrf(superlu_options_t *options, SuperMatrix *A, int relax, int panel_size,
/* Drop small rows */
stempv = (float *) tempv;
i = ilu_cdrop_row(options, first, last, tol_L, quota, &nnzLj,
- &fill_tol, &Glu, stempv, iwork2, 0);
+ &fill_tol, &Glu, stempv, swork2, 0);
/* Reset the parameters */
if (drop_rule & DROP_DYNAMIC) {
if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m)
@@ -375,7 +379,7 @@ cgsitrf(superlu_options_t *options, SuperMatrix *A, int relax, int panel_size,
amax[0] = 0.0;
/* Scatter into SPA dense[*] */
for (k = xa_begin[icol]; k < xa_end[icol]; k++) {
- register float tmp = slu_c_abs1 (&a[k]);
+ register float tmp = c_abs1 (&a[k]);
if (tmp > amax[0]) amax[0] = tmp;
dense[asub[k]] = a[k];
}
@@ -496,7 +500,7 @@ cgsitrf(superlu_options_t *options, SuperMatrix *A, int relax, int panel_size,
perm_r, &dense[k], drop_rule,
milu, amax[jj - jcol] * tol_U,
quota, &drop_sum, &nnzUj, &Glu,
- iwork2)) != 0)
+ swork2)) != 0)
return;
/* Reset the dropping threshold if required */
@@ -507,7 +511,11 @@ cgsitrf(superlu_options_t *options, SuperMatrix *A, int relax, int panel_size,
tol_U = SUPERLU_MAX(drop_tol, tol_U * 0.5);
}
- cs_mult(&drop_sum, &drop_sum, MILU_ALPHA);
+ if (drop_sum.r != 0.0 && drop_sum.i != 0.0)
+ {
+ omega = SUPERLU_MIN(2.0*(1.0-alpha)/c_abs1(&drop_sum), 1.0);
+ cs_mult(&drop_sum, &drop_sum, omega);
+ }
if (usepr) pivrow = iperm_r[jj];
fill_tol = pow(fill_ini, 1.0 - (double)jj / (double)min_mn);
if ( (*info = ilu_cpivotL(jj, diag_pivot_thresh, &usepr, perm_r,
@@ -550,7 +558,7 @@ cgsitrf(superlu_options_t *options, SuperMatrix *A, int relax, int panel_size,
/* Drop small rows */
stempv = (float *) tempv;
i = ilu_cdrop_row(options, first, last, tol_L, quota,
- &nnzLj, &fill_tol, &Glu, stempv, iwork2,
+ &nnzLj, &fill_tol, &Glu, stempv, swork2,
1);
/* Reset the parameters */
@@ -615,6 +623,7 @@ cgsitrf(superlu_options_t *options, SuperMatrix *A, int relax, int panel_size,
}
ops[FACT] += ops[TRSV] + ops[GEMV];
+ stat->expansions = --(Glu.num_expansions);
if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r);
SUPERLU_FREE (iperm_c);
@@ -623,6 +632,6 @@ cgsitrf(superlu_options_t *options, SuperMatrix *A, int relax, int panel_size,
SUPERLU_FREE (iswap);
SUPERLU_FREE (relax_fsupc);
SUPERLU_FREE (amax);
- if ( iwork2 ) SUPERLU_FREE (iwork2);
+ if ( swork2 ) SUPERLU_FREE (swork2);
}
View
34 scipy/sparse/linalg/dsolve/SuperLU/SRC/cgsrfs.c
@@ -157,7 +157,7 @@ cgsrfs(trans_t trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U,
complex *work;
float *rwork;
int *iwork;
- extern double slamch_(char *);
+
extern int clacon_(int *, complex *, complex *, float *, int *);
#ifdef _CRAY
extern int CCOPY(int *, complex *, int *, complex *, int *);
@@ -287,21 +287,21 @@ cgsrfs(trans_t trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U,
than SAFE2, then SAFE1 is added to the i-th component of the
numerator before dividing. */
- for (i = 0; i < A->nrow; ++i) rwork[i] = slu_c_abs1( &Bptr[i] );
+ for (i = 0; i < A->nrow; ++i) rwork[i] = c_abs1( &Bptr[i] );
/* Compute abs(op(A))*abs(X) + abs(B). */
if (notran) {
for (k = 0; k < A->ncol; ++k) {
- xk = slu_c_abs1( &Xptr[k] );
+ xk = c_abs1( &Xptr[k] );
for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i)
- rwork[Astore->rowind[i]] += slu_c_abs1(&Aval[i]) * xk;
+ rwork[Astore->rowind[i]] += c_abs1(&Aval[i]) * xk;
}
} else {
for (k = 0; k < A->ncol; ++k) {
s = 0.;
for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) {
irow = Astore->rowind[i];
- s += slu_c_abs1(&Aval[i]) * slu_c_abs1(&Xptr[irow]);
+ s += c_abs1(&Aval[i]) * c_abs1(&Xptr[irow]);
}
rwork[k] += s;
}
@@ -309,9 +309,9 @@ cgsrfs(trans_t trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U,
s = 0.;
for (i = 0; i < A->nrow; ++i) {
if (rwork[i] > safe2) {
- s = SUPERLU_MAX( s, slu_c_abs1(&work[i]) / rwork[i] );
+ s = SUPERLU_MAX( s, c_abs1(&work[i]) / rwork[i] );
} else if ( rwork[i] != 0.0 ) {
- s = SUPERLU_MAX( s, (slu_c_abs1(&work[i]) + safe1) / rwork[i] );
+ s = SUPERLU_MAX( s, (c_abs1(&work[i]) + safe1) / rwork[i] );
}
/* If rwork[i] is exactly 0.0, then we know the true
residual also must be exactly 0.0. */
@@ -364,22 +364,22 @@ cgsrfs(trans_t trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U,
inv(op(A)) * diag(W),
where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */
- for (i = 0; i < A->nrow; ++i) rwork[i] = slu_c_abs1( &Bptr[i] );
+ for (i = 0; i < A->nrow; ++i) rwork[i] = c_abs1( &Bptr[i] );
/* Compute abs(op(A))*abs(X) + abs(B). */
if ( notran ) {
for (k = 0; k < A->ncol; ++k) {
- xk = slu_c_abs1( &Xptr[k] );
+ xk = c_abs1( &Xptr[k] );
for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i)
- rwork[Astore->rowind[i]] += slu_c_abs1(&Aval[i]) * xk;
+ rwork[Astore->rowind[i]] += c_abs1(&Aval[i]) * xk;
}
} else {
for (k = 0; k < A->ncol; ++k) {
s = 0.;
for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) {
irow = Astore->rowind[i];
- xk = slu_c_abs1( &Xptr[irow] );
- s += slu_c_abs1(&Aval[i]) * xk;
+ xk = c_abs1( &Xptr[irow] );
+ s += c_abs1(&Aval[i]) * xk;
}
rwork[k] += s;
}
@@ -387,9 +387,9 @@ cgsrfs(trans_t trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U,
for (i = 0; i < A->nrow; ++i)
if (rwork[i] > safe2)
- rwork[i] = slu_c_abs(&work[i]) + (iwork[i]+1)*eps*rwork[i];
+ rwork[i] = c_abs(&work[i]) + (iwork[i]+1)*eps*rwork[i];
else
- rwork[i] = slu_c_abs(&work[i])+(iwork[i]+1)*eps*rwork[i]+safe1;
+ rwork[i] = c_abs(&work[i])+(iwork[i]+1)*eps*rwork[i]+safe1;
kase = 0;
do {
@@ -437,13 +437,13 @@ cgsrfs(trans_t trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U,
lstres = 0.;
if ( notran && colequ ) {
for (i = 0; i < A->nrow; ++i)
- lstres = SUPERLU_MAX( lstres, C[i] * slu_c_abs1( &Xptr[i]) );
+ lstres = SUPERLU_MAX( lstres, C[i] * c_abs1( &Xptr[i]) );
} else if ( !notran && rowequ ) {
for (i = 0; i < A->nrow; ++i)
- lstres = SUPERLU_MAX( lstres, R[i] * slu_c_abs1( &Xptr[i]) );
+ lstres = SUPERLU_MAX( lstres, R[i] * c_abs1( &Xptr[i]) );
} else {
for (i = 0; i < A->nrow; ++i)
- lstres = SUPERLU_MAX( lstres, slu_c_abs1( &Xptr[i]) );
+ lstres = SUPERLU_MAX( lstres, c_abs1( &Xptr[i]) );
}
if ( lstres != 0. )
ferr[j] /= lstres;
View
53 scipy/sparse/linalg/dsolve/SuperLU/SRC/cgssvx.c
@@ -389,10 +389,12 @@ printf("dgssvx: Fact=%4d, Trans=%4d, equed=%c\n",
#endif
/* Test the input parameters */
- if (!nofact && options->Fact != DOFACT && options->Fact != SamePattern &&
+ if (options->Fact != DOFACT && options->Fact != SamePattern &&
options->Fact != SamePattern_SameRowPerm &&
- !notran && options->Trans != TRANS && options->Trans != CONJ &&
- !equil && options->Equil != NO)
+ options->Fact != FACTORED &&
+ options->Trans != NOTRANS && options->Trans != TRANS &&
+ options->Trans != CONJ &&
+ options->Equil != NO && options->Equil != YES)
*info = -1;
else if ( A->nrow != A->ncol || A->nrow < 0 ||
(A->Stype != SLU_NC && A->Stype != SLU_NR) ||
@@ -428,15 +430,21 @@ printf("dgssvx: Fact=%4d, Trans=%4d, equed=%c\n",
}
if (*info == 0) {
if ( lwork < -1 ) *info = -12;
- else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) ||
+ else if ( B->ncol < 0 ) *info = -13;
+ else if ( B->ncol > 0 ) { /* no checking if B->ncol=0 */
+ if ( Bstore->lda < SUPERLU_MAX(0, A->nrow) ||
B->Stype != SLU_DN || B->Dtype != SLU_C ||
B->Mtype != SLU_GE )
*info = -13;
- else if ( X->ncol < 0 || Xstore->lda < SUPERLU_MAX(0, A->nrow) ||
+ }
+ if ( X->ncol < 0 ) *info = -14;
+ else if ( X->ncol > 0 ) { /* no checking if X->ncol=0 */
+ if ( Xstore->lda < SUPERLU_MAX(0, A->nrow) ||
(B->ncol != 0 && B->ncol != X->ncol) ||
X->Stype != SLU_DN ||
X->Dtype != SLU_C || X->Mtype != SLU_GE )
*info = -14;
+ }
}
}
if (*info != 0) {
@@ -485,22 +493,6 @@ printf("dgssvx: Fact=%4d, Trans=%4d, equed=%c\n",
utime[EQUIL] = SuperLU_timer_() - t0;
}
- if ( nrhs > 0 ) {
- /* Scale the right hand side if equilibration was performed. */
- if ( notran ) {
- if ( rowequ ) {
- for (j = 0; j < nrhs; ++j)
- for (i = 0; i < A->nrow; ++i) {
- cs_mult(&Bmat[i+j*ldb], &Bmat[i+j*ldb], R[i]);
- }
- }
- } else if ( colequ ) {
- for (j = 0; j < nrhs; ++j)
- for (i = 0; i < A->nrow; ++i) {
- cs_mult(&Bmat[i+j*ldb], &Bmat[i+j*ldb], C[i]);
- }
- }
- }
if ( nofact ) {
@@ -566,6 +558,19 @@ printf("dgssvx: Fact=%4d, Trans=%4d, equed=%c\n",
}
if ( nrhs > 0 ) {
+ /* Scale the right hand side if equilibration was performed. */
+ if ( notran ) {
+ if ( rowequ ) {
+ for (j = 0; j < nrhs; ++j)
+ for (i = 0; i < A->nrow; ++i)
+ cs_mult(&Bmat[i+j*ldb], &Bmat[i+j*ldb], R[i]);
+ }
+ } else if ( colequ ) {
+ for (j = 0; j < nrhs; ++j)
+ for (i = 0; i < A->nrow; ++i)
+ cs_mult(&Bmat[i+j*ldb], &Bmat[i+j*ldb], C[i]);
+ }
+
/* Compute the solution matrix X. */
for (j = 0; j < nrhs; j++) /* Save a copy of the right hand sides */
for (i = 0; i < B->nrow; i++)
@@ -590,15 +595,13 @@ printf("dgssvx: Fact=%4d, Trans=%4d, equed=%c\n",
if ( notran ) {
if ( colequ ) {
for (j = 0; j < nrhs; ++j)
- for (i = 0; i < A->nrow; ++i) {
+ for (i = 0; i < A->nrow; ++i)
cs_mult(&Xmat[i+j*ldx], &Xmat[i+j*ldx], C[i]);
- }
}
} else if ( rowequ ) {
for (j = 0; j < nrhs; ++j)
- for (i = 0; i < A->nrow; ++i) {
+ for (i = 0; i < A->nrow; ++i)
cs_mult(&Xmat[i+j*ldx], &Xmat[i+j*ldx], R[i]);
- }
}
} /* end if nrhs > 0 */
View
8 scipy/sparse/linalg/dsolve/SuperLU/SRC/clacon.c
@@ -85,7 +85,7 @@ clacon_(int *n, complex *v, complex *x, float *est, int *kase)
static int i, j;
float temp;
float safmin;
- extern double slamch_(char *);
+ extern float slamch_(char *);
extern int icmax1_(int *, complex *, int *);
extern double scsum1_(int *, complex *, int *);
@@ -113,14 +113,14 @@ clacon_(int *n, complex *v, complex *x, float *est, int *kase)
L20:
if (*n == 1) {
v[0] = x[0];
- *est = slu_c_abs(&v[0]);
+ *est = c_abs(&v[0]);
/* ... QUIT */
goto L150;
}
*est = scsum1_(n, x, &c__1);
for (i = 0; i < *n; ++i) {
- d__1 = slu_c_abs(&x[i]);
+ d__1 = c_abs(&x[i]);
if (d__1 > safmin) {
d__1 = 1 / d__1;
x[i].r *= d__1;
@@ -165,7 +165,7 @@ clacon_(int *n, complex *v, complex *x, float *est, int *kase)
if (*est <= estold) goto L120;
for (i = 0; i < *n; ++i) {
- d__1 = slu_c_abs(&x[i]);
+ d__1 = c_abs(&x[i]);
if (d__1 > safmin) {
d__1 = 1 / d__1;
x[i].r *= d__1;
View
6 scipy/sparse/linalg/dsolve/SuperLU/SRC/clangs.c
@@ -79,7 +79,7 @@ float clangs(char *norm, SuperMatrix *A)
value = 0.;
for (j = 0; j < A->ncol; ++j)
for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++)
- value = SUPERLU_MAX( value, slu_c_abs( &Aval[i]) );
+ value = SUPERLU_MAX( value, c_abs( &Aval[i]) );
} else if (lsame_(norm, "O") || *(unsigned char *)norm == '1') {
/* Find norm1(A). */
@@ -87,7 +87,7 @@ float clangs(char *norm, SuperMatrix *A)
for (j = 0; j < A->ncol; ++j) {
sum = 0.;
for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++)
- sum += slu_c_abs( &Aval[i] );
+ sum += c_abs( &Aval[i] );
value = SUPERLU_MAX(value,sum);
}
@@ -99,7 +99,7 @@ float clangs(char *norm, SuperMatrix *A)
for (j = 0; j < A->ncol; ++j)
for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++) {
irow = Astore->rowind[i];
- rwork[irow] += slu_c_abs( &Aval[i] );
+ rwork[irow] += c_abs( &Aval[i] );
}
value = 0.;
for (i = 0; i < A->nrow; ++i)
View
1  scipy/sparse/linalg/dsolve/SuperLU/SRC/claqgs.c
@@ -91,7 +91,6 @@ claqgs(SuperMatrix *A, float *r, float *c,
complex *Aval;
int i, j, irow;
float large, small, cj;
- extern double slamch_(char *);
float temp;
View
10 scipy/sparse/linalg/dsolve/SuperLU/SRC/cldperm.c
@@ -11,8 +11,8 @@
#include "slu_cdefs.h"
-extern void mc64id_(int_t*);
-extern void mc64ad_(int_t*, int_t*, int_t*, int_t [], int_t [], double [],
+extern int_t mc64id_(int_t*);
+extern int_t mc64ad_(int_t*, int_t*, int_t*, int_t [], int_t [], double [],
int_t*, int_t [], int_t*, int_t[], int_t*, double [],
int_t [], int_t []);
@@ -91,7 +91,7 @@ cldperm(int_t job, int_t n, int_t nnz, int_t colptr[], int_t adjncy[],
double *nzval_d = (double *) SUPERLU_MALLOC(nnz * sizeof(double));
#if ( DEBUGlevel>=1 )
- CHECK_MALLOC(0, "Enter cldperm()");
+ CHECK_MALLOC("Enter cldperm()");
#endif
liw = 5*n;
if ( job == 3 ) liw = 10*n + nnz;
@@ -132,7 +132,7 @@ cldperm(int_t job, int_t n, int_t nnz, int_t colptr[], int_t adjncy[],
icntl[1] = -1;
#endif
- for (i = 0; i < nnz; ++i) nzval_d[i] = slu_c_abs1(&nzval[i]);
+ for (i = 0; i < nnz; ++i) nzval_d[i] = c_abs1(&nzval[i]);
mc64ad_(&job, &n, &nnz, colptr, adjncy, nzval_d, &num, perm,
&liw, iw, &ldw, dw, icntl, info);
@@ -161,7 +161,7 @@ cldperm(int_t job, int_t n, int_t nnz, int_t colptr[], int_t adjncy[],
SUPERLU_FREE(nzval_d);
#if ( DEBUGlevel>=1 )
- CHECK_MALLOC(0, "Exit cldperm()");
+ CHECK_MALLOC("Exit cldperm()");
#endif
return info[0];
View
6 scipy/sparse/linalg/dsolve/SuperLU/SRC/cmemory.c
@@ -215,7 +215,7 @@ cLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
}
#if ( PRNTlevel >= 1 )
- printf("cLUMemInit() called: fill_ratio %ld, nzlmax %ld, nzumax %ld\n",
+ printf("cLUMemInit() called: fill_ratio %.0f, nzlmax %ld, nzumax %ld\n",
fill_ratio, nzlmax, nzumax);
fflush(stdout);
#endif
@@ -332,7 +332,7 @@ cLUWorkInit(int m, int n, int panel_size, int **iworkptr,
{
int isize, dsize, extra;
complex *old_ptr;
- int maxsuper = sp_ienv(3),
+ int maxsuper = SUPERLU_MAX( sp_ienv(3), sp_ienv(7) ),
rowblk = sp_ienv(4);
isize = ( (2 * panel_size + 3 + NO_MARKER ) * m + n ) * sizeof(int);
@@ -381,7 +381,7 @@ cSetRWork(int m, int panel_size, complex *dworkptr,
{
complex zero = {0.0, 0.0};
- int maxsuper = sp_ienv(3),
+ int maxsuper = SUPERLU_MAX( sp_ienv(3), sp_ienv(7) ),
rowblk = sp_ienv(4);
*dense = dworkptr;
*tempv = *dense + panel_size*m;
View
2  scipy/sparse/linalg/dsolve/SuperLU/SRC/cpanel_bmod.c
@@ -115,7 +115,7 @@ cpanel_bmod (
xlusup = Glu->xlusup;
if ( first ) {
- maxsuper = sp_ienv(3);
+ maxsuper = SUPERLU_MAX( sp_ienv(3), sp_ienv(7) );
rowblk = sp_ienv(4);
colblk = sp_ienv(5);
first = 0;
View
26 scipy/sparse/linalg/dsolve/SuperLU/SRC/cpivotL.c
@@ -108,16 +108,12 @@ if ( jcol == MIN_COL ) {
Also search for user-specified pivot, and diagonal element. */
if ( *usepr ) *pivrow = iperm_r[jcol];
diagind = iperm_c[jcol];
-#ifdef SCIPY_SPECIFIC_FIX
- pivmax = -1.0;
-#else
pivmax = 0.0;
-#endif
pivptr = nsupc;
diag = EMPTY;
old_pivptr = nsupc;
for (isub = nsupc; isub < nsupr; ++isub) {
- rtemp = slu_c_abs1 (&lu_col_ptr[isub]);
+ rtemp = c_abs1 (&lu_col_ptr[isub]);
if ( rtemp > pivmax ) {
pivmax = rtemp;
pivptr = isub;
@@ -127,16 +123,18 @@ if ( jcol == MIN_COL ) {
}
/* Test for singularity */
-#ifdef SCIPY_SPECIFIC_FIX
- if (pivmax < 0.0) {
- perm_r[diagind] = jcol;
- *usepr = 0;
- return (jcol+1);
- }
-#endif
if ( pivmax == 0.0 ) {
#if 1
+#if SCIPY_FIX
+ if (pivptr < nsupr) {
+ *pivrow = lsub_ptr[pivptr];
+ }
+ else {
+ *pivrow = diagind;
+ }
+#else
*pivrow = lsub_ptr[pivptr];
+#endif
perm_r[*pivrow] = jcol;
#else
perm_r[diagind] = jcol;
@@ -149,7 +147,7 @@ if ( jcol == MIN_COL ) {
/* Choose appropriate pivotal element by our policy. */
if ( *usepr ) {
- rtemp = slu_c_abs1 (&lu_col_ptr[old_pivptr]);
+ rtemp = c_abs1 (&lu_col_ptr[old_pivptr]);
if ( rtemp != 0.0 && rtemp >= thresh )
pivptr = old_pivptr;
else
@@ -158,7 +156,7 @@ if ( jcol == MIN_COL ) {
if ( *usepr == 0 ) {
/* Use diagonal pivot? */
if ( diag >= 0 ) { /* diagonal exists */
- rtemp = slu_c_abs1 (&lu_col_ptr[diag]);
+ rtemp = c_abs1 (&lu_col_ptr[diag]);
if ( rtemp != 0.0 && rtemp >= thresh ) pivptr = diag;
}
*pivrow = lsub_ptr[pivptr];
View
7 scipy/sparse/linalg/dsolve/SuperLU/SRC/cpivotgrowth.c
@@ -58,7 +58,6 @@ cPivotGrowth(int ncols, SuperMatrix *A, int *perm_c,
int i, j, k, oldcol;
int *inv_perm_c;
float rpg, maxaj, maxuj;
- extern double slamch_(char *);
float smlnum;
complex *luval;
complex temp_comp;
@@ -88,15 +87,15 @@ cPivotGrowth(int ncols, SuperMatrix *A, int *perm_c,
maxaj = 0.;
oldcol = inv_perm_c[j];
for (i = Astore->colptr[oldcol]; i < Astore->colptr[oldcol+1]; ++i)
- maxaj = SUPERLU_MAX( maxaj, slu_c_abs1( &Aval[i]) );
+ maxaj = SUPERLU_MAX( maxaj, c_abs1( &Aval[i]) );
maxuj = 0.;
for (i = Ustore->colptr[j]; i < Ustore->colptr[j+1]; i++)
- maxuj = SUPERLU_MAX( maxuj, slu_c_abs1( &Uval[i]) );
+ maxuj = SUPERLU_MAX( maxuj, c_abs1( &Uval[i]) );
/* Supernode */
for (i = 0; i < nz_in_U; ++i)
- maxuj = SUPERLU_MAX( maxuj, slu_c_abs1( &luval[i]) );
+ maxuj = SUPERLU_MAX( maxuj, c_abs1( &luval[i]) );
++nz_in_U;
luval += nsupr;
View
4 scipy/sparse/linalg/dsolve/SuperLU/SRC/creadtriple.c
@@ -1,6 +1,6 @@
-/*! @file creadrb.c
- * \brief Read a matrix stored in Rutherford-Boeing format
+/*! @file creadtriple.c
+ * \brief Read a matrix stored in triplet (coordinate) format
*
* <pre>
* -- SuperLU routine (version 4.0) --
View
4 scipy/sparse/linalg/dsolve/SuperLU/SRC/cutil.c
@@ -408,8 +408,8 @@ void cinf_norm_error(int nrhs, SuperMatrix *X, complex *xtrue)
err = xnorm = 0.0;
for (i = 0; i < X->nrow; i++) {
c_sub(&temp, &soln_work[i], &xtrue[i]);
- err = SUPERLU_MAX(err, slu_c_abs(&temp));
- xnorm = SUPERLU_MAX(xnorm, slu_c_abs(&soln_work[i]));
+ err = SUPERLU_MAX(err, c_abs(&temp));
+ xnorm = SUPERLU_MAX(xnorm, c_abs(&soln_work[i]));
}
err = err / xnorm;
printf("||X - Xtrue||/||X|| = %e\n", err);
View
58 scipy/sparse/linalg/dsolve/SuperLU/SRC/dGetDiagU.c
@@ -1,58 +0,0 @@
-/*! @file dGetDiagU.c
- * \brief Extracts main diagonal of matrix
- *
- * <pre>
- * -- Auxiliary routine in SuperLU (version 2.0) --
- * Lawrence Berkeley National Lab, Univ. of California Berkeley.
- * Xiaoye S. Li
- * September 11, 2003
- *
- * Purpose
- * =======
- *
- * GetDiagU extracts the main diagonal of matrix U of the LU factorization.
- *
- * Arguments
- * =========
- *
- * L (input) SuperMatrix*
- * The factor L from the factorization Pr*A*Pc=L*U as computed by
- * dgstrf(). Use compressed row subscripts storage for supernodes,
- * i.e., L has types: Stype = SLU_SC, Dtype = SLU_D, Mtype = SLU_TRLU.
- *
- * diagU (output) double*, dimension (n)
- * The main diagonal of matrix U.
- *
- * Note
- * ====
- * The diagonal blocks of the L and U matrices are stored in the L
- * data structures.
- * </pre>
-*/
-#include "slu_ddefs.h"
-
-void dGetDiagU(SuperMatrix *L, double *diagU)
-{
- int_t i, k, nsupers;
- int_t fsupc, nsupr, nsupc, luptr;
- double *dblock, *Lval;
- SCformat *Lstore;
-
- Lstore = L->Store;
- Lval = Lstore->nzval;
- nsupers = Lstore->nsuper + 1;
-
- for (k = 0; k < nsupers; ++k) {
- fsupc = L_FST_SUPC(k);
- nsupc = L_FST_SUPC(k+1) - fsupc;
- nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
- luptr = L_NZ_START(fsupc);
-
- dblock = &diagU[fsupc];
- for (i = 0; i < nsupc; ++i) {
- dblock[i] = Lval[luptr];
- luptr += nsupr + 1;
- }
- }
-}
-
View
224 scipy/sparse/linalg/dsolve/SuperLU/SRC/dgsisx.c
@@ -1,11 +1,12 @@
/*! @file dgsisx.c
- * \brief Gives the approximate solutions of linear equations A*X=B or A'*X=B
+ * \brief Computes an approximate solutions of linear equations A*X=B or A'*X=B
*
* <pre>
- * -- SuperLU routine (version 4.0) --
+ * -- SuperLU routine (version 4.2) --
* Lawrence Berkeley National Laboratory.
- * June 30, 2009
+ * November, 2010
+ * August, 2011
* </pre>
*/
#include "slu_ddefs.h"
@@ -16,9 +17,10 @@
* Purpose
* =======
*
- * DGSISX gives the approximate solutions of linear equations A*X=B or A'*X=B,
- * using the ILU factorization from dgsitrf(). An estimation of
- * the condition number is provided. It performs the following steps:
+ * DGSISX computes an approximate solutions of linear equations
+ * A*X=B or A'*X=B, using the ILU factorization from dgsitrf().
+ * An estimation of the condition number is provided.
+ * The routine performs the following steps:
*
* 1. If A is stored column-wise (A->Stype = SLU_NC):
*
@@ -71,50 +73,50 @@
* entries of modulus 1 on the diagonal and off-diagonal entries
* of modulus at most 1. If MC64 fails, dgsequ() is used to
* equilibrate the system.
+ * ( Default: LargeDiag )
* 2) options->ILU_DropTol = tau is the threshold for dropping.
* For L, it is used directly (for the whole row in a supernode);
* For U, ||A(:,i)||_oo * tau is used as the threshold
* for the i-th column.
* If a secondary dropping rule is required, tau will
* also be used to compute the second threshold.
+ * ( Default: 1e-4 )
* 3) options->ILU_FillFactor = gamma, used as the initial guess
* of memory growth.
* If a secondary dropping rule is required, it will also
* be used as an upper bound of the memory.
+ * ( Default: 10 )
* 4) options->ILU_DropRule specifies the dropping rule.
- * Option Explanation
- * ====== ===========
- * DROP_BASIC: Basic dropping rule, supernodal based ILU.
- * DROP_PROWS: Supernodal based ILUTP, p = gamma * nnz(A) / n.
- * DROP_COLUMN: Variation of ILUTP, for j-th column,
- * p = gamma * nnz(A(:,j)).
- * DROP_AREA; Variation of ILUTP, for j-th column, use
- * nnz(F(:,1:j)) / nnz(A(:,1:j)) to control the
- * memory.
- * DROP_DYNAMIC: Modify the threshold tau during the
- * factorizaion.
- * If nnz(L(:,1:j)) / nnz(A(:,1:j)) < gamma
- * tau_L(j) := MIN(1, tau_L(j-1) * 2);
- * Otherwise
- * tau_L(j) := MIN(1, tau_L(j-1) * 2);
- * tau_U(j) uses the similar rule.
- * NOTE: the thresholds used by L and U are
- * indenpendent.
- * DROP_INTERP: Compute the second dropping threshold by
- * interpolation instead of sorting (default).
- * In this case, the actual fill ratio is not
- * guaranteed smaller than gamma.
+ * Option Meaning
+ * ====== ===========
+ * DROP_BASIC: Basic dropping rule, supernodal based ILUTP(tau).
+ * DROP_PROWS: Supernodal based ILUTP(p,tau), p = gamma*nnz(A)/n.
+ * DROP_COLUMN: Variant of ILUTP(p,tau), for j-th column,
+ * p = gamma * nnz(A(:,j)).
+ * DROP_AREA: Variation of ILUTP, for j-th column, use
+ * nnz(F(:,1:j)) / nnz(A(:,1:j)) to control memory.
+ * DROP_DYNAMIC: Modify the threshold tau during factorizaion:
+ * If nnz(L(:,1:j)) / nnz(A(:,1:j)) > gamma
+ * tau_L(j) := MIN(tau_0, tau_L(j-1) * 2);
+ * Otherwise
+ * tau_L(j) := MAX(tau_0, tau_L(j-1) / 2);
+ * tau_U(j) uses the similar rule.
+ * NOTE: the thresholds used by L and U are separate.
+ * DROP_INTERP: Compute the second dropping threshold by
+ * interpolation instead of sorting (default).
+ * In this case, the actual fill ratio is not
+ * guaranteed smaller than gamma.
* DROP_PROWS, DROP_COLUMN and DROP_AREA are mutually exclusive.
- * ( The default option is DROP_BASIC | DROP_AREA. )
- * 5) options->ILU_Norm is the criterion of computing the average
- * value of a row in L.
- * options->ILU_Norm average(x[1:n])
+ * ( Default: DROP_BASIC | DROP_AREA )
+ * 5) options->ILU_Norm is the criterion of measuring the magnitude
+ * of a row in a supernode of L. ( Default is INF_NORM )
+ * options->ILU_Norm RowSize(x[1:n])
* ================= ===============
* ONE_NORM ||x||_1 / n
* TWO_NORM ||x||_2 / sqrt(n)
* INF_NORM max{|x[i]|}
* 6) options->ILU_MILU specifies the type of MILU's variation.
- * = SILU (default): do not perform MILU;
+ * = SILU: do not perform Modified ILU;
* = SMILU_1 (not recommended):
* U(i,i) := U(i,i) + sum(dropped entries);
* = SMILU_2:
@@ -123,10 +125,12 @@
* U(i,i) := U(i,i) + SGN(U(i,i)) * sum(|dropped entries|);
* NOTE: Even SMILU_1 does not preserve the column sum because of
* late dropping.
+ * ( Default: SILU )
* 7) options->ILU_FillTol is used as the perturbation when
* encountering zero pivots. If some U(i,i) = 0, so that U is
* exactly singular, then
* U(i,i) := ||A(:,i)|| * options->ILU_FillTol ** (1 - i / n).
+ * ( Default: 1e-2 )
*
* 2. If A is stored row-wise (A->Stype = SLU_NR), apply the above algorithm
* to the transpose of A:
@@ -194,8 +198,11 @@
* On entry, If options->Fact = FACTORED and equed is not 'N',
* then A must have been equilibrated by the scaling factors in
* R and/or C.
- * On exit, A is not modified if options->Equil = NO, or if
- * options->Equil = YES but equed = 'N' on exit.
+ * On exit, A is not modified
+ * if options->Equil = NO, or
+ * if options->Equil = YES but equed = 'N' on exit, or
+ * if options->RowPerm = NO.
+ *
* Otherwise, if options->Equil = YES and equed is not 'N',
* A is scaled as follows:
* If A->Stype = SLU_NC:
@@ -207,6 +214,16 @@
* equed = 'C': transpose(A) := transpose(A) * diag(C)
* equed = 'B': transpose(A) := diag(R) * transpose(A) * diag(C).
*
+ * If options->RowPerm = LargeDiag, MC64 is used to scale and permute
+ * the matrix to an I-matrix, that is A is modified as follows:
+ * P*Dr*A*Dc has entries of modulus 1 on the diagonal and
+ * off-diagonal entries of modulus at most 1. P is a permutation
+ * obtained from MC64.
+ * If MC64 fails, dgsequ() is used to equilibrate the system,
+ * and A is scaled as above, but no permutation is involved.
+ * On exit, A is restored to the orginal row numbering, so
+ * Dr*A*Dc is returned.
+ *
* perm_c (input/output) int*
* If A->Stype = SLU_NC, Column permutation vector of size A->ncol,
* which defines the permutation matrix Pc; perm_c[i] = j means
@@ -223,8 +240,8 @@
* perm_r (input/output) int*
* If A->Stype = SLU_NC, row permutation vector of size A->nrow,
* which defines the permutation matrix Pr, and is determined
- * by partial pivoting. perm_r[i] = j means row i of A is in
- * position j in Pr*A.
+ * by MC64 first then followed by partial pivoting.
+ * perm_r[i] = j means row i of A is in position j in Pr*A.
*
* If A->Stype = SLU_NR, permutation vector of size A->ncol, which
* determines permutation of rows of transpose(A)
@@ -261,7 +278,7 @@
* If equed = 'N' or 'C', R is not accessed.
* If options->Fact = FACTORED, R is an input argument,
* otherwise, R is output.
- * If options->zFact = FACTORED and equed = 'R' or 'B', each element
+ * If options->Fact = FACTORED and equed = 'R' or 'B', each element
* of R must be positive.
*
* C (input/output) double*, dimension (A->ncol)
@@ -384,7 +401,7 @@ dgsisx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
DNformat *Bstore, *Xstore;
double *Bmat, *Xmat;
- int ldb, ldx, nrhs;
+ int ldb, ldx, nrhs, n;
SuperMatrix *AA;/* A in SLU_NC format used by the factorization routine.*/
SuperMatrix AC; /* Matrix postmultiplied by Pc */
int colequ, equil, nofact, notran, rowequ, permc_spec, mc64;
@@ -397,7 +414,7 @@ dgsisx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
double t0; /* temporary time */
double *utime;
- int *perm = NULL;
+ int *perm = NULL; /* permutation returned from MC64 */
/* External functions */
extern double dlangs(char *, SuperMatrix *);
@@ -409,6 +426,7 @@ dgsisx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
ldb = Bstore->lda;
ldx = Xstore->lda;
nrhs = B->ncol;
+ n = B->nrow;
*info = 0;
nofact = (options->Fact != FACTORED);
@@ -427,10 +445,12 @@ dgsisx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
}
/* Test the input parameters */
- if (!nofact && options->Fact != DOFACT && options->Fact != SamePattern &&
+ if (options->Fact != DOFACT && options->Fact != SamePattern &&
options->Fact != SamePattern_SameRowPerm &&
- !notran && options->Trans != TRANS && options->Trans != CONJ &&
- !equil && options->Equil != NO)
+ options->Fact != FACTORED &&
+ options->Trans != NOTRANS && options->Trans != TRANS &&
+ options->Trans != CONJ &&
+ options->Equil != NO && options->Equil != YES)
*info = -1;
else if ( A->nrow != A->ncol || A->nrow < 0 ||
(A->Stype != SLU_NC && A->Stype != SLU_NR) ||
@@ -516,37 +536,46 @@ dgsisx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
int *colptr = Astore->colptr;
int *rowind = Astore->rowind;
double *nzval = (double *)Astore->nzval;
- int n = AA->nrow;
if ( mc64 ) {
- *equed = 'B';
- rowequ = colequ = 1;
t0 = SuperLU_timer_();
if ((perm = intMalloc(n)) == NULL)
ABORT("SUPERLU_MALLOC fails for perm[]");
info1 = dldperm(5, n, nnz, colptr, rowind, nzval, perm, R, C);
- if (info1 > 0) { /* MC64 fails, call dgsequ() later */
+ if (info1 != 0) { /* MC64 fails, call dgsequ() later */
mc64 = 0;
SUPERLU_FREE(perm);
perm = NULL;
} else {
- for (i = 0; i < n; i++) {
- R[i] = exp(R[i]);
- C[i] = exp(C[i]);
- }
- /* permute and scale the matrix */
+ if ( equil ) {
+ rowequ = colequ = 1;
+ for (i = 0; i < n; i++) {
+ R[i] = exp(R[i]);
+ C[i] = exp(C[i]);
+ }
+ /* scale the matrix */
+ for (j = 0; j < n; j++) {
+ for (i = colptr[j]; i < colptr[j + 1]; i++) {
+ nzval[i] *= R[rowind[i]] * C[j];
+ }
+ }
+ *equed = 'B';
+ }
+
+ /* permute the matrix */
for (j = 0; j < n; j++) {
for (i = colptr[j]; i < colptr[j + 1]; i++) {
- nzval[i] *= R[rowind[i]] * C[j];
+ /*nzval[i] *= R[rowind[i]] * C[j];*/
rowind[i] = perm[rowind[i]];
}
}
}
utime[EQUIL] = SuperLU_timer_() - t0;
}
- if ( !mc64 & equil ) {
+
+ if ( !mc64 & equil ) { /* Only perform equilibration, no row perm */
t0 = SuperLU_timer_();
/* Compute row and column scalings to equilibrate the matrix A. */
dgsequ(AA, R, C, &rowcnd, &colcnd, &amax, &info1);
@@ -561,22 +590,6 @@ dgsisx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
}
}
- if ( nrhs > 0 ) {
- /* Scale the right hand side if equilibration was performed. */
- if ( notran ) {
- if ( rowequ ) {
- for (j = 0; j < nrhs; ++j)
- for (i = 0; i < A->nrow; ++i) {
- Bmat[i + j*ldb] *= R[i];
- }
- }
- } else if ( colequ ) {
- for (j = 0; j < nrhs; ++j)
- for (i = 0; i < A->nrow; ++i) {
- Bmat[i + j*ldb] *= C[i];
- }
- }
- }
if ( nofact ) {
@@ -608,6 +621,26 @@ dgsisx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
mem_usage->total_needed = *info - A->ncol;
return;
}
+
+ if ( mc64 ) { /* Fold MC64's perm[] into perm_r[]. */
+ NCformat *Astore = AA->Store;
+ int nnz = Astore->nnz, *rowind = Astore->rowind;
+ int *perm_tmp, *iperm;
+ if ((perm_tmp = intMalloc(2*n)) == NULL)
+ ABORT("SUPERLU_MALLOC fails for perm_tmp[]");
+ iperm = perm_tmp + n;
+ for (i = 0; i < n; ++i) perm_tmp[i] = perm_r[perm[i]];
+ for (i = 0; i < n; ++i) {
+ perm_r[i] = perm_tmp[i];
+ iperm[perm[i]] = i;
+ }
+
+ /* Restore A's original row indices. */
+ for (i = 0; i < nnz; ++i) rowind[i] = iperm[rowind[i]];
+
+ SUPERLU_FREE(perm); /* MC64 permutation */
+ SUPERLU_FREE(perm_tmp);
+ }
}
if ( options->PivotGrowth ) {
@@ -630,7 +663,24 @@ dgsisx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
utime[RCOND] = SuperLU_timer_() - t0;
}
- if ( nrhs > 0 ) {
+ if ( nrhs > 0 ) { /* Solve the system */
+ double *rhs_work;
+
+ /* Scale and permute the right-hand side if equilibration
+ and permutation from MC64 were performed. */
+ if ( notran ) {
+ if ( rowequ ) {
+ for (j = 0; j < nrhs; ++j)
+ for (i = 0; i < n; ++i)
+ Bmat[i + j*ldb] *= R[i];
+ }
+ } else if ( colequ ) {
+ for (j = 0; j < nrhs; ++j)
+ for (i = 0; i < n; ++i) {
+ Bmat[i + j*ldb] *= C[i];
+ }
+ }
+
/* Compute the solution matrix X. */
for (j = 0; j < nrhs; j++) /* Save a copy of the right hand sides */
for (i = 0; i < B->nrow; i++)
@@ -645,42 +695,26 @@ dgsisx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
if ( notran ) {
if ( colequ ) {
for (j = 0; j < nrhs; ++j)
- for (i = 0; i < A->nrow; ++i) {
+ for (i = 0; i < n; ++i) {
Xmat[i + j*ldx] *= C[i];
}
}
- } else {
+ } else { /* transposed system */
if ( rowequ ) {
- if (perm) {
- double *tmp;
- int n = A->nrow;
-
- if ((tmp = doubleMalloc(n)) == NULL)
- ABORT("SUPERLU_MALLOC fails for tmp[]");
- for (j = 0; j < nrhs; j++) {
- for (i = 0; i < n; i++)
- tmp[i] = Xmat[i + j * ldx]; /*dcopy*/
- for (i = 0; i < n; i++)
- Xmat[i + j * ldx] = R[i] * tmp[perm[i]];
- }
- SUPERLU_FREE(tmp);
- } else {
- for (j = 0; j < nrhs; ++j)
- for (i = 0; i < A->nrow; ++i) {
- Xmat[i + j*ldx] *= R[i];
- }
- }
+ for (j = 0; j < nrhs; ++j)
+ for (i = 0; i < A->nrow; ++i) {
+ Xmat[i + j*ldx] *= R[i];
+ }
}
}
+
} /* end if nrhs > 0 */
if ( options->ConditionNumber ) {
- /* Set INFO = A->ncol+1 if the matrix is singular to working precision. */
+ /* The matrix is singular to working precision. */
if ( *rcond < dlamch_("E") && *info == 0) *info = A->ncol + 1;
}
- if (perm) SUPERLU_FREE(perm);
-
if ( nofact ) {
ilu_dQuerySpace(L, U, mem_usage);
Destroy_CompCol_Permuted(&AC);
View
36 scipy/sparse/linalg/dsolve/SuperLU/SRC/dgsitrf.c
@@ -1,11 +1,12 @@
-/*! @file dgsitf.c
+/*! @file dgsitrf.c
* \brief Computes an ILU factorization of a general sparse matrix
*
* <pre>
- * -- SuperLU routine (version 4.0) --
+ * -- SuperLU routine (version 4.1) --
* Lawrence Berkeley National Laboratory.
* June 30, 2009
+ *
* </pre>
*/
@@ -191,8 +192,9 @@ dgsitrf(superlu_options_t *options, SuperMatrix *A, int relax, int panel_size,
int nzlumax;
double *amax;
double drop_sum;
+ double alpha, omega; /* used in MILU, mimicing DRIC */
static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */
- int *iwork2; /* used by the second dropping rule */
+ double *dwork2; /* used by the second dropping rule */
/* Local scalars */
fact_t fact = options->Fact;
@@ -223,6 +225,7 @@ dgsitrf(superlu_options_t *options, SuperMatrix *A, int relax, int panel_size,
int nnzLj, nnzUj;
double tol_L = drop_tol, tol_U = drop_tol;
double zero = 0.0;
+ double one = 1.0;
/* Executable */
iinfo = 0;
@@ -266,14 +269,15 @@ dgsitrf(superlu_options_t *options, SuperMatrix *A, int relax, int panel_size,
for (k = 0; k < n; k++) iswap[k] = perm_c[k];
amax = (double *) doubleMalloc(panel_size);
if (drop_rule & DROP_SECONDARY)
- iwork2 = (int *)intMalloc(n);
+ dwork2 = (double *)doubleMalloc(n);
else
- iwork2 = NULL;
+ dwork2 = NULL;
nnzAj = 0;
nnzLj = 0;
nnzUj = 0;
- last_drop = SUPERLU_MAX(min_mn - 2 * sp_ienv(3), (int)(min_mn * 0.95));
+ last_drop = SUPERLU_MAX(min_mn - 2 * sp_ienv(7), (int)(min_mn * 0.95));
+ alpha = pow((double)n, -1.0 / options->ILU_MILU_Dim);
/* Identify relaxed snodes */
relax_end = (int *) intMalloc(n);
@@ -333,7 +337,7 @@ dgsitrf(superlu_options_t *options, SuperMatrix *A, int relax, int panel_size,
/* Drop small rows */
i = ilu_ddrop_row(options, first, last, tol_L, quota, &nnzLj,
- &fill_tol, &Glu, tempv, iwork2, 0);
+ &fill_tol, &Glu, tempv, dwork2, 0);
/* Reset the parameters */
if (drop_rule & DROP_DYNAMIC) {
if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m)
@@ -494,7 +498,7 @@ dgsitrf(superlu_options_t *options, SuperMatrix *A, int relax, int panel_size,
perm_r, &dense[k], drop_rule,
milu, amax[jj - jcol] * tol_U,
quota, &drop_sum, &nnzUj, &Glu,
- iwork2)) != 0)
+ dwork2)) != 0)
return;
/* Reset the dropping threshold if required */
@@ -505,7 +509,16 @@ dgsitrf(superlu_options_t *options, SuperMatrix *A, int relax, int panel_size,
tol_U = SUPERLU_MAX(drop_tol, tol_U * 0.5);
}
- drop_sum *= MILU_ALPHA;
+ if (drop_sum != zero)
+ {
+ if (drop_sum > zero)
+ omega = SUPERLU_MIN(2.0 * (1.0 - alpha)
+ * amax[jj - jcol] / drop_sum, one);
+ else
+ omega = SUPERLU_MAX(2.0 * (1.0 - alpha)
+ * amax[jj - jcol] / drop_sum, -one);
+ drop_sum *= omega;
+ }
if (usepr) pivrow = iperm_r[jj];
fill_tol = pow(fill_ini, 1.0 - (double)jj / (double)min_mn);
if ( (*info = ilu_dpivotL(jj, diag_pivot_thresh, &usepr, perm_r,
@@ -547,7 +560,7 @@ dgsitrf(superlu_options_t *options, SuperMatrix *A, int relax, int panel_size,
/* Drop small rows */
i = ilu_ddrop_row(options, first, last, tol_L, quota,
- &nnzLj, &fill_tol, &Glu, tempv, iwork2,
+ &nnzLj, &fill_tol, &Glu, tempv, dwork2,
1);
/* Reset the parameters */
@@ -612,6 +625,7 @@ dgsitrf(superlu_options_t *options, SuperMatrix *A, int relax, int panel_size,
}
ops[FACT] += ops[TRSV] + ops[GEMV];
+ stat->expansions = --(Glu.num_expansions);
if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r);
SUPERLU_FREE (iperm_c);
@@ -620,6 +634,6 @@ dgsitrf(superlu_options_t *options, SuperMatrix *A, int relax, int panel_size,
SUPERLU_FREE (iswap);
SUPERLU_FREE (relax_fsupc);
SUPERLU_FREE (amax);
- if ( iwork2 ) SUPERLU_FREE (iwork2);
+ if ( dwork2 ) SUPERLU_FREE (dwork2);
}
View
2  scipy/sparse/linalg/dsolve/SuperLU/SRC/dgsrfs.c
@@ -157,7 +157,7 @@ dgsrfs(trans_t trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U,
double *work;
double *rwork;
int *iwork;
- extern double dlamch_(char *);
+
extern int dlacon_(int *, double *, double *, int *, double *, int *);
#ifdef _CRAY
extern int SCOPY(int *, double *, int *, double *, int *);
View
53 scipy/sparse/linalg/dsolve/SuperLU/SRC/dgssvx.c
@@ -389,10 +389,12 @@ printf("dgssvx: Fact=%4d, Trans=%4d, equed=%c\n",
#endif
/* Test the input parameters */
- if (!nofact && options->Fact != DOFACT && options->Fact != SamePattern &&
+ if (options->Fact != DOFACT && options->Fact != SamePattern &&
options->Fact != SamePattern_SameRowPerm &&
- !notran && options->Trans != TRANS && options->Trans != CONJ &&
- !equil && options->Equil != NO)
+ options->Fact != FACTORED &&
+ options->Trans != NOTRANS && options->Trans != TRANS &&
+ options->Trans != CONJ &&
+ options->Equil != NO && options->Equil != YES)
*info = -1;
else if ( A->nrow != A->ncol || A->nrow < 0 ||
(A->Stype != SLU_NC && A->Stype != SLU_NR) ||
@@ -428,15 +430,21 @@ printf("dgssvx: Fact=%4d, Trans=%4d, equed=%c\n",
}
if (*info == 0) {
if ( lwork < -1 ) *info = -12;
- else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) ||
+ else if ( B->ncol < 0 ) *info = -13;
+ else if ( B->ncol > 0 ) { /* no checking if B->ncol=0 */
+ if ( Bstore->lda < SUPERLU_MAX(0, A->nrow) ||
B->Stype != SLU_DN || B->Dtype != SLU_D ||
B->Mtype != SLU_GE )
*info = -13;
- else if ( X->ncol < 0 || Xstore->lda < SUPERLU_MAX(0, A->nrow) ||
+ }
+ if ( X->ncol < 0 ) *info = -14;
+ else if ( X->ncol > 0 ) { /* no checking if X->ncol=0 */
+ if ( Xstore->lda < SUPERLU_MAX(0, A->nrow) ||
(B->ncol != 0 && B->ncol != X->ncol) ||
X->Stype != SLU_DN ||
X->Dtype != SLU_D || X->Mtype != SLU_GE )
*info = -14;
+ }
}
}
if (*info != 0) {
@@ -485,22 +493,6 @@ printf("dgssvx: Fact=%4d, Trans=%4d, equed=%c\n",
utime[EQUIL] = SuperLU_timer_() - t0;
}
- if ( nrhs > 0 ) {
- /* Scale the right hand side if equilibration was performed. */
- if ( notran ) {
- if ( rowequ ) {
- for (j = 0; j < nrhs; ++j)
- for (i = 0; i < A->nrow; ++i) {
- Bmat[i + j*ldb] *= R[i];
- }
- }
- } else if ( colequ ) {
- for (j = 0; j < nrhs; ++j)
- for (i = 0; i < A->nrow; ++i) {
- Bmat[i + j*ldb] *= C[i];
- }
- }
- }
if ( nofact ) {
@@ -566,6 +558,19 @@ printf("dgssvx: Fact=%4d, Trans=%4d, equed=%c\n",
}
if ( nrhs > 0 ) {
+ /* Scale the right hand side if equilibration was performed. */
+ if ( notran ) {
+ if ( rowequ ) {
+ for (j = 0; j < nrhs; ++j)
+ for (i = 0; i < A->nrow; ++i)
+ Bmat[i + j*ldb] *= R[i];
+ }
+ } else if ( colequ ) {
+ for (j = 0; j < nrhs; ++j)
+ for (i = 0; i < A->nrow; ++i)
+ Bmat[i + j*ldb] *= C[i];
+ }
+
/* Compute the solution matrix X. */
for (j = 0; j < nrhs; j++) /* Save a copy of the right hand sides */
for (i = 0; i < B->nrow; i++)
@@ -590,15 +595,13 @@ printf("dgssvx: Fact=%4d, Trans=%4d, equed=%c\n",
if ( notran ) {
if ( colequ ) {
for (j = 0; j < nrhs; ++j)
- for (i = 0; i < A->nrow; ++i) {
+ for (i = 0; i < A->nrow; ++i)
Xmat[i + j*ldx] *= C[i];
- }
}
} else if ( rowequ ) {
for (j = 0; j < nrhs; ++j)
- for (i = 0; i < A->nrow; ++i) {
+ for (i = 0; i < A->nrow; ++i)
Xmat[i + j*ldx] *= R[i];
- }
}
} /* end if nrhs > 0 */
View
234 scipy/sparse/linalg/dsolve/SuperLU/SRC/dgstrsL.c
@@ -1,234 +0,0 @@
-/*! @file dgstrsL.c
- * \brief Performs the L-solve using the LU factorization computed by DGSTRF
- *
- * <pre>
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * September 15, 2003
- *
- * Copyright (c) 1994 by Xerox Corporation. All rights reserved.
- *
- * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
- * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
- *
- * Permission is hereby granted to use or copy this program for any
- * purpose, provided the above notices are retained on all copies.
- * Permission to modify the code and to distribute modified code is
- * granted, provided the above notices are retained, and a notice that
- * the code was modified is included with the above copyright notice.
- * </pre>
- */
-
-#include "slu_ddefs.h"
-#include "slu_util.h"
-
-
-/*
- * Function prototypes
- */
-void dusolve(int, int, double*, double*);
-void dlsolve(int, int, double*, double*);
-void dmatvec(int, int, int, double*, double*, double*);
-
-/*! \brief
- *
- * <pre>
- * Purpose
- * =======
- *
- * dgstrsL only performs the L-solve using the LU factorization computed
- * by DGSTRF.
- *
- * See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- * Arguments
- * =========
- *
- * trans (input) char*
- * Specifies the form of the system of equations:
- * = 'N': A * X = B (No transpose)
- * = 'T': A'* X = B (Transpose)
- * = 'C': A**H * X = B (Conjugate transpose)
- *
- * L (input) SuperMatrix*
- * The factor L from the factorization Pr*A*Pc=L*U as computed by
- * dgstrf(). Use compressed row subscripts storage for supernodes,
- * i.e., L has types: Stype = SLU_SC, Dtype = SLU_D, Mtype = SLU_TRLU.
- *
- * U (input) SuperMatrix*
- * The factor U from the factorization Pr*A*Pc=L*U as computed by
- * dgstrf(). Use column-wise storage scheme, i.e., U has types:
- * Stype = SLU_NC, Dtype = SLU_D, Mtype = SLU_TRU.
- *
- * perm_r (input) int*, dimension (L->nrow)
- * Row permutation vector, which defines the permutation matrix Pr;
- * perm_r[i] = j means row i of A is in position j in Pr*A.
- *
- * B (input/output) SuperMatrix*
- * B has types: Stype = SLU_DN, Dtype = SLU_D, Mtype = SLU_GE.
- * On entry, the right hand side matrix.
- * On exit, the solution matrix if info = 0;
- *
- * info (output) int*
- * = 0: successful exit
- * < 0: if info = -i, the i-th argument had an illegal value
- * </pre>
- */
-void
-dgstrsL(char *trans, SuperMatrix *L, int *perm_r, SuperMatrix *B, int *info)
-{
-#ifdef _CRAY
- _fcd ftcs1, ftcs2, ftcs3, ftcs4;
-#endif
- int incx = 1, incy = 1;
- double alpha = 1.0, beta = 1.0;
- DNformat *Bstore;
- double *Bmat;
- SCformat *Lstore;
- double *Lval, *Uval;
- int nrow, notran;
- int fsupc, nsupr, nsupc, luptr, istart, irow;
- int i, j, k, iptr, jcol, n, ldb, nrhs;
- double *work, *work_col, *rhs_work, *soln;
- flops_t solve_ops;
- extern SuperLUStat_t SuperLUStat;
- void dprint_soln();
-
- /* Test input parameters ... */
- *info = 0;
- Bstore = B->Store;
- ldb = Bstore->lda;
- nrhs = B->ncol;
- notran = lsame_(trans, "N");
- if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C") ) *info = -1;
- else if ( L->nrow != L->ncol || L->nrow < 0 ||
- L->Stype != SLU_SC || L->Dtype != SLU_D || L->Mtype != SLU_TRLU )
- *info = -2;
- else if ( ldb < SUPERLU_MAX(0, L->nrow) ||
- B->Stype != SLU_DN || B->Dtype != SLU_D || B->Mtype != SLU_GE )
- *info = -4;
- if ( *info ) {
- i = -(*info);
- xerbla_("dgstrsL", &i);
- return;
- }
-
- n = L->nrow;
- work = doubleCalloc(n * nrhs);
- if ( !work ) ABORT("Malloc fails for local work[].");
- soln = doubleMalloc(n);
- if ( !soln ) ABORT("Malloc fails for local soln[].");
-
- Bmat = Bstore->nzval;
- Lstore = L->Store;
- Lval = Lstore->nzval;
- solve_ops = 0;
-
- if ( notran ) {
- /* Permute right hand sides to form Pr*B */
- for (i = 0; i < nrhs; i++) {
- rhs_work = &Bmat[i*ldb];
- for (k = 0; k < n; k++) soln[perm_r[k]] = rhs_work[k];
- for (k = 0; k < n; k++) rhs_work[k] = soln[k];
- }
-
- /* Forward solve PLy=Pb. */
- for (k = 0; k <= Lstore->nsuper; k++) {
- fsupc = L_FST_SUPC(k);
- istart = L_SUB_START(fsupc);
- nsupr = L_SUB_START(fsupc+1) - istart;
- nsupc = L_FST_SUPC(k+1) - fsupc;
- nrow = nsupr - nsupc;
-
- solve_ops += nsupc * (nsupc - 1) * nrhs;
- solve_ops += 2 * nrow * nsupc * nrhs;
-
- if ( nsupc == 1 ) {
- for (j = 0; j < nrhs; j++) {
- rhs_work = &Bmat[j*ldb];
- luptr = L_NZ_START(fsupc);
- for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); iptr++){
- irow = L_SUB(iptr);
- ++luptr;
- rhs_work[irow] -= rhs_work[fsupc] * Lval[luptr];
- }
- }
- } else {
- luptr = L_NZ_START(fsupc);
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
- ftcs1 = _cptofcd("L", strlen("L"));
- ftcs2 = _cptofcd("N", strlen("N"));
- ftcs3 = _cptofcd("U", strlen("U"));
- STRSM( ftcs1, ftcs1, ftcs2, ftcs3, &nsupc, &nrhs, &alpha,
- &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-
- SGEMM( ftcs2, ftcs2, &nrow, &nrhs, &nsupc, &alpha,
- &Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb,
- &beta, &work[0], &n );
-#else
- dtrsm_("L", "L", "N", "U", &nsupc, &nrhs, &alpha,
- &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-
- dgemm_( "N", "N", &nrow, &nrhs, &nsupc, &alpha,
- &Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb,
- &beta, &work[0], &n );
-#endif
- for (j = 0; j < nrhs; j++) {
- rhs_work = &Bmat[j*ldb];
- work_col = &work[j*n];
- iptr = istart + nsupc;
- for (i = 0; i < nrow; i++) {
- irow = L_SUB(iptr);
- rhs_work[irow] -= work_col[i]; /* Scatter */
- work_col[i] = 0.0;
- iptr++;
- }
- }
-#else
- for (j = 0; j < nrhs; j++) {
- rhs_work = &Bmat[j*ldb];
- dlsolve (nsupr, nsupc, &Lval[luptr], &rhs_work[fsupc]);
- dmatvec (nsupr, nrow, nsupc, &Lval[luptr+nsupc],
- &rhs_work[fsupc], &work[0] );
-
- iptr = istart + nsupc;
- for (i = 0; i < nrow; i++) {
- irow = L_SUB(iptr);
- rhs_work[irow] -= work[i];
- work[i] = 0.0;
- iptr++;
- }
- }
-#endif
- } /* else ... */
- } /* for L-solve */
-
-#ifdef DEBUG
- printf("After L-solve: y=\n");
- dprint_soln(n, nrhs, Bmat);
-#endif
-
- SuperLUStat.ops[SOLVE] = solve_ops;
-
- } else {
- printf("Transposed solve not implemented.\n");
- exit(0);
- }
-
- SUPERLU_FREE(work);
- SUPERLU_FREE(soln);
-}
-
-/*
- * Diagnostic print of the solution vector
- */
-void
-dprint_soln(int n, int nrhs, double *soln)
-{
- int i;
-
- for (i = 0; i < n; i++)
- printf("\t%d: %.4f\n", i, soln[i]);
-}
View
224 scipy/sparse/linalg/dsolve/SuperLU/SRC/dgstrsU.c
@@ -1,224 +0,0 @@
-/*! @file dgstrsU.c
- * \brief Performs the U-solve using the LU factorization computed by DGSTRF
- *
- * <pre>
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research