diff --git a/cscs-checks/libraries/magma/magma-2.2/flops.h b/cscs-checks/libraries/magma/magma-2.2/flops.h deleted file mode 100644 index 8ae4bec07f..0000000000 --- a/cscs-checks/libraries/magma/magma-2.2/flops.h +++ /dev/null @@ -1,400 +0,0 @@ -/** - * - * @file flops.h - * - * File provided by Univ. of Tennessee, - * - * @version 1.0.0 - * @author Mathieu Faverge - * @date November 2016 - * - **/ -/* - * This file provide the flops formula for all Level 3 BLAS and some - * Lapack routines. Each macro uses the same size parameters as the - * function associated and provide one formula for additions and one - * for multiplications. Example to use these macros: - * - * FLOPS_ZGEMM( m, n, k ) - * - * All the formula are reported in the LAPACK Lawn 41: - * http://www.netlib.org/lapack/lawns/lawn41.ps - */ -#ifndef MAGMA_FLOPS_H -#define MAGMA_FLOPS_H - -/***************************************************************************//** - Generic formula coming from LAWN 41 -*******************************************************************************/ -/* - * Level 1 BLAS - */ - #define FMULS_AXPY(n_) (n_) - #define FADDS_AXPY(n_) (n_) - -/* - * Level 2 BLAS - */ -#define FMULS_GEMV(m_, n_) ((m_) * (n_) + 2. * (m_)) -#define FADDS_GEMV(m_, n_) ((m_) * (n_) ) - -#define FMULS_SYMV(n_) FMULS_GEMV( (n_), (n_) ) -#define FADDS_SYMV(n_) FADDS_GEMV( (n_), (n_) ) -#define FMULS_HEMV FMULS_SYMV -#define FADDS_HEMV FADDS_SYMV - -/* - * Level 3 BLAS - */ -#define FMULS_GEMM(m_, n_, k_) ((m_) * (n_) * (k_)) -#define FADDS_GEMM(m_, n_, k_) ((m_) * (n_) * (k_)) - -#define FMULS_SYMM(side_, m_, n_) ( ( (side_) == MagmaLeft ) ? FMULS_GEMM((m_), (m_), (n_)) : FMULS_GEMM((m_), (n_), (n_)) ) -#define FADDS_SYMM(side_, m_, n_) ( ( (side_) == MagmaLeft ) ? FADDS_GEMM((m_), (m_), (n_)) : FADDS_GEMM((m_), (n_), (n_)) ) -#define FMULS_HEMM FMULS_SYMM -#define FADDS_HEMM FADDS_SYMM - -#define FMULS_SYRK(k_, n_) (0.5 * (k_) * (n_) * ((n_)+1)) -#define FADDS_SYRK(k_, n_) (0.5 * (k_) * (n_) * ((n_)+1)) -#define FMULS_HERK FMULS_SYRK -#define FADDS_HERK FADDS_SYRK - -#define FMULS_SYR2K(k_, n_) ((k_) * (n_) * (n_) ) -#define FADDS_SYR2K(k_, n_) ((k_) * (n_) * (n_) + (n_)) -#define FMULS_HER2K FMULS_SYR2K -#define FADDS_HER2K FADDS_SYR2K - -#define FMULS_TRMM_2(m_, n_) (0.5 * (n_) * (m_) * ((m_)+1)) -#define FADDS_TRMM_2(m_, n_) (0.5 * (n_) * (m_) * ((m_)-1)) - - -#define FMULS_TRMM(side_, m_, n_) ( ( (side_) == MagmaLeft ) ? FMULS_TRMM_2((m_), (n_)) : FMULS_TRMM_2((n_), (m_)) ) -#define FADDS_TRMM(side_, m_, n_) ( ( (side_) == MagmaLeft ) ? FADDS_TRMM_2((m_), (n_)) : FADDS_TRMM_2((n_), (m_)) ) - -#define FMULS_TRSM FMULS_TRMM -#define FADDS_TRSM FADDS_TRMM - -/* - * Lapack - */ -#define FMULS_GETRF(m_, n_) ( ((m_) < (n_)) \ - ? (0.5 * (m_) * ((m_) * ((n_) - (1./3.) * (m_) - 1. ) + (n_)) + (2. / 3.) * (m_)) \ - : (0.5 * (n_) * ((n_) * ((m_) - (1./3.) * (n_) - 1. ) + (m_)) + (2. / 3.) * (n_)) ) -#define FADDS_GETRF(m_, n_) ( ((m_) < (n_)) \ - ? (0.5 * (m_) * ((m_) * ((n_) - (1./3.) * (m_) ) - (n_)) + (1. / 6.) * (m_)) \ - : (0.5 * (n_) * ((n_) * ((m_) - (1./3.) * (n_) ) - (m_)) + (1. / 6.) * (n_)) ) - -#define FMULS_GETRI(n_) ( (n_) * ((5. / 6.) + (n_) * ((2. / 3.) * (n_) + 0.5)) ) -#define FADDS_GETRI(n_) ( (n_) * ((5. / 6.) + (n_) * ((2. / 3.) * (n_) - 1.5)) ) - -#define FMULS_GETRS(n_, nrhs_) ((nrhs_) * (n_) * (n_) ) -#define FADDS_GETRS(n_, nrhs_) ((nrhs_) * (n_) * ((n_) - 1 )) - -#define FMULS_POTRF(n_) ((n_) * (((1. / 6.) * (n_) + 0.5) * (n_) + (1. / 3.))) -#define FADDS_POTRF(n_) ((n_) * (((1. / 6.) * (n_) ) * (n_) - (1. / 6.))) - -#define FMULS_POTRI(n_) ( (n_) * ((2. / 3.) + (n_) * ((1. / 3.) * (n_) + 1. )) ) -#define FADDS_POTRI(n_) ( (n_) * ((1. / 6.) + (n_) * ((1. / 3.) * (n_) - 0.5)) ) - -#define FMULS_POTRS(n_, nrhs_) ((nrhs_) * (n_) * ((n_) + 1 )) -#define FADDS_POTRS(n_, nrhs_) ((nrhs_) * (n_) * ((n_) - 1 )) - -//SPBTRF -//SPBTRS -//SSYTRF -//SSYTRI -//SSYTRS - -#define FMULS_GEQRF(m_, n_) (((m_) > (n_)) \ - ? ((n_) * ((n_) * ( 0.5-(1./3.) * (n_) + (m_)) + (m_) + 23. / 6.)) \ - : ((m_) * ((m_) * ( -0.5-(1./3.) * (m_) + (n_)) + 2.*(n_) + 23. / 6.)) ) -#define FADDS_GEQRF(m_, n_) (((m_) > (n_)) \ - ? ((n_) * ((n_) * ( 0.5-(1./3.) * (n_) + (m_)) + 5. / 6.)) \ - : ((m_) * ((m_) * ( -0.5-(1./3.) * (m_) + (n_)) + (n_) + 5. / 6.)) ) - -#define FMULS_GEQRT(m_, n_) (0.5 * (m_)*(n_)) -#define FADDS_GEQRT(m_, n_) (0.5 * (m_)*(n_)) - -#define FMULS_GEQLF(m_, n_) FMULS_GEQRF(m_, n_) -#define FADDS_GEQLF(m_, n_) FADDS_GEQRF(m_, n_) - -#define FMULS_GERQF(m_, n_) (((m_) > (n_)) \ - ? ((n_) * ((n_) * ( 0.5-(1./3.) * (n_) + (m_)) + (m_) + 29. / 6.)) \ - : ((m_) * ((m_) * ( -0.5-(1./3.) * (m_) + (n_)) + 2.*(n_) + 29. / 6.)) ) -#define FADDS_GERQF(m_, n_) (((m_) > (n_)) \ - ? ((n_) * ((n_) * ( -0.5-(1./3.) * (n_) + (m_)) + (m_) + 5. / 6.)) \ - : ((m_) * ((m_) * ( 0.5-(1./3.) * (m_) + (n_)) + + 5. / 6.)) ) - -#define FMULS_GELQF(m_, n_) FMULS_GERQF(m_, n_) -#define FADDS_GELQF(m_, n_) FADDS_GERQF(m_, n_) - -#define FMULS_UNGQR(m_, n_, k_) ((k_) * (2.* (m_) * (n_) + 2. * (n_) - 5./3. + (k_) * ( 2./3. * (k_) - ((m_) + (n_)) - 1.))) -#define FADDS_UNGQR(m_, n_, k_) ((k_) * (2.* (m_) * (n_) + (n_) - (m_) + 1./3. + (k_) * ( 2./3. * (k_) - ((m_) + (n_)) ))) -#define FMULS_ORGQR FMULS_UNGQR -#define FADDS_ORGQR FADDS_UNGQR - -#define FMULS_UNGQL FMULS_UNGQR -#define FADDS_UNGQL FADDS_UNGQR -#define FMULS_ORGQL FMULS_UNGQR -#define FADDS_ORGQL FADDS_UNGQR - -#define FMULS_UNGRQ(m_, n_, k_) ((k_) * (2.* (m_) * (n_) + (m_) + (n_) - 2./3. + (k_) * ( 2./3. * (k_) - ((m_) + (n_)) - 1.))) -#define FADDS_UNGRQ(m_, n_, k_) ((k_) * (2.* (m_) * (n_) + (m_) - (n_) + 1./3. + (k_) * ( 2./3. * (k_) - ((m_) + (n_)) ))) -#define FMULS_ORGRQ FMULS_UNGRQ -#define FADDS_ORGRQ FADDS_UNGRQ - -#define FMULS_UNGLQ FMULS_UNGRQ -#define FADDS_UNGLQ FADDS_UNGRQ -#define FMULS_ORGLQ FMULS_UNGRQ -#define FADDS_ORGLQ FADDS_UNGRQ - -#define FMULS_GEQRS(m_, n_, nrhs_) ((nrhs_) * ((n_) * ( 2.* (m_) - 0.5 * (n_) + 2.5))) -#define FADDS_GEQRS(m_, n_, nrhs_) ((nrhs_) * ((n_) * ( 2.* (m_) - 0.5 * (n_) + 0.5))) - -#define FMULS_UNMQR(m_, n_, k_, side_) (( (side_) == MagmaLeft ) \ - ? (2.*(n_)*(m_)*(k_) - (n_)*(k_)*(k_) + 2.*(n_)*(k_)) \ - : (2.*(n_)*(m_)*(k_) - (m_)*(k_)*(k_) + (m_)*(k_) + (n_)*(k_) - 0.5*(k_)*(k_) + 0.5*(k_))) -#define FADDS_UNMQR(m_, n_, k_, side_) (( ((side_)) == MagmaLeft ) \ - ? (2.*(n_)*(m_)*(k_) - (n_)*(k_)*(k_) + (n_)*(k_)) \ - : (2.*(n_)*(m_)*(k_) - (m_)*(k_)*(k_) + (m_)*(k_))) -#define FMULS_ORMQR FMULS_UNMQR -#define FADDS_ORMQR FADDS_UNMQR - -#define FMULS_UNMQL FMULS_UNMQR -#define FADDS_UNMQL FADDS_UNMQR -#define FMULS_ORMQL FMULS_UNMQR -#define FADDS_ORMQL FADDS_UNMQR - -#define FMULS_UNMRQ FMULS_UNMQR -#define FADDS_UNMRQ FADDS_UNMQR -#define FMULS_ORMRQ FMULS_UNMQR -#define FADDS_ORMRQ FADDS_UNMQR - -#define FMULS_UNMLQ FMULS_UNMQR -#define FADDS_UNMLQ FADDS_UNMQR -#define FMULS_ORMLQ FMULS_UNMQR -#define FADDS_ORMLQ FADDS_UNMQR - -#define FMULS_TRTRI(n_) ((n_) * ((n_) * ( 1./6. * (n_) + 0.5 ) + 1./3.)) -#define FADDS_TRTRI(n_) ((n_) * ((n_) * ( 1./6. * (n_) - 0.5 ) + 1./3.)) - -#define FMULS_GEHRD(n_) ( (n_) * ((n_) * (5./3. *(n_) + 0.5) - 7./6.) - 13. ) -#define FADDS_GEHRD(n_) ( (n_) * ((n_) * (5./3. *(n_) - 1. ) - 2./3.) - 8. ) - -#define FMULS_SYTRD(n_) ( (n_) * ( (n_) * ( 2./3. * (n_) + 2.5 ) - 1./6. ) - 15.) -#define FADDS_SYTRD(n_) ( (n_) * ( (n_) * ( 2./3. * (n_) + 1. ) - 8./3. ) - 4.) -#define FMULS_HETRD FMULS_SYTRD -#define FADDS_HETRD FADDS_SYTRD - -#define FMULS_GEBRD(m_, n_) ( ((m_) >= (n_)) \ - ? ((n_) * ((n_) * (2. * (m_) - 2./3. * (n_) + 2. ) + 20./3.)) \ - : ((m_) * ((m_) * (2. * (n_) - 2./3. * (m_) + 2. ) + 20./3.)) ) -#define FADDS_GEBRD(m_, n_) ( ((m_) >= (n_)) \ - ? ((n_) * ((n_) * (2. * (m_) - 2./3. * (n_) + 1. ) - (m_) + 5./3.)) \ - : ((m_) * ((m_) * (2. * (n_) - 2./3. * (m_) + 1. ) - (n_) + 5./3.)) ) - -#define FMULS_LARFG(n_) (2*n_) -#define FADDS_LARFG(n_) ( n_) - - -/***************************************************************************//** - Users functions -*******************************************************************************/ -/* - * Level 1 BLAS - */ -#define FLOPS_ZAXPY(n_) (6. * FMULS_AXPY((double)(n_)) + 2.0 * FADDS_AXPY((double)(n_)) ) -#define FLOPS_CAXPY(n_) (6. * FMULS_AXPY((double)(n_)) + 2.0 * FADDS_AXPY((double)(n_)) ) -#define FLOPS_DAXPY(n_) ( FMULS_AXPY((double)(n_)) + FADDS_AXPY((double)(n_)) ) -#define FLOPS_SAXPY(n_) ( FMULS_AXPY((double)(n_)) + FADDS_AXPY((double)(n_)) ) - -/* - * Level 2 BLAS - */ -#define FLOPS_ZGEMV(m_, n_) (6. * FMULS_GEMV((double)(m_), (double)(n_)) + 2.0 * FADDS_GEMV((double)(m_), (double)(n_)) ) -#define FLOPS_CGEMV(m_, n_) (6. * FMULS_GEMV((double)(m_), (double)(n_)) + 2.0 * FADDS_GEMV((double)(m_), (double)(n_)) ) -#define FLOPS_DGEMV(m_, n_) ( FMULS_GEMV((double)(m_), (double)(n_)) + FADDS_GEMV((double)(m_), (double)(n_)) ) -#define FLOPS_SGEMV(m_, n_) ( FMULS_GEMV((double)(m_), (double)(n_)) + FADDS_GEMV((double)(m_), (double)(n_)) ) - -#define FLOPS_ZHEMV(n_) (6. * FMULS_HEMV((double)(n_)) + 2.0 * FADDS_HEMV((double)(n_)) ) -#define FLOPS_CHEMV(n_) (6. * FMULS_HEMV((double)(n_)) + 2.0 * FADDS_HEMV((double)(n_)) ) - -#define FLOPS_ZSYMV(n_) (6. * FMULS_SYMV((double)(n_)) + 2.0 * FADDS_SYMV((double)(n_)) ) -#define FLOPS_CSYMV(n_) (6. * FMULS_SYMV((double)(n_)) + 2.0 * FADDS_SYMV((double)(n_)) ) -#define FLOPS_DSYMV(n_) ( FMULS_SYMV((double)(n_)) + FADDS_SYMV((double)(n_)) ) -#define FLOPS_SSYMV(n_) ( FMULS_SYMV((double)(n_)) + FADDS_SYMV((double)(n_)) ) - -/* - * Level 3 BLAS - */ -#define FLOPS_ZGEMM(m_, n_, k_) (6. * FMULS_GEMM((double)(m_), (double)(n_), (double)(k_)) + 2.0 * FADDS_GEMM((double)(m_), (double)(n_), (double)(k_)) ) -#define FLOPS_CGEMM(m_, n_, k_) (6. * FMULS_GEMM((double)(m_), (double)(n_), (double)(k_)) + 2.0 * FADDS_GEMM((double)(m_), (double)(n_), (double)(k_)) ) -#define FLOPS_DGEMM(m_, n_, k_) ( FMULS_GEMM((double)(m_), (double)(n_), (double)(k_)) + FADDS_GEMM((double)(m_), (double)(n_), (double)(k_)) ) -#define FLOPS_SGEMM(m_, n_, k_) ( FMULS_GEMM((double)(m_), (double)(n_), (double)(k_)) + FADDS_GEMM((double)(m_), (double)(n_), (double)(k_)) ) - -#define FLOPS_ZHEMM(side_, m_, n_) (6. * FMULS_HEMM(side_, (double)(m_), (double)(n_)) + 2.0 * FADDS_HEMM(side_, (double)(m_), (double)(n_)) ) -#define FLOPS_CHEMM(side_, m_, n_) (6. * FMULS_HEMM(side_, (double)(m_), (double)(n_)) + 2.0 * FADDS_HEMM(side_, (double)(m_), (double)(n_)) ) - -#define FLOPS_ZSYMM(side_, m_, n_) (6. * FMULS_SYMM(side_, (double)(m_), (double)(n_)) + 2.0 * FADDS_SYMM(side_, (double)(m_), (double)(n_)) ) -#define FLOPS_CSYMM(side_, m_, n_) (6. * FMULS_SYMM(side_, (double)(m_), (double)(n_)) + 2.0 * FADDS_SYMM(side_, (double)(m_), (double)(n_)) ) -#define FLOPS_DSYMM(side_, m_, n_) ( FMULS_SYMM(side_, (double)(m_), (double)(n_)) + FADDS_SYMM(side_, (double)(m_), (double)(n_)) ) -#define FLOPS_SSYMM(side_, m_, n_) ( FMULS_SYMM(side_, (double)(m_), (double)(n_)) + FADDS_SYMM(side_, (double)(m_), (double)(n_)) ) - -#define FLOPS_ZHERK(k_, n_) (6. * FMULS_HERK((double)(k_), (double)(n_)) + 2.0 * FADDS_HERK((double)(k_), (double)(n_)) ) -#define FLOPS_CHERK(k_, n_) (6. * FMULS_HERK((double)(k_), (double)(n_)) + 2.0 * FADDS_HERK((double)(k_), (double)(n_)) ) - -#define FLOPS_ZSYRK(k_, n_) (6. * FMULS_SYRK((double)(k_), (double)(n_)) + 2.0 * FADDS_SYRK((double)(k_), (double)(n_)) ) -#define FLOPS_CSYRK(k_, n_) (6. * FMULS_SYRK((double)(k_), (double)(n_)) + 2.0 * FADDS_SYRK((double)(k_), (double)(n_)) ) -#define FLOPS_DSYRK(k_, n_) ( FMULS_SYRK((double)(k_), (double)(n_)) + FADDS_SYRK((double)(k_), (double)(n_)) ) -#define FLOPS_SSYRK(k_, n_) ( FMULS_SYRK((double)(k_), (double)(n_)) + FADDS_SYRK((double)(k_), (double)(n_)) ) - -#define FLOPS_ZHER2K(k_, n_) (6. * FMULS_HER2K((double)(k_), (double)(n_)) + 2.0 * FADDS_HER2K((double)(k_), (double)(n_)) ) -#define FLOPS_CHER2K(k_, n_) (6. * FMULS_HER2K((double)(k_), (double)(n_)) + 2.0 * FADDS_HER2K((double)(k_), (double)(n_)) ) - -#define FLOPS_ZSYR2K(k_, n_) (6. * FMULS_SYR2K((double)(k_), (double)(n_)) + 2.0 * FADDS_SYR2K((double)(k_), (double)(n_)) ) -#define FLOPS_CSYR2K(k_, n_) (6. * FMULS_SYR2K((double)(k_), (double)(n_)) + 2.0 * FADDS_SYR2K((double)(k_), (double)(n_)) ) -#define FLOPS_DSYR2K(k_, n_) ( FMULS_SYR2K((double)(k_), (double)(n_)) + FADDS_SYR2K((double)(k_), (double)(n_)) ) -#define FLOPS_SSYR2K(k_, n_) ( FMULS_SYR2K((double)(k_), (double)(n_)) + FADDS_SYR2K((double)(k_), (double)(n_)) ) - -#define FLOPS_ZTRMM(side_, m_, n_) (6. * FMULS_TRMM(side_, (double)(m_), (double)(n_)) + 2.0 * FADDS_TRMM(side_, (double)(m_), (double)(n_)) ) -#define FLOPS_CTRMM(side_, m_, n_) (6. * FMULS_TRMM(side_, (double)(m_), (double)(n_)) + 2.0 * FADDS_TRMM(side_, (double)(m_), (double)(n_)) ) -#define FLOPS_DTRMM(side_, m_, n_) ( FMULS_TRMM(side_, (double)(m_), (double)(n_)) + FADDS_TRMM(side_, (double)(m_), (double)(n_)) ) -#define FLOPS_STRMM(side_, m_, n_) ( FMULS_TRMM(side_, (double)(m_), (double)(n_)) + FADDS_TRMM(side_, (double)(m_), (double)(n_)) ) - -#define FLOPS_ZTRSM(side_, m_, n_) (6. * FMULS_TRSM(side_, (double)(m_), (double)(n_)) + 2.0 * FADDS_TRSM(side_, (double)(m_), (double)(n_)) ) -#define FLOPS_CTRSM(side_, m_, n_) (6. * FMULS_TRSM(side_, (double)(m_), (double)(n_)) + 2.0 * FADDS_TRSM(side_, (double)(m_), (double)(n_)) ) -#define FLOPS_DTRSM(side_, m_, n_) ( FMULS_TRSM(side_, (double)(m_), (double)(n_)) + FADDS_TRSM(side_, (double)(m_), (double)(n_)) ) -#define FLOPS_STRSM(side_, m_, n_) ( FMULS_TRSM(side_, (double)(m_), (double)(n_)) + FADDS_TRSM(side_, (double)(m_), (double)(n_)) ) - -/* - * Lapack - */ -#define FLOPS_ZGETRF(m_, n_) (6. * FMULS_GETRF((double)(m_), (double)(n_)) + 2.0 * FADDS_GETRF((double)(m_), (double)(n_)) ) -#define FLOPS_CGETRF(m_, n_) (6. * FMULS_GETRF((double)(m_), (double)(n_)) + 2.0 * FADDS_GETRF((double)(m_), (double)(n_)) ) -#define FLOPS_DGETRF(m_, n_) ( FMULS_GETRF((double)(m_), (double)(n_)) + FADDS_GETRF((double)(m_), (double)(n_)) ) -#define FLOPS_SGETRF(m_, n_) ( FMULS_GETRF((double)(m_), (double)(n_)) + FADDS_GETRF((double)(m_), (double)(n_)) ) - -#define FLOPS_ZGETRI(n_) (6. * FMULS_GETRI((double)(n_)) + 2.0 * FADDS_GETRI((double)(n_)) ) -#define FLOPS_CGETRI(n_) (6. * FMULS_GETRI((double)(n_)) + 2.0 * FADDS_GETRI((double)(n_)) ) -#define FLOPS_DGETRI(n_) ( FMULS_GETRI((double)(n_)) + FADDS_GETRI((double)(n_)) ) -#define FLOPS_SGETRI(n_) ( FMULS_GETRI((double)(n_)) + FADDS_GETRI((double)(n_)) ) - -#define FLOPS_ZGETRS(n_, nrhs_) (6. * FMULS_GETRS((double)(n_), (double)(nrhs_)) + 2.0 * FADDS_GETRS((double)(n_), (double)(nrhs_)) ) -#define FLOPS_CGETRS(n_, nrhs_) (6. * FMULS_GETRS((double)(n_), (double)(nrhs_)) + 2.0 * FADDS_GETRS((double)(n_), (double)(nrhs_)) ) -#define FLOPS_DGETRS(n_, nrhs_) ( FMULS_GETRS((double)(n_), (double)(nrhs_)) + FADDS_GETRS((double)(n_), (double)(nrhs_)) ) -#define FLOPS_SGETRS(n_, nrhs_) ( FMULS_GETRS((double)(n_), (double)(nrhs_)) + FADDS_GETRS((double)(n_), (double)(nrhs_)) ) - -#define FLOPS_ZPOTRF(n_) (6. * FMULS_POTRF((double)(n_)) + 2.0 * FADDS_POTRF((double)(n_)) ) -#define FLOPS_CPOTRF(n_) (6. * FMULS_POTRF((double)(n_)) + 2.0 * FADDS_POTRF((double)(n_)) ) -#define FLOPS_DPOTRF(n_) ( FMULS_POTRF((double)(n_)) + FADDS_POTRF((double)(n_)) ) -#define FLOPS_SPOTRF(n_) ( FMULS_POTRF((double)(n_)) + FADDS_POTRF((double)(n_)) ) - -#define FLOPS_ZPOTRI(n_) (6. * FMULS_POTRI((double)(n_)) + 2.0 * FADDS_POTRI((double)(n_)) ) -#define FLOPS_CPOTRI(n_) (6. * FMULS_POTRI((double)(n_)) + 2.0 * FADDS_POTRI((double)(n_)) ) -#define FLOPS_DPOTRI(n_) ( FMULS_POTRI((double)(n_)) + FADDS_POTRI((double)(n_)) ) -#define FLOPS_SPOTRI(n_) ( FMULS_POTRI((double)(n_)) + FADDS_POTRI((double)(n_)) ) - -#define FLOPS_ZPOTRS(n_, nrhs_) (6. * FMULS_POTRS((double)(n_), (double)(nrhs_)) + 2.0 * FADDS_POTRS((double)(n_), (double)(nrhs_)) ) -#define FLOPS_CPOTRS(n_, nrhs_) (6. * FMULS_POTRS((double)(n_), (double)(nrhs_)) + 2.0 * FADDS_POTRS((double)(n_), (double)(nrhs_)) ) -#define FLOPS_DPOTRS(n_, nrhs_) ( FMULS_POTRS((double)(n_), (double)(nrhs_)) + FADDS_POTRS((double)(n_), (double)(nrhs_)) ) -#define FLOPS_SPOTRS(n_, nrhs_) ( FMULS_POTRS((double)(n_), (double)(nrhs_)) + FADDS_POTRS((double)(n_), (double)(nrhs_)) ) - -#define FLOPS_ZGEQRF(m_, n_) (6. * FMULS_GEQRF((double)(m_), (double)(n_)) + 2.0 * FADDS_GEQRF((double)(m_), (double)(n_)) ) -#define FLOPS_CGEQRF(m_, n_) (6. * FMULS_GEQRF((double)(m_), (double)(n_)) + 2.0 * FADDS_GEQRF((double)(m_), (double)(n_)) ) -#define FLOPS_DGEQRF(m_, n_) ( FMULS_GEQRF((double)(m_), (double)(n_)) + FADDS_GEQRF((double)(m_), (double)(n_)) ) -#define FLOPS_SGEQRF(m_, n_) ( FMULS_GEQRF((double)(m_), (double)(n_)) + FADDS_GEQRF((double)(m_), (double)(n_)) ) - -#define FLOPS_ZGEQRT(m_, n_) (6. * FMULS_GEQRT((double)(m_), (double)(n_)) + 2.0 * FADDS_GEQRT((double)(m_), (double)(n_)) ) -#define FLOPS_CGEQRT(m_, n_) (6. * FMULS_GEQRT((double)(m_), (double)(n_)) + 2.0 * FADDS_GEQRT((double)(m_), (double)(n_)) ) -#define FLOPS_DGEQRT(m_, n_) ( FMULS_GEQRT((double)(m_), (double)(n_)) + FADDS_GEQRT((double)(m_), (double)(n_)) ) -#define FLOPS_SGEQRT(m_, n_) ( FMULS_GEQRT((double)(m_), (double)(n_)) + FADDS_GEQRT((double)(m_), (double)(n_)) ) - -#define FLOPS_ZGEQLF(m_, n_) (6. * FMULS_GEQLF((double)(m_), (double)(n_)) + 2.0 * FADDS_GEQLF((double)(m_), (double)(n_)) ) -#define FLOPS_CGEQLF(m_, n_) (6. * FMULS_GEQLF((double)(m_), (double)(n_)) + 2.0 * FADDS_GEQLF((double)(m_), (double)(n_)) ) -#define FLOPS_DGEQLF(m_, n_) ( FMULS_GEQLF((double)(m_), (double)(n_)) + FADDS_GEQLF((double)(m_), (double)(n_)) ) -#define FLOPS_SGEQLF(m_, n_) ( FMULS_GEQLF((double)(m_), (double)(n_)) + FADDS_GEQLF((double)(m_), (double)(n_)) ) - -#define FLOPS_ZGERQF(m_, n_) (6. * FMULS_GERQF((double)(m_), (double)(n_)) + 2.0 * FADDS_GERQF((double)(m_), (double)(n_)) ) -#define FLOPS_CGERQF(m_, n_) (6. * FMULS_GERQF((double)(m_), (double)(n_)) + 2.0 * FADDS_GERQF((double)(m_), (double)(n_)) ) -#define FLOPS_DGERQF(m_, n_) ( FMULS_GERQF((double)(m_), (double)(n_)) + FADDS_GERQF((double)(m_), (double)(n_)) ) -#define FLOPS_SGERQF(m_, n_) ( FMULS_GERQF((double)(m_), (double)(n_)) + FADDS_GERQF((double)(m_), (double)(n_)) ) - -#define FLOPS_ZGELQF(m_, n_) (6. * FMULS_GELQF((double)(m_), (double)(n_)) + 2.0 * FADDS_GELQF((double)(m_), (double)(n_)) ) -#define FLOPS_CGELQF(m_, n_) (6. * FMULS_GELQF((double)(m_), (double)(n_)) + 2.0 * FADDS_GELQF((double)(m_), (double)(n_)) ) -#define FLOPS_DGELQF(m_, n_) ( FMULS_GELQF((double)(m_), (double)(n_)) + FADDS_GELQF((double)(m_), (double)(n_)) ) -#define FLOPS_SGELQF(m_, n_) ( FMULS_GELQF((double)(m_), (double)(n_)) + FADDS_GELQF((double)(m_), (double)(n_)) ) - -#define FLOPS_ZUNGQR(m_, n_, k_) (6. * FMULS_UNGQR((double)(m_), (double)(n_), (double)(k_)) + 2.0 * FADDS_UNGQR((double)(m_), (double)(n_), (double)(k_)) ) -#define FLOPS_CUNGQR(m_, n_, k_) (6. * FMULS_UNGQR((double)(m_), (double)(n_), (double)(k_)) + 2.0 * FADDS_UNGQR((double)(m_), (double)(n_), (double)(k_)) ) -#define FLOPS_DORGQR(m_, n_, k_) ( FMULS_UNGQR((double)(m_), (double)(n_), (double)(k_)) + FADDS_UNGQR((double)(m_), (double)(n_), (double)(k_)) ) -#define FLOPS_SORGQR(m_, n_, k_) ( FMULS_UNGQR((double)(m_), (double)(n_), (double)(k_)) + FADDS_UNGQR((double)(m_), (double)(n_), (double)(k_)) ) - -#define FLOPS_ZUNGQL(m_, n_, k_) (6. * FMULS_UNGQL((double)(m_), (double)(n_), (double)(k_)) + 2.0 * FADDS_UNGQL((double)(m_), (double)(n_), (double)(k_)) ) -#define FLOPS_CUNGQL(m_, n_, k_) (6. * FMULS_UNGQL((double)(m_), (double)(n_), (double)(k_)) + 2.0 * FADDS_UNGQL((double)(m_), (double)(n_), (double)(k_)) ) -#define FLOPS_DORGQL(m_, n_, k_) ( FMULS_UNGQL((double)(m_), (double)(n_), (double)(k_)) + FADDS_UNGQL((double)(m_), (double)(n_), (double)(k_)) ) -#define FLOPS_SORGQL(m_, n_, k_) ( FMULS_UNGQL((double)(m_), (double)(n_), (double)(k_)) + FADDS_UNGQL((double)(m_), (double)(n_), (double)(k_)) ) - -#define FLOPS_ZUNGRQ(m_, n_, k_) (6. * FMULS_UNGRQ((double)(m_), (double)(n_), (double)(k_)) + 2.0 * FADDS_UNGRQ((double)(m_), (double)(n_), (double)(k_)) ) -#define FLOPS_CUNGRQ(m_, n_, k_) (6. * FMULS_UNGRQ((double)(m_), (double)(n_), (double)(k_)) + 2.0 * FADDS_UNGRQ((double)(m_), (double)(n_), (double)(k_)) ) -#define FLOPS_DORGRQ(m_, n_, k_) ( FMULS_UNGRQ((double)(m_), (double)(n_), (double)(k_)) + FADDS_UNGRQ((double)(m_), (double)(n_), (double)(k_)) ) -#define FLOPS_SORGRQ(m_, n_, k_) ( FMULS_UNGRQ((double)(m_), (double)(n_), (double)(k_)) + FADDS_UNGRQ((double)(m_), (double)(n_), (double)(k_)) ) - -#define FLOPS_ZUNGLQ(m_, n_, k_) (6. * FMULS_UNGLQ((double)(m_), (double)(n_), (double)(k_)) + 2.0 * FADDS_UNGLQ((double)(m_), (double)(n_), (double)(k_)) ) -#define FLOPS_CUNGLQ(m_, n_, k_) (6. * FMULS_UNGLQ((double)(m_), (double)(n_), (double)(k_)) + 2.0 * FADDS_UNGLQ((double)(m_), (double)(n_), (double)(k_)) ) -#define FLOPS_DORGLQ(m_, n_, k_) ( FMULS_UNGLQ((double)(m_), (double)(n_), (double)(k_)) + FADDS_UNGLQ((double)(m_), (double)(n_), (double)(k_)) ) -#define FLOPS_SORGLQ(m_, n_, k_) ( FMULS_UNGLQ((double)(m_), (double)(n_), (double)(k_)) + FADDS_UNGLQ((double)(m_), (double)(n_), (double)(k_)) ) - -#define FLOPS_ZUNMQR(m_, n_, k_, side_) (6. * FMULS_UNMQR((double)(m_), (double)(n_), (double)(k_), (side_)) + 2.0 * FADDS_UNMQR((double)(m_), (double)(n_), (double)(k_), (side_)) ) -#define FLOPS_CUNMQR(m_, n_, k_, side_) (6. * FMULS_UNMQR((double)(m_), (double)(n_), (double)(k_), (side_)) + 2.0 * FADDS_UNMQR((double)(m_), (double)(n_), (double)(k_), (side_)) ) -#define FLOPS_DORMQR(m_, n_, k_, side_) ( FMULS_UNMQR((double)(m_), (double)(n_), (double)(k_), (side_)) + FADDS_UNMQR((double)(m_), (double)(n_), (double)(k_), (side_)) ) -#define FLOPS_SORMQR(m_, n_, k_, side_) ( FMULS_UNMQR((double)(m_), (double)(n_), (double)(k_), (side_)) + FADDS_UNMQR((double)(m_), (double)(n_), (double)(k_), (side_)) ) - -#define FLOPS_ZUNMQL(m_, n_, k_, side_) (6. * FMULS_UNMQL((double)(m_), (double)(n_), (double)(k_), (side_)) + 2.0 * FADDS_UNMQL((double)(m_), (double)(n_), (double)(k_), (side_)) ) -#define FLOPS_CUNMQL(m_, n_, k_, side_) (6. * FMULS_UNMQL((double)(m_), (double)(n_), (double)(k_), (side_)) + 2.0 * FADDS_UNMQL((double)(m_), (double)(n_), (double)(k_), (side_)) ) -#define FLOPS_DORMQL(m_, n_, k_, side_) ( FMULS_UNMQL((double)(m_), (double)(n_), (double)(k_), (side_)) + FADDS_UNMQL((double)(m_), (double)(n_), (double)(k_), (side_)) ) -#define FLOPS_SORMQL(m_, n_, k_, side_) ( FMULS_UNMQL((double)(m_), (double)(n_), (double)(k_), (side_)) + FADDS_UNMQL((double)(m_), (double)(n_), (double)(k_), (side_)) ) - -#define FLOPS_ZUNMRQ(m_, n_, k_, side_) (6. * FMULS_UNMRQ((double)(m_), (double)(n_), (double)(k_), (side_)) + 2.0 * FADDS_UNMRQ((double)(m_), (double)(n_), (double)(k_), (side_)) ) -#define FLOPS_CUNMRQ(m_, n_, k_, side_) (6. * FMULS_UNMRQ((double)(m_), (double)(n_), (double)(k_), (side_)) + 2.0 * FADDS_UNMRQ((double)(m_), (double)(n_), (double)(k_), (side_)) ) -#define FLOPS_DORMRQ(m_, n_, k_, side_) ( FMULS_UNMRQ((double)(m_), (double)(n_), (double)(k_), (side_)) + FADDS_UNMRQ((double)(m_), (double)(n_), (double)(k_), (side_)) ) -#define FLOPS_SORMRQ(m_, n_, k_, side_) ( FMULS_UNMRQ((double)(m_), (double)(n_), (double)(k_), (side_)) + FADDS_UNMRQ((double)(m_), (double)(n_), (double)(k_), (side_)) ) - -#define FLOPS_ZUNMLQ(m_, n_, k_, side_) (6. * FMULS_UNMLQ((double)(m_), (double)(n_), (double)(k_), (side_)) + 2.0 * FADDS_UNMLQ((double)(m_), (double)(n_), (double)(k_), (side_)) ) -#define FLOPS_CUNMLQ(m_, n_, k_, side_) (6. * FMULS_UNMLQ((double)(m_), (double)(n_), (double)(k_), (side_)) + 2.0 * FADDS_UNMLQ((double)(m_), (double)(n_), (double)(k_), (side_)) ) -#define FLOPS_DORMLQ(m_, n_, k_, side_) ( FMULS_UNMLQ((double)(m_), (double)(n_), (double)(k_), (side_)) + FADDS_UNMLQ((double)(m_), (double)(n_), (double)(k_), (side_)) ) -#define FLOPS_SORMLQ(m_, n_, k_, side_) ( FMULS_UNMLQ((double)(m_), (double)(n_), (double)(k_), (side_)) + FADDS_UNMLQ((double)(m_), (double)(n_), (double)(k_), (side_)) ) - -#define FLOPS_ZGEQRS(m_, n_, nrhs_) (6. * FMULS_GEQRS((double)(m_), (double)(n_), (double)(nrhs_)) + 2.0 * FADDS_GEQRS((double)(m_), (double)(n_), (double)(nrhs_)) ) -#define FLOPS_CGEQRS(m_, n_, nrhs_) (6. * FMULS_GEQRS((double)(m_), (double)(n_), (double)(nrhs_)) + 2.0 * FADDS_GEQRS((double)(m_), (double)(n_), (double)(nrhs_)) ) -#define FLOPS_DGEQRS(m_, n_, nrhs_) ( FMULS_GEQRS((double)(m_), (double)(n_), (double)(nrhs_)) + FADDS_GEQRS((double)(m_), (double)(n_), (double)(nrhs_)) ) -#define FLOPS_SGEQRS(m_, n_, nrhs_) ( FMULS_GEQRS((double)(m_), (double)(n_), (double)(nrhs_)) + FADDS_GEQRS((double)(m_), (double)(n_), (double)(nrhs_)) ) - -#define FLOPS_ZTRTRI(n_) (6. * FMULS_TRTRI((double)(n_)) + 2.0 * FADDS_TRTRI((double)(n_)) ) -#define FLOPS_CTRTRI(n_) (6. * FMULS_TRTRI((double)(n_)) + 2.0 * FADDS_TRTRI((double)(n_)) ) -#define FLOPS_DTRTRI(n_) ( FMULS_TRTRI((double)(n_)) + FADDS_TRTRI((double)(n_)) ) -#define FLOPS_STRTRI(n_) ( FMULS_TRTRI((double)(n_)) + FADDS_TRTRI((double)(n_)) ) - -#define FLOPS_ZGEHRD(n_) (6. * FMULS_GEHRD((double)(n_)) + 2.0 * FADDS_GEHRD((double)(n_)) ) -#define FLOPS_CGEHRD(n_) (6. * FMULS_GEHRD((double)(n_)) + 2.0 * FADDS_GEHRD((double)(n_)) ) -#define FLOPS_DGEHRD(n_) ( FMULS_GEHRD((double)(n_)) + FADDS_GEHRD((double)(n_)) ) -#define FLOPS_SGEHRD(n_) ( FMULS_GEHRD((double)(n_)) + FADDS_GEHRD((double)(n_)) ) - -#define FLOPS_ZHETRD(n_) (6. * FMULS_HETRD((double)(n_)) + 2.0 * FADDS_HETRD((double)(n_)) ) -#define FLOPS_CHETRD(n_) (6. * FMULS_HETRD((double)(n_)) + 2.0 * FADDS_HETRD((double)(n_)) ) - -#define FLOPS_ZSYTRD(n_) (6. * FMULS_SYTRD((double)(n_)) + 2.0 * FADDS_SYTRD((double)(n_)) ) -#define FLOPS_CSYTRD(n_) (6. * FMULS_SYTRD((double)(n_)) + 2.0 * FADDS_SYTRD((double)(n_)) ) -#define FLOPS_DSYTRD(n_) ( FMULS_SYTRD((double)(n_)) + FADDS_SYTRD((double)(n_)) ) -#define FLOPS_SSYTRD(n_) ( FMULS_SYTRD((double)(n_)) + FADDS_SYTRD((double)(n_)) ) - -#define FLOPS_ZGEBRD(m_, n_) (6. * FMULS_GEBRD((double)(m_), (double)(n_)) + 2.0 * FADDS_GEBRD((double)(m_), (double)(n_)) ) -#define FLOPS_CGEBRD(m_, n_) (6. * FMULS_GEBRD((double)(m_), (double)(n_)) + 2.0 * FADDS_GEBRD((double)(m_), (double)(n_)) ) -#define FLOPS_DGEBRD(m_, n_) ( FMULS_GEBRD((double)(m_), (double)(n_)) + FADDS_GEBRD((double)(m_), (double)(n_)) ) -#define FLOPS_SGEBRD(m_, n_) ( FMULS_GEBRD((double)(m_), (double)(n_)) + FADDS_GEBRD((double)(m_), (double)(n_)) ) - -#define FLOPS_ZLARFG(n_) (6. * FMULS_LARFG((double)n_) + 2. * FADDS_LARFG((double)n_) ) -#define FLOPS_CLARFG(n_) (6. * FMULS_LARFG((double)n_) + 2. * FADDS_LARFG((double)n_) ) -#define FLOPS_DLARFG(n_) ( FMULS_LARFG((double)n_) + FADDS_LARFG((double)n_) ) -#define FLOPS_SLARFG(n_) ( FMULS_LARFG((double)n_) + FADDS_LARFG((double)n_) ) - -#endif /* MAGMA_FLOPS_H */ diff --git a/cscs-checks/libraries/magma/magma-2.2/magma_util.cpp b/cscs-checks/libraries/magma/magma-2.2/magma_util.cpp deleted file mode 100644 index 78d0893396..0000000000 --- a/cscs-checks/libraries/magma/magma-2.2/magma_util.cpp +++ /dev/null @@ -1,718 +0,0 @@ -/* - -- MAGMA (version 2.2.0) -- - Univ. of Tennessee, Knoxville - Univ. of California, Berkeley - Univ. of Colorado, Denver - @date November 2016 - - @author Mark Gates - - Utilities for testing. -*/ -#include -#include -#include -#include - -// flock exists only on Unix -#ifdef USE_FLOCK -#include // flock -#include // fchmod -#endif - -#include "magma_v2.h" -#include "testings.h" - -// -------------------- -// global variable -#if defined(HAVE_CUBLAS) - const char* g_platform_str = "cuBLAS"; - -#elif defined(HAVE_clBLAS) - const char* g_platform_str = "clBLAS"; - -#elif defined(HAVE_MIC) - const char* g_platform_str = "Xeon Phi"; - -#else - #error "unknown platform" -#endif - - -// -------------------- -// If condition is false, print error message and exit. -// Error message is formatted using printf, using any additional arguments. -extern "C" -void magma_assert( bool condition, const char* msg, ... ) -{ - if ( ! condition ) { - printf( "Assert failed: " ); - va_list va; - va_start( va, msg ); - vprintf( msg, va ); - printf( "\n" ); - exit(1); - } -} - -// -------------------- -// If condition is false, print warning message; does not exit. -// Warning message is formatted using printf, using any additional arguments. -extern "C" -void magma_assert_warn( bool condition, const char* msg, ... ) -{ - if ( ! condition ) { - printf( "Assert failed: " ); - va_list va; - va_start( va, msg ); - vprintf( msg, va ); - printf( "\n" ); - } -} - - -// -------------------- -// Acquire lock file. -// operation should be LOCK_SH (for shared access) or LOCK_EX (for exclusive access). -// Returns open file descriptor. -// Exits program on error. -// Lock is released by simply closing the file descriptor with close(), -// or when program exits or crashes. - -int open_lockfile( const char* file, int operation ) -{ - int fd = -1; -#ifdef USE_FLOCK - int err; - - if ( file == NULL ) - return -1; - else if ( operation != LOCK_SH && operation != LOCK_EX ) - return -2; - - fd = open( file, O_RDONLY|O_CREAT, 0666 ); - if ( fd < 0 ) { - fprintf( stderr, "Error: Can't read file %s: %s (%d)\n", - file, strerror(errno), errno ); - exit(1); - } - - // make it world-writable so anyone can rm the lockfile later on if needed - // Ignore error -- occurs when someone else created the file. - err = fchmod( fd, 0666 ); - //if ( err < 0 ) { - // fprintf( stderr, "Warning: Can't chmod file %s 0666: %s (%d)\n", - // file, strerror(errno), errno ); - //} - - // first try nonblocking lock; - // if that fails (e.g., someone has exclusive lock) let user know and try blocking lock. - err = flock( fd, operation|LOCK_NB ); - if ( err < 0 ) { - fprintf( stderr, "Waiting for lock on %s...\n", file ); - err = flock( fd, operation ); - if ( err < 0 ) { - fprintf( stderr, "Error: Can't lock file %s (operation %d): %s (%d)\n", - file, operation, strerror(errno), errno ); - exit(1); - } - } -#endif - return fd; -} - -// filename to use for lock file -const char* lockfile = "/tmp/icl-lock"; - - -// -------------------- -const char *usage_short = -"%% Usage: %s [options] [-h|--help]\n\n"; - -const char *usage = -"Options are:\n" -" -n m[,n[,k] Adds problem sizes. All of -n, -N, --range are now synonymous.\n" -" -N m[,n[,k] m, n, k can each be a single size or an inclusive range start:end:step.\n" -" --range m[,n[,k] If two ranges are given, the number of sizes is limited by the smaller range.\n" -" If only m,n are given, then k=n. If only m is given, then n=k=m.\n" -" Examples: -N 100 -N 100,200,300 -N 100,200:1000:100,300 -N 100:1000:100\n" -" Default test sizes are the range 1088 : 10304 : 1024, that is, 1K+64 : 10K+64 : 1K.\n" -" For batched, default sizes are 32 : 512 : 32.\n" -"\n" -" -c --[no]check Whether to check results. Some tests always check.\n" -" Also set with $MAGMA_TESTINGS_CHECK.\n" -" -c2 --check2 For getrf, check residual |Ax-b| instead of |PA-LU|.\n" -" -l --[no]lapack Whether to run lapack. Some tests always run lapack.\n" -" Also set with $MAGMA_RUN_LAPACK.\n" -" --[no]warmup Whether to warmup. Not yet implemented in most cases.\n" -" Also set with $MAGMA_WARMUP.\n" -" --dev x GPU device to use, default 0.\n" -" --align n Round up LDDA on GPU to multiple of align, default 32.\n" -" --verbose Verbose output.\n" -" -x --exclusive Lock file for exclusive use (internal ICL functionality).\n" -"\n" -"The following options apply to only some routines.\n" -" --batch x number of matrices for the batched routines, default 1000.\n" -" --nb x Block size, default set automatically.\n" -" --nrhs x Number of right hand sides, default 1.\n" -" --nqueue x Number of device queues, default 1.\n" -" --ngpu x Number of GPUs, default 1. Also set with $MAGMA_NUM_GPUS.\n" -" (Some testers take --ngpu -1 to run the multi-GPU code with 1 GPU.\n" -" --nsub x Number of submatrices, default 1.\n" -" --niter x Number of iterations to repeat each test, default 1.\n" -" --nthread x Number of CPU threads for some experimental codes, default 1.\n" -" (For most testers, set $OMP_NUM_THREADS or $MKL_NUM_THREADS\n" -" to control the number of CPU threads.)\n" -" --offset x Offset from beginning of matrix, default 0.\n" -" --itype [123] Generalized Hermitian-definite eigenproblem type, default 1.\n" -" --svd-work x SVD workspace size, one of:\n" -" query* queries LAPACK and MAGMA\n" -" doc is what LAPACK and MAGMA document as required\n" -" doc_old is what LAPACK <= 3.6 documents\n" -" min is minimum required, which may be smaller than doc\n" -" min_old is minimum required by LAPACK <= 3.6\n" -" min_fast is minimum to take fast path in gesvd\n" -" min-1 is (minimum - 1), to test error return\n" -" opt is optimal\n" -" opt_old is optimal as computed by LAPACK <= 3.6\n" -" opt_slow is optimal for slow path in gesvd\n" -" max is maximum that will be used\n" -"\n" -" --version x version of routine, e.g., during development, default 1.\n" -" --fraction x fraction of eigenvectors to compute, default 1.\n" -" If fraction == 0, computes eigenvalues il=0.1*N to iu=0.3*N.\n" -" --tolerance x accuracy tolerance, multiplied by machine epsilon, default 30.\n" -" --tol x same.\n" -" -L -U -F uplo = Lower*, Upper, or Full.\n" -" -[NTC][NTC] transA = NoTrans*, Trans, or ConjTrans (first letter) and\n" -" transB = NoTrans*, Trans, or ConjTrans (second letter).\n" -" -[TC] transA = Trans or ConjTrans. Default is NoTrans. Doesn't change transB.\n" -" -S[LR] side = Left*, Right.\n" -" -D[NU] diag = NonUnit*, Unit.\n" -" --jobu [nsoa] No*, Some, Overwrite, or All left singular vectors (U). gesdd uses this for jobz.\n" -" --jobv [nsoa] No*, Some, Overwrite, or All right singular vectors (V).\n" -" -J[NV] jobz = No* or Vectors; compute eigenvectors (symmetric).\n" -" -L[NV] jobvl = No* or Vectors; compute left eigenvectors (non-symmetric).\n" -" -R[NV] jobvr = No* or Vectors; compute right eigenvectors (non-symmetric).\n" -"\n" -" * default values\n"; - - -// constructor fills in default values -magma_opts::magma_opts( magma_opts_t flag ) -{ - // fill in default values - this->batchcount = 300; - this->device = 0; - this->align = 32; - this->nb = 0; // auto - this->nrhs = 1; - this->nqueue = 1; - this->ngpu = magma_num_gpus(); - this->nsub = 1; - this->niter = 1; - this->nthread = 1; - this->offset = 0; - this->itype = 1; - this->version = 1; - this->verbose = 0; - this->fraction = 1.; - this->tolerance = 30.; - this->check = (getenv("MAGMA_TESTINGS_CHECK") != NULL); - this->magma = true; - this->lapack = (getenv("MAGMA_RUN_LAPACK") != NULL); - this->warmup = (getenv("MAGMA_WARMUP") != NULL); - - this->uplo = MagmaLower; // potrf, etc. - this->transA = MagmaNoTrans; // gemm, etc. - this->transB = MagmaNoTrans; // gemm - this->side = MagmaLeft; // trsm, etc. - this->diag = MagmaNonUnit; // trsm, etc. - this->jobz = MagmaNoVec; // heev: no eigen vectors - this->jobvr = MagmaNoVec; // geev: no right eigen vectors - this->jobvl = MagmaNoVec; // geev: no left eigen vectors - - #ifdef USE_FLOCK - this->flock_op = LOCK_SH; // default shared lock - #endif - - if ( flag == MagmaOptsBatched ) { - // 32, 64, ..., 512 - this->default_nstart = 32; - this->default_nstep = 32; - this->default_nend = 512; - } - else { - // 1K + 64, 2K + 64, ..., 10K + 64 - this->default_nstart = 1024 + 64; - this->default_nstep = 1024; - this->default_nend = 10304; - } -} - - -// Given pointer to a string, scans the string for a comma, -// and advances the string to after the comma. -// Returns true if comma found, otherwise false. -bool scan_comma( char** handle ) -{ - char* ptr = *handle; - // scan past whitespace - while( *ptr == ' ' ) { - ptr += 1; - } - // scan comma - if ( *ptr == ',' ) { - *handle = ptr + 1; - return true; - } - else { - return false; - } -} - - -// Given pointer to a string, scans the string for a range "%d:%d:%d" or a number "%d". -// If range, then start, end, step are set accordingly. -// If number, then start = end and step = 0. -// Advances the string to after the range or number. -// Ensures start, end >= 0. -// If step >= 0, ensures start <= end; -// if step < 0, ensures start >= end. -// Returns true if found valid range or number, otherwise false. -bool scan_range( char** handle, int* start, int* end, int* step ) -{ - int bytes1, bytes3, cnt; - char* ptr = *handle; - cnt = sscanf( ptr, "%d%n:%d:%d%n", start, &bytes1, end, step, &bytes3 ); - if ( cnt == 3 ) { - *handle += bytes3; - return (*start >= 0 && *end >= 0 && (*step >= 0 ? *start <= *end : *start >= *end)); - } - else if ( cnt == 1 ) { - *handle += bytes1; - *end = *start; - *step = 0; - return (*start >= 0); - } - else { - return false; - } -} - - -// parse values from command line -void magma_opts::parse_opts( int argc, char** argv ) -{ - printf( usage_short, argv[0] ); - - magma_int_t ndevices; - magma_device_t devices[ MagmaMaxGPUs ]; - magma_getdevices( devices, MagmaMaxGPUs, &ndevices ); - - this->ntest = 0; - for( int i = 1; i < argc; ++i ) { - // ----- problem size - // -n or -N or --range fill in single size or range of sizes, and update ntest - if ( (strcmp("-n", argv[i]) == 0 || - strcmp("-N", argv[i]) == 0 || - strcmp("--range", argv[i]) == 0) && i+1 < argc ) - { - i++; - int m_start, m_end, m_step; - int n_start, n_end, n_step; - int k_start, k_end, k_step; - char* ptr = argv[i]; - bool valid = scan_range( &ptr, &m_start, &m_end, &m_step ); - if ( valid ) { - if ( *ptr == '\0' ) { - n_start = k_start = m_start; - n_end = k_end = m_end; - n_step = k_step = m_step; - } - else { - valid = scan_comma( &ptr ) && scan_range( &ptr, &n_start, &n_end, &n_step ); - if ( valid ) { - if ( *ptr == '\0' ) { - k_start = n_start; - k_end = n_end; - k_step = n_step; - } - else { - valid = scan_comma( &ptr ) && scan_range( &ptr, &k_start, &k_end, &k_step ); - valid = (valid && *ptr == '\0'); - } - } - } - } - - magma_assert( valid, "error: '%s %s' is not valid, expected (m|m_start:m_end:m_step)[,(n|n_start:n_end:n_step)[,(k|k_start:k_end:k_step)]]\n", - argv[i-1], argv[i] ); - // if all zero steps, just give start point - if ( m_step == 0 && n_step == 0 && k_step == 0 ) { - magma_assert( this->ntest < MAX_NTEST, "error: %s %s exceeded maximum number of tests (%d).\n", - argv[i-1], argv[i], MAX_NTEST ); - this->msize[ this->ntest ] = m_start; - this->nsize[ this->ntest ] = n_start; - this->ksize[ this->ntest ] = k_start; - this->ntest++; - } - else { - for( int m=m_start, n=n_start, k=k_start; - (m_step >= 0 ? m <= m_end : m >= m_end) && - (n_step >= 0 ? n <= n_end : n >= n_end) && - (k_step >= 0 ? k <= k_end : k >= k_end); - m += m_step, n += n_step, k += k_step ) - { - magma_assert( this->ntest < MAX_NTEST, "error: %s %s exceeded maximum number of tests (%d).\n", - argv[i-1], argv[i], MAX_NTEST ); - this->msize[ this->ntest ] = m; - this->nsize[ this->ntest ] = n; - this->ksize[ this->ntest ] = k; - this->ntest++; - } - } - } - - // ----- scalar arguments - else if ( strcmp("--dev", argv[i]) == 0 && i+1 < argc ) { - this->device = atoi( argv[++i] ); - magma_assert( this->device >= 0 && this->device < ndevices, - "error: --dev %s is invalid; ensure dev in [0,%d].\n", argv[i], ndevices-1 ); - } - else if ( strcmp("--align", argv[i]) == 0 && i+1 < argc ) { - this->align = atoi( argv[++i] ); - magma_assert( this->align >= 1 && this->align <= 4096, - "error: --align %s is invalid; ensure align in [1,4096].\n", argv[i] ); - } - else if ( strcmp("--nrhs", argv[i]) == 0 && i+1 < argc ) { - this->nrhs = atoi( argv[++i] ); - magma_assert( this->nrhs >= 0, - "error: --nrhs %s is invalid; ensure nrhs >= 0.\n", argv[i] ); - } - else if ( strcmp("--nb", argv[i]) == 0 && i+1 < argc ) { - this->nb = atoi( argv[++i] ); - magma_assert( this->nb > 0, - "error: --nb %s is invalid; ensure nb > 0.\n", argv[i] ); - } - else if ( strcmp("--ngpu", argv[i]) == 0 && i+1 < argc ) { - this->ngpu = atoi( argv[++i] ); - magma_assert( this->ngpu <= MagmaMaxGPUs, - "error: --ngpu %s exceeds MagmaMaxGPUs, %d.\n", argv[i], MagmaMaxGPUs ); - magma_assert( this->ngpu <= ndevices, - "error: --ngpu %s exceeds number of CUDA or OpenCL devices, %d.\n", argv[i], ndevices ); - // allow ngpu == -1, which forces multi-GPU code with 1 GPU. see testing_zhegvd, etc. - magma_assert( this->ngpu > 0 || this->ngpu == -1, - "error: --ngpu %s is invalid; ensure ngpu != 0.\n", argv[i] ); - // save in environment variable, so magma_num_gpus() picks it up - char env_num_gpus[20]; // space for "MAGMA_NUM_GPUS=", 4 digits, and nil - #if defined( _WIN32 ) || defined( _WIN64 ) - snprintf( env_num_gpus, sizeof(env_num_gpus), "MAGMA_NUM_GPUS=%lld", (long long) abs(this->ngpu) ); - putenv( env_num_gpus ); - #else - snprintf( env_num_gpus, sizeof(env_num_gpus), "%lld", (long long) abs(this->ngpu) ); - setenv( "MAGMA_NUM_GPUS", env_num_gpus, true ); - #endif - } - else if ( strcmp("--nsub", argv[i]) == 0 && i+1 < argc ) { - this->nsub = atoi( argv[++i] ); - magma_assert( this->nsub > 0, - "error: --nsub %s is invalid; ensure nsub > 0.\n", argv[i] ); - } - else if ( strcmp("--nqueue", argv[i]) == 0 && i+1 < argc ) { - this->nqueue = atoi( argv[++i] ); - magma_assert( this->nqueue > 0, - "error: --nqueue %s is invalid; ensure nqueue > 0.\n", argv[i] ); - } - else if ( strcmp("--niter", argv[i]) == 0 && i+1 < argc ) { - this->niter = atoi( argv[++i] ); - magma_assert( this->niter > 0, - "error: --niter %s is invalid; ensure niter > 0.\n", argv[i] ); - } - else if ( strcmp("--nthread", argv[i]) == 0 && i+1 < argc ) { - this->nthread = atoi( argv[++i] ); - magma_assert( this->nthread > 0, - "error: --nthread %s is invalid; ensure nthread > 0.\n", argv[i] ); - } - else if ( strcmp("--offset", argv[i]) == 0 && i+1 < argc ) { - this->offset = atoi( argv[++i] ); - magma_assert( this->offset >= 0, - "error: --offset %s is invalid; ensure offset >= 0.\n", argv[i] ); - } - else if ( strcmp("--itype", argv[i]) == 0 && i+1 < argc ) { - this->itype = atoi( argv[++i] ); - magma_assert( this->itype >= 1 && this->itype <= 3, - "error: --itype %s is invalid; ensure itype in [1,2,3].\n", argv[i] ); - } - else if ( strcmp("--version", argv[i]) == 0 && i+1 < argc ) { - this->version = atoi( argv[++i] ); - magma_assert( this->version >= 1, - "error: --version %s is invalid; ensure version > 0.\n", argv[i] ); - } - else if ( strcmp("--fraction", argv[i]) == 0 && i+1 < argc ) { - this->fraction = atof( argv[++i] ); - magma_assert( this->fraction >= 0 && this->fraction <= 1, - "error: --fraction %s is invalid; ensure fraction in [0,1].\n", argv[i] ); - } - else if ( (strcmp("--tol", argv[i]) == 0 || - strcmp("--tolerance", argv[i]) == 0) && i+1 < argc ) { - this->tolerance = atof( argv[++i] ); - magma_assert( this->tolerance >= 0 && this->tolerance <= 1000, - "error: --tolerance %s is invalid; ensure tolerance in [0,1000].\n", argv[i] ); - } - else if ( strcmp("--batch", argv[i]) == 0 && i+1 < argc ) { - this->batchcount = atoi( argv[++i] ); - magma_assert( this->batchcount > 0, - "error: --batch %s is invalid; ensure batch > 0.\n", argv[i] ); - } - // ----- boolean arguments - // check results - else if ( strcmp("-c", argv[i]) == 0 || - strcmp("--check", argv[i]) == 0 ) { this->check = 1; } - else if ( strcmp("-c2", argv[i]) == 0 || - strcmp("--check2", argv[i]) == 0 ) { this->check = 2; } - else if ( strcmp("--nocheck", argv[i]) == 0 ) { this->check = 0; } - - else if ( strcmp("-l", argv[i]) == 0 || - strcmp("--lapack", argv[i]) == 0 ) { this->lapack = true; } - else if ( strcmp("--nolapack", argv[i]) == 0 ) { this->lapack = false; } - - else if ( strcmp("--magma", argv[i]) == 0 ) { this->magma = true; } - else if ( strcmp("--nomagma", argv[i]) == 0 ) { this->magma = false; } - - else if ( strcmp("--warmup", argv[i]) == 0 ) { this->warmup = true; } - else if ( strcmp("--nowarmup", argv[i]) == 0 ) { this->warmup = false; } - - //else if ( strcmp("--all", argv[i]) == 0 ) { this->all = true; } - //else if ( strcmp("--notall", argv[i]) == 0 ) { this->all = false; } - - else if ( strcmp("-v", argv[i]) == 0 || - strcmp("--verbose", argv[i]) == 0 ) { this->verbose += 1; } - - // ----- lapack options - else if ( strcmp("-L", argv[i]) == 0 ) { this->uplo = MagmaLower; } - else if ( strcmp("-U", argv[i]) == 0 ) { this->uplo = MagmaUpper; } - else if ( strcmp("-F", argv[i]) == 0 ) { this->uplo = MagmaFull; } - - else if ( strcmp("-NN", argv[i]) == 0 ) { this->transA = MagmaNoTrans; this->transB = MagmaNoTrans; } - else if ( strcmp("-NT", argv[i]) == 0 ) { this->transA = MagmaNoTrans; this->transB = MagmaTrans; } - else if ( strcmp("-NC", argv[i]) == 0 ) { this->transA = MagmaNoTrans; this->transB = MagmaConjTrans; } - else if ( strcmp("-TN", argv[i]) == 0 ) { this->transA = MagmaTrans; this->transB = MagmaNoTrans; } - else if ( strcmp("-TT", argv[i]) == 0 ) { this->transA = MagmaTrans; this->transB = MagmaTrans; } - else if ( strcmp("-TC", argv[i]) == 0 ) { this->transA = MagmaTrans; this->transB = MagmaConjTrans; } - else if ( strcmp("-CN", argv[i]) == 0 ) { this->transA = MagmaConjTrans; this->transB = MagmaNoTrans; } - else if ( strcmp("-CT", argv[i]) == 0 ) { this->transA = MagmaConjTrans; this->transB = MagmaTrans; } - else if ( strcmp("-CC", argv[i]) == 0 ) { this->transA = MagmaConjTrans; this->transB = MagmaConjTrans; } - else if ( strcmp("-T", argv[i]) == 0 ) { this->transA = MagmaTrans; } - else if ( strcmp("-C", argv[i]) == 0 ) { this->transA = MagmaConjTrans; } - - else if ( strcmp("-SL", argv[i]) == 0 ) { this->side = MagmaLeft; } - else if ( strcmp("-SR", argv[i]) == 0 ) { this->side = MagmaRight; } - - else if ( strcmp("-DN", argv[i]) == 0 ) { this->diag = MagmaNonUnit; } - else if ( strcmp("-DU", argv[i]) == 0 ) { this->diag = MagmaUnit; } - - else if ( strcmp("-JN", argv[i]) == 0 ) { this->jobz = MagmaNoVec; } - else if ( strcmp("-JV", argv[i]) == 0 ) { this->jobz = MagmaVec; } - - else if ( strcmp("-LN", argv[i]) == 0 ) { this->jobvl = MagmaNoVec; } - else if ( strcmp("-LV", argv[i]) == 0 ) { this->jobvl = MagmaVec; } - - else if ( strcmp("-RN", argv[i]) == 0 ) { this->jobvr = MagmaNoVec; } - else if ( strcmp("-RV", argv[i]) == 0 ) { this->jobvr = MagmaVec; } - - // ----- vectors of options - else if ( strcmp("--svd-work", argv[i]) == 0 && i+1 < argc ) { - i += 1; - char *token; - char *arg = strdup( argv[i] ); - for (token = strtok( arg, ", " ); - token != NULL; - token = strtok( NULL, ", " )) - { - if ( *token == '\0' ) { /* ignore empty tokens */ } - else if ( strcmp( token, "all" ) == 0 ) { this->svd_work.push_back( MagmaSVD_all ); } - else if ( strcmp( token, "query" ) == 0 ) { this->svd_work.push_back( MagmaSVD_query ); } - else if ( strcmp( token, "doc" ) == 0 ) { this->svd_work.push_back( MagmaSVD_doc ); } - else if ( strcmp( token, "doc_old" ) == 0 ) { this->svd_work.push_back( MagmaSVD_doc_old ); } - else if ( strcmp( token, "min" ) == 0 ) { this->svd_work.push_back( MagmaSVD_min ); } - else if ( strcmp( token, "min-1" ) == 0 ) { this->svd_work.push_back( MagmaSVD_min_1 ); } - else if ( strcmp( token, "min_old" ) == 0 ) { this->svd_work.push_back( MagmaSVD_min_old ); } - else if ( strcmp( token, "min_old-1" ) == 0 ) { this->svd_work.push_back( MagmaSVD_min_old_1 ); } - else if ( strcmp( token, "min_fast" ) == 0 ) { this->svd_work.push_back( MagmaSVD_min_fast ); } - else if ( strcmp( token, "min_fast-1") == 0 ) { this->svd_work.push_back( MagmaSVD_min_fast_1 ); } - else if ( strcmp( token, "opt" ) == 0 ) { this->svd_work.push_back( MagmaSVD_opt ); } - else if ( strcmp( token, "opt_old" ) == 0 ) { this->svd_work.push_back( MagmaSVD_opt_old ); } - else if ( strcmp( token, "opt_slow" ) == 0 ) { this->svd_work.push_back( MagmaSVD_opt_slow ); } - else if ( strcmp( token, "max" ) == 0 ) { this->svd_work.push_back( MagmaSVD_max ); } - else { - magma_assert( false, "error: --svd-work '%s' is invalid\n", argv[i] ); - } - } - free( arg ); - } - - else if ( strcmp("--jobu", argv[i]) == 0 && i+1 < argc ) { - i += 1; - const char* arg = argv[i]; - while( *arg != '\0' ) { - this->jobu.push_back( magma_vec_const( *arg )); - ++arg; - if ( *arg == ',' ) - ++arg; - } - } - else if ( (strcmp("--jobv", argv[i]) == 0 || - strcmp("--jobvt", argv[i]) == 0) && i+1 < argc ) { - i += 1; - const char* arg = argv[i]; - while( *arg != '\0' ) { - this->jobv.push_back( magma_vec_const( *arg )); - ++arg; - if ( *arg == ',' ) - ++arg; - } - } - - // ----- misc - else if ( strcmp("-x", argv[i]) == 0 || - strcmp("--exclusive", argv[i]) == 0 ) { - #ifdef USE_FLOCK - this->flock_op = LOCK_EX; - #else - fprintf( stderr, "ignoring %s: USE_FLOCK not defined; flock not supported.\n", argv[i] ); - #endif - } - - // ----- usage - else if ( strcmp("-h", argv[i]) == 0 || - strcmp("--help", argv[i]) == 0 ) { - fprintf( stderr, usage, argv[0], MAX_NTEST ); - exit(0); - } - else { - fprintf( stderr, "error: unrecognized option %s\n", argv[i] ); - exit(1); - } - } - - // default values - if ( this->svd_work.size() == 0 ) { - this->svd_work.push_back( MagmaSVD_query ); - } - if ( this->jobu.size() == 0 ) { - this->jobu.push_back( MagmaNoVec ); - } - if ( this->jobv.size() == 0 ) { - this->jobv.push_back( MagmaNoVec ); - } - - // if -N or --range not given, use default range - if ( this->ntest == 0 ) { - magma_int_t n2 = this->default_nstart; //1024 + 64; - while( n2 <= this->default_nend && this->ntest < MAX_NTEST ) { - this->msize[ this->ntest ] = n2; - this->nsize[ this->ntest ] = n2; - this->ksize[ this->ntest ] = n2; - n2 += this->default_nstep; //1024; - this->ntest++; - } - } - assert( this->ntest <= MAX_NTEST ); - - // lock file - #ifdef USE_FLOCK - this->flock_fd = open_lockfile( lockfile, this->flock_op ); - #endif - - #ifdef HAVE_CUBLAS - magma_setdevice( this->device ); - #endif - - // create queues on this device - // 2 queues + 1 extra NULL entry to catch errors - magma_queue_create( devices[ this->device ], &this->queues2[ 0 ] ); - magma_queue_create( devices[ this->device ], &this->queues2[ 1 ] ); - this->queues2[ 2 ] = NULL; - - this->queue = this->queues2[ 0 ]; - - #ifdef HAVE_CUBLAS - // handle for directly calling cublas - this->handle = magma_queue_get_cublas_handle( this->queue ); - #endif -} -// end parse_opts - - -// ------------------------------------------------------------ -void magma_opts::cleanup() -{ - this->queue = NULL; - magma_queue_destroy( this->queues2[0] ); - magma_queue_destroy( this->queues2[1] ); - this->queues2[0] = NULL; - this->queues2[1] = NULL; - - #ifdef HAVE_CUBLAS - this->handle = NULL; - #endif -} - - -// ------------------------------------------------------------ -// Initialize PAPI events set to measure flops. -// Note flops counters are inaccurate on Sandy Bridge, and don't exist on Haswell. -// See http://icl.cs.utk.edu/projects/papi/wiki/PAPITopics:SandyFlops -#ifdef HAVE_PAPI -#include -#include // memset -#endif // HAVE_PAPI - -int gPAPI_flops_set = -1; // i.e., PAPI_NULL - -extern "C" -void flops_init() -{ - #ifdef HAVE_PAPI - int err = PAPI_library_init( PAPI_VER_CURRENT ); - if ( err != PAPI_VER_CURRENT ) { - fprintf( stderr, "Error: PAPI couldn't initialize: %s (%d)\n", - PAPI_strerror(err), err ); - } - - // read flops - err = PAPI_create_eventset( &gPAPI_flops_set ); - if ( err != PAPI_OK ) { - fprintf( stderr, "Error: PAPI_create_eventset failed\n" ); - } - - err = PAPI_assign_eventset_component( gPAPI_flops_set, 0 ); - if ( err != PAPI_OK ) { - fprintf( stderr, "Error: PAPI_assign_eventset_component failed: %s (%d)\n", - PAPI_strerror(err), err ); - } - - PAPI_option_t opt; - memset( &opt, 0, sizeof(PAPI_option_t) ); - opt.inherit.inherit = PAPI_INHERIT_ALL; - opt.inherit.eventset = gPAPI_flops_set; - err = PAPI_set_opt( PAPI_INHERIT, &opt ); - if ( err != PAPI_OK ) { - fprintf( stderr, "Error: PAPI_set_opt failed: %s (%d)\n", - PAPI_strerror(err), err ); - } - - err = PAPI_add_event( gPAPI_flops_set, PAPI_FP_OPS ); - if ( err != PAPI_OK ) { - fprintf( stderr, "Error: PAPI_add_event failed: %s (%d)\n", - PAPI_strerror(err), err ); - } - - err = PAPI_start( gPAPI_flops_set ); - if ( err != PAPI_OK ) { - fprintf( stderr, "Error: PAPI_start failed: %s (%d)\n", - PAPI_strerror(err), err ); - } - #endif // HAVE_PAPI -} diff --git a/cscs-checks/libraries/magma/magma-2.2/patch.txt b/cscs-checks/libraries/magma/magma-2.2/patch.txt deleted file mode 100644 index 6e8904b5de..0000000000 --- a/cscs-checks/libraries/magma/magma-2.2/patch.txt +++ /dev/null @@ -1,158 +0,0 @@ -diff -Naur src/testing_cblas_z.cpp src_patched/testing_cblas_z.cpp ---- ./testing_cblas_z.cpp 2018-09-12 17:02:42.000000000 +0200 -+++ ./testing_cblas_z.cpp 2018-09-13 18:44:49.000000000 +0200 -@@ -14,6 +14,7 @@ - #include - #include - #include -+#include - - #ifdef HAVE_CBLAS - #include -@@ -104,6 +105,8 @@ - // ---------------------------------------- - int main( int argc, char** argv ) - { -+ std::clock_t start; -+ double duration; - TESTING_CHECK( magma_init() ); - magma_print_environment(); - -@@ -162,7 +165,8 @@ - printf( "%% Error w.r.t. Error w.r.t. Error w.r.t.\n" - "%% M N K incx incy Function CBLAS Fortran BLAS inline\n" - "%%====================================================================================\n" ); -- for( int itest = 0; itest < opts.ntest; ++itest ) { -+//ajajajaj for( int itest = 0; itest < opts.ntest; ++itest ) { -+ for( int itest = 0; itest < opts.ntest; itest+=100 ) { - if ( itest > 0 ) { - printf( "%%----------------------------------------------------------------------\n" ); - } -@@ -185,6 +189,7 @@ - lapackf77_zlarnv( &ione, ISEED, &size, A ); - lapackf77_zlarnv( &ione, ISEED, &size, B ); - -+ start = std::clock(); - // ----- test DZASUM - for( int iincx = 0; iincx < ninc; ++iincx ) { - magma_int_t incx = inc[iincx]; -@@ -383,6 +388,7 @@ - } - } - -+ duration = ( std::clock() - start ) / (double) CLOCKS_PER_SEC; - // cleanup - magma_free_cpu( A ); - magma_free_cpu( B ); -@@ -391,5 +397,11 @@ - - opts.cleanup(); - TESTING_CHECK( magma_finalize() ); -+ printf("Duration: %f\n", duration); -+ if (gStatus==0){ -+ printf("Result = PASS\n"); -+ }else{ -+ printf("Result = FAIL\n"); -+ } - return gStatus; - } -diff -Naur src/testing_zgemm.cpp src_patched/testing_zgemm.cpp ---- ./testing_zgemm.cpp 2018-09-12 17:03:47.000000000 +0200 -+++ ./testing_zgemm.cpp 2018-09-13 18:44:49.000000000 +0200 -@@ -62,6 +62,7 @@ - - magma_opts opts; - opts.parse_opts( argc, argv ); -+ opts.lapack=1; - - // Allow 3*eps; complex needs 2*sqrt(2) factor; see Higham, 2002, sec. 3.6. - double eps = lapackf77_dlamch("E"); -@@ -87,7 +88,8 @@ - g_platform_str, g_platform_str ); - #endif - printf("%%========================================================================================================\n"); -- for( int itest = 0; itest < opts.ntest; ++itest ) { -+//ajajajaj for( int itest = 0; itest < opts.ntest; ++itest ) { -+ for( int itest = 0; itest < opts.ntest; itest+=100 ) { - for( int iter = 0; iter < opts.niter; ++iter ) { - M = opts.msize[itest]; - N = opts.nsize[itest]; -@@ -220,6 +222,9 @@ - cpu_perf, 1000.*cpu_time, - magma_error, dev_error, - (okay ? "ok" : "failed")); -+ printf("MAGMA GFlops: %e\n", magma_perf); -+ printf("cuBLAS GFlops: %e\n", dev_perf); -+ printf("CPU GFlops: %e\n", cpu_perf); - #else - bool okay = (dev_error < tol); - status += ! okay; -@@ -271,5 +276,10 @@ - - opts.cleanup(); - TESTING_CHECK( magma_finalize() ); -+ if (status==0){ -+ printf("Result = PASS\n"); -+ }else{ -+ printf("Result = FAIL\n"); -+ } - return status; - } -diff -Naur src/testing_zsymmetrize.cpp src_patched/testing_zsymmetrize.cpp ---- ./testing_zsymmetrize.cpp 2018-09-12 17:04:01.000000000 +0200 -+++ ./testing_zsymmetrize.cpp 2018-09-13 18:44:49.000000000 +0200 -@@ -125,5 +125,12 @@ - - opts.cleanup(); - TESTING_CHECK( magma_finalize() ); -+ printf("CPU performance: %e\n", cpu_perf); -+ printf("GPU performance: %e\n", gpu_perf); -+ if (status==0){ -+ printf("Result = PASS\n"); -+ }else{ -+ printf("Result = FAIL\n"); -+ } - return status; - } -diff -Naur src/testing_ztranspose.cpp src_patched/testing_ztranspose.cpp ---- ./testing_ztranspose.cpp 2018-09-12 17:04:11.000000000 +0200 -+++ ./testing_ztranspose.cpp 2018-09-13 18:44:49.000000000 +0200 -@@ -212,5 +212,12 @@ - - opts.cleanup(); - TESTING_CHECK( magma_finalize() ); -+ printf("CPU performance: %e\n", cpu_perf); -+ printf("GPU performance: %e\n", gpu_perf); -+ if (status==0){ -+ printf("Result = PASS\n"); -+ }else{ -+ printf("Result = FAIL\n"); -+ } - return status; - } -diff -Naur src/testing_zunmbr.cpp src_patched/testing_zunmbr.cpp ---- ./testing_zunmbr.cpp 2018-09-12 17:04:26.000000000 +0200 -+++ ./testing_zunmbr.cpp 2018-09-13 18:44:49.000000000 +0200 -@@ -55,7 +55,8 @@ - - printf("%% M N K vect side trans CPU Gflop/s (sec) GPU Gflop/s (sec) ||R||_F / ||QC||_F\n"); - printf("%%==============================================================================================\n"); -- for( int itest = 0; itest < opts.ntest; ++itest ) { -+//ajajajaj for( int itest = 0; itest < opts.ntest; ++itest ) { -+ for( int itest = 0; itest < opts.ntest; itest+=100 ) { - for( int ivect = 0; ivect < 2; ++ivect ) { - for( int iside = 0; iside < 2; ++iside ) { - for( int itran = 0; itran < 2; ++itran ) { -@@ -224,5 +225,12 @@ - - opts.cleanup(); - TESTING_CHECK( magma_finalize() ); -+ printf("CPU performance: %e\n", cpu_perf); -+ printf("GPU performance: %e\n", gpu_perf); -+ if (status==0){ -+ printf("Result = PASS\n"); -+ }else{ -+ printf("Result = FAIL\n"); -+ } - return status; - } diff --git a/cscs-checks/libraries/magma/magma-2.2/testing_c.h b/cscs-checks/libraries/magma/magma-2.2/testing_c.h deleted file mode 100644 index d3f1751bac..0000000000 --- a/cscs-checks/libraries/magma/magma-2.2/testing_c.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - -- MAGMA (version 2.2.0) -- - Univ. of Tennessee, Knoxville - Univ. of California, Berkeley - Univ. of Colorado, Denver - @date November 2016 - - @generated from testing/testing_z.h, normal z -> c, Sun Nov 20 20:20:47 2016 - @author Mark Gates - - Utilities for testing. -*/ -#ifndef TESTING_MAGMA_C_H -#define TESTING_MAGMA_C_H - -#ifdef __cplusplus -extern "C" { -#endif - -#define COMPLEX - -void magma_cmake_symmetric( magma_int_t N, magmaFloatComplex* A, magma_int_t lda ); -void magma_cmake_hermitian( magma_int_t N, magmaFloatComplex* A, magma_int_t lda ); - -void magma_cmake_spd( magma_int_t N, magmaFloatComplex* A, magma_int_t lda ); -void magma_cmake_hpd( magma_int_t N, magmaFloatComplex* A, magma_int_t lda ); - -// work around MKL bug in multi-threaded lanhe/lansy -float safe_lapackf77_clanhe( - const char *norm, const char *uplo, - const magma_int_t *n, - const magmaFloatComplex *A, const magma_int_t *lda, - float *work ); - -#ifdef COMPLEX -static inline float magma_clapy2( magmaFloatComplex x ) -{ - float xr = MAGMA_C_REAL( x ); - float xi = MAGMA_C_IMAG( x ); - return lapackf77_slapy2( &xr, &xi ); -} -#endif - -void check_cgesvd( - magma_int_t check, - magma_vec_t jobu, - magma_vec_t jobvt, - magma_int_t m, magma_int_t n, - magmaFloatComplex *A, magma_int_t lda, - float *S, - magmaFloatComplex *U, magma_int_t ldu, - magmaFloatComplex *VT, magma_int_t ldv, - float result[4] ); - -void check_cgeev( - magma_vec_t jobvl, - magma_vec_t jobvr, - magma_int_t n, - magmaFloatComplex *A, magma_int_t lda, - #ifdef COMPLEX - magmaFloatComplex *w, - #else - float *wr, float *wi, - #endif - magmaFloatComplex *VL, magma_int_t ldvl, - magmaFloatComplex *VR, magma_int_t ldvr, - magmaFloatComplex *work, magma_int_t lwork, - #ifdef COMPLEX - float *rwork, magma_int_t lrwork, - #endif - float result[4] ); - -#undef COMPLEX - -#ifdef __cplusplus -} -#endif - -#endif // #ifndef TESTING_MAGMA_C_H diff --git a/cscs-checks/libraries/magma/magma-2.2/testing_cblas_z.cpp b/cscs-checks/libraries/magma/magma-2.2/testing_cblas_z.cpp deleted file mode 100644 index bec3091fd2..0000000000 --- a/cscs-checks/libraries/magma/magma-2.2/testing_cblas_z.cpp +++ /dev/null @@ -1,395 +0,0 @@ -/* - -- MAGMA (version 2.2.0) -- - Univ. of Tennessee, Knoxville - Univ. of California, Berkeley - Univ. of Colorado, Denver - - @precisions normal z -> c d s - @author Mark Gates - - These tests ensure that the MAGMA implementations of CBLAS routines - are correct. (We no longer use wrappers.) -*/ -#include -#include -#include -#include - -#ifdef HAVE_CBLAS -#include -#endif - -// make sure that asserts are enabled -#undef NDEBUG -#include - -// includes, project -#include "magma_v2.h" -#include "magma_lapack.h" -#include "magma_operators.h" -#include "testings.h" - -#define COMPLEX - -#define A(i,j) &A[ (i) + (j)*ld ] -#define B(i,j) &B[ (i) + (j)*ld ] - - -// ---------------------------------------- -// These may not be portable to different Fortran implementations, -// hence why MAGMA does not rely on them. - -#define blasf77_dzasum FORTRAN_NAME( dzasum, DZASUM ) -#define blasf77_dznrm2 FORTRAN_NAME( dznrm2, DZNRM2 ) -#define blasf77_zdotc FORTRAN_NAME( zdotc, ZDOTC ) -#define blasf77_zdotu FORTRAN_NAME( zdotu, ZDOTU ) - -#ifdef __cplusplus -extern "C" { -#endif - -double blasf77_dzasum( const magma_int_t* n, - const magmaDoubleComplex* x, const magma_int_t* incx ); - -double blasf77_dznrm2( const magma_int_t* n, - const magmaDoubleComplex* x, const magma_int_t* incx ); - -magmaDoubleComplex blasf77_zdotc( const magma_int_t* n, - const magmaDoubleComplex* x, const magma_int_t* incx, - const magmaDoubleComplex* y, const magma_int_t* incy ); - -magmaDoubleComplex blasf77_zdotu( const magma_int_t* n, - const magmaDoubleComplex* x, const magma_int_t* incx, - const magmaDoubleComplex* y, const magma_int_t* incy ); - -#ifdef __cplusplus -} // extern "C" -#endif - - -// ---------------------------------------- -double gTol = 0; -magma_int_t gStatus = 0; - -const double SKIPPED_FLAG = -1; - -void output( - const char* routine, - magma_int_t m, magma_int_t n, magma_int_t k, magma_int_t incx, magma_int_t incy, - double error_cblas, double error_fblas, double error_inline ) -{ - // SKIPPED_FLAG indicates skipped, e.g., zdotc with MKL -- it isn't an error - bool okay = (error_cblas == SKIPPED_FLAG || error_cblas < gTol) && - (error_fblas == SKIPPED_FLAG || error_fblas < gTol) && - (error_inline < gTol); - gStatus += ! okay; - - printf( "%5lld %5lld %5lld %5lld %5lld %-8s", - (long long) m, (long long) n, (long long) k, (long long) incx, (long long) incy, routine ); - - if ( error_cblas == SKIPPED_FLAG ) - printf( " %8s", "n/a" ); - else - printf( " %#8.3g", error_cblas ); - - if ( error_fblas == SKIPPED_FLAG ) - printf( " %8s", "n/a" ); - else - printf( " %#8.3g", error_fblas ); - - printf( " %#8.3g %s\n", error_inline, (okay ? "ok" : "failed") ); -} - - -// ---------------------------------------- -int main( int argc, char** argv ) -{ - TESTING_CHECK( magma_init() ); - magma_print_environment(); - - //real_Double_t t_m, t_c, t_f; - magma_int_t ione = 1; - - magmaDoubleComplex *A, *B; - double error_cblas, error_fblas, error_inline; - magma_int_t ISEED[4] = {0,0,0,1}; - magma_int_t i, j, k, m, n, size, maxn, ld; - - // complex x for magma, cblas, fortran, inline blas respectively - magmaDoubleComplex x2_m, x2_c, x2_f, x2_i; - - // real x for magma, cblas, fortran, inline blas respectively - double x_m, x_c, x_f, x_i; - - MAGMA_UNUSED( x_c ); - MAGMA_UNUSED( x_f ); - MAGMA_UNUSED( x2_c ); - MAGMA_UNUSED( x2_f ); - MAGMA_UNUSED( x2_m ); - - magma_opts opts; - opts.parse_opts( argc, argv ); - - opts.tolerance = max( 100., opts.tolerance ); - double tol = opts.tolerance * lapackf77_dlamch("E"); - gTol = tol; - - magma_int_t inc[] = { -2, -1, 1, 2 }; //{ 1 }; //{ -1, 1 }; - magma_int_t ninc = sizeof(inc)/sizeof(*inc); - magma_int_t maxinc = 0; - for( i=0; i < ninc; ++i ) { - maxinc = max( maxinc, abs(inc[i]) ); - } - - printf( "!! Calling these CBLAS and Fortran BLAS sometimes crashes (segfaults), which !!\n" - "!! is why we use wrappers. It does not necesarily indicate a bug in MAGMA. !!\n" - "!! If MAGMA_WITH_MKL or __APPLE__ are defined, known failures are skipped. !!\n" - "\n" ); - - // tell user about disabled functions - #ifndef HAVE_CBLAS - printf( "n/a: HAVE_CBLAS not defined, so no cblas functions tested.\n\n" ); - #endif - - #if defined(MAGMA_WITH_MKL) - printf( "n/a: cblas_zdotc, cblas_zdotu, blasf77_zdotc, and blasf77_zdotu are disabled with MKL, due to segfaults.\n\n" ); - #endif - - #if defined(__APPLE__) - printf( "n/a: blasf77_zdotc and blasf77_zdotu are disabled on MacOS, due to segfaults.\n\n" ); - #endif - - printf( "%% Error w.r.t. Error w.r.t. Error w.r.t.\n" - "%% M N K incx incy Function CBLAS Fortran BLAS inline\n" - "%%====================================================================================\n" ); - for( int itest = 0; itest < opts.ntest; ++itest ) { - if ( itest > 0 ) { - printf( "%%----------------------------------------------------------------------\n" ); - } - - m = opts.msize[itest]; - n = opts.nsize[itest]; - k = opts.ksize[itest]; - - // allocate matrices - // over-allocate so they can be any combination of - // {m,n,k} * {abs(incx), abs(incy)} by - // {m,n,k} * {abs(incx), abs(incy)} - maxn = max( max( m, n ), k ) * maxinc; - ld = max( 1, maxn ); - size = ld*maxn; - TESTING_CHECK( magma_zmalloc_cpu( &A, size )); - TESTING_CHECK( magma_zmalloc_cpu( &B, size )); - - // initialize matrices - lapackf77_zlarnv( &ione, ISEED, &size, A ); - lapackf77_zlarnv( &ione, ISEED, &size, B ); - - // ----- test DZASUM - for( int iincx = 0; iincx < ninc; ++iincx ) { - magma_int_t incx = inc[iincx]; - - for( int iincy = 0; iincy < ninc; ++iincy ) { - magma_int_t incy = inc[iincy]; - - // get one-norm of column j of A - if ( incx > 0 && incx == incy ) { // positive, no incy - error_cblas = 0; - error_fblas = 0; - error_inline = 0; - for( j=0; j < k; ++j ) { - x_m = magma_cblas_dzasum( m, A(0,j), incx ); - - #ifdef HAVE_CBLAS - x_c = cblas_dzasum( m, A(0,j), incx ); - error_cblas = max( error_cblas, fabs(x_m - x_c) / fabs(m*x_c) ); - #else - x_c = 0; - error_cblas = SKIPPED_FLAG; - #endif - - x_f = blasf77_dzasum( &m, A(0,j), &incx ); - error_fblas = max( error_fblas, fabs(x_m - x_f) / fabs(m*x_f) ); - - // inline implementation - x_i = 0; - for( i=0; i < m; ++i ) { - x_i += MAGMA_Z_ABS1( *A(i*incx,j) ); // |real(Aij)| + |imag(Aij)| - } - error_inline = max( error_inline, fabs(x_m - x_i) / fabs(m*x_i) ); - - //printf( "dzasum xm %.8e, xc %.8e, xf %.8e, xi %.8e\n", x_m, x_c, x_f, x_i ); - } - output( "dzasum", m, n, k, incx, incy, error_cblas, error_fblas, error_inline ); - } - } - } - printf( "\n" ); - - // ----- test DZNRM2 - // get two-norm of column j of A - for( int iincx = 0; iincx < ninc; ++iincx ) { - magma_int_t incx = inc[iincx]; - - for( int iincy = 0; iincy < ninc; ++iincy ) { - magma_int_t incy = inc[iincy]; - - if ( incx > 0 && incx == incy ) { // positive, no incy - error_cblas = 0; - error_fblas = 0; - error_inline = 0; - for( j=0; j < k; ++j ) { - x_m = magma_cblas_dznrm2( m, A(0,j), incx ); - - #ifdef HAVE_CBLAS - x_c = cblas_dznrm2( m, A(0,j), incx ); - error_cblas = max( error_cblas, fabs(x_m - x_c) / fabs(m*x_c) ); - #else - x_c = 0; - error_cblas = SKIPPED_FLAG; - #endif - - x_f = blasf77_dznrm2( &m, A(0,j), &incx ); - error_fblas = max( error_fblas, fabs(x_m - x_f) / fabs(m*x_f) ); - - // inline implementation (poor -- doesn't scale) - x_i = 0; - for( i=0; i < m; ++i ) { - x_i += real( *A(i*incx,j) ) * real( *A(i*incx,j) ) - + imag( *A(i*incx,j) ) * imag( *A(i*incx,j) ); - // same: real( conj( *A(i*incx,j) ) * *A(i*incx,j) ); - } - x_i = sqrt( x_i ); - error_inline = max( error_inline, fabs(x_m - x_i) / fabs(m*x_i) ); - - //printf( "dznrm2 xm %.8e, xc %.8e, xf %.8e, xi %.8e\n", x_m, x_c, x_f, x_i ); - } - output( "dznrm2", m, n, k, incx, incy, error_cblas, error_fblas, error_inline ); - } - } - } - printf( "\n" ); - - // ----- test ZDOTC - // dot columns, Aj^H Bj - for( int iincx = 0; iincx < ninc; ++iincx ) { - magma_int_t incx = inc[iincx]; - - for( int iincy = 0; iincy < ninc; ++iincy ) { - magma_int_t incy = inc[iincy]; - - error_cblas = 0; - error_fblas = 0; - error_inline = 0; - for( j=0; j < k; ++j ) { - // MAGMA implementation, not just wrapper - x2_m = magma_cblas_zdotc( m, A(0,j), incx, B(0,j), incy ); - - // crashes with MKL 11.1.2, ILP64 - #if defined(HAVE_CBLAS) && ! defined(MAGMA_WITH_MKL) - #ifdef COMPLEX - cblas_zdotc_sub( m, A(0,j), incx, B(0,j), incy, &x2_c ); - #else - x2_c = cblas_zdotc( m, A(0,j), incx, B(0,j), incy ); - #endif - error_cblas = max( error_cblas, fabs(x2_m - x2_c) / fabs(m*x2_c) ); - #else - x2_c = MAGMA_Z_ZERO; - error_cblas = SKIPPED_FLAG; - #endif - - // crashes with MKL 11.2.3 and MacOS 10.9 - #if (! defined(COMPLEX) || ! defined(MAGMA_WITH_MKL)) && ! defined(__APPLE__) - x2_f = blasf77_zdotc( &m, A(0,j), &incx, B(0,j), &incy ); - error_fblas = max( error_fblas, fabs(x2_m - x2_f) / fabs(m*x2_f) ); - #else - x2_f = MAGMA_Z_ZERO; - error_fblas = SKIPPED_FLAG; - #endif - - // inline implementation - x2_i = MAGMA_Z_ZERO; - magma_int_t A_offset = (incx > 0 ? 0 : (-n + 1)*incx); - magma_int_t B_offset = (incy > 0 ? 0 : (-n + 1)*incy); - for( i=0; i < m; ++i ) { - x2_i += conj( *A(A_offset + i*incx,j) ) * *B(B_offset + i*incy,j); - } - error_inline = max( error_inline, fabs(x2_m - x2_i) / fabs(m*x2_i) ); - - //printf( "zdotc xm %.8e + %.8ei, xc %.8e + %.8ei, xf %.8e + %.8ei, xi %.8e + %.8ei\n", - // real(x2_m), imag(x2_m), - // real(x2_c), imag(x2_c), - // real(x2_f), imag(x2_f), - // real(x2_i), imag(x2_i) ); - } - output( "zdotc", m, n, k, incx, incy, error_cblas, error_fblas, error_inline ); - } - } - printf( "\n" ); - - // ----- test ZDOTU - // dot columns, Aj^T * Bj - for( int iincx = 0; iincx < ninc; ++iincx ) { - magma_int_t incx = inc[iincx]; - - for( int iincy = 0; iincy < ninc; ++iincy ) { - magma_int_t incy = inc[iincy]; - - error_cblas = 0; - error_fblas = 0; - error_inline = 0; - for( j=0; j < k; ++j ) { - // MAGMA implementation, not just wrapper - x2_m = magma_cblas_zdotu( m, A(0,j), incx, B(0,j), incy ); - - // crashes with MKL 11.1.2, ILP64 - #if defined(HAVE_CBLAS) && ! defined(MAGMA_WITH_MKL) - #ifdef COMPLEX - cblas_zdotu_sub( m, A(0,j), incx, B(0,j), incy, &x2_c ); - #else - x2_c = cblas_zdotu( m, A(0,j), incx, B(0,j), incy ); - #endif - error_cblas = max( error_cblas, fabs(x2_m - x2_c) / fabs(m*x2_c) ); - #else - x2_c = MAGMA_Z_ZERO; - error_cblas = SKIPPED_FLAG; - #endif - - // crashes with MKL 11.2.3 and MacOS 10.9 - #if (! defined(COMPLEX) || ! defined(MAGMA_WITH_MKL)) && ! defined(__APPLE__) - x2_f = blasf77_zdotu( &m, A(0,j), &incx, B(0,j), &incy ); - error_fblas = max( error_fblas, fabs(x2_m - x2_f) / fabs(m*x2_f) ); - #else - x2_f = MAGMA_Z_ZERO; - error_fblas = SKIPPED_FLAG; - #endif - - // inline implementation - x2_i = MAGMA_Z_ZERO; - magma_int_t A_offset = (incx > 0 ? 0 : (-n + 1)*incx); - magma_int_t B_offset = (incy > 0 ? 0 : (-n + 1)*incy); - for( i=0; i < m; ++i ) { - x2_i += *A(A_offset + i*incx,j) * *B(B_offset + i*incy,j); - } - error_inline = max( error_inline, fabs(x2_m - x2_i) / fabs(m*x2_i) ); - - //printf( "zdotu xm %.8e + %.8ei, xc %.8e + %.8ei, xf %.8e + %.8ei, xi %.8e + %.8ei\n", - // real(x2_m), imag(x2_m), - // real(x2_c), imag(x2_c), - // real(x2_f), imag(x2_f), - // real(x2_i), imag(x2_i) ); - } - output( "zdotu", m, n, k, incx, incy, error_cblas, error_fblas, error_inline ); - } - } - - // cleanup - magma_free_cpu( A ); - magma_free_cpu( B ); - fflush( stdout ); - } // itest, incx, incy - - opts.cleanup(); - TESTING_CHECK( magma_finalize() ); - return gStatus; -} diff --git a/cscs-checks/libraries/magma/magma-2.2/testing_d.h b/cscs-checks/libraries/magma/magma-2.2/testing_d.h deleted file mode 100644 index c350419c7a..0000000000 --- a/cscs-checks/libraries/magma/magma-2.2/testing_d.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - -- MAGMA (version 2.2.0) -- - Univ. of Tennessee, Knoxville - Univ. of California, Berkeley - Univ. of Colorado, Denver - @date November 2016 - - @generated from testing/testing_z.h, normal z -> d, Sun Nov 20 20:20:46 2016 - @author Mark Gates - - Utilities for testing. -*/ -#ifndef TESTING_MAGMA_D_H -#define TESTING_MAGMA_D_H - -#ifdef __cplusplus -extern "C" { -#endif - -#define REAL - -void magma_dmake_symmetric( magma_int_t N, double* A, magma_int_t lda ); -void magma_dmake_symmetric( magma_int_t N, double* A, magma_int_t lda ); - -void magma_dmake_spd( magma_int_t N, double* A, magma_int_t lda ); -void magma_dmake_hpd( magma_int_t N, double* A, magma_int_t lda ); - -// work around MKL bug in multi-threaded lanhe/lansy -double safe_lapackf77_dlansy( - const char *norm, const char *uplo, - const magma_int_t *n, - const double *A, const magma_int_t *lda, - double *work ); - -#ifdef COMPLEX -static inline double magma_dlapy2( double x ) -{ - double xr = MAGMA_D_REAL( x ); - double xi = MAGMA_D_IMAG( x ); - return lapackf77_dlapy2( &xr, &xi ); -} -#endif - -void check_dgesvd( - magma_int_t check, - magma_vec_t jobu, - magma_vec_t jobvt, - magma_int_t m, magma_int_t n, - double *A, magma_int_t lda, - double *S, - double *U, magma_int_t ldu, - double *VT, magma_int_t ldv, - double result[4] ); - -void check_dgeev( - magma_vec_t jobvl, - magma_vec_t jobvr, - magma_int_t n, - double *A, magma_int_t lda, - #ifdef COMPLEX - double *w, - #else - double *wr, double *wi, - #endif - double *VL, magma_int_t ldvl, - double *VR, magma_int_t ldvr, - double *work, magma_int_t lwork, - #ifdef COMPLEX - double *rwork, magma_int_t lrwork, - #endif - double result[4] ); - -#undef REAL - -#ifdef __cplusplus -} -#endif - -#endif // #ifndef TESTING_MAGMA_D_H diff --git a/cscs-checks/libraries/magma/magma-2.2/testing_s.h b/cscs-checks/libraries/magma/magma-2.2/testing_s.h deleted file mode 100644 index 7a1b0752f3..0000000000 --- a/cscs-checks/libraries/magma/magma-2.2/testing_s.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - -- MAGMA (version 2.2.0) -- - Univ. of Tennessee, Knoxville - Univ. of California, Berkeley - Univ. of Colorado, Denver - @date November 2016 - - @generated from testing/testing_z.h, normal z -> s, Sun Nov 20 20:20:47 2016 - @author Mark Gates - - Utilities for testing. -*/ -#ifndef TESTING_MAGMA_S_H -#define TESTING_MAGMA_S_H - -#ifdef __cplusplus -extern "C" { -#endif - -#define REAL - -void magma_smake_symmetric( magma_int_t N, float* A, magma_int_t lda ); -void magma_smake_symmetric( magma_int_t N, float* A, magma_int_t lda ); - -void magma_smake_spd( magma_int_t N, float* A, magma_int_t lda ); -void magma_smake_hpd( magma_int_t N, float* A, magma_int_t lda ); - -// work around MKL bug in multi-threaded lanhe/lansy -float safe_lapackf77_slansy( - const char *norm, const char *uplo, - const magma_int_t *n, - const float *A, const magma_int_t *lda, - float *work ); - -#ifdef COMPLEX -static inline float magma_slapy2( float x ) -{ - float xr = MAGMA_S_REAL( x ); - float xi = MAGMA_S_IMAG( x ); - return lapackf77_slapy2( &xr, &xi ); -} -#endif - -void check_sgesvd( - magma_int_t check, - magma_vec_t jobu, - magma_vec_t jobvt, - magma_int_t m, magma_int_t n, - float *A, magma_int_t lda, - float *S, - float *U, magma_int_t ldu, - float *VT, magma_int_t ldv, - float result[4] ); - -void check_sgeev( - magma_vec_t jobvl, - magma_vec_t jobvr, - magma_int_t n, - float *A, magma_int_t lda, - #ifdef COMPLEX - float *w, - #else - float *wr, float *wi, - #endif - float *VL, magma_int_t ldvl, - float *VR, magma_int_t ldvr, - float *work, magma_int_t lwork, - #ifdef COMPLEX - float *rwork, magma_int_t lrwork, - #endif - float result[4] ); - -#undef REAL - -#ifdef __cplusplus -} -#endif - -#endif // #ifndef TESTING_MAGMA_S_H diff --git a/cscs-checks/libraries/magma/magma-2.2/testing_z.h b/cscs-checks/libraries/magma/magma-2.2/testing_z.h deleted file mode 100644 index 54503185f4..0000000000 --- a/cscs-checks/libraries/magma/magma-2.2/testing_z.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - -- MAGMA (version 2.2.0) -- - Univ. of Tennessee, Knoxville - Univ. of California, Berkeley - Univ. of Colorado, Denver - @date November 2016 - - @precisions normal z -> s d c - @author Mark Gates - - Utilities for testing. -*/ -#ifndef TESTING_MAGMA_Z_H -#define TESTING_MAGMA_Z_H - -#ifdef __cplusplus -extern "C" { -#endif - -#define COMPLEX - -void magma_zmake_symmetric( magma_int_t N, magmaDoubleComplex* A, magma_int_t lda ); -void magma_zmake_hermitian( magma_int_t N, magmaDoubleComplex* A, magma_int_t lda ); - -void magma_zmake_spd( magma_int_t N, magmaDoubleComplex* A, magma_int_t lda ); -void magma_zmake_hpd( magma_int_t N, magmaDoubleComplex* A, magma_int_t lda ); - -// work around MKL bug in multi-threaded lanhe/lansy -double safe_lapackf77_zlanhe( - const char *norm, const char *uplo, - const magma_int_t *n, - const magmaDoubleComplex *A, const magma_int_t *lda, - double *work ); - -#ifdef COMPLEX -static inline double magma_zlapy2( magmaDoubleComplex x ) -{ - double xr = MAGMA_Z_REAL( x ); - double xi = MAGMA_Z_IMAG( x ); - return lapackf77_dlapy2( &xr, &xi ); -} -#endif - -void check_zgesvd( - magma_int_t check, - magma_vec_t jobu, - magma_vec_t jobvt, - magma_int_t m, magma_int_t n, - magmaDoubleComplex *A, magma_int_t lda, - double *S, - magmaDoubleComplex *U, magma_int_t ldu, - magmaDoubleComplex *VT, magma_int_t ldv, - double result[4] ); - -void check_zgeev( - magma_vec_t jobvl, - magma_vec_t jobvr, - magma_int_t n, - magmaDoubleComplex *A, magma_int_t lda, - #ifdef COMPLEX - magmaDoubleComplex *w, - #else - double *wr, double *wi, - #endif - magmaDoubleComplex *VL, magma_int_t ldvl, - magmaDoubleComplex *VR, magma_int_t ldvr, - magmaDoubleComplex *work, magma_int_t lwork, - #ifdef COMPLEX - double *rwork, magma_int_t lrwork, - #endif - double result[4] ); - -#undef COMPLEX - -#ifdef __cplusplus -} -#endif - -#endif // #ifndef TESTING_MAGMA_Z_H diff --git a/cscs-checks/libraries/magma/magma-2.2/testing_zgemm.cpp b/cscs-checks/libraries/magma/magma-2.2/testing_zgemm.cpp deleted file mode 100644 index 3756949453..0000000000 --- a/cscs-checks/libraries/magma/magma-2.2/testing_zgemm.cpp +++ /dev/null @@ -1,275 +0,0 @@ -/* - -- MAGMA (version 2.2.0) -- - Univ. of Tennessee, Knoxville - Univ. of California, Berkeley - Univ. of Colorado, Denver - @date November 2016 - - @precisions normal z -> c d s - @author Mark Gates -*/ -// includes, system -#include -#include -#include -#include - -// includes, project -#include "flops.h" -#include "magma_v2.h" -#include "magma_lapack.h" -#include "magma_operators.h" -#include "testings.h" - -/* //////////////////////////////////////////////////////////////////////////// - -- Testing zgemm -*/ -int main( int argc, char** argv) -{ - #ifdef HAVE_clBLAS - #define dA(i_, j_) dA, ((i_) + (j_)*ldda) - #define dB(i_, j_) dB, ((i_) + (j_)*lddb) - #define dC(i_, j_) dC, ((i_) + (j_)*lddc) - #else - #define dA(i_, j_) (dA + (i_) + (j_)*ldda) - #define dB(i_, j_) (dB + (i_) + (j_)*lddb) - #define dC(i_, j_) (dC + (i_) + (j_)*lddc) - #endif - - TESTING_CHECK( magma_init() ); - magma_print_environment(); - - real_Double_t gflops, magma_perf, magma_time, dev_perf, dev_time, cpu_perf, cpu_time; - double magma_error, dev_error, work[1]; - magma_int_t M, N, K; - magma_int_t Am, An, Bm, Bn; - magma_int_t sizeA, sizeB, sizeC; - magma_int_t lda, ldb, ldc, ldda, lddb, lddc; - magma_int_t ione = 1; - magma_int_t ISEED[4] = {0,0,0,1}; - int status = 0; - - magmaDoubleComplex *hA, *hB, *hC, *hCmagma, *hCdev; - magmaDoubleComplex_ptr dA, dB, dC; - magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE; - magmaDoubleComplex alpha = MAGMA_Z_MAKE( 0.29, -0.86 ); - magmaDoubleComplex beta = MAGMA_Z_MAKE( -0.48, 0.38 ); - - // used only with CUDA - MAGMA_UNUSED( magma_perf ); - MAGMA_UNUSED( magma_time ); - MAGMA_UNUSED( magma_error ); - - magma_opts opts; - opts.parse_opts( argc, argv ); - - // Allow 3*eps; complex needs 2*sqrt(2) factor; see Higham, 2002, sec. 3.6. - double eps = lapackf77_dlamch("E"); - double tol = 3*eps; - - #ifdef HAVE_CUBLAS - // for CUDA, we can check MAGMA vs. CUBLAS, without running LAPACK - printf("%% If running lapack (option --lapack), MAGMA and %s error are both computed\n" - "%% relative to CPU BLAS result. Else, MAGMA error is computed relative to %s result.\n\n", - g_platform_str, g_platform_str ); - printf("%% transA = %s, transB = %s\n", - lapack_trans_const(opts.transA), - lapack_trans_const(opts.transB) ); - printf("%% M N K MAGMA Gflop/s (ms) %s Gflop/s (ms) CPU Gflop/s (ms) MAGMA error %s error\n", - g_platform_str, g_platform_str ); - #else - // for others, we need LAPACK for check - opts.lapack |= opts.check; // check (-c) implies lapack (-l) - printf("%% transA = %s, transB = %s\n", - lapack_trans_const(opts.transA), - lapack_trans_const(opts.transB) ); - printf("%% M N K %s Gflop/s (ms) CPU Gflop/s (ms) %s error\n", - g_platform_str, g_platform_str ); - #endif - printf("%%========================================================================================================\n"); - for( int itest = 0; itest < opts.ntest; ++itest ) { - for( int iter = 0; iter < opts.niter; ++iter ) { - M = opts.msize[itest]; - N = opts.nsize[itest]; - K = opts.ksize[itest]; - gflops = FLOPS_ZGEMM( M, N, K ) / 1e9; - - if ( opts.transA == MagmaNoTrans ) { - lda = Am = M; - An = K; - } else { - lda = Am = K; - An = M; - } - - if ( opts.transB == MagmaNoTrans ) { - ldb = Bm = K; - Bn = N; - } else { - ldb = Bm = N; - Bn = K; - } - ldc = M; - - ldda = magma_roundup( lda, opts.align ); // multiple of 32 by default - lddb = magma_roundup( ldb, opts.align ); // multiple of 32 by default - lddc = magma_roundup( ldc, opts.align ); // multiple of 32 by default - - sizeA = lda*An; - sizeB = ldb*Bn; - sizeC = ldc*N; - - TESTING_CHECK( magma_zmalloc_cpu( &hA, lda*An )); - TESTING_CHECK( magma_zmalloc_cpu( &hB, ldb*Bn )); - TESTING_CHECK( magma_zmalloc_cpu( &hC, ldc*N )); - TESTING_CHECK( magma_zmalloc_cpu( &hCmagma, ldc*N )); - TESTING_CHECK( magma_zmalloc_cpu( &hCdev, ldc*N )); - - TESTING_CHECK( magma_zmalloc( &dA, ldda*An )); - TESTING_CHECK( magma_zmalloc( &dB, lddb*Bn )); - TESTING_CHECK( magma_zmalloc( &dC, lddc*N )); - - /* Initialize the matrices */ - lapackf77_zlarnv( &ione, ISEED, &sizeA, hA ); - lapackf77_zlarnv( &ione, ISEED, &sizeB, hB ); - lapackf77_zlarnv( &ione, ISEED, &sizeC, hC ); - - magma_zsetmatrix( Am, An, hA, lda, dA(0,0), ldda, opts.queue ); - magma_zsetmatrix( Bm, Bn, hB, ldb, dB(0,0), lddb, opts.queue ); - - // for error checks - double Anorm = lapackf77_zlange( "F", &Am, &An, hA, &lda, work ); - double Bnorm = lapackf77_zlange( "F", &Bm, &Bn, hB, &ldb, work ); - double Cnorm = lapackf77_zlange( "F", &M, &N, hC, &ldc, work ); - - /* ===================================================================== - Performs operation using MAGMABLAS (currently only with CUDA) - =================================================================== */ - #ifdef HAVE_CUBLAS - magma_zsetmatrix( M, N, hC, ldc, dC, lddc, opts.queue ); - - magma_time = magma_sync_wtime( opts.queue ); - magmablas_zgemm( opts.transA, opts.transB, M, N, K, - alpha, dA, ldda, - dB, lddb, - beta, dC, lddc, - opts.queue ); - magma_time = magma_sync_wtime( opts.queue ) - magma_time; - magma_perf = gflops / magma_time; - - magma_zgetmatrix( M, N, dC, lddc, hCmagma, ldc, opts.queue ); - #endif - - /* ===================================================================== - Performs operation using CUBLAS / clBLAS / Xeon Phi MKL - =================================================================== */ - magma_zsetmatrix( M, N, hC, ldc, dC(0,0), lddc, opts.queue ); - - dev_time = magma_sync_wtime( opts.queue ); - magma_zgemm( opts.transA, opts.transB, M, N, K, - alpha, dA(0,0), ldda, - dB(0,0), lddb, - beta, dC(0,0), lddc, opts.queue ); - dev_time = magma_sync_wtime( opts.queue ) - dev_time; - dev_perf = gflops / dev_time; - - magma_zgetmatrix( M, N, dC(0,0), lddc, hCdev, ldc, opts.queue ); - - /* ===================================================================== - Performs operation using CPU BLAS - =================================================================== */ - if ( opts.lapack ) { - cpu_time = magma_wtime(); - blasf77_zgemm( lapack_trans_const(opts.transA), lapack_trans_const(opts.transB), &M, &N, &K, - &alpha, hA, &lda, - hB, &ldb, - &beta, hC, &ldc ); - cpu_time = magma_wtime() - cpu_time; - cpu_perf = gflops / cpu_time; - } - - /* ===================================================================== - Check the result - =================================================================== */ - if ( opts.lapack ) { - // Compute forward error bound (see Higham, 2002, sec. 3.5), - // modified to include alpha, beta, and input C. - // ||R_magma - R_ref||_p / (gamma_{K+2} |alpha| ||A||_p ||B||_p + 2 |beta| ||C||_p ) < eps/2. - // This should work with p = 1, inf, fro, but numerical tests - // show p = 1, inf are very spiky and sometimes exceed eps. - // We use gamma_n = sqrt(n)*u instead of n*u/(1-n*u), since the - // former accurately represents statistical average rounding. - // We allow a slightly looser tolerance. - - // use LAPACK for R_ref - blasf77_zaxpy( &sizeC, &c_neg_one, hC, &ione, hCdev, &ione ); - dev_error = lapackf77_zlange( "F", &M, &N, hCdev, &ldc, work ) - / (sqrt(double(K+2))*fabs(alpha)*Anorm*Bnorm + 2*fabs(beta)*Cnorm); - - #ifdef HAVE_CUBLAS - blasf77_zaxpy( &sizeC, &c_neg_one, hC, &ione, hCmagma, &ione ); - magma_error = lapackf77_zlange( "F", &M, &N, hCmagma, &ldc, work ) - / (sqrt(double(K+2))*fabs(alpha)*Anorm*Bnorm + 2*fabs(beta)*Cnorm); - - bool okay = (magma_error < tol && dev_error < tol); - status += ! okay; - printf("%5lld %5lld %5lld %7.2f (%7.2f) %7.2f (%7.2f) %7.2f (%7.2f) %8.2e %8.2e %s\n", - (long long) M, (long long) N, (long long) K, - magma_perf, 1000.*magma_time, - dev_perf, 1000.*dev_time, - cpu_perf, 1000.*cpu_time, - magma_error, dev_error, - (okay ? "ok" : "failed")); - #else - bool okay = (dev_error < tol); - status += ! okay; - printf("%5lld %5lld %5lld %7.2f (%7.2f) %7.2f (%7.2f) %8.2e %s\n", - (long long) M, (long long) N, (long long) K, - dev_perf, 1000.*dev_time, - cpu_perf, 1000.*cpu_time, - dev_error, - (okay ? "ok" : "failed")); - #endif - } - else { - #ifdef HAVE_CUBLAS - // use cuBLAS for R_ref (currently only with CUDA) - blasf77_zaxpy( &sizeC, &c_neg_one, hCdev, &ione, hCmagma, &ione ); - magma_error = lapackf77_zlange( "F", &M, &N, hCmagma, &ldc, work ) - / (sqrt(double(K+2))*fabs(alpha)*Anorm*Bnorm + 2*fabs(beta)*Cnorm); - - bool okay = (magma_error < tol); - status += ! okay; - printf("%5lld %5lld %5lld %7.2f (%7.2f) %7.2f (%7.2f) --- ( --- ) %8.2e --- %s\n", - (long long) M, (long long) N, (long long) K, - magma_perf, 1000.*magma_time, - dev_perf, 1000.*dev_time, - magma_error, - (okay ? "ok" : "failed")); - #else - printf("%5lld %5lld %5lld %7.2f (%7.2f) --- ( --- ) ---\n", - (long long) M, (long long) N, (long long) K, - dev_perf, 1000.*dev_time ); - #endif - } - - magma_free_cpu( hA ); - magma_free_cpu( hB ); - magma_free_cpu( hC ); - magma_free_cpu( hCmagma ); - magma_free_cpu( hCdev ); - - magma_free( dA ); - magma_free( dB ); - magma_free( dC ); - fflush( stdout ); - } - if ( opts.niter > 1 ) { - printf( "\n" ); - } - } - - opts.cleanup(); - TESTING_CHECK( magma_finalize() ); - return status; -} diff --git a/cscs-checks/libraries/magma/magma-2.2/testing_zsymmetrize.cpp b/cscs-checks/libraries/magma/magma-2.2/testing_zsymmetrize.cpp deleted file mode 100644 index efc1281596..0000000000 --- a/cscs-checks/libraries/magma/magma-2.2/testing_zsymmetrize.cpp +++ /dev/null @@ -1,129 +0,0 @@ -/* - -- MAGMA (version 2.2.0) -- - Univ. of Tennessee, Knoxville - Univ. of California, Berkeley - Univ. of Colorado, Denver - @date November 2016 - - @precisions normal z -> s d c - @author Mark Gates - -*/ - -// includes, system -#include -#include -#include -#include - -// includes, project -#include "magma_v2.h" -#include "magma_lapack.h" -#include "testings.h" - -/* //////////////////////////////////////////////////////////////////////////// - -- Testing zsymmetrize - Code is very similar to testing_ztranspose.cpp -*/ -int main( int argc, char** argv) -{ - TESTING_CHECK( magma_init() ); - magma_print_environment(); - - real_Double_t gbytes, gpu_perf, gpu_time, cpu_perf, cpu_time; - double error, work[1]; - magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE; - magmaDoubleComplex *h_A, *h_R; - magmaDoubleComplex_ptr d_A; - magma_int_t N, size, lda, ldda; - magma_int_t ione = 1; - int status = 0; - - magma_opts opts; - opts.parse_opts( argc, argv ); - - printf("%% uplo = %s\n", lapack_uplo_const(opts.uplo) ); - printf("%% N CPU GByte/s (ms) GPU GByte/s (ms) check\n"); - printf("%%====================================================\n"); - for( int itest = 0; itest < opts.ntest; ++itest ) { - for( int iter = 0; iter < opts.niter; ++iter ) { - N = opts.nsize[itest]; - lda = N; - ldda = magma_roundup( N, opts.align ); // multiple of 32 by default - size = lda*N; - // load strictly lower triangle, save strictly upper triangle - gbytes = sizeof(magmaDoubleComplex) * 1.*N*(N-1) / 1e9; - - TESTING_CHECK( magma_zmalloc_cpu( &h_A, size )); - TESTING_CHECK( magma_zmalloc_cpu( &h_R, size )); - - TESTING_CHECK( magma_zmalloc( &d_A, ldda*N )); - - /* Initialize the matrix */ - for( int j = 0; j < N; ++j ) { - for( int i = 0; i < N; ++i ) { - h_A[i + j*lda] = MAGMA_Z_MAKE( i + j/10000., j ); - } - } - - /* ==================================================================== - Performs operation using MAGMA - =================================================================== */ - magma_zsetmatrix( N, N, h_A, lda, d_A, ldda, opts.queue ); - - gpu_time = magma_sync_wtime( opts.queue ); - //magmablas_zsymmetrize( opts.uplo, N-2, d_A+1+ldda, ldda, opts.queue ); // inset by 1 row & col - magmablas_zsymmetrize( opts.uplo, N, d_A, ldda, opts.queue ); - gpu_time = magma_sync_wtime( opts.queue ) - gpu_time; - gpu_perf = gbytes / gpu_time; - - /* ===================================================================== - Performs operation using naive in-place algorithm - (LAPACK doesn't implement symmetrize) - =================================================================== */ - cpu_time = magma_wtime(); - //for( int j = 1; j < N-1; ++j ) { // inset by 1 row & col - // for( int i = 1; i < j; ++i ) { - for( int j = 0; j < N; ++j ) { - for( int i = 0; i < j; ++i ) { - if ( opts.uplo == MagmaLower ) { - h_A[i + j*lda] = MAGMA_Z_CONJ( h_A[j + i*lda] ); - } - else { - h_A[j + i*lda] = MAGMA_Z_CONJ( h_A[i + j*lda] ); - } - } - // real diagonal - h_A[j + j*lda] = MAGMA_Z_MAKE( MAGMA_Z_REAL( h_A[j + j*lda] ), 0 ); - } - cpu_time = magma_wtime() - cpu_time; - cpu_perf = gbytes / cpu_time; - - /* ===================================================================== - Check the result - =================================================================== */ - magma_zgetmatrix( N, N, d_A, ldda, h_R, lda, opts.queue ); - - blasf77_zaxpy(&size, &c_neg_one, h_A, &ione, h_R, &ione); - error = lapackf77_zlange("f", &N, &N, h_R, &lda, work); - - printf("%5lld %7.2f (%7.2f) %7.2f (%7.2f) %s\n", - (long long) N, cpu_perf, cpu_time*1000., gpu_perf, gpu_time*1000., - (error == 0. ? "ok" : "failed") ); - status += ! (error == 0.); - - magma_free_cpu( h_A ); - magma_free_cpu( h_R ); - - magma_free( d_A ); - fflush( stdout ); - } - if ( opts.niter > 1 ) { - printf( "\n" ); - } - } - - opts.cleanup(); - TESTING_CHECK( magma_finalize() ); - return status; -} diff --git a/cscs-checks/libraries/magma/magma-2.2/testing_ztranspose.cpp b/cscs-checks/libraries/magma/magma-2.2/testing_ztranspose.cpp deleted file mode 100644 index 8357fc6373..0000000000 --- a/cscs-checks/libraries/magma/magma-2.2/testing_ztranspose.cpp +++ /dev/null @@ -1,216 +0,0 @@ -/* - -- MAGMA (version 2.2.0) -- - Univ. of Tennessee, Knoxville - Univ. of California, Berkeley - Univ. of Colorado, Denver - @date November 2016 - - @precisions normal z -> s d c - @author Mark Gates - -*/ - -// includes, system -#include -#include -#include -#include - -// includes, project -#include "magma_v2.h" -#include "magma_lapack.h" -#include "magma_operators.h" // conj -#include "testings.h" - -#define COMPLEX - -/* //////////////////////////////////////////////////////////////////////////// - -- Testing ztranspose - Code is very similar to testing_zsymmetrize.cpp -*/ -int main( int argc, char** argv) -{ - TESTING_CHECK( magma_init() ); - magma_print_environment(); - - // OpenCL use: cl_mem , offset (two arguments); - // else use: pointer + offset (one argument). - #ifdef HAVE_clBLAS - #define d_A(i_, j_) d_A, ((i_) + (j_)*ldda) - #define d_B(i_, j_) d_B, ((i_) + (j_)*lddb) - #else - #define d_A(i_, j_) (d_A + (i_) + (j_)*ldda) - #define d_B(i_, j_) (d_B + (i_) + (j_)*lddb) - #endif - - real_Double_t gbytes, gpu_perf, gpu_time, gpu_perf2=0, gpu_time2=0, cpu_perf, cpu_time; - double error, error2, work[1]; - magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE; - magmaDoubleComplex *h_A, *h_B, *h_R; - magmaDoubleComplex_ptr d_A, d_B; - magma_int_t M, N, size, lda, ldda, ldb, lddb; - magma_int_t ione = 1; - int status = 0; - - magma_opts opts; - opts.parse_opts( argc, argv ); - - #ifdef COMPLEX - magma_int_t ntrans = 2; - magma_trans_t trans[] = { Magma_ConjTrans, MagmaTrans }; - #else - magma_int_t ntrans = 1; - magma_trans_t trans[] = { MagmaTrans }; - #endif - - printf("%% Inplace transpose requires M == N.\n"); - printf("%% Trans M N CPU GByte/s (ms) GPU GByte/s (ms) check Inplace GB/s (ms) check\n"); - printf("%%=========================================================================================\n"); - for( int itest = 0; itest < opts.ntest; ++itest ) { - for( int itran = 0; itran < ntrans; ++itran ) { - for( int iter = 0; iter < opts.niter; ++iter ) { - M = opts.msize[itest]; - N = opts.nsize[itest]; - lda = M; - ldda = magma_roundup( M, opts.align ); // multiple of 32 by default - ldb = N; - lddb = magma_roundup( N, opts.align ); // multiple of 32 by default - // load entire matrix, save entire matrix - gbytes = sizeof(magmaDoubleComplex) * 2.*M*N / 1e9; - - TESTING_CHECK( magma_zmalloc_cpu( &h_A, lda*N )); // input: M x N - TESTING_CHECK( magma_zmalloc_cpu( &h_B, ldb*M )); // output: N x M - TESTING_CHECK( magma_zmalloc_cpu( &h_R, ldb*M )); // output: N x M - - TESTING_CHECK( magma_zmalloc( &d_A, ldda*N )); // input: M x N - TESTING_CHECK( magma_zmalloc( &d_B, lddb*M )); // output: N x M - - /* Initialize the matrix */ - for( int j = 0; j < N; ++j ) { - for( int i = 0; i < M; ++i ) { - h_A[i + j*lda] = MAGMA_Z_MAKE( i + j/10000., j ); - } - } - for( int j = 0; j < M; ++j ) { - for( int i = 0; i < N; ++i ) { - h_B[i + j*ldb] = MAGMA_Z_MAKE( i + j/10000., j ); - } - } - magma_zsetmatrix( N, M, h_B, ldb, d_B(0,0), lddb, opts.queue ); - - /* ===================================================================== - Performs operation using naive out-of-place algorithm - (LAPACK doesn't implement transpose) - =================================================================== */ - cpu_time = magma_wtime(); - //for( int j = 1; j < N-1; ++j ) { // inset by 1 row & col - // for( int i = 1; i < M-1; ++i ) { // inset by 1 row & col - if ( trans[itran] == MagmaTrans ) { - for( int j = 0; j < N; ++j ) { - for( int i = 0; i < M; ++i ) { - h_B[j + i*ldb] = h_A[i + j*lda]; - } - } - } - else { - for( int j = 0; j < N; ++j ) { - for( int i = 0; i < M; ++i ) { - h_B[j + i*ldb] = conj( h_A[i + j*lda] ); - } - } - } - cpu_time = magma_wtime() - cpu_time; - cpu_perf = gbytes / cpu_time; - - /* ==================================================================== - Performs operation using MAGMA, out-of-place - =================================================================== */ - magma_zsetmatrix( M, N, h_A, lda, d_A(0,0), ldda, opts.queue ); - magma_zsetmatrix( N, M, h_B, ldb, d_B(0,0), lddb, opts.queue ); - - gpu_time = magma_sync_wtime( opts.queue ); - if ( trans[itran] == MagmaTrans ) { - //magmablas_ztranspose( M-2, N-2, d_A(1,1), ldda, d_B(1,1), lddb, opts.queue ); // inset by 1 row & col - magmablas_ztranspose( M, N, d_A(0,0), ldda, d_B(0,0), lddb, opts.queue ); - } - #ifdef HAVE_CUBLAS - else { - //magmablas_ztranspose_conj( M-2, N-2, d_A(1,1), ldda, d_B(1,1), lddb, opts.queue ); // inset by 1 row & col - magmablas_ztranspose_conj( M, N, d_A(0,0), ldda, d_B(0,0), lddb, opts.queue ); - } - #endif - gpu_time = magma_sync_wtime( opts.queue ) - gpu_time; - gpu_perf = gbytes / gpu_time; - - /* ==================================================================== - Performs operation using MAGMA, in-place - =================================================================== */ - if ( M == N ) { - magma_zsetmatrix( M, N, h_A, lda, d_A(0,0), ldda, opts.queue ); - - gpu_time2 = magma_sync_wtime( opts.queue ); - if ( trans[itran] == MagmaTrans ) { - //magmablas_ztranspose_inplace( N-2, d_A(1,1), ldda, opts.queue ); // inset by 1 row & col - magmablas_ztranspose_inplace( N, d_A(0,0), ldda, opts.queue ); - } - #ifdef HAVE_CUBLAS - else { - //magmablas_ztranspose_conj_inplace( N-2, d_A(1,1), ldda, opts.queue ); // inset by 1 row & col - magmablas_ztranspose_conj_inplace( N, d_A(0,0), ldda, opts.queue ); - } - #endif - gpu_time2 = magma_sync_wtime( opts.queue ) - gpu_time2; - gpu_perf2 = gbytes / gpu_time2; - } - - /* ===================================================================== - Check the result - =================================================================== */ - // check out-of-place transpose (d_B) - size = ldb*M; - magma_zgetmatrix( N, M, d_B(0,0), lddb, h_R, ldb, opts.queue ); - blasf77_zaxpy( &size, &c_neg_one, h_B, &ione, h_R, &ione ); - error = lapackf77_zlange("f", &N, &M, h_R, &ldb, work ); - - if ( M == N ) { - // also check in-place tranpose (d_A) - magma_zgetmatrix( N, M, d_A(0,0), ldda, h_R, ldb, opts.queue ); - blasf77_zaxpy( &size, &c_neg_one, h_B, &ione, h_R, &ione ); - error2 = lapackf77_zlange("f", &N, &M, h_R, &ldb, work ); - - printf("%5c %5lld %5lld %7.2f (%7.2f) %7.2f (%7.2f) %6s %7.2f (%7.2f) %s\n", - lapacke_trans_const( trans[itran] ), - (long long) M, (long long) N, - cpu_perf, cpu_time*1000., gpu_perf, gpu_time*1000., - (error == 0. ? "ok" : "failed"), - gpu_perf2, gpu_time2, - (error2 == 0. ? "ok" : "failed") ); - status += ! (error == 0. && error2 == 0.); - } - else { - printf("%5c %5lld %5lld %7.2f (%7.2f) %7.2f (%7.2f) %6s --- ( --- )\n", - lapacke_trans_const( trans[itran] ), - (long long) M, (long long) N, - cpu_perf, cpu_time*1000., gpu_perf, gpu_time*1000., - (error == 0. ? "ok" : "failed") ); - status += ! (error == 0.); - } - - magma_free_cpu( h_A ); - magma_free_cpu( h_B ); - magma_free_cpu( h_R ); - - magma_free( d_A ); - magma_free( d_B ); - fflush( stdout ); - } - if ( opts.niter > 1 ) { - printf( "\n" ); - } - } - } - - opts.cleanup(); - TESTING_CHECK( magma_finalize() ); - return status; -} diff --git a/cscs-checks/libraries/magma/magma-2.2/testing_zunmbr.cpp b/cscs-checks/libraries/magma/magma-2.2/testing_zunmbr.cpp deleted file mode 100644 index 4236ad9cf7..0000000000 --- a/cscs-checks/libraries/magma/magma-2.2/testing_zunmbr.cpp +++ /dev/null @@ -1,228 +0,0 @@ -/* - -- MAGMA (version 2.2.0) -- - Univ. of Tennessee, Knoxville - Univ. of California, Berkeley - Univ. of Colorado, Denver - @date November 2016 - - @author Mark Gates - @precisions normal z -> c d s -*/ -// includes, system -#include -#include -#include -#include -#include - -// includes, project -#include "flops.h" -#include "magma_v2.h" -#include "magma_lapack.h" -#include "magma_operators.h" -#include "testings.h" - -/* //////////////////////////////////////////////////////////////////////////// - -- Testing zunmbr -*/ -int main( int argc, char** argv ) -{ - TESTING_CHECK( magma_init() ); - magma_print_environment(); - - real_Double_t gflops, gpu_perf, gpu_time, cpu_perf, cpu_time; - double Cnorm, error, dwork[1]; - magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE; - magma_int_t ione = 1; - magma_int_t m, n, k, mi, ni, mm, nn, nq, size, info; - magma_int_t ISEED[4] = {0,0,0,1}; - magma_int_t nb, ldc, lda, lwork, lwork_max; - magmaDoubleComplex *C, *R, *A, *work, *tau, *tauq, *taup; - double *d, *e; - int status = 0; - - magma_opts opts; - opts.parse_opts( argc, argv ); - - // need slightly looser bound (60*eps instead of 30*eps) for some tests - opts.tolerance = max( 60., opts.tolerance ); - double tol = opts.tolerance * lapackf77_dlamch("E"); - - // test all combinations of input parameters - magma_vect_t vect [] = { MagmaQ, MagmaP }; - magma_side_t side [] = { MagmaLeft, MagmaRight }; - magma_trans_t trans[] = { Magma_ConjTrans, MagmaNoTrans }; - - printf("%% M N K vect side trans CPU Gflop/s (sec) GPU Gflop/s (sec) ||R||_F / ||QC||_F\n"); - printf("%%==============================================================================================\n"); - for( int itest = 0; itest < opts.ntest; ++itest ) { - for( int ivect = 0; ivect < 2; ++ivect ) { - for( int iside = 0; iside < 2; ++iside ) { - for( int itran = 0; itran < 2; ++itran ) { - for( int iter = 0; iter < opts.niter; ++iter ) { - m = opts.msize[itest]; - n = opts.nsize[itest]; - k = opts.ksize[itest]; - nb = magma_get_zgebrd_nb( m, n ); - ldc = m; - // A is nq x k (vect=Q) or k x nq (vect=P) - // where nq=m (left) or nq=n (right) - nq = (side[iside] == MagmaLeft ? m : n ); - mm = (vect[ivect] == MagmaQ ? nq : k ); - nn = (vect[ivect] == MagmaQ ? k : nq); - lda = mm; - - // MBR calls either MQR or MLQ in various ways - if ( vect[ivect] == MagmaQ ) { - if ( nq >= k ) { - gflops = FLOPS_ZUNMQR( m, n, k, side[iside] ) / 1e9; - } - else { - if ( side[iside] == MagmaLeft ) { - mi = m - 1; - ni = n; - } - else { - mi = m; - ni = n - 1; - } - gflops = FLOPS_ZUNMQR( mi, ni, nq-1, side[iside] ) / 1e9; - } - } - else { - if ( nq > k ) { - gflops = FLOPS_ZUNMLQ( m, n, k, side[iside] ) / 1e9; - } - else { - if ( side[iside] == MagmaLeft ) { - mi = m - 1; - ni = n; - } - else { - mi = m; - ni = n - 1; - } - gflops = FLOPS_ZUNMLQ( mi, ni, nq-1, side[iside] ) / 1e9; - } - } - - // workspace for gebrd is (mm + nn)*nb - // workspace for unmbr is m*nb or n*nb, depending on side - lwork_max = max( (mm + nn)*nb, max( m*nb, n*nb )); - // this rounds it up slightly if needed to agree with lwork query below - lwork_max = magma_int_t( real( magma_zmake_lwork( lwork_max ))); - - TESTING_CHECK( magma_zmalloc_cpu( &C, ldc*n )); - TESTING_CHECK( magma_zmalloc_cpu( &R, ldc*n )); - TESTING_CHECK( magma_zmalloc_cpu( &A, lda*nn )); - TESTING_CHECK( magma_zmalloc_cpu( &work, lwork_max )); - TESTING_CHECK( magma_dmalloc_cpu( &d, min(mm,nn) )); - TESTING_CHECK( magma_dmalloc_cpu( &e, min(mm,nn) )); - TESTING_CHECK( magma_zmalloc_cpu( &tauq, min(mm,nn) )); - TESTING_CHECK( magma_zmalloc_cpu( &taup, min(mm,nn) )); - - // C is full, m x n - size = ldc*n; - lapackf77_zlarnv( &ione, ISEED, &size, C ); - lapackf77_zlacpy( "Full", &m, &n, C, &ldc, R, &ldc ); - - size = lda*nn; - lapackf77_zlarnv( &ione, ISEED, &size, A ); - - // compute BRD factorization to get Householder vectors in A, tauq, taup - //lapackf77_zgebrd( &mm, &nn, A, &lda, d, e, tauq, taup, work, &lwork_max, &info ); - magma_zgebrd( mm, nn, A, lda, d, e, tauq, taup, work, lwork_max, &info ); - if (info != 0) { - printf("magma_zgebrd returned error %lld: %s.\n", - (long long) info, magma_strerror( info )); - } - - if ( vect[ivect] == MagmaQ ) { - tau = tauq; - } else { - tau = taup; - } - - /* ===================================================================== - Performs operation using LAPACK - =================================================================== */ - cpu_time = magma_wtime(); - lapackf77_zunmbr( lapack_vect_const( vect[ivect] ), - lapack_side_const( side[iside] ), - lapack_trans_const( trans[itran] ), - &m, &n, &k, - A, &lda, tau, C, &ldc, work, &lwork_max, &info ); - cpu_time = magma_wtime() - cpu_time; - cpu_perf = gflops / cpu_time; - if (info != 0) { - printf("lapackf77_zunmbr returned error %lld: %s.\n", - (long long) info, magma_strerror( info )); - } - - /* ==================================================================== - Performs operation using MAGMA - =================================================================== */ - // query for workspace size - lwork = -1; - magma_zunmbr( vect[ivect], side[iside], trans[itran], - m, n, k, - A, lda, tau, R, ldc, work, lwork, &info ); - if (info != 0) { - printf("magma_zunmbr (lwork query) returned error %lld: %s.\n", - (long long) info, magma_strerror( info )); - } - lwork = (magma_int_t) MAGMA_Z_REAL( work[0] ); - if ( lwork < 0 || lwork > lwork_max ) { - printf("Warning: optimal lwork %lld > allocated lwork_max %lld\n", (long long) lwork, (long long) lwork_max ); - lwork = lwork_max; - } - - gpu_time = magma_wtime(); - magma_zunmbr( vect[ivect], side[iside], trans[itran], - m, n, k, - A, lda, tau, R, ldc, work, lwork, &info ); - gpu_time = magma_wtime() - gpu_time; - gpu_perf = gflops / gpu_time; - if (info != 0) { - printf("magma_zunmbr returned error %lld: %s.\n", - (long long) info, magma_strerror( info )); - } - - /* ===================================================================== - compute relative error |QC_magma - QC_lapack| / |QC_lapack| - =================================================================== */ - size = ldc*n; - blasf77_zaxpy( &size, &c_neg_one, C, &ione, R, &ione ); - Cnorm = lapackf77_zlange( "Fro", &m, &n, C, &ldc, dwork ); - error = lapackf77_zlange( "Fro", &m, &n, R, &ldc, dwork ) / (magma_dsqrt(m*n) * Cnorm); - - printf( "%5lld %5lld %5lld %c %4c %5c %7.2f (%7.2f) %7.2f (%7.2f) %8.2e %s\n", - (long long) m, (long long) n, (long long) k, - lapacke_vect_const( vect[ivect] ), - lapacke_side_const( side[iside] ), - lapacke_trans_const( trans[itran] ), - cpu_perf, cpu_time, gpu_perf, gpu_time, - error, (error < tol ? "ok" : "failed") ); - status += ! (error < tol); - - magma_free_cpu( C ); - magma_free_cpu( R ); - magma_free_cpu( A ); - magma_free_cpu( work ); - magma_free_cpu( d ); - magma_free_cpu( e ); - magma_free_cpu( taup ); - magma_free_cpu( tauq ); - fflush( stdout ); - } - if ( opts.niter > 1 ) { - printf( "\n" ); - } - }}} // end ivect, iside, itran - printf( "\n" ); - } - - opts.cleanup(); - TESTING_CHECK( magma_finalize() ); - return status; -} diff --git a/cscs-checks/libraries/magma/magma-2.2/testings.h b/cscs-checks/libraries/magma/magma-2.2/testings.h deleted file mode 100644 index efd80eae96..0000000000 --- a/cscs-checks/libraries/magma/magma-2.2/testings.h +++ /dev/null @@ -1,219 +0,0 @@ -#ifndef TESTINGS_H -#define TESTINGS_H - -#include -#include - -#if ! defined(MAGMA_H) && ! defined(MAGMA_V2_H) -#include "magma_v2.h" -#endif - -#include - -#include "magma_lapack.h" -#include "testing_s.h" -#include "testing_d.h" -#include "testing_c.h" -#include "testing_z.h" - - -/***************************************************************************//** - * For portability to Windows - */ -#if defined( _WIN32 ) || defined( _WIN64 ) - // functions where Microsoft fails to provide C99 or POSIX standard - // (only with Microsoft, not with nvcc on Windows) - // in both magma_internal.h and testings.h - #ifndef __NVCC__ - - #include - #define copysign(x,y) _copysign(x,y) - #define isnan(x) _isnan(x) - #define isinf(x) ( ! _finite(x) && ! _isnan(x) ) - #define isfinite(x) _finite(x) - // note _snprintf has slightly different semantics than snprintf - #define snprintf _snprintf - #define unlink _unlink - - #endif -#endif - - -#ifdef __cplusplus -extern "C" { -#endif - -void flops_init(); - - -/***************************************************************************//** - max that propogates nan consistently: - max_nan( 1, nan ) = nan - max_nan( nan, 1 ) = nan - - isnan and isinf are hard to call portably from both C and C++. - In Windows C, include float.h, use _isnan as above (before VS 2015) - In Unix C or C++, include math.h, use isnan - In std C++, include cmath, use std::isnan - Sometimes in C++, include cmath, use isnan is okay (on Linux but not MacOS) - This makes writing a header inline function a nightmare. For now, do it - here in testing to avoid potential issues in the MAGMA library itself. -*******************************************************************************/ -static inline double magma_max_nan( double x, double y ) -{ - #ifdef isnan - // with include macro - return (isnan(y) || x < y ? y : x); - #else - // with include function - return (std::isnan(y) || x < y ? y : x); - #endif -} - - -/***************************************************************************//** - * Global utilities - * in both magma_internal.h and testings.h - **/ -#ifndef max -#define max(a, b) ((a) > (b) ? (a) : (b)) -#endif - -#ifndef min -#define min(a, b) ((a) < (b) ? (a) : (b)) -#endif - -// suppress "warning: unused variable" in a portable fashion -#define MAGMA_UNUSED(var) ((void)var) - - -/***************************************************************************//** - * Macros to handle error checking. - */ - -#define TESTING_CHECK( err ) \ - do { \ - magma_int_t err_ = (err); \ - if ( err_ != 0 ) { \ - fprintf( stderr, "Error: %s\nfailed at %s:%d: error %lld: %s\n", \ - #err, __FILE__, __LINE__, \ - (long long) err_, magma_strerror(err_) ); \ - exit(1); \ - } \ - } while( 0 ) - - -/***************************************************************************//** - * Functions and data structures used for testing. - */ - -void magma_assert( bool condition, const char* msg, ... ); - -void magma_assert_warn( bool condition, const char* msg, ... ); - -#define MAX_NTEST 1050 - -typedef enum { - MagmaOptsDefault = 0, - MagmaOptsBatched = 1000 -} magma_opts_t; - -typedef enum { - MagmaSVD_all, - MagmaSVD_query, - MagmaSVD_doc, - MagmaSVD_doc_old, - MagmaSVD_min, - MagmaSVD_min_1, - MagmaSVD_min_old, - MagmaSVD_min_old_1, - MagmaSVD_min_fast, - MagmaSVD_min_fast_1, - MagmaSVD_opt, - MagmaSVD_opt_old, - MagmaSVD_opt_slow, - MagmaSVD_max -} magma_svd_work_t; - -class magma_opts -{ -public: - // constructor - magma_opts( magma_opts_t flag=MagmaOptsDefault ); - - // parse command line - void parse_opts( int argc, char** argv ); - - // deallocate queues, etc. - void cleanup(); - - // matrix size - magma_int_t ntest; - magma_int_t msize[ MAX_NTEST ]; - magma_int_t nsize[ MAX_NTEST ]; - magma_int_t ksize[ MAX_NTEST ]; - magma_int_t batchcount; - - magma_int_t default_nstart; - magma_int_t default_nend; - magma_int_t default_nstep; - - // scalars - magma_int_t device; - magma_int_t align; - magma_int_t nb; - magma_int_t nrhs; - magma_int_t nqueue; - magma_int_t ngpu; - magma_int_t nsub; - magma_int_t niter; - magma_int_t nthread; - magma_int_t offset; - magma_int_t itype; // hegvd: problem type - magma_int_t version; // hemm_mgpu, hetrd - magma_int_t check; - magma_int_t verbose; - double fraction; // hegvdx - double tolerance; - - // boolean arguments - bool magma; - bool lapack; - bool warmup; - - // lapack options - magma_uplo_t uplo; - magma_trans_t transA; - magma_trans_t transB; - magma_side_t side; - magma_diag_t diag; - magma_vec_t jobz; // heev: no eigen vectors - magma_vec_t jobvr; // geev: no right eigen vectors - magma_vec_t jobvl; // geev: no left eigen vectors - - // vectors of options - std::vector< magma_svd_work_t > svd_work; - std::vector< magma_vec_t > jobu; - std::vector< magma_vec_t > jobv; - - // queue for default device - magma_queue_t queue; - magma_queue_t queues2[3]; // 2 queues + 1 extra NULL entry to catch errors - - #ifdef HAVE_CUBLAS - // handle for directly calling cublas - cublasHandle_t handle; - #endif - - // misc - int flock_op; // shared or exclusive lock - int flock_fd; // lock file -}; - -extern const char* g_platform_str; - -#ifdef __cplusplus -} -#endif - -#endif /* TESTINGS_H */ diff --git a/cscs-checks/libraries/magma/magma-2.4/Makefile_cblas_z b/cscs-checks/libraries/magma/magma-2.4/Makefile_cblas_z deleted file mode 100644 index 4d072aedfe..0000000000 --- a/cscs-checks/libraries/magma/magma-2.4/Makefile_cblas_z +++ /dev/null @@ -1,14 +0,0 @@ -all: testing_cblas_z - -testing_cblas_z: testing_cblas_z.o magma_util.o -# CC testing_cblas_z.o magma_util.o -lcusparse -lcublas /apps/daint/UES/jenkins/5.2.UP04/easybuild/software/magma/2.0.0-CrayGNU-2016.03/lib/libmagma.a /apps/daint/UES/jenkins/5.2.UP04/easybuild/software/magma/2.0.0-CrayGNU-2016.03/lib/libmagma_sparse.a -o testing_cblas_z - $(CXX) $(LDFLAGS) testing_cblas_z.o magma_util.o -o testing_cblas_z - -testing_cblas_z.o: testing_cblas_z.cpp - $(CXX) -c $(CXXFLAGS) -DADD_ -DHAVE_CUBLAS -DHAVE_CBLAS -I. testing_cblas_z.cpp - -magma_util.o: magma_util.cpp - $(CXX) -c $(CXXFLAGS) -DADD_ -DHAVE_CUBLAS -DHAVE_CBLAS -I. magma_util.cpp - -clean: - rm *.o testing_cblas_z diff --git a/cscs-checks/libraries/magma/magma-2.4/Makefile_zgemm b/cscs-checks/libraries/magma/magma-2.4/Makefile_zgemm deleted file mode 100644 index b571766835..0000000000 --- a/cscs-checks/libraries/magma/magma-2.4/Makefile_zgemm +++ /dev/null @@ -1,13 +0,0 @@ -all: testing_zgemm - -testing_zgemm: testing_zgemm.o magma_util.o - $(CXX) $(LDFLAGS) testing_zgemm.o magma_util.o -o testing_zgemm - -testing_zgemm.o: testing_zgemm.cpp - $(CXX) -c $(CXXFLAGS) -DADD_ -DHAVE_CUBLAS -DHAVE_CBLAS -I. testing_zgemm.cpp - -magma_util.o: magma_util.cpp - $(CXX) -c $(CXXFLAGS) -DADD_ -DHAVE_CUBLAS -DHAVE_CBLAS -I. magma_util.cpp - -clean: - rm *.o testing_zgemm diff --git a/cscs-checks/libraries/magma/magma-2.4/Makefile_zsymmetrize b/cscs-checks/libraries/magma/magma-2.4/Makefile_zsymmetrize deleted file mode 100644 index 7630d5142a..0000000000 --- a/cscs-checks/libraries/magma/magma-2.4/Makefile_zsymmetrize +++ /dev/null @@ -1,13 +0,0 @@ -all: testing_zsymmetrize - -testing_zsymmetrize: testing_zsymmetrize.o magma_util.o - $(CXX) $(LDFLAGS) testing_zsymmetrize.o magma_util.o -o testing_zsymmetrize - -testing_zsymmetrize.o: testing_zsymmetrize.cpp - $(CXX) -c $(CXXFLAGS) -DADD_ -DHAVE_CUBLAS -DHAVE_CBLAS -I. testing_zsymmetrize.cpp - -magma_util.o: magma_util.cpp - $(CXX) -c $(CXXFLAGS) -DADD_ -DHAVE_CUBLAS -DHAVE_CBLAS -I. magma_util.cpp - -clean: - rm *.o testing_zsymmetrize diff --git a/cscs-checks/libraries/magma/magma-2.4/Makefile_ztranspose b/cscs-checks/libraries/magma/magma-2.4/Makefile_ztranspose deleted file mode 100644 index cd906f7e75..0000000000 --- a/cscs-checks/libraries/magma/magma-2.4/Makefile_ztranspose +++ /dev/null @@ -1,13 +0,0 @@ -all: testing_ztranspose - -testing_ztranspose: testing_ztranspose.o magma_util.o - $(CXX) $(LDFLAGS) testing_ztranspose.o magma_util.o -o testing_ztranspose - -testing_ztranspose.o: testing_ztranspose.cpp - $(CXX) -c $(CXXFLAGS) -DADD_ -DHAVE_CUBLAS -DHAVE_CBLAS -I. testing_ztranspose.cpp - -magma_util.o: magma_util.cpp - $(CXX) -c $(CXXFLAGS) -DADD_ -DHAVE_CUBLAS -DHAVE_CBLAS -I. magma_util.cpp - -clean: - rm *.o testing_ztranspose diff --git a/cscs-checks/libraries/magma/magma-2.4/Makefile_zunmbr b/cscs-checks/libraries/magma/magma-2.4/Makefile_zunmbr deleted file mode 100644 index c4167dc33f..0000000000 --- a/cscs-checks/libraries/magma/magma-2.4/Makefile_zunmbr +++ /dev/null @@ -1,13 +0,0 @@ -all: testing_zunmbr - -testing_zunmbr: testing_zunmbr.o magma_util.o - $(CXX) $(LDFLAGS) testing_zunmbr.o magma_util.o -o testing_zunmbr - -testing_zunmbr.o: testing_zunmbr.cpp - $(CXX) -c $(CXXFLAGS) -DADD_ -DHAVE_CUBLAS -DHAVE_CBLAS -I. testing_zunmbr.cpp - -magma_util.o: magma_util.cpp - $(CXX) -c $(CXXFLAGS) -DADD_ -DHAVE_CUBLAS -DHAVE_CBLAS -I. magma_util.cpp - -clean: - rm *.o testing_zunmbr diff --git a/cscs-checks/libraries/magma/magma-2.4/license.txt b/cscs-checks/libraries/magma/magma-2.4/license.txt deleted file mode 100644 index 5336bd4c45..0000000000 --- a/cscs-checks/libraries/magma/magma-2.4/license.txt +++ /dev/null @@ -1,10 +0,0 @@ -License - -Copyright © 2018 The University of Tennessee. All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: -· Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. -· Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer listed in this license in the documentation and/or other materials provided with the distribution. -· Neither the name of the copyright holders nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. - -This software is provided by the copyright holders and contributors "as is" and any express or implied warranties, including, but not limited to, the implied warranties of merchantability and fitness for a particular purpose are disclaimed. in no event shall the copyright owner or contributors be liable for any direct, indirect, incidental, special, exemplary, or consequential damages (including, but not limited to, procurement of substitute goods or services; loss of use, data, or profits; or business interruption) however caused and on any theory of liability, whether in contract, strict liability, or tort (including negligence or otherwise) arising in any way out of the use of this software, even if advised of the possibility of such damage. diff --git a/cscs-checks/libraries/magma/magma_checks.py b/cscs-checks/libraries/magma/magma_checks.py index 4abbba8efe..b6ca0a562a 100644 --- a/cscs-checks/libraries/magma/magma_checks.py +++ b/cscs-checks/libraries/magma/magma_checks.py @@ -2,68 +2,44 @@ import reframe.utility.sanity as sn -@rfm.parameterized_test(*([name, libversion, variant] - for name in ['cblas_z', 'zgemm', 'zsymmetrize', - 'ztranspose', 'zunmbr'] - for libversion in ['2.2', '2.4'] - for variant in ['prod', 'maint'] - if (name, libversion, variant)[:2] != ('zunmbr', '2.4'))) +@rfm.required_version('>=2.16') +@rfm.parameterized_test(['cblas_z'], ['zgemm'], + ['zsymmetrize'], ['ztranspose']) class MagmaCheck(rfm.RegressionTest): - def __init__(self, name, libversion, variant): + def __init__(self, subtest): super().__init__() self.valid_systems = ['daint:gpu', 'dom:gpu'] self.num_gpus_per_node = 1 - self.executable = 'testing_' + name self.sanity_patterns = sn.assert_found(r'Result = PASS', self.stdout) self.prebuild_cmd = ['patch < patch.txt'] self.build_system = 'Make' - self.build_system.makefile = 'Makefile_%s' % name + self.valid_prog_environs = ['PrgEnv-intel'] + self.build_system.makefile = 'Makefile_%s' % subtest # Compile with -O0 since with a higher level the compiler seems to # optimise something away self.build_system.cflags = ['-O0'] self.build_system.cxxflags = ['-O0', '-std=c++11'] self.build_system.ldflags = ['-lcusparse', '-lcublas', '-lmagma', '-lmagma_sparse'] - if libversion == '2.2': - self.valid_prog_environs = ['PrgEnv-gnu'] - self.modules = ['magma/2.2.0-CrayGNU-18.08-cuda-9.1'] - self.sourcesdir = 'magma-2.2' - elif libversion == '2.4': - self.valid_prog_environs = ['PrgEnv-intel'] - self.modules = ['magma/2.4.0-CrayIntel-18.08-cuda-9.1'] - self.sourcesdir = 'magma-2.4' - - if variant == 'prod': - self.tags |= {'production'} - elif variant == 'maint': - self.tags |= {'maintenance'} - - if name == 'cblas_z': + self.executable = './testing_' + subtest + self.modules = ['magma'] + self.maintainers = ['AJ'] + self.tags = {'scs', 'production', 'maintenance'} + if subtest == 'cblas_z': self.perf_patterns = { - 'duration': - sn.extractsingle(r'Duration: (?P\S+)', - self.stdout, "duration", float) + 'duration': sn.extractsingle(r'Duration: (\S+)', + self.stdout, 1, float) + } + self.reference = { + 'daint:gpu': { + 'duration': (0.10, None, 1.05, 's'), + }, + 'dom:gpu': { + 'duration': (0.10, None, 1.05, 's'), + }, } - if variant == 'prod': - self.reference = { - 'daint:gpu': { - 'duration': (2.25, None, 0.05), - }, - 'dom:gpu': { - 'duration': (2.02, None, 0.05), - }, - } - elif variant == 'maint': - self.reference = { - 'daint:gpu': { - 'duration': (2.25, None, 0.05), - }, - 'dom:gpu': { - 'duration': (2.02, None, 0.05), - }, - } - elif name == 'zgemm': + elif subtest == 'zgemm': self.perf_patterns = { 'magma': sn.extractsingle(r'MAGMA GFlops: (?P\S+)', self.stdout, 'magma_gflops', float), @@ -73,64 +49,36 @@ def __init__(self, name, libversion, variant): 'cpu': sn.extractsingle(r'CPU GFlops: (?P\S+)', self.stdout, 'cpu_gflops', float) } - if variant == 'prod': - self.reference = { - 'daint:gpu': { - 'magma': (3357.0, None, 0.2), - 'cublas': (3775.0, None, 0.45), - 'cpu': (47.01, None, 0.1), - }, - 'dom:gpu': { - 'magma': (3330.0, None, 0.1), - 'cublas': (3774.0, None, 0.05), - 'cpu': (47.32, None, 0.05), - }, - } - elif variant == 'maint': - self.reference = { - 'daint:gpu': { - 'magma': (3357.0, None, 0.2), - 'cublas': (3775.0, None, 0.45), - 'cpu': (47.01, None, 0.1), - }, - 'dom:gpu': { - 'magma': (3330.0, None, 0.1), - 'cublas': (3774.0, None, 0.05), - 'cpu': (47.32, None, 0.05), - }, - } - elif name == 'zsymmetrize': + self.reference = { + 'daint:gpu': { + 'magma': (3344, -0.05, None, 'Gflop/s'), + 'cublas': (3709, -0.05, None, 'Gflop/s'), + 'cpu': (42.8, -0.27, None, 'Gflop/s'), + }, + 'dom:gpu': { + 'magma': (3344, -0.05, None, 'Gflop/s'), + 'cublas': (3709, -0.05, None, 'Gflop/s'), + 'cpu': (42.8, -0.27, None, 'Gflop/s'), + }, + } + elif subtest == 'zsymmetrize': self.perf_patterns = { - 'cpu_perf': - sn.extractsingle(r'CPU performance: (?P\S+)', - self.stdout, 'cpu_performance', float), - 'gpu_perf': - sn.extractsingle(r'GPU performance: (?P\S+)', - self.stdout, 'gpu_performance', float), + 'cpu_perf': sn.extractsingle(r'CPU performance: (\S+)', + self.stdout, 1, float), + 'gpu_perf': sn.extractsingle(r'GPU performance: (\S+)', + self.stdout, 1, float), } - if variant == 'prod': - self.reference = { - 'daint:gpu': { - 'cpu_perf': (0.93, None, 0.05), - 'gpu_perf': (157.8, None, 0.05), - }, - 'dom:gpu': { - 'cpu_perf': (0.93, None, 0.05), - 'gpu_perf': (158.4, None, 0.05), - }, - } - elif variant == 'maint': - self.reference = { - 'daint:gpu': { - 'cpu_perf': (0.93, None, 0.05), - 'gpu_perf': (157.8, None, 0.05), - }, - 'dom:gpu': { - 'cpu_perf': (0.93, None, 0.05), - 'gpu_perf': (158.4, None, 0.05), - }, - } - elif name == 'ztranspose': + self.reference = { + 'daint:gpu': { + 'cpu_perf': (0.91, -0.05, None, 'GB/s'), + 'gpu_perf': (158.3, -0.05, None, 'GB/s'), + }, + 'dom:gpu': { + 'cpu_perf': (0.91, -0.05, None, 'GB/s'), + 'gpu_perf': (158.3, -0.05, None, 'GB/s'), + }, + } + elif subtest == 'ztranspose': self.perf_patterns = { 'cpu_perf': sn.extractsingle(r'CPU performance: (?P\S+)', @@ -139,29 +87,18 @@ def __init__(self, name, libversion, variant): sn.extractsingle(r'GPU performance: (?P\S+)', self.stdout, 'gpu_performance', float) } - if variant == 'prod': - self.reference = { - 'daint:gpu': { - 'cpu_perf': (1.52, None, 0.05), - 'gpu_perf': (499.0, None, 0.05), - }, - 'dom:gpu': { - 'cpu_perf': (1.57, None, 0.05), - 'gpu_perf': (499.1, None, 0.05), - }, - } - elif variant == 'maint': - self.reference = { - 'daint:gpu': { - 'cpu_perf': (1.52, None, 0.05), - 'gpu_perf': (499.0, None, 0.05), - }, - 'dom:gpu': { - 'cpu_perf': (1.57, None, 0.05), - 'gpu_perf': (499.1, None, 0.05), - }, - } - elif name == 'zunmbr': + self.reference = { + 'daint:gpu': { + 'cpu_perf': (1.51, -0.05, None, 'GB/s'), + 'gpu_perf': (498.2, -0.05, None, 'GB/s'), + }, + 'dom:gpu': { + 'cpu_perf': (1.51, -0.05, None, 'GB/s'), + 'gpu_perf': (498.2, -0.05, None, 'GB/s'), + }, + } + elif subtest == 'zunmbr': + # This test fails to compile with Magma 2.4 self.perf_patterns = { 'cpu_perf': sn.extractsingle(r'CPU performance: (?P\S+)', @@ -170,28 +107,13 @@ def __init__(self, name, libversion, variant): sn.extractsingle(r'GPU performance: (?P\S+)', self.stdout, 'gpu_performance', float) } - if variant == 'prod': - self.reference = { - 'daint:gpu': { - 'cpu_perf': (36.5, None, 0.05), - 'gpu_perf': (252.0, None, 0.05), - }, - 'dom:gpu': { - 'cpu_perf': (36.7, None, 0.05), - 'gpu_perf': (256.4, None, 0.05), - }, - } - elif variant == 'maint': - self.reference = { - 'daint:gpu': { - 'cpu_perf': (36.5, None, 0.05), - 'gpu_perf': (252.0, None, 0.05), - }, - 'dom:gpu': { - 'cpu_perf': (36.7, None, 0.05), - 'gpu_perf': (256.4, None, 0.05), - }, - } - - self.maintainers = ['AJ'] - self.tags |= {'scs'} + self.reference = { + 'daint:gpu': { + 'cpu_perf': (36.6, -0.05, None, 'Gflop/s'), + 'gpu_perf': (254.7, -0.05, None, 'Gflop/s'), + }, + 'dom:gpu': { + 'cpu_perf': (36.6, -0.05, None, 'Gflop/s'), + 'gpu_perf': (254.7, -0.05, None, 'Gflop/s'), + }, + } diff --git a/cscs-checks/libraries/magma/magma-2.2/Makefile_cblas_z b/cscs-checks/libraries/magma/src/Makefile_cblas_z similarity index 100% rename from cscs-checks/libraries/magma/magma-2.2/Makefile_cblas_z rename to cscs-checks/libraries/magma/src/Makefile_cblas_z diff --git a/cscs-checks/libraries/magma/magma-2.2/Makefile_zgemm b/cscs-checks/libraries/magma/src/Makefile_zgemm similarity index 100% rename from cscs-checks/libraries/magma/magma-2.2/Makefile_zgemm rename to cscs-checks/libraries/magma/src/Makefile_zgemm diff --git a/cscs-checks/libraries/magma/magma-2.2/Makefile_zsymmetrize b/cscs-checks/libraries/magma/src/Makefile_zsymmetrize similarity index 100% rename from cscs-checks/libraries/magma/magma-2.2/Makefile_zsymmetrize rename to cscs-checks/libraries/magma/src/Makefile_zsymmetrize diff --git a/cscs-checks/libraries/magma/magma-2.2/Makefile_ztranspose b/cscs-checks/libraries/magma/src/Makefile_ztranspose similarity index 100% rename from cscs-checks/libraries/magma/magma-2.2/Makefile_ztranspose rename to cscs-checks/libraries/magma/src/Makefile_ztranspose diff --git a/cscs-checks/libraries/magma/magma-2.2/Makefile_zunmbr b/cscs-checks/libraries/magma/src/Makefile_zunmbr similarity index 100% rename from cscs-checks/libraries/magma/magma-2.2/Makefile_zunmbr rename to cscs-checks/libraries/magma/src/Makefile_zunmbr diff --git a/cscs-checks/libraries/magma/magma-2.4/flops.h b/cscs-checks/libraries/magma/src/flops.h similarity index 100% rename from cscs-checks/libraries/magma/magma-2.4/flops.h rename to cscs-checks/libraries/magma/src/flops.h diff --git a/cscs-checks/libraries/magma/magma-2.2/license.txt b/cscs-checks/libraries/magma/src/license.txt similarity index 100% rename from cscs-checks/libraries/magma/magma-2.2/license.txt rename to cscs-checks/libraries/magma/src/license.txt diff --git a/cscs-checks/libraries/magma/magma-2.4/magma_lapack.hpp b/cscs-checks/libraries/magma/src/magma_lapack.hpp similarity index 100% rename from cscs-checks/libraries/magma/magma-2.4/magma_lapack.hpp rename to cscs-checks/libraries/magma/src/magma_lapack.hpp diff --git a/cscs-checks/libraries/magma/magma-2.4/magma_matrix.hpp b/cscs-checks/libraries/magma/src/magma_matrix.hpp similarity index 100% rename from cscs-checks/libraries/magma/magma-2.4/magma_matrix.hpp rename to cscs-checks/libraries/magma/src/magma_matrix.hpp diff --git a/cscs-checks/libraries/magma/magma-2.4/magma_util.cpp b/cscs-checks/libraries/magma/src/magma_util.cpp similarity index 100% rename from cscs-checks/libraries/magma/magma-2.4/magma_util.cpp rename to cscs-checks/libraries/magma/src/magma_util.cpp diff --git a/cscs-checks/libraries/magma/magma-2.4/patch.txt b/cscs-checks/libraries/magma/src/patch.txt similarity index 100% rename from cscs-checks/libraries/magma/magma-2.4/patch.txt rename to cscs-checks/libraries/magma/src/patch.txt diff --git a/cscs-checks/libraries/magma/magma-2.4/testing_c.h b/cscs-checks/libraries/magma/src/testing_c.h similarity index 100% rename from cscs-checks/libraries/magma/magma-2.4/testing_c.h rename to cscs-checks/libraries/magma/src/testing_c.h diff --git a/cscs-checks/libraries/magma/magma-2.4/testing_cblas_z.cpp b/cscs-checks/libraries/magma/src/testing_cblas_z.cpp similarity index 100% rename from cscs-checks/libraries/magma/magma-2.4/testing_cblas_z.cpp rename to cscs-checks/libraries/magma/src/testing_cblas_z.cpp diff --git a/cscs-checks/libraries/magma/magma-2.4/testing_d.h b/cscs-checks/libraries/magma/src/testing_d.h similarity index 100% rename from cscs-checks/libraries/magma/magma-2.4/testing_d.h rename to cscs-checks/libraries/magma/src/testing_d.h diff --git a/cscs-checks/libraries/magma/magma-2.4/testing_s.h b/cscs-checks/libraries/magma/src/testing_s.h similarity index 100% rename from cscs-checks/libraries/magma/magma-2.4/testing_s.h rename to cscs-checks/libraries/magma/src/testing_s.h diff --git a/cscs-checks/libraries/magma/magma-2.4/testing_z.h b/cscs-checks/libraries/magma/src/testing_z.h similarity index 100% rename from cscs-checks/libraries/magma/magma-2.4/testing_z.h rename to cscs-checks/libraries/magma/src/testing_z.h diff --git a/cscs-checks/libraries/magma/magma-2.4/testing_zgemm.cpp b/cscs-checks/libraries/magma/src/testing_zgemm.cpp similarity index 100% rename from cscs-checks/libraries/magma/magma-2.4/testing_zgemm.cpp rename to cscs-checks/libraries/magma/src/testing_zgemm.cpp diff --git a/cscs-checks/libraries/magma/magma-2.4/testing_zsymmetrize.cpp b/cscs-checks/libraries/magma/src/testing_zsymmetrize.cpp similarity index 100% rename from cscs-checks/libraries/magma/magma-2.4/testing_zsymmetrize.cpp rename to cscs-checks/libraries/magma/src/testing_zsymmetrize.cpp diff --git a/cscs-checks/libraries/magma/magma-2.4/testing_ztranspose.cpp b/cscs-checks/libraries/magma/src/testing_ztranspose.cpp similarity index 100% rename from cscs-checks/libraries/magma/magma-2.4/testing_ztranspose.cpp rename to cscs-checks/libraries/magma/src/testing_ztranspose.cpp diff --git a/cscs-checks/libraries/magma/magma-2.4/testing_zunmbr.cpp b/cscs-checks/libraries/magma/src/testing_zunmbr.cpp similarity index 100% rename from cscs-checks/libraries/magma/magma-2.4/testing_zunmbr.cpp rename to cscs-checks/libraries/magma/src/testing_zunmbr.cpp diff --git a/cscs-checks/libraries/magma/magma-2.4/testings.h b/cscs-checks/libraries/magma/src/testings.h similarity index 100% rename from cscs-checks/libraries/magma/magma-2.4/testings.h rename to cscs-checks/libraries/magma/src/testings.h