Merge pull request #3224 from pv/sparsetools-overflow

BUG: sparse: fix int32 overflows in sparsetools
scipy · Feb 1, 2014 · c35429e · c35429e
2 parents 2b1c323 + 8778612
commit c35429e
Show file tree

Hide file tree

Showing 9 changed files with 338 additions and 50 deletions.
diff --git a/HACKING.rst.txt b/HACKING.rst.txt
@@ -197,6 +197,9 @@ Using ``runtests.py`` is the recommended approach to running tests.
 There are also a number of alternatives to it, for example in-place
 build or installing to a virtualenv. See the FAQ below for details.
 
+Some of the tests in Scipy are very slow and need to be separately
+enabled. See the FAQ below for details.
+
 
 SciPy structure
 ===============
@@ -391,6 +394,22 @@ course use your favourite alternative debugger; run it on the
 ``python`` binary with arguments ``runtests.py -g --python mytest.py``.
 
 
+*How do I enable additional tests in Scipy?*
+
+Some of the tests in Scipy's test suite are very slow and not enabled
+by default. You can run the full suite via::
+
+    $ python runtests.py -g -m full
+
+This invokes the test suite ``import scipy; scipy.test("full")``,
+enabling also slow tests.
+
+There is an additional level of very slow tests (several minutes),
+which are disabled also in this case. They can be enabled by setting
+the environment variable ``SCIPY_XSLOW=1`` before running the test
+suite.
+
+
 .. _scikit-learn: http://scikit-learn.org
 
 .. _scikits-image: http://scikits-image.org/

diff --git a/scipy/sparse/sparsetools/bsr.h b/scipy/sparse/sparsetools/bsr.h
@@ -18,10 +18,11 @@ void bsr_diagonal(const I n_brow,
 	              const T Ax[],
 	                    T Yx[])
 {
-    const I D  = std::min(R*n_brow, C*n_bcol);
-    const I RC = R*C;
+    const npy_intp D  = std::min((npy_intp)R*n_brow,
+                                 (npy_intp)C*n_bcol);
+    const npy_intp RC = (npy_intp)R*C;
 
-    for(I i = 0; i < D; i++){
+    for (npy_intp i = 0; i < D; i++){
         Yx[i] = 0;
     }
 
@@ -31,8 +32,8 @@ void bsr_diagonal(const I n_brow,
         for(I i = 0; i < end; i++){
             for(I jj = Ap[i]; jj < Ap[i+1]; jj++){
                 if (i == Aj[jj]){
-                    I row = R*i;
-                    const T * val = Ax + RC*jj;
+                    npy_intp row = (npy_intp)R*i;
+                    const T * val = Ax + (npy_intp)RC*jj;
                     for(I bi = 0; bi < R; bi++){
                         Yx[row + bi] = *val;
                         val += C + 1;
@@ -47,9 +48,9 @@ void bsr_diagonal(const I n_brow,
         const I end = (D/R) + (D % R == 0 ? 0 : 1);
         for(I i = 0; i < end; i++){
             for(I jj = Ap[i]; jj < Ap[i+1]; jj++){
-                const I base_row = R*i;
-                const I base_col = C*Aj[jj];
-                const T * base_val = Ax + RC*jj;
+                const npy_intp base_row = (npy_intp)R*i;
+                const npy_intp base_col = (npy_intp)C*Aj[jj];
+                const T * base_val = Ax + (npy_intp)RC*jj;
 
                 for(I bi = 0; bi < R; bi++){
                     const I row = base_row + bi;
@@ -58,7 +59,7 @@ void bsr_diagonal(const I n_brow,
                     for(I bj = 0; bj < C; bj++){
                         const I col = base_col + bj;
                         if (row == col){
-                            Yx[row] = base_val[bi*C + bj];
+                            Yx[row] = base_val[(npy_intp)bi*C + bj];
                         }
                     }
                 }
@@ -85,16 +86,16 @@ void bsr_scale_rows(const I n_brow,
 	                      T Ax[],
 	                const T Xx[])
 {
-    const I RC = R*C;
+    const npy_intp RC = (npy_intp)R*C;
 
     for(I i = 0; i < n_brow; i++){
-        const T * row_scales = Xx + R*i;
+        const T * row_scales = Xx + (npy_intp)R*i;
 
         for(I jj = Ap[i]; jj < Ap[i+1]; jj++){
            T * block = Ax + RC*jj;
 
             for(I bi = 0; bi < R; bi++){
-                scal(C, row_scales[bi], block + C*bi);
+                scal(C, row_scales[bi], block + (npy_intp)C*bi);
             }
         }
     }
@@ -117,11 +118,11 @@ void bsr_scale_columns(const I n_brow,
 	                   const T Xx[])
 {
     const I bnnz = Ap[n_brow];
-    const I RC  = R*C;
+    const npy_intp RC  = (npy_intp)R*C;
     for(I i = 0; i < bnnz; i++){
-        const T * scales = Xx + C*Aj[i] ;
+        const T * scales = Xx + (npy_intp)C*Aj[i] ;
         T * block = Ax + RC*i;
-        
+
         for(I bi = 0; bi < R; bi++){
             for(I bj = 0; bj < C; bj++){
                 block[C*bi + bj] *= scales[bj];
@@ -162,8 +163,8 @@ void bsr_sort_indices(const I n_brow,
 
 
     const I nblks = Ap[n_brow];
-    const I RC    = R*C;
-    const I nnz   = RC*nblks;
+    const npy_intp RC    = (npy_intp)R*C;
+    const npy_intp nnz   = (npy_intp)RC*nblks;
 
     //compute permutation of blocks using CSR
     std::vector<I> perm(nblks);
@@ -224,7 +225,7 @@ void bsr_transpose(const I n_brow,
 	                     T Bx[])
 {  
     const I nblks = Ap[n_brow];
-    const I RC    = R*C;
+    const npy_intp RC    = (npy_intp)R*C;
 
     //compute permutation of blocks using tranpose(CSR)
     std::vector<I> perm_in (nblks);
@@ -240,7 +241,7 @@ void bsr_transpose(const I n_brow,
               T * Bx_blk = Bx + RC * i;
         for(I r = 0; r < R; r++){
             for(I c = 0; c < C; c++){
-                Bx_blk[c * R + r] = Ax_blk[r * C + c];
+                Bx_blk[(npy_intp)c * R + r] = Ax_blk[(npy_intp)r * C + c];
             }
         }
     }
@@ -263,16 +264,16 @@ void bsr_matmat_pass2(const I n_brow,  const I n_bcol,
         return;
     }
 
-    const I RC = R*C;
-    const I RN = R*N;
-    const I NC = N*C;
+    const npy_intp RC = (npy_intp)R*C;
+    const npy_intp RN = (npy_intp)R*N;
+    const npy_intp NC = (npy_intp)N*C;
 
     std::fill( Cx, Cx + RC * Cp[n_brow], 0 ); //clear output array
 
     std::vector<I>  next(n_bcol,-1);
     std::vector<T*> mats(n_bcol);
-    
-    I nnz = 0;
+
+    npy_intp nnz = 0;
     Cp[0] = 0;
 
     for(I i = 0; i < n_brow; i++){
@@ -357,7 +358,7 @@ void bsr_binop_bsr_general(const I n_brow, const I n_bcol,
                            const bin_op& op)
 {
     //Method that works for duplicate and/or unsorted indices
-    const I RC = R*C;
+    const npy_intp RC = (npy_intp)R*C;
 
     Cp[0] = 0;
     I nnz = 0;
@@ -446,7 +447,7 @@ void bsr_binop_bsr_canonical(const I n_brow, const I n_bcol,
                                    I Cp[],        I Cj[],       T2 Cx[],
                              const bin_op& op)
 {
-    const I RC = R*C;
+    const npy_intp RC = (npy_intp)R*C;
     T2 * result = Cx;
 
     Cp[0] = 0;
@@ -720,13 +721,13 @@ void bsr_matvec(const I n_brow,
         return;
     }
 
-    const I RC = R*C;
+    const npy_intp RC = (npy_intp)R*C;
     for(I i = 0; i < n_brow; i++){
-        T * y = Yx + R * i;
+        T * y = Yx + (npy_intp)R * i;
         for(I jj = Ap[i]; jj < Ap[i+1]; jj++){
             const I j = Aj[jj];
             const T * A = Ax + RC * jj;
-            const T * x = Xx + C * j;
+            const T * x = Xx + (npy_intp)C * j;
             gemv(R, C, A, x, y); // y += A*x
         }
     }
@@ -772,9 +773,9 @@ void bsr_matvecs(const I n_brow,
         return;
     }
 
-    const I A_bs = R*C;      //Ax blocksize
-    const I Y_bs = n_vecs*R; //Yx blocksize
-    const I X_bs = C*n_vecs; //Xx blocksize
+    const npy_intp A_bs = (npy_intp)R*C;      //Ax blocksize
+    const npy_intp Y_bs = (npy_intp)n_vecs*R; //Yx blocksize
+    const npy_intp X_bs = (npy_intp)C*n_vecs; //Xx blocksize
 
     for(I i = 0; i < n_brow; i++){
         T * y = Yx + Y_bs * i;

diff --git a/scipy/sparse/sparsetools/coo.h b/scipy/sparse/sparsetools/coo.h
@@ -114,12 +114,12 @@ void coo_todense(const I n_row,
 {
     if (!fortran) {
         for(I n = 0; n < nnz; n++){
-            Bx[ n_col * Ai[n] + Aj[n] ] += Ax[n];
+            Bx[ (npy_intp)n_col * Ai[n] + Aj[n] ] += Ax[n];
         }
     }
     else {
         for(I n = 0; n < nnz; n++){
-            Bx[ n_row * Aj[n] + Ai[n] ] += Ax[n];
+            Bx[ (npy_intp)n_row * Aj[n] + Ai[n] ] += Ax[n];
         }
     }
 }

diff --git a/scipy/sparse/sparsetools/csc.h b/scipy/sparse/sparsetools/csc.h
@@ -78,7 +78,7 @@ void csc_matvecs(const I n_row,
     for(I j = 0; j < n_col; j++){
         for(I ii = Ap[j]; ii < Ap[j+1]; ii++){
             const I i = Ai[ii];
-            axpy(n_vecs, Ax[ii], Xx + n_vecs * j, Yx + n_vecs * i);
+            axpy(n_vecs, Ax[ii], Xx + (npy_intp)n_vecs * j, Yx + (npy_intp)n_vecs * i);
         }
     }
 }

diff --git a/scipy/sparse/sparsetools/csr.h b/scipy/sparse/sparsetools/csr.h
@@ -456,13 +456,13 @@ void csr_toell(const I n_row,
 	                 I Bj[],
 	                 T Bx[])
 {
-    const I ell_nnz = row_length * n_row;
+    const npy_intp ell_nnz = (npy_intp)row_length * n_row;
     std::fill(Bj, Bj + ell_nnz, 0);
     std::fill(Bx, Bx + ell_nnz, 0);
 
     for(I i = 0; i < n_row; i++){
-        I * Bj_row = Bj + row_length * i;
-        T * Bx_row = Bx + row_length * i;
+        I * Bj_row = Bj + (npy_intp)row_length * i;
+        T * Bx_row = Bx + (npy_intp)row_length * i;
         for(I jj = Ap[i]; jj < Ap[i+1]; jj++){
             *Bj_row = Aj[jj];            
             *Bx_row = Ax[jj];            
@@ -535,16 +535,29 @@ void csr_matmat_pass1(const I n_row,
 
     I nnz = 0;
     for(I i = 0; i < n_row; i++){
+        npy_intp row_nnz = 0;
+
         for(I jj = Ap[i]; jj < Ap[i+1]; jj++){
             I j = Aj[jj];
             for(I kk = Bp[j]; kk < Bp[j+1]; kk++){
                 I k = Bj[kk];
                 if(mask[k] != i){
                     mask[k] = i;                        
-                    nnz++;
+                    row_nnz++;
                 }
             }
-        }         
+        }
+
+        npy_intp next_nnz = nnz + row_nnz;
+
+        if (row_nnz > NPY_MAX_INTP - nnz || next_nnz != (I)next_nnz) {
+            /*
+             * Index overflowed. Note that row_nnz <= n_col and cannot overflow
+             */
+            throw std::overflow_error("nnz of the result is too large");
+        }
+
+        nnz = next_nnz;
         Cp[i+1] = nnz;
     }
 }
@@ -1097,11 +1110,11 @@ void csr_matvecs(const I n_row,
 	                   T Yx[])
 {
     for(I i = 0; i < n_row; i++){
-        T * y = Yx + n_vecs * i;
+        T * y = Yx + (npy_intp)n_vecs * i;
         for(I jj = Ap[i]; jj < Ap[i+1]; jj++){
             const I j = Aj[jj];
             const T a = Ax[jj];
-            const T * x = Xx + n_vecs * j;
+            const T * x = Xx + (npy_intp)n_vecs * j;
             axpy(n_vecs, a, x, y);
         }
     }

diff --git a/scipy/sparse/sparsetools/dense.h b/scipy/sparse/sparsetools/dense.h
@@ -59,7 +59,7 @@ void gemv(const I m, const I n, const T * A, const T * x, T * y){
     for(I i = 0; i < m; i++){
         T dot = y[i];
         for(I j = 0; j < n; j++){
-            dot += A[n * i + j] * x[j];
+            dot += A[(npy_intp)n * i + j] * x[j];
         }
         y[i] = dot;
     }
@@ -70,11 +70,11 @@ template <class I, class T>
 void gemm(const I m, const I n, const I k, const T * A, const T * B, T * C){
     for(I i = 0; i < m; i++){
         for(I j = 0; j < n; j++){
-            T dot = C[n * i + j];
+            T dot = C[(npy_intp)n * i + j];
             for(I _d = 0; _d < k; _d++){
-                dot += A[k * i + _d] * B[n * _d + j];
+                dot += A[(npy_intp)k * i + _d] * B[(npy_intp)n * _d + j];
             }
-            C[n * i + j] = dot;
+            C[(npy_intp)n * i + j] = dot;
         }
     }
 }

diff --git a/scipy/sparse/sparsetools/dia.h b/scipy/sparse/sparsetools/dia.h
@@ -45,7 +45,7 @@ void dia_matvec(const I n_row,
 
         const I N = j_end - j_start;  //number of elements to process
 
-        const T * diag = diags + i*L + j_start;
+        const T * diag = diags + (npy_intp)i*L + j_start;
         const T * x = Xx + j_start;
               T * y = Yx + i_start;
 

diff --git a/scipy/sparse/sparsetools/scratch.h b/scipy/sparse/sparsetools/scratch.h
@@ -138,7 +138,7 @@ void csr_todense(const I  n_row,
                  const T  Ax[],
                        T  Mx[])
 {
-    I row_base = 0;
+    npy_intp row_base = 0;
     for(I i = 0; i < n_row; i++){
         I row_start = Ap[i];
         I row_end   = Ap[i+1];