From d6bd41648dbd41285c5c4336b6b526d113c7718e Mon Sep 17 00:00:00 2001 From: lewuathe Date: Thu, 18 Jun 2015 21:52:50 +0900 Subject: [PATCH] Check existence of scipy.sparse --- python/pyspark/mllib/tests.py | 8 ++++---- python/pyspark/mllib/util.py | 14 ++++++++++++-- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/python/pyspark/mllib/tests.py b/python/pyspark/mllib/tests.py index 58d6ee66a1515..6056fc40e291b 100644 --- a/python/pyspark/mllib/tests.py +++ b/python/pyspark/mllib/tests.py @@ -835,11 +835,11 @@ def test_append_bias_with_vector(self): def test_append_bias_with_sp_vector(self): data = Vectors.sparse(3, {0: 2.0, 2: 2.0}) - # Returned value must be scipy.sparse matrix + expected = Vectors.sparse(4, {0: 2.0, 2: 2.0, 3: 1.0}) + # Returned value must be SparseVector ret = MLUtils.appendBias(data) - self.assertEqual(ret.shape, (1, 4)) - self.assertEqual(ret.toarray()[0][3], 1.0) - self.assertEqual(type(ret), sp.csc_matrix) + self.assertEqual(ret, expected) + self.assertEqual(type(ret), SparseVector) def test_load_vectors(self): import shutil diff --git a/python/pyspark/mllib/util.py b/python/pyspark/mllib/util.py index 51e476572a3f6..8301afdcf0091 100644 --- a/python/pyspark/mllib/util.py +++ b/python/pyspark/mllib/util.py @@ -17,8 +17,13 @@ import sys import numpy as np -import scipy.sparse as sp import warnings +try: + import scipy.sparse + _have_scipy = True +except: + # No SciPy in environment, but that's okay + _have_scipy = False if sys.version > '3': xrange = range @@ -178,7 +183,12 @@ def appendBias(data): """ vec = _convert_to_vector(data) if isinstance(vec, SparseVector): - return sp.csc_matrix(np.append(vec.toArray(), 1.0)) + if _have_scipy: + l = scipy.sparse.csc_matrix(np.append(vec.toArray(), 1.0)) + return _convert_to_vector(l.T) + else: + raise TypeError("Cannot append bias %s into sparce " + "vector because of lack of scipy" % type(vec)) elif isinstance(vec, Vector): vec = vec.toArray() return np.append(vec, 1.0).tolist()