diff --git a/python/pyspark/shuffle.py b/python/pyspark/shuffle.py index 4c2cc26acde4b..1d0b16cade8bb 100644 --- a/python/pyspark/shuffle.py +++ b/python/pyspark/shuffle.py @@ -513,7 +513,7 @@ def load(f): chunks.append(load(open(path, 'rb'))) current_chunk = [] gc.collect() - batch /= 2 + batch //= 2 limit = self._next_limit() MemoryBytesSpilled += (used_memory - get_used_memory()) << 20 DiskBytesSpilled += os.path.getsize(path) diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py index 43e1144e25875..ea63a396da5b8 100644 --- a/python/pyspark/tests.py +++ b/python/pyspark/tests.py @@ -31,10 +31,8 @@ import time import zipfile import random -import itertools import threading import hashlib -from StringIO import StringIO from py4j.protocol import Py4JJavaError @@ -50,6 +48,11 @@ xrange = range basestring = str +if sys.version >= "3": + from io import StringIO +else: + from StringIO import StringIO + from pyspark.conf import SparkConf from pyspark.context import SparkContext