-
-
Notifications
You must be signed in to change notification settings - Fork 25.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[MRG] PyPy support #11010
[MRG] PyPy support #11010
Changes from 52 commits
6e2f4df
13c2469
be622fa
5d7d25d
2857205
cb02f29
cb56013
7f381fc
40b12e7
f482cee
85e4ee7
dd3647d
7222ce7
11cc6fb
f86c60e
3d99fca
37a0294
8e90d56
275c6d7
7e80717
46878e2
4b239d6
c4c2995
456f94d
1b95ac9
02454b9
9a5d71e
5faf55e
b5fbe28
5958a23
ef14a60
2b98923
0bc6753
a300228
9c29561
ac8c286
2faea96
006ecc0
218c529
3634eaf
695585b
a7dd224
4636266
f3007a3
eb85b96
9cc0970
7cdc822
0d971d4
6c95101
5702aac
82302d6
dbc192c
4931d94
47f9f76
b538e8e
122f38b
71ec01c
9f9aebd
33f2f3a
66fcbe7
40ada79
b71e79f
6390cf6
ae76d01
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
#!/usr/bin/env bash | ||
set -x | ||
set -e | ||
|
||
apt-get -yq update | ||
apt-get -yq install libatlas-dev libatlas-base-dev liblapack-dev gfortran | ||
|
||
pip install virtualenv | ||
|
||
if command -v pypy3; then | ||
virtualenv -p $(command -v pypy3) pypy-env | ||
elif command -v pypy; then | ||
virtualenv -p $(command -v pypy) pypy-env | ||
fi | ||
|
||
source pypy-env/bin/activate | ||
|
||
python --version | ||
which python | ||
|
||
pip install --extra-index https://antocuni.github.io/pypy-wheels/ubuntu numpy==1.14.4 Cython pytest | ||
pip install "scipy>=1.1.0" sphinx numpydoc docutils | ||
pip install -e . | ||
|
||
make test |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -41,8 +41,12 @@ | |
|
||
VERSION = sklearn.__version__ | ||
|
||
SCIPY_MIN_VERSION = '0.13.3' | ||
NUMPY_MIN_VERSION = '1.8.2' | ||
if '__pypy__' in sys.modules: | ||
SCIPY_MIN_VERSION = '1.1.0' | ||
NUMPY_MIN_VERSION = '1.4.0' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 1.14.0? |
||
else: | ||
SCIPY_MIN_VERSION = '0.13.3' | ||
NUMPY_MIN_VERSION = '1.8.2' | ||
|
||
|
||
# Optional setuptools features | ||
|
@@ -185,6 +189,10 @@ def setup_package(): | |
'Programming Language :: Python :: 3.4', | ||
'Programming Language :: Python :: 3.5', | ||
'Programming Language :: Python :: 3.6', | ||
('Programming Language :: Python :: ' | ||
'Implementation :: CPython'), | ||
('Programming Language :: Python :: ' | ||
'Implementation :: PyPy') | ||
], | ||
cmdclass=cmdclass, | ||
install_requires=[ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,6 +18,7 @@ | |
from sklearn.utils.testing import assert_raises | ||
from sklearn.utils.testing import assert_raises_regex | ||
from sklearn.utils.testing import assert_in | ||
from sklearn.utils.testing import fails_if_pypy | ||
from sklearn.utils.fixes import sp_version | ||
|
||
import sklearn | ||
|
@@ -30,6 +31,8 @@ | |
invalidfile = os.path.join(currdir, "data", "svmlight_invalid.txt") | ||
invalidfile2 = os.path.join(currdir, "data", "svmlight_invalid_order.txt") | ||
|
||
pytestmark = fails_if_pypy | ||
|
||
|
||
def test_load_svmlight_file(): | ||
X, y = load_svmlight_file(datafile) | ||
|
@@ -119,7 +122,8 @@ def test_load_compressed(): | |
with NamedTemporaryFile(prefix="sklearn-test", suffix=".gz") as tmp: | ||
tmp.close() # necessary under windows | ||
with open(datafile, "rb") as f: | ||
shutil.copyfileobj(f, gzip.open(tmp.name, "wb")) | ||
with gzip.open(tmp.name, "wb") as fh_out: | ||
shutil.copyfileobj(f, fh_out) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For the record, not using a context manager here results in an empty file when
|
||
Xgz, ygz = load_svmlight_file(tmp.name) | ||
# because we "close" it manually and write to it, | ||
# we need to remove it manually. | ||
|
@@ -130,7 +134,8 @@ def test_load_compressed(): | |
with NamedTemporaryFile(prefix="sklearn-test", suffix=".bz2") as tmp: | ||
tmp.close() # necessary under windows | ||
with open(datafile, "rb") as f: | ||
shutil.copyfileobj(f, BZ2File(tmp.name, "wb")) | ||
with BZ2File(tmp.name, "wb") as fh_out: | ||
shutil.copyfileobj(f, fh_out) | ||
Xbz, ybz = load_svmlight_file(tmp.name) | ||
# because we "close" it manually and write to it, | ||
# we need to remove it manually. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -931,12 +931,14 @@ def _resize_state(self): | |
raise ValueError('resize with smaller n_estimators %d < %d' % | ||
(total_n_estimators, self.estimators_[0])) | ||
|
||
self.estimators_.resize((total_n_estimators, self.loss_.K)) | ||
self.train_score_.resize(total_n_estimators) | ||
self.estimators_ = np.resize(self.estimators_, | ||
(total_n_estimators, self.loss_.K)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure the difference here between
Maybe @amueller or @glemaitre would have an opinion?
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Your comment mixes up Probably numpy should never have supported such a dangerous operation in the first place, but these things happen... On CPython, numpy can use the refcount to check if the operation is even possibly-maybe safe, and only allows it if there are no other references to the array. On PyPy, there are no refcounts, so numpy is conservative and assumes views might exist. If you're really sure that this is what you want to do, and are prepared to accept the risks, then you can pass There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the insightful comment!
Yes, definitely I meant |
||
self.train_score_ = np.resize(self.train_score_, total_n_estimators) | ||
if (self.subsample < 1 or hasattr(self, 'oob_improvement_')): | ||
# if do oob resize arrays or create new if not available | ||
if hasattr(self, 'oob_improvement_'): | ||
self.oob_improvement_.resize(total_n_estimators) | ||
self.oob_improvement_ = np.resize(self.oob_improvement_, | ||
total_n_estimators) | ||
else: | ||
self.oob_improvement_ = np.zeros((total_n_estimators,), | ||
dtype=np.float64) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,9 +7,15 @@ | |
import numpy as np | ||
import scipy.sparse as sp | ||
|
||
from . import _hashing | ||
from ..utils import IS_PYPY | ||
from ..base import BaseEstimator, TransformerMixin | ||
|
||
if not IS_PYPY: | ||
from ._hashing import transform as _hashing_transform | ||
else: | ||
def _hashing_transform(*args, **kwargs): | ||
raise NotImplementedError('FeatureHasher is not compatible with PyPy.') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it would help if this error message could also include the URL of a github issue that explains why this cannot be currently be implemented with the current versions of PyPy & Cython. |
||
|
||
|
||
def _iteritems(d): | ||
"""Like d.iteritems, but accepts any collections.Mapping.""" | ||
|
@@ -155,7 +161,7 @@ def transform(self, raw_X): | |
elif self.input_type == "string": | ||
raw_X = (((f, 1) for f in x) for x in raw_X) | ||
indices, indptr, values = \ | ||
_hashing.transform(raw_X, self.n_features, self.dtype, | ||
_hashing_transform(raw_X, self.n_features, self.dtype, | ||
self.alternate_sign) | ||
n_samples = indptr.shape[0] - 1 | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
import os | ||
import sys | ||
|
||
|
||
def configuration(parent_package='', top_path=None): | ||
|
@@ -10,10 +11,11 @@ def configuration(parent_package='', top_path=None): | |
if os.name == 'posix': | ||
libraries.append('m') | ||
|
||
config.add_extension('_hashing', | ||
sources=['_hashing.pyx'], | ||
include_dirs=[numpy.get_include()], | ||
libraries=libraries) | ||
if "__pypy__" not in sys.modules: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should this be IS_PYPY There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oh wait this is setup, never mind... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Still, the following is cleaner: platform.python_implementation() == 'PyPy' |
||
config.add_extension('_hashing', | ||
sources=['_hashing.pyx'], | ||
include_dirs=[numpy.get_include()], | ||
libraries=libraries) | ||
config.add_subpackage("tests") | ||
|
||
return config |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
platform.python_implementation() == 'PyPy'