Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AttributeError: 'NeuralNetClassifier' object has no attribute 'score' #470

Merged
merged 8 commits into from May 28, 2019

Conversation

Projects
None yet
3 participants
@damienlancry
Copy link
Contributor

commented May 10, 2019

These changes make NeuralNetClassifier NeuralNetBinaryClassifier and NeuralNetRegressor inherit from sklearn.base.ClassifierMixin and sklearn.base.RegressorMixin to gives them a score method. While making these changes I encountered the error Cannot import 'NeuralNetBinaryClassifier' so i added it to the __init__ file. This is in response to the issue #469 that I opened.

Here are working examples showing the efficiency of my changes:

#### Classification

from sklearn.model_selection import train_test_split
from skorch import NeuralNetClassifier 
from torch import nn, optim, tensor, cuda, Tensor
import torch.nn.functional as F 
from skorch.dataset import Dataset
from skorch.helper import predefined_split
from sklearn.datasets import load_digits, load_breast_cancer


digits = load_digits()
X_train, y_train = digits.data, digits.target
X_train = X_train.reshape((-1, 1, 8, 8)) / 16.
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2)

class ClassifierModule(nn.Module):
    def __init__(self):
        super(ClassifierModule, self).__init__()
        self.layer1 = nn.Sequential(
        nn.Conv2d(1, 32, 3),
        nn.ReLU(),
        nn.Conv2d(32, 32, 3),
        nn.ReLU(),
        )
        self.d1 = nn.Dropout(0.25)
        self.f1 = nn.Linear(4 * 4 * 32, 64)
        self.d2 = nn.Dropout(0.5)
        self.f2 = nn.Linear(64, 10)
    def forward(self, x):
        out = x
        out = self.layer1(out)
        out = out.view(-1,4 * 4 * 32)
        out = self.d1(out)
        out = self.f1(out)
        out = self.d2(out)
        out = self.f2(out)
        return F.softmax(out, dim=-1)

def tensorize_feats(*features):
    for array in list(features):
        yield tensor(array).float().cuda()
def tensorize_labels(*labels):
    for array in list(labels):
        yield tensor(array).long().cuda()
        
X_train, X_test = tensorize_feats(*(X_train, X_test))
y_train, y_test = tensorize_labels(*(y_train, y_test))

model = NeuralNetClassifier(ClassifierModule, 
                            max_epochs=100, 
                            batch_size=128, 
                            lr=0.001, 
                            device="cuda",
                            optimizer=optim.Adam, 
                            verbose=0
                           )

model.fit(X_train, y_train)
model.score(X_test.cpu(),y_test.cpu())

### Binary Classification

from skorch import NeuralNetBinaryClassifier 

digits = load_digits()
X, y = digits.data, digits.target
idx = np.where(y < 2)[0]
X, y = X[idx], y[idx] 
X = X.reshape((-1, 1, 8, 8)) / 16.



X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

class ClassifierModule(nn.Module):
    def __init__(self):
        super(ClassifierModule, self).__init__()
        self.layer1 = nn.Sequential(
        nn.Conv2d(1, 16, 3),
        nn.ReLU(),
        nn.Conv2d(16, 16, 3),
        nn.ReLU(),
        )
        self.d1 = nn.Dropout(0.25)
        self.f1 = nn.Linear(4 * 4 * 16, 32)
        self.d2 = nn.Dropout(0.5)
        self.f2 = nn.Linear(32, 1)
    def forward(self, x):
        out = x
        out = self.layer1(out)
        out = out.view(-1,4 * 4 * 16)
        out = self.d1(out)
        out = self.f1(out)
        out = self.d2(out)
        out = self.f2(out)
        return out.view(-1)

def tensorize(*features):
    for array in list(features):
        yield tensor(array).float().cuda()
        
X_train, X_test = tensorize(*(X_train, X_test))
y_train, y_test = tensorize(*(y_train, y_test))

model = NeuralNetBinaryClassifier(ClassifierModule, 
                            max_epochs=100, 
                            batch_size=128, 
                            lr=0.001, 
                            device="cuda",
                            optimizer=optim.Adam,
                            verbose=0
                           )

model.fit(X_train, y_train)
model.score(X_test.cpu(),y_test.cpu())

### Regression

from skorch import NeuralNetRegressor

digits = load_digits()
X, y = digits.data, digits.target
X = X.reshape((-1, 1, 8, 8)) / 16.
y = y.reshape((-1, 1)) / 10.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)


class RegressorModule(nn.Module):
    def __init__(self):
        super(RegressorModule, self).__init__()
        self.layer1 = nn.Sequential(
        nn.Conv2d(1, 16, 3),
        nn.ReLU(),
        nn.Conv2d(16, 16, 3),
        nn.ReLU(),
        )
        self.d1 = nn.Dropout(0.25)
        self.f1 = nn.Linear(4 * 4 * 16, 32)
        self.d2 = nn.Dropout(0.5)
        self.f2 = nn.Linear(32, 1)
    def forward(self, x):
        out = x
        out = self.layer1(out)
        out = out.view(-1,4 * 4 * 16)
        out = self.d1(out)
        out = self.f1(out)
        out = self.d2(out)
        out = self.f2(out)
        return out

def tensorize(*features):
    for array in list(features):
        yield tensor(array).float().cuda()
        
X_train, X_test = tensorize(*(X_train, X_test))
y_train, y_test = tensorize(*(y_train, y_test))

model = NeuralNetRegressor(RegressorModule, 
                            max_epochs=100, 
                            batch_size=128, 
                            lr=0.001, 
                            device="cuda",
                            optimizer=optim.Adam,
                            verbose=1
                          )

model.fit(X_train, y_train)
model.score(X_test.cpu(),y_test.cpu())
make NeuralNetClassifier, NeuralNetBinaryClassifier, and NeuralNetReg…
…ressor inherit from ClassifierMixin and RegressorMixin to gives them a score method
@BenjaminBossan

This comment has been minimized.

Copy link
Collaborator

commented May 11, 2019

Thank you for working on this.

Could you please

@damienlancry

This comment has been minimized.

Copy link
Contributor Author

commented May 13, 2019

Thank you for considering these changes.

Should I write tests in a specific file?
Are the examples i gave for classification, binary classification and regression sufficient tests?

@BenjaminBossan

This comment has been minimized.

Copy link
Collaborator

commented May 13, 2019

Should I write tests in a specific file?

Yes, please add them to test_classifier.py and test_regressor.py.

Are the examples i gave for classification, binary classification and regression sufficient tests?

I would say that these examples are a little bit too much even (from the point of view of LOCs and training time). Please consider using the existing fixtures for data and nets. E.g., look at this test here:

def test_predict_and_predict_proba(self, net_fit, data):
X = data[0]
y_proba = net_fit.predict_proba(X)
assert np.allclose(y_proba.sum(1), 1, rtol=1e-5)

You can re-use the same net and data but instead of testing predict_proba, you would test score. You don't even need to test whether the score is good, just that its values conform to expectation (e.g. 0 <= p <= 1).

For the regression, I believe sklearn should be able to handle multioutput. Maybe you could add a minimal test for this as well. If you are unsure about something, just ask.

@damienlancry

This comment has been minimized.

Copy link
Contributor Author

commented May 15, 2019

ok one more question: I have no idea what dataset to use for the multi output regression

@BenjaminBossan

This comment has been minimized.

Copy link
Collaborator

commented May 15, 2019

You could use sklearn.dataset.make_regression(..., n_targets=3) for multiregression. As for the module, you could use skorch.toy.MLPModule with output_units=3.

@damienlancry

This comment has been minimized.

Copy link
Contributor Author

commented May 21, 2019

Hi
I added tests, updated the changes.md and the docs.
Here are simpler working examples (I added one for the multi target regression case):

from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification, make_regression
from skorch.toy import make_classifier, make_binary_classifier, make_regressor
from skorch import NeuralNetClassifier, NeuralNetBinaryClassifier, NeuralNetRegressor
from torch import tensor, optim, nn

###################### CLASSIFICATION ###############################
n_classes  = 3
n_features = 5
X, y = make_classification(n_samples=10000, n_features=n_features, n_informative=3, n_classes=n_classes)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
X_train, X_test = tensor(X_train).float().cuda(), tensor(X_test).float()
y_train, y_test = tensor(y_train).long().cuda(),  tensor(y_test).long()

Net  = make_classifier(output_nonlin=None, input_units=n_features, output_units=n_classes, num_hidden=2)
model= NeuralNetClassifier(Net, device="cuda", optimizer=optim.Adam, criterion=nn.CrossEntropyLoss)

model.fit(X_train, y_train)
print(model.score(X_test,y_test))

###################### BINARY CLASSIFICATION #######################
n_classes  = 2
n_features = 5
X, y = make_classification(n_samples=10000, n_features=n_features, n_informative=3, n_classes=n_classes)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
X_train, X_test = tensor(X_train).float().cuda(), tensor(X_test).float()
y_train, y_test = tensor(y_train).float().cuda(), tensor(y_test).float()

Net  = make_binary_classifier(input_units=n_features, num_hidden=2, output_units=1)
model= NeuralNetBinaryClassifier(Net, device="cuda",optimizer=optim.Adam, criterion=nn.BCEWithLogitsLoss)

model.fit(X_train, y_train)
print(model.score(X_test, y_test))

###################### REGRESSION ##################################
n_features = 5
X, y = make_regression(n_samples=10000, n_features=n_features)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
X_train, X_test = tensor(X_train).float().cuda(), tensor(X_test).float()
y_train, y_test = tensor(y_train).float().cuda(), tensor(y_test).float()

y_train, y_test = y_train.view(-1, 1), y_test.view(-1, 1)

Net  = make_regressor(input_units=n_features)
model= NeuralNetRegressor(Net, device="cuda", optimizer=optim.Adam)

model.fit(X_train, y_train)
print(model.score(X_test, y_test))

###################### MULTI TARGET REGRESSION #####################
n_features = 5
n_targets  = 3
X, y = make_regression(n_samples=10000, n_features=n_features, n_targets=n_targets)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
X_train, X_test = tensor(X_train).float().cuda(), tensor(X_test).float()
y_train, y_test = tensor(y_train).float().cuda(), tensor(y_test).float()

y_train, y_test = y_train.view(-1, n_targets), y_test.view(-1, n_targets)

Net  = make_regressor(input_units=n_features, output_units=n_targets)
model= NeuralNetRegressor(Net, device="cuda", optimizer=optim.Adam)

model.fit(X_train, y_train)
print(model.score(X_test, y_test))
@damienlancry

This comment has been minimized.

Copy link
Contributor Author

commented May 21, 2019

It seems that "All checks have failed" and I have no idea why. I am not used to contributing to open source projects like that, so I do not really where to look to diagnose the problem. Any suggestion?

@BenjaminBossan

This comment has been minimized.

Copy link
Collaborator

commented May 21, 2019

Thanks for the contribution, I will review this soon. Regarding the failing CI, this is unrelated to your changes, so don't worry about it.

@BenjaminBossan
Copy link
Collaborator

left a comment

So this looks good. I have made some comments about minor things you would need to address before merging.

One thing that is still missing is the unit test for multioutput regression. The test can be based on the example you gave, but you don't need to cast the numpy arrays to torch tensors for that. Also, training just 1 epoch should be enough to prove it works, otherwise the test takes too long.


This method returns the mean accuracy on the given test data and labels for
classifiers and the coefficient of determination R^2 of the prediction for
regressors.

This comment has been minimized.

Copy link
@BenjaminBossan

BenjaminBossan May 21, 2019

Collaborator

Could you please add that NeuralNet has no score method and if a user needs it, they need to implement it themselves?

X, y = data
accuracy = net.score(X, y)
assert 0. <= accuracy <= 1.
return accuracy

This comment has been minimized.

Copy link
@BenjaminBossan

BenjaminBossan May 21, 2019

Collaborator

No need for the return

X, y = data
accuracy = net.score(X, y)
assert 0. <= accuracy <= 1.
return accuracy

This comment has been minimized.

Copy link
@BenjaminBossan

BenjaminBossan May 21, 2019

Collaborator

same as above

X, y = data
r2_score = net.score(X, y)
assert r2_score <= 1.
return r2_score

This comment has been minimized.

Copy link
@BenjaminBossan

BenjaminBossan May 21, 2019

Collaborator

same as above

@BenjaminBossan

This comment has been minimized.

Copy link
Collaborator

commented May 21, 2019

And I forgot to mention: For the CI to pass (and presumably your local tests too), you need to either merge the current master or rebase on it.

damienlancry added some commits May 22, 2019

@BenjaminBossan
Copy link
Collaborator

left a comment

Thanks, you're almost there. Unfortunately, the test doesn't work right now. You probably didn't notice because pytest doesn't discover the test (since it's not called test_*). With the changes in the comments, the test should hopefully pass.

@@ -359,7 +359,8 @@ score(X, y)

This method returns the mean accuracy on the given test data and labels for
classifiers and the coefficient of determination R^2 of the prediction for
regressors.
regressors. NeuralNet Class still has no score method. If one needs it,

This comment has been minimized.

Copy link
@BenjaminBossan

BenjaminBossan May 22, 2019

Collaborator

Could you please replace "NeuralNet Class" with ":class:`.NeuralNetClassifier`", this should create the correct link in the docs.

@@ -359,7 +359,8 @@ score(X, y)

This method returns the mean accuracy on the given test data and labels for
classifiers and the coefficient of determination R^2 of the prediction for
regressors.
regressors. NeuralNet Class still has no score method. If one needs it,
one needs to implement it himself.

This comment has been minimized.

Copy link
@BenjaminBossan

BenjaminBossan May 22, 2019

Collaborator

We prefer to speak to the user directly in the docs: "If you need it, you have to implement it yourself."

@@ -90,4 +107,8 @@ def test_score(self, net, data):
X, y = data
r2_score = net.score(X, y)
assert r2_score <= 1.
return r2_score

def multi_target_test_score(self, multi_target_net, multi_target_data):

This comment has been minimized.

Copy link
@BenjaminBossan

BenjaminBossan May 22, 2019

Collaborator

All tests have to be named test_* for pytest to discover them. You need to rename the test to something like test_multi_target_score.

def multi_target_regression_data():
X, y = make_regression(
1000, 20, n_targets=3, n_informative=10, bias=0, random_state=0)
X, y = X.astype(np.float32), y.astype(np.float32).reshape(-1, 1)

This comment has been minimized.

Copy link
@BenjaminBossan

BenjaminBossan May 22, 2019

Collaborator

You should not reshape the y data to (-1, 1). This will make it a 1-dimensional array, defeating the purpose of multioutput regression.

@@ -78,6 +78,15 @@ def regression_data():
yt = StandardScaler().fit_transform(y)
return Xt, yt

@pytest.fixture(scope='module')
def multi_target_regression_data():

This comment has been minimized.

Copy link
@BenjaminBossan

BenjaminBossan May 22, 2019

Collaborator

In sklearn lingo, this is called multioutput, not multitarget

@@ -36,6 +36,23 @@ def net(self, net_cls, module_cls):
lr=0.1,
)

@pytest.fixture(scope='module')
def multi_target_data(self, multi_target_regression_data):

This comment has been minimized.

Copy link
@BenjaminBossan

BenjaminBossan May 22, 2019

Collaborator

Since this fixture doesn't really do anything, you can remove it. Below, in the test function that uses the data, you can directly reference the fixture you defined in conftest.py.

Show resolved Hide resolved skorch/tests/test_regressor.py Outdated
Show resolved Hide resolved skorch/tests/test_regressor.py Outdated
-updated neuralnet.rst
-fixed pytest recognition of multioutput regression unit test
-fixed reshaping
-renamed multioutput
-deleted useless fixture
-maxepochs to 1
-called fit before score
@damienlancry

This comment has been minimized.

Copy link
Contributor Author

commented May 23, 2019

Again the checks are failing, I am not sure if this is because I messed up when pushing my changes or if this is because there really is a failing test... Sorry about that

@BenjaminBossan

This comment has been minimized.

Copy link
Collaborator

commented May 23, 2019

Hey, no worries, this is why there are automatic tests.

The error is that you want to use the fixture multioutput_data, but in conftest it is called multioutput_regression_data. You should probably use the latter name, since it's more descriptive.

In general, you can also run the tests locally before pushing to catch any such errors. To do that, follow the instructions for developers in the installation section in the README. Or you could click on Details link next to the failing Travis CI check and inspect the error there.

@damienlancry

This comment has been minimized.

Copy link
Contributor Author

commented May 24, 2019

ok thanks for the instructions I think I fixed the remaining problems now :)

@BenjaminBossan
Copy link
Collaborator

left a comment

Just two minor things that I haven't seen the last time, then it's finished.

def multioutput_regression_data():
X, y = make_regression(
1000, 20, n_targets=3, n_informative=10, bias=0, random_state=0)
X, y = X.astype(np.float32), y.astype(np.float32).reshape(-1, 3)

This comment has been minimized.

Copy link
@BenjaminBossan

BenjaminBossan May 24, 2019

Collaborator

The .reshape(-1, 3) should be unnecessary.

This comment has been minimized.

Copy link
@damienlancry

damienlancry May 27, 2019

Author Contributor

Yeah right


This method returns the mean accuracy on the given test data and labels for
classifiers and the coefficient of determination R^2 of the prediction for
regressors. :class:`.NeuralNet` Class still has no score method. If you need it,

This comment has been minimized.

Copy link
@BenjaminBossan

BenjaminBossan May 24, 2019

Collaborator

Class is not necessary.

@thomasjpfan
Copy link
Member

left a comment

Minor comment, otherwise LGTM

score(X, y)
^^^^^^^^^^^

This method returns the mean accuracy on the given test data and labels for

This comment has been minimized.

Copy link
@thomasjpfan

thomasjpfan May 24, 2019

Member

X could be test or training data.

Suggested change
This method returns the mean accuracy on the given test data and labels for
This method returns the mean accuracy on the given data and labels for

This comment has been minimized.

Copy link
@damienlancry

damienlancry May 27, 2019

Author Contributor

True True True

-updated neuralnet.rst
-removed reshape
@damienlancry

This comment has been minimized.

Copy link
Contributor Author

commented May 28, 2019

How about this last commit?

@BenjaminBossan

This comment has been minimized.

Copy link
Collaborator

commented May 28, 2019

Great, thank you for incorporating all the requested changes.

@BenjaminBossan BenjaminBossan merged commit c09546a into skorch-dev:master May 28, 2019

1 check passed

Travis CI - Pull Request Build Passed
Details
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.