In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
from IPython import display

import os
import sys
import imp
import time
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim.lr_scheduler import MultiStepLR, StepLR
from torch.utils.data import DataLoader, TensorDataset
from torch.autograd import Variable

import torchvision as tv
import lib.pytorch_trainer as ptt

use_gpu = torch.cuda.is_available()
print('GPU available:', use_gpu)
print('torch', torch.version.__version__)
print('Python', sys.version)


GPU available: False
torch 0.2.0_4
Python 3.6.2 |Anaconda custom (x86_64)| (default, Sep 21 2017, 18:29:43) 
[GCC 4.2.1 Compatible Clang 4.0.1 (tags/RELEASE_401/final)]


In [2]:
from sklearn.base import BaseEstimator
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import PredefinedSplit

import scipy.stats as st

from skopt import BayesSearchCV
from skopt.space import Real, Categorical, Integer


In [3]:
# MNIST Dataset
# =============
train_ds = tv.datasets.MNIST('/data/datasets/MNIST/', train=True, 
                             transform=tv.transforms.ToTensor(), 
                             target_transform=None, 
                             download=True)

test_ds = tv.datasets.MNIST('/data/datasets/MNIST/', train=False, 
                             transform=tv.transforms.ToTensor(), 
                             target_transform=None, 
                             download=True)

def torch_datasets_to_sklearn_cv_data(train_ds, valid_ds):
    n_train, n_valid = len(train_ds), len(valid_ds)
    x, y = train_ds[0]
    all_tuples = list(train_ds) + list(valid_ds)
    all_labels = np.array([y for _, y in all_tuples], np.int)
    all_data = torch.cat([w.view(1, *x.shape) for w, _ in all_tuples], 0).numpy()
    valid_fold = np.zeros_like(all_labels)
    valid_fold[:n_train] = -1
    psplit = PredefinedSplit(valid_fold)
    return all_data, all_labels, psplit

def get_dataset(use_test_dataset=False, n_samples=60000, n_splits=6):
    if use_test_dataset:
        # using the test dataset as a fixed validation set (only one split)
        all_data, all_labels, psplit = torch_datasets_to_sklearn_cv_data(train_ds, valid_ds)

    else:
        all_labels = np.array([y for _, y in list(train_ds)], np.int)[:n_samples]
        all_data = torch.cat([w.view(1, 1, 28, 28) for w, _ in list(train_ds)], 0).numpy()[:n_samples]
        psplit = n_splits

    print('Training Data:   shape {}, min {:.3f}, max {:.3f}'.format(all_data.shape, 
                                                                     all_data.min(), all_data.max()))
    print('Training Labels: shape {}, min {}, max {}'.format(all_labels.shape, 
                                                             all_labels.min(), all_labels.max()))
    return all_data, all_labels, psplit


In [4]:
class MyNet(nn.Module):
    # Input size is (-1, 1, 28, 28)
    def __init__(self):
        super().__init__()
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool2d(2, 2)
        self.conv1 = nn.Conv2d(1, 6, 5)         # in_channels, out_channels, kernel_size
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.pool(x)
        x = self.conv2(x)
        x = self.relu(x)
        x = self.pool(x)
        x = x.view(-1, 16 * 4 * 4)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        return x

    
class SklEstimator(BaseEstimator):
    
    skl_id = 0
    fit_num = 0
    
    def __init__(self, model_class=None, criterion_class='CrossEntropyLoss', optim_class='SGD', 
                 optim_lr=0.001, optim_momentum=0.9, weight_decay=0, 
                 sched_step=10, sched_gamma=1.0, mb_size=16, n_epochs=100, verbose=True):
        self.par_model_class = model_class
        self.par_criterion_class = criterion_class
        self.par_optim_class = optim_class
        self.par_optim_lr = optim_lr
        self.par_optim_momentum = optim_momentum
        self.par_weight_decay = weight_decay
        self.par_sched_step = sched_step
        self.par_sched_gamma = sched_gamma
        self.par_mb_size = mb_size
        self.par_n_epochs = n_epochs
        self.par_verbose = verbose
        
    def _initialize(self):
        SklEstimator.skl_id += 1
        self.idd = 'skl_model_{}'.format(SklEstimator.skl_id)
        
        if self.par_model_class is None:
            raise Exception('Model not specified.')
        
        self.model = self.par_model_class()
        
        if self.par_criterion_class == 'CrossEntropyLoss':
            self.criterion = nn.CrossEntropyLoss()
        elif self.par_criterion_class == 'MSELoss':
            self.criterion = nn.MSELoss()
        else:
            self.criterion = None
            raise Exception("Calma...")
            
        if self.par_optim_class == 'Adam':
            self.optim = torch.optim.Adam(self.model.parameters(), lr=self.par_optim_lr, 
                                          weight_decay=self.par_weight_decay)
        elif self.par_optim_class == 'SGD':
            self.optim = torch.optim.SGD(self.model.parameters(), lr=self.par_optim_lr, 
                                         momentum=self.par_optim_momentum, nesterov=True,
                                         weight_decay=self.par_weight_decay)
        else:
            self.optim = None
            raise Exception("Calma...")
            
        if self.par_sched_gamma < 1.0:
            self.lr_sched = StepLR(self.optim, step_size=self.par_sched_step, gamma=self.par_sched_gamma)
        else:
            self.lr_sched = None
        
        if self.par_verbose > 0:
            callbacks = [ptt.PrintCallback()]
        else:
            callbacks = None
        
        self.trainer = ptt.DeepNetTrainer(model=self.model, 
                                          criterion=self.criterion, 
                                          optimizer=self.optim, 
                                          lr_scheduler=self.lr_sched, 
                                          callbacks=callbacks)
    
    def get_params(self, deep):
        params = []
        for k, v in self.__dict__.items():
            if k.startswith('par_'):
                params.append((k[4:], v))
        return dict(params)
    
    def set_params(self, **params):
        for k, v in params.items():
            setattr(self, 'par_' + k, v)
        self._initialize()
        return self
    
    def fit(self, Xtrain, ytrain):
        SklEstimator.fit_num += 1
        self.t0 = time.time()
        print('\n***** Fit #{} '.format(SklEstimator.fit_num))
        Xtra = torch.from_numpy(Xtrain)
        ytra = torch.from_numpy(ytrain)
        self.trainer.fit(self.par_n_epochs, Xtra, ytra, batch_size=self.par_mb_size, shuffle=True)
    
    def score(self, Xtrain, ytrain):
        Xtra = torch.from_numpy(Xtrain)
        ytra = torch.from_numpy(ytrain)
        loss = self.trainer.score(Xtra, ytra, batch_size=self.par_mb_size)
        print('***** Score = {:.5f} [{} samples]  {:.2f}s'.format(loss, ytra.shape[0], time.time() - self.t0))
        return loss


In [5]:
use_test_dataset = False
n_samples = 6000
n_splits = 3

parameters = {
    'model_class':     Categorical([MyNet]),
    'optim_class':     Categorical(['Adam']), 
    'optim_lr':        Real(0.0001, 0.005, prior='uniform'),
    'weight_decay':    Real(0.0, 0.01, prior='uniform'),
    'sched_gamma':     Real(0.5, 1.0, prior='uniform'),
    'sched_step':      Integer(10, 10), 
    'mb_size':         Integer(100, 100),
    'n_epochs':        Integer(10, 10),
}

all_data, all_labels, psplit = get_dataset(use_test_dataset=use_test_dataset, 
                                           n_samples=n_samples, n_splits=n_splits)

validator = BayesSearchCV(SklEstimator(verbose=0), 
                          search_spaces=parameters, 
                          n_iter=20,
                          cv=psplit,
                          random_state=20171024,
                          verbose=3)


Training Data:   shape (6000, 1, 28, 28), min 0.000, max 1.000
Training Labels: shape (6000,), min 0, max 9


In [6]:
try:
    validator.fit(all_data, all_labels)

except KeyboardInterrupt:
    print('Interrupted!')

Fitting 3 folds for each of 1 candidates, totalling 3 fits
[CV] mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.00133969007934, sched_gamma=0.95756595501, sched_step=10, weight_decay=0.00137361722933 

***** Fit #1 
***** Score = -0.18722 [2000 samples]  13.28s
***** Score = -0.08612 [4000 samples]  13.68s
[CV]  mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.00133969007934, sched_gamma=0.95756595501, sched_step=10, weight_decay=0.00137361722933, score=-0.18721648156642914, total=  13.3s
[CV] mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.00133969007934, sched_gamma=0.95756595501, sched_step=10, weight_decay=0.00137361722933 

***** Fit #2 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   13.7s remaining:    0.0s


***** Score = -0.14672 [2000 samples]  13.98s
***** Score = -0.09942 [4000 samples]  14.44s
[CV]  mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.00133969007934, sched_gamma=0.95756595501, sched_step=10, weight_decay=0.00137361722933, score=-0.14671684186905623, total=  14.0s
[CV] mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.00133969007934, sched_gamma=0.95756595501, sched_step=10, weight_decay=0.00137361722933 

***** Fit #3 


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:   28.2s remaining:    0.0s


***** Score = -0.18343 [2000 samples]  13.98s
***** Score = -0.09940 [4000 samples]  14.42s
[CV]  mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.00133969007934, sched_gamma=0.95756595501, sched_step=10, weight_decay=0.00137361722933, score=-0.18342508487403392, total=  14.0s

***** Fit #4 


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:   42.6s finished


Fitting 3 folds for each of 1 candidates, totalling 3 fits
[CV] mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.00356016135815, sched_gamma=0.907967287278, sched_step=10, weight_decay=0.0050081160186 

***** Fit #5 
***** Score = -0.18259 [2000 samples]  14.61s
***** Score = -0.08052 [4000 samples]  15.11s
[CV]  mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.00356016135815, sched_gamma=0.907967287278, sched_step=10, weight_decay=0.0050081160186, score=-0.18258965704590083, total=  14.6s
[CV] mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.00356016135815, sched_gamma=0.907967287278, sched_step=10, weight_decay=0.0050081160186 

***** Fit #6 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   15.1s remaining:    0.0s


***** Score = -0.13105 [2000 samples]  14.56s
***** Score = -0.07202 [4000 samples]  14.98s
[CV]  mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.00356016135815, sched_gamma=0.907967287278, sched_step=10, weight_decay=0.0050081160186, score=-0.13104738146066666, total=  14.6s
[CV] mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.00356016135815, sched_gamma=0.907967287278, sched_step=10, weight_decay=0.0050081160186 

***** Fit #7 


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:   30.1s remaining:    0.0s


***** Score = -0.19566 [2000 samples]  14.59s
***** Score = -0.07911 [4000 samples]  15.02s
[CV]  mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.00356016135815, sched_gamma=0.907967287278, sched_step=10, weight_decay=0.0050081160186, score=-0.19565789327025412, total=  14.6s

***** Fit #8 


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:   45.1s finished


Fitting 3 folds for each of 1 candidates, totalling 3 fits
[CV] mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.00302405690814, sched_gamma=0.985706474557, sched_step=10, weight_decay=0.00615227320107 

***** Fit #9 
***** Score = -0.12945 [2000 samples]  14.21s
***** Score = -0.06661 [4000 samples]  14.65s
[CV]  mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.00302405690814, sched_gamma=0.985706474557, sched_step=10, weight_decay=0.00615227320107, score=-0.1294479535892606, total=  14.2s
[CV] mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.00302405690814, sched_gamma=0.985706474557, sched_step=10, weight_decay=0.00615227320107 

***** Fit #10 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   14.7s remaining:    0.0s


***** Score = -0.16643 [2000 samples]  14.74s
***** Score = -0.12180 [4000 samples]  15.20s
[CV]  mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.00302405690814, sched_gamma=0.985706474557, sched_step=10, weight_decay=0.00615227320107, score=-0.16642842292785645, total=  14.7s
[CV] mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.00302405690814, sched_gamma=0.985706474557, sched_step=10, weight_decay=0.00615227320107 

***** Fit #11 


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:   29.9s remaining:    0.0s


***** Score = -0.20669 [2000 samples]  15.02s
***** Score = -0.10050 [4000 samples]  15.47s
[CV]  mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.00302405690814, sched_gamma=0.985706474557, sched_step=10, weight_decay=0.00615227320107, score=-0.20668958481401206, total=  15.0s

***** Fit #12 


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:   45.4s finished


Fitting 3 folds for each of 1 candidates, totalling 3 fits
[CV] mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.000409789991198, sched_gamma=0.699839809152, sched_step=10, weight_decay=0.00743350340545 

***** Fit #13 
***** Score = -0.33494 [2000 samples]  14.18s
***** Score = -0.26399 [4000 samples]  14.65s
[CV]  mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.000409789991198, sched_gamma=0.699839809152, sched_step=10, weight_decay=0.00743350340545, score=-0.3349364668130875, total=  14.2s
[CV] mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.000409789991198, sched_gamma=0.699839809152, sched_step=10, weight_decay=0.00743350340545 

***** Fit #14 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   14.7s remaining:    0.0s


***** Score = -0.33441 [2000 samples]  14.93s
***** Score = -0.32446 [4000 samples]  15.37s
[CV]  mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.000409789991198, sched_gamma=0.699839809152, sched_step=10, weight_decay=0.00743350340545, score=-0.33440918028354644, total=  14.9s
[CV] mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.000409789991198, sched_gamma=0.699839809152, sched_step=10, weight_decay=0.00743350340545 

***** Fit #15 


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:   30.0s remaining:    0.0s


***** Score = -0.32865 [2000 samples]  15.08s
***** Score = -0.26935 [4000 samples]  15.53s
[CV]  mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.000409789991198, sched_gamma=0.699839809152, sched_step=10, weight_decay=0.00743350340545, score=-0.32865416407585146, total=  15.1s

***** Fit #16 


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:   45.6s finished


Fitting 3 folds for each of 1 candidates, totalling 3 fits
[CV] mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.00408485241484, sched_gamma=0.712167751793, sched_step=10, weight_decay=0.00815768522986 

***** Fit #17 
***** Score = -0.17023 [2000 samples]  14.50s
***** Score = -0.09070 [4000 samples]  14.96s
[CV]  mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.00408485241484, sched_gamma=0.712167751793, sched_step=10, weight_decay=0.00815768522986, score=-0.17022814340889453, total=  14.5s
[CV] mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.00408485241484, sched_gamma=0.712167751793, sched_step=10, weight_decay=0.00815768522986 

***** Fit #18 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   15.0s remaining:    0.0s


***** Score = -0.27835 [2000 samples]  15.42s
***** Score = -0.21955 [4000 samples]  15.88s
[CV]  mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.00408485241484, sched_gamma=0.712167751793, sched_step=10, weight_decay=0.00815768522986, score=-0.2783523313701153, total=  15.4s
[CV] mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.00408485241484, sched_gamma=0.712167751793, sched_step=10, weight_decay=0.00815768522986 

***** Fit #19 


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:   30.9s remaining:    0.0s


***** Score = -0.18424 [2000 samples]  15.44s
***** Score = -0.09355 [4000 samples]  15.89s
[CV]  mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.00408485241484, sched_gamma=0.712167751793, sched_step=10, weight_decay=0.00815768522986, score=-0.18423559926450253, total=  15.4s

***** Fit #20 


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:   46.8s finished


Fitting 3 folds for each of 1 candidates, totalling 3 fits
[CV] mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.000938416974536, sched_gamma=0.537881363552, sched_step=10, weight_decay=0.00541932132868 

***** Fit #21 
***** Score = -0.18660 [2000 samples]  14.57s
***** Score = -0.13417 [4000 samples]  15.04s
[CV]  mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.000938416974536, sched_gamma=0.537881363552, sched_step=10, weight_decay=0.00541932132868, score=-0.18660393729805946, total=  14.6s
[CV] mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.000938416974536, sched_gamma=0.537881363552, sched_step=10, weight_decay=0.00541932132868 

***** Fit #22 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   15.1s remaining:    0.0s


***** Score = -0.18976 [2000 samples]  15.97s
***** Score = -0.17260 [4000 samples]  16.44s
[CV]  mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.000938416974536, sched_gamma=0.537881363552, sched_step=10, weight_decay=0.00541932132868, score=-0.1897582434117794, total=  16.0s
[CV] mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.000938416974536, sched_gamma=0.537881363552, sched_step=10, weight_decay=0.00541932132868 

***** Fit #23 


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:   31.5s remaining:    0.0s


***** Score = -0.19721 [2000 samples]  15.59s
***** Score = -0.13051 [4000 samples]  16.06s
[CV]  mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.000938416974536, sched_gamma=0.537881363552, sched_step=10, weight_decay=0.00541932132868, score=-0.19720817171037197, total=  15.6s

***** Fit #24 


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:   47.6s finished


Fitting 3 folds for each of 1 candidates, totalling 3 fits
[CV] mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.00015556481324, sched_gamma=0.610170243118, sched_step=10, weight_decay=0.00246702151701 

***** Fit #25 
***** Score = -0.49050 [2000 samples]  15.32s
***** Score = -0.43427 [4000 samples]  15.80s
[CV]  mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.00015556481324, sched_gamma=0.610170243118, sched_step=10, weight_decay=0.00246702151701, score=-0.49050371050834657, total=  15.3s
[CV] mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.00015556481324, sched_gamma=0.610170243118, sched_step=10, weight_decay=0.00246702151701 

***** Fit #26 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   15.8s remaining:    0.0s


***** Score = -0.45898 [2000 samples]  15.92s
***** Score = -0.46587 [4000 samples]  16.40s
[CV]  mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.00015556481324, sched_gamma=0.610170243118, sched_step=10, weight_decay=0.00246702151701, score=-0.45898110419511795, total=  15.9s
[CV] mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.00015556481324, sched_gamma=0.610170243118, sched_step=10, weight_decay=0.00246702151701 

***** Fit #27 


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:   32.2s remaining:    0.0s


***** Score = -0.44850 [2000 samples]  15.92s
***** Score = -0.40663 [4000 samples]  16.39s
[CV]  mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.00015556481324, sched_gamma=0.610170243118, sched_step=10, weight_decay=0.00246702151701, score=-0.44849756360054016, total=  15.9s

***** Fit #28 


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:   48.6s finished


Fitting 3 folds for each of 1 candidates, totalling 3 fits
[CV] mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.00404293898021, sched_gamma=0.676384635832, sched_step=10, weight_decay=0.00182885895743 

***** Fit #29 
***** Score = -0.13964 [2000 samples]  14.98s
***** Score = -0.03756 [4000 samples]  15.46s
[CV]  mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.00404293898021, sched_gamma=0.676384635832, sched_step=10, weight_decay=0.00182885895743, score=-0.13963800501078366, total=  15.0s
[CV] mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.00404293898021, sched_gamma=0.676384635832, sched_step=10, weight_decay=0.00182885895743 

***** Fit #30 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   15.5s remaining:    0.0s


***** Score = -0.11896 [2000 samples]  15.78s
***** Score = -0.04374 [4000 samples]  16.26s
[CV]  mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.00404293898021, sched_gamma=0.676384635832, sched_step=10, weight_decay=0.00182885895743, score=-0.11896471194922924, total=  15.8s
[CV] mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.00404293898021, sched_gamma=0.676384635832, sched_step=10, weight_decay=0.00182885895743 

***** Fit #31 


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:   31.8s remaining:    0.0s


***** Score = -0.17897 [2000 samples]  15.82s
***** Score = -0.05192 [4000 samples]  16.30s
[CV]  mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.00404293898021, sched_gamma=0.676384635832, sched_step=10, weight_decay=0.00182885895743, score=-0.1789700049906969, total=  15.8s

***** Fit #32 


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:   48.1s finished


Fitting 3 folds for each of 1 candidates, totalling 3 fits
[CV] mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.00219097068527, sched_gamma=0.650773816506, sched_step=10, weight_decay=0.00896144155293 

***** Fit #33 
***** Score = -0.18198 [2000 samples]  15.10s
***** Score = -0.09691 [4000 samples]  15.58s
[CV]  mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.00219097068527, sched_gamma=0.650773816506, sched_step=10, weight_decay=0.00896144155293, score=-0.18198240213096142, total=  15.1s
[CV] mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.00219097068527, sched_gamma=0.650773816506, sched_step=10, weight_decay=0.00896144155293 

***** Fit #34 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   15.6s remaining:    0.0s


***** Score = -0.15577 [2000 samples]  16.10s
***** Score = -0.13567 [4000 samples]  16.58s
[CV]  mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.00219097068527, sched_gamma=0.650773816506, sched_step=10, weight_decay=0.00896144155293, score=-0.15576829575002193, total=  16.1s
[CV] mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.00219097068527, sched_gamma=0.650773816506, sched_step=10, weight_decay=0.00896144155293 

***** Fit #35 


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:   32.2s remaining:    0.0s


***** Score = -0.16824 [2000 samples]  16.31s
***** Score = -0.11438 [4000 samples]  16.78s
[CV]  mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.00219097068527, sched_gamma=0.650773816506, sched_step=10, weight_decay=0.00896144155293, score=-0.16823645047843455, total=  16.3s

***** Fit #36 


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:   49.0s finished


ValueError: Input contains NaN, infinity or a value too large for dtype('float64').

In [None]:
validator.best_params_

In [None]:
validator.best_index_

In [None]:
validator.best_estimator_.score(all_data, all_labels)

In [None]:
validator.cv_results_

In [None]:
BayesSearchCV?