In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
from IPython import display

import os
import sys
import imp
import time
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim.lr_scheduler import MultiStepLR, StepLR
from torch.utils.data import DataLoader, TensorDataset
from torch.autograd import Variable

import torchvision as tv
import lib.pytorch_trainer as ptt

use_gpu = torch.cuda.is_available()
print('GPU available:', use_gpu)
print('torch', torch.version.__version__)
print('Python', sys.version)


GPU available: True
torch 0.2.0_4
Python 3.6.1 |Anaconda custom (64-bit)| (default, May 11 2017, 13:09:58) 
[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]


In [2]:
from sklearn.base import BaseEstimator
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import PredefinedSplit
from sklearn.utils import shuffle

import scipy.stats as st

from skopt import BayesSearchCV
from skopt.space import Real, Categorical, Integer


In [3]:
# MNIST Dataset
# =============
train_ds = tv.datasets.MNIST('/data/datasets/MNIST/', train=True, 
                             transform=tv.transforms.ToTensor(), 
                             target_transform=None, 
                             download=True)

test_ds = tv.datasets.MNIST('/data/datasets/MNIST/', train=False, 
                             transform=tv.transforms.ToTensor(), 
                             target_transform=None, 
                             download=True)

def torch_datasets_to_sklearn_cv_data(train_ds, valid_ds):
    n_train, n_valid = len(train_ds), len(valid_ds)
    x, y = train_ds[0]
    all_tuples = list(train_ds) + list(valid_ds)
    all_labels = np.array([y for _, y in all_tuples], np.int)
    all_data = torch.cat([w.view(1, *x.shape) for w, _ in all_tuples], 0).numpy()
    valid_fold = np.zeros_like(all_labels)
    valid_fold[:n_train] = -1
    psplit = PredefinedSplit(valid_fold)
    return all_data, all_labels, psplit

def get_dataset(use_test_dataset=False, n_samples=60000, n_splits=6):
    if use_test_dataset:
        # using the test dataset as a fixed validation set (only one split)
        all_data, all_labels, psplit = torch_datasets_to_sklearn_cv_data(train_ds, valid_ds)

    else:
        all_labels = np.array([y for _, y in list(train_ds)], np.int)[:n_samples]
        all_data = torch.cat([w.view(1, 1, 28, 28) for w, _ in list(train_ds)], 0).numpy()[:n_samples]
        psplit = n_splits

    print('Training Data:   shape {}, min {:.3f}, max {:.3f}'.format(all_data.shape, 
                                                                     all_data.min(), all_data.max()))
    print('Training Labels: shape {}, min {}, max {}'.format(all_labels.shape, 
                                                             all_labels.min(), all_labels.max()))
    return all_data, all_labels, psplit


In [4]:
class MyNet(nn.Module):
    # Input size is (-1, 1, 28, 28)
    def __init__(self):
        super().__init__()
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool2d(2, 2)
        self.conv1 = nn.Conv2d(1, 6, 5)         # in_channels, out_channels, kernel_size
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.pool(x)
        x = self.conv2(x)
        x = self.relu(x)
        x = self.pool(x)
        x = x.view(-1, 16 * 4 * 4)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        return x

    
class SklEstimator(BaseEstimator):
    
    skl_id = 0
    fit_num = 0
    
    def __init__(self, model_class=None, criterion_class='CrossEntropyLoss', optim_class='SGD', 
                 optim_lr=0.001, optim_momentum=0.9, weight_decay=0, 
                 sched_step=10, sched_gamma=1.0, mb_size=16, n_epochs=100, verbose=True):
        self.par_model_class = model_class
        self.par_criterion_class = criterion_class
        self.par_optim_class = optim_class
        self.par_optim_lr = optim_lr
        self.par_optim_momentum = optim_momentum
        self.par_weight_decay = weight_decay
        self.par_sched_step = sched_step
        self.par_sched_gamma = sched_gamma
        self.par_mb_size = mb_size
        self.par_n_epochs = n_epochs
        self.par_verbose = verbose
        
    def _initialize(self):
        SklEstimator.skl_id += 1
        self.idd = 'skl_model_{}'.format(SklEstimator.skl_id)
        
        if self.par_model_class is None:
            raise Exception('Model not specified.')
        
        self.model = self.par_model_class()
        
        if self.par_criterion_class == 'CrossEntropyLoss':
            self.criterion = nn.CrossEntropyLoss()
        elif self.par_criterion_class == 'MSELoss':
            self.criterion = nn.MSELoss()
        else:
            self.criterion = None
            raise Exception("Calma...")
            
        if self.par_optim_class == 'Adam':
            self.optim = torch.optim.Adam(self.model.parameters(), lr=self.par_optim_lr, 
                                          weight_decay=self.par_weight_decay)
        elif self.par_optim_class == 'SGD':
            self.optim = torch.optim.SGD(self.model.parameters(), lr=self.par_optim_lr, 
                                         momentum=self.par_optim_momentum, nesterov=True,
                                         weight_decay=self.par_weight_decay)
        else:
            self.optim = None
            raise Exception("Calma...")
            
        if self.par_sched_gamma < 1.0:
            self.lr_sched = StepLR(self.optim, step_size=self.par_sched_step, gamma=self.par_sched_gamma)
        else:
            self.lr_sched = None
        
        if self.par_verbose > 0:
            callbacks = [ptt.PrintCallback()]
        else:
            callbacks = None
        
        self.trainer = ptt.DeepNetTrainer(model=self.model, 
                                          criterion=self.criterion, 
                                          optimizer=self.optim, 
                                          lr_scheduler=self.lr_sched, 
                                          callbacks=callbacks)
    
    def get_params(self, deep):
        params = []
        for k, v in self.__dict__.items():
            if k.startswith('par_'):
                params.append((k[4:], v))
        return dict(params)
    
    def set_params(self, **params):
        for k, v in params.items():
            setattr(self, 'par_' + k, v)
        self._initialize()
        return self
    
    def fit(self, Xtrain, ytrain):
        SklEstimator.fit_num += 1
        self.t0 = time.time()
        print('\n***** Fit #{} '.format(SklEstimator.fit_num))
        Xtra = torch.from_numpy(Xtrain)
        ytra = torch.from_numpy(ytrain)
        self.trainer.fit(self.par_n_epochs, Xtra, ytra, batch_size=self.par_mb_size, shuffle=True)
    
    def score(self, Xtrain, ytrain):
        Xtra = torch.from_numpy(Xtrain)
        ytra = torch.from_numpy(ytrain)
        mdict = self.trainer.evaluate(Xtra, ytra, batch_size=self.par_mb_size)
        score = - mdict['losses'] # negativo pois é busca por máximo score
        print('***** Score = {:.5f} [{} samples]  {:.2f}s'.format(score, ytra.shape[0], time.time() - self.t0))
        return score

In [5]:
use_test_dataset = False
n_samples = 60000
n_splits = 3

parameters = {
    'model_class':     Categorical([MyNet]),
    'optim_class':     Categorical(['Adam']), 
    'optim_lr':        Real(0.0001, 0.005, prior='uniform'),
    'weight_decay':    Real(0.0, 0.01, prior='uniform'),
    'sched_gamma':     Real(0.5, 1.0, prior='uniform'),
    'sched_step':      Integer(10, 10), 
    'mb_size':         Integer(100, 100),
    'n_epochs':        Integer(10, 10),
}

all_data, all_labels, psplit = get_dataset(use_test_dataset=use_test_dataset, 
                                           n_samples=n_samples, n_splits=n_splits)

# all_data, all_labels = shuffle(all_data, all_labels)

validator = BayesSearchCV(SklEstimator(verbose=1), 
                          search_spaces=parameters, 
                          n_iter=9,
                          cv=psplit,
                          random_state=20171024,
                          verbose=3)


Training Data:   shape (60000, 1, 28, 28), min 0.000, max 1.000
Training Labels: shape (60000,), min 0, max 9


- a busca seguinte sempre termina no fit #36 com erro de NaN, independente dos dados e dos parâmetros!

In [6]:
try:
    validator.fit(all_data, all_labels)

except KeyboardInterrupt:
    print('Interrupted!')

Fitting 3 folds for each of 1 candidates, totalling 3 fits
[CV] mb_size=100, model_class=<class '__main__.MyNet'>, n_epochs=10, optim_class=Adam, optim_lr=0.00395978866345, sched_gamma=0.848192668981, sched_step=10, weight_decay=0.00585296751024 

***** Fit #1 
Start training for 10 epochs
  1:   2.5s   T: 0.32541 best
  2:   2.0s   T: 0.12295 best
  3:   2.0s   T: 0.10750 best
  4:   2.0s   T: 0.10546 best
  5:   2.0s   T: 0.10083 best
  6:   2.0s   T: 0.09518 best
  7:   2.0s   T: 0.09229 best
  8:   2.0s   T: 0.09108 best
  9:   2.0s   T: 0.09056 best
 10:   2.0s   T: 0.08706 best
Stop training at epoch: 10/10


AttributeError: 'DeepNetTrainer' object has no attribute 'score'

In [None]:
validator.best_params_

In [None]:
validator.best_index_

In [None]:
validator.best_estimator_.score(all_data, all_labels)

In [None]:
validator.cv_results_

In [None]:
BayesSearchCV?