In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
from IPython import display

import os
import sys
import imp
import time
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim.lr_scheduler import MultiStepLR, StepLR
from torch.utils.data import DataLoader, TensorDataset
from torch.autograd import Variable

import torchvision as tv
import lib.pytorch_trainer as ptt

use_gpu = torch.cuda.is_available()
print('GPU available:', use_gpu)
print('torch', torch.version.__version__)
print('Python', sys.version)


GPU available: True
torch 0.2.0_4
Python 3.6.1 |Anaconda custom (64-bit)| (default, May 11 2017, 13:09:58) 
[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]


In [2]:
from sklearn.base import BaseEstimator
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import PredefinedSplit
import scipy.stats as st


In [3]:
# MNIST Dataset
# =============
train_ds = tv.datasets.MNIST('/data/datasets/MNIST/', train=True, 
                             transform=tv.transforms.ToTensor(), 
                             target_transform=None, 
                             download=True)

test_ds = tv.datasets.MNIST('/data/datasets/MNIST/', train=False, 
                             transform=tv.transforms.ToTensor(), 
                             target_transform=None, 
                             download=True)

def torch_datasets_to_sklearn_cv_data(train_ds, valid_ds):
    n_train, n_valid = len(train_ds), len(valid_ds)
    x, y = train_ds[0]
    all_tuples = list(train_ds) + list(valid_ds)
    all_labels = np.array([y for _, y in all_tuples], np.int)
    all_data = torch.cat([w.view(1, *x.shape) for w, _ in all_tuples], 0).numpy()
    valid_fold = np.zeros_like(all_labels)
    valid_fold[:n_train] = -1
    psplit = PredefinedSplit(valid_fold)
    return all_data, all_labels, psplit

def get_dataset(use_test_dataset=False, n_samples=60000, n_splits=6):
    if use_test_dataset:
        # using the test dataset as a fixed validation set (only one split)
        all_data, all_labels, psplit = torch_datasets_to_sklearn_cv_data(train_ds, valid_ds)

    else:
        all_labels = np.array([y for _, y in list(train_ds)], np.int)[:n_samples]
        all_data = torch.cat([w.view(1, 1, 28, 28) for w, _ in list(train_ds)], 0).numpy()[:n_samples]
        psplit = n_splits

    # print(all_data.shape, all_data.min(), all_data.max(), '***', all_labels.shape, all_labels.min(), all_labels.max())
    return all_data, all_labels, psplit


In [4]:
class MyNet(nn.Module):
    # Input size is (-1, 1, 28, 28)
    def __init__(self):
        super().__init__()
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool2d(2, 2)
        self.conv1 = nn.Conv2d(1, 6, 5)         # in_channels, out_channels, kernel_size
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.pool(x)
        x = self.conv2(x)
        x = self.relu(x)
        x = self.pool(x)
        x = x.view(-1, 16 * 4 * 4)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        return x

    
class SklEstimator(BaseEstimator):
    
    skl_id = 0
    fit_num = 0
    
    def __init__(self, model_class=None, criterion_class='CrossEntropyLoss', optim_class='SGD', 
                 optim_lr=0.001, optim_momentum=0.9, weight_decay=0, 
                 sched_step=10, sched_gamma=1.0, mb_size=16, n_epochs=100, verbose=True):
        self.par_model_class = model_class
        self.par_criterion_class = criterion_class
        self.par_optim_class = optim_class
        self.par_optim_lr = optim_lr
        self.par_optim_momentum = optim_momentum
        self.par_weight_decay = weight_decay
        self.par_sched_step = sched_step
        self.par_sched_gamma = sched_gamma
        self.par_mb_size = mb_size
        self.par_n_epochs = n_epochs
        self.par_verbose = verbose
        
    def _initialize(self):
        SklEstimator.skl_id += 1
        self.idd = 'skl_model_{}'.format(SklEstimator.skl_id)
        
        if self.par_model_class is None:
            raise Exception('Model not specified.')
        
        self.model = self.par_model_class()
        
        if self.par_criterion_class == 'CrossEntropyLoss':
            self.criterion = nn.CrossEntropyLoss()
        elif self.par_criterion_class == 'MSELoss':
            self.criterion = nn.MSELoss()
        else:
            self.criterion = None
            raise Exception("Calma...")
            
        if self.par_optim_class == 'Adam':
            self.optim = torch.optim.Adam(self.model.parameters(), lr=self.par_optim_lr, 
                                          weight_decay=self.par_weight_decay)
        elif self.par_optim_class == 'SGD':
            self.optim = torch.optim.SGD(self.model.parameters(), lr=self.par_optim_lr, 
                                         momentum=self.par_optim_momentum, nesterov=True,
                                         weight_decay=self.par_weight_decay)
        else:
            self.optim = None
            raise Exception("Calma...")
            
        if self.par_sched_gamma < 1.0:
            self.lr_sched = StepLR(self.optim, step_size=self.par_sched_step, gamma=self.par_sched_gamma)
        else:
            self.lr_sched = None
        
        if self.par_verbose > 0:
            callbacks = [ptt.PrintCallback()]
        else:
            callbacks = None
        
        self.trainer = ptt.DeepNetTrainer(model=self.model, 
                                          criterion=self.criterion, 
                                          optimizer=self.optim, 
                                          lr_scheduler=self.lr_sched, 
                                          callbacks=callbacks)
    
    def get_params(self, deep):
        params = []
        for k, v in self.__dict__.items():
            if k.startswith('par_'):
                params.append((k[4:], v))
        return dict(params)
    
    def set_params(self, **params):
        for k, v in params.items():
            setattr(self, 'par_' + k, v)
        self._initialize()
        return self
    
    def fit(self, Xtrain, ytrain):
        SklEstimator.fit_num += 1
        self.t0 = time.time()
        print('\n***** Fit #{} '.format(SklEstimator.fit_num))
        Xtra = torch.from_numpy(Xtrain)
        ytra = torch.from_numpy(ytrain)
        self.trainer.fit(self.par_n_epochs, Xtra, ytra, batch_size=self.par_mb_size, shuffle=True)
    
    def score(self, Xtrain, ytrain):
        Xtra = torch.from_numpy(Xtrain)
        ytra = torch.from_numpy(ytrain)
        loss = self.trainer.score(Xtra, ytra, batch_size=self.par_mb_size)
        print('***** Score = {:.5f} [{} samples]  {:.2f}s'.format(loss, ytra.shape[0], time.time() - self.t0))
        return loss


In [5]:
use_test_dataset = False
n_samples = 60000
n_splits = 3

parameters = {
    'model_class':     [MyNet],
    'optim_class':     ['Adam'], 
    'optim_lr':        st.uniform(0.0001, 0.005),    # 0.0001:0.0051
    'weight_decay':    st.uniform(0.0, 0.01),
    'sched_step':      [10], 
    'sched_gamma':     st.uniform(0.5, 0.5),         # 0.5:1.0
    'mb_size':         [100],
    'n_epochs':        [50],
}

all_data, all_labels, psplit = get_dataset(use_test_dataset=use_test_dataset, 
                                           n_samples=n_samples, n_splits=n_splits)

validator = RandomizedSearchCV(SklEstimator(verbose=0), 
                               param_distributions=parameters, 
                               cv=psplit,
                               n_iter=20, 
                               verbose=1)


In [6]:
try:
    validator.fit(all_data, all_labels)

except KeyboardInterrupt:
    print('Interrupted!')

Fitting 3 folds for each of 20 candidates, totalling 60 fits

***** Fit #1 
***** Score = -0.10999 [20000 samples]  99.27s
***** Score = -0.10338 [40000 samples]  99.96s

***** Fit #2 
***** Score = -0.09813 [20000 samples]  99.31s
***** Score = -0.08952 [40000 samples]  99.99s

***** Fit #3 
***** Score = -0.10683 [20000 samples]  99.52s
***** Score = -0.09497 [40000 samples]  100.20s

***** Fit #4 
***** Score = -0.05181 [20000 samples]  98.62s
***** Score = -0.00091 [40000 samples]  99.30s

***** Fit #5 
***** Score = -0.05802 [20000 samples]  98.34s
***** Score = -0.00088 [40000 samples]  99.03s

***** Fit #6 
***** Score = -0.07049 [20000 samples]  98.61s
***** Score = -0.00122 [40000 samples]  99.29s

***** Fit #7 
***** Score = -0.04562 [20000 samples]  98.32s
***** Score = -0.02344 [40000 samples]  98.99s

***** Fit #8 
***** Score = -0.04245 [20000 samples]  98.22s
***** Score = -0.01888 [40000 samples]  98.90s

***** Fit #9 
***** Score = -0.04332 [20000 samples]  98.10s
****

[Parallel(n_jobs=1)]: Done  60 out of  60 | elapsed: 98.9min finished


In [7]:
validator.best_params_

{'mb_size': 100,
 'model_class': __main__.MyNet,
 'n_epochs': 50,
 'optim_class': 'Adam',
 'optim_lr': 0.0024771453065453647,
 'sched_gamma': 0.52432974219051665,
 'sched_step': 10,
 'weight_decay': 0.0006398623515844027}

In [8]:
validator.best_index_, validator.best_score_

(16, -0.039320223670268509)

In [9]:
validator.best_estimator_.score(all_data, all_labels)

***** Score = -0.00617 [60000 samples]  147.92s


-0.006170385721197817

In [10]:
validator.cv_results_

{'mean_fit_time': array([ 99.11292354,  98.2733856 ,  97.95920404,  97.83784604,
         97.64676237,  97.93762835,  98.22874602,  97.59057474,
         97.71171371,  97.55566843,  97.66143258,  97.720963  ,
         97.68668278,  97.83518895,  97.6853505 ,  97.93287142,
         97.7379024 ,  97.63070353,  97.8340145 ,  97.85715318]),
 'mean_score_time': array([ 0.34071938,  0.33981085,  0.33958745,  0.33723672,  0.33710432,
         0.33724475,  0.33690707,  0.33771674,  0.33657384,  0.33724825,
         0.33699314,  0.33715971,  0.33748976,  0.33663781,  0.33765125,
         0.3380936 ,  0.33713357,  0.33742332,  0.3384308 ,  0.3376445 ]),
 'mean_test_score': array([-0.10498181, -0.06010653, -0.04379633, -0.09950886, -0.05299287,
        -0.07325573, -0.05866176, -0.05536476, -0.06226666, -0.04064668,
        -0.07675833, -0.04570483, -0.08171712, -0.0706612 , -0.05026358,
        -0.07783582, -0.03932022, -0.09608498, -0.06854825, -0.06383154]),
 'mean_train_score': array([-0.0959