### Testing out Hyperspace on MNIST Data - Model Resiliency Test

In [1]:
from hyperspace import create_hyperspace
from ray import tune
import tensorflow as tf
from torch import nn
import pytorch_lightning as pl
from pytorch_lightning import loggers as pl_loggers
from ray.tune.suggest.skopt import SkOptSearch
from skopt import Optimizer
import ray
from tqdm import tqdm
import torch
import torchvision
import statistics



In [2]:
ray.init()

2020-10-01 15:12:29,685	INFO services.py:1164 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


{'node_ip_address': '192.168.1.240',
 'raylet_ip_address': '192.168.1.240',
 'redis_address': '192.168.1.240:6379',
 'object_store_address': '/tmp/ray/session_2020-10-01_15-12-29_198752_57535/sockets/plasma_store',
 'raylet_socket_name': '/tmp/ray/session_2020-10-01_15-12-29_198752_57535/sockets/raylet',
 'webui_url': '127.0.0.1:8265',
 'session_dir': '/tmp/ray/session_2020-10-01_15-12-29_198752_57535',
 'metrics_export_port': 60139}

### Tensorflow Model Objective Function Definition

In [3]:
def mnist_tf_objective(config):
    mnist = tf.keras.datasets.mnist

    (x_train, y_train),(x_test, y_test) = mnist.load_data()
    x_train, x_test = x_train / 255.0, x_test / 255.0

    model = tf.keras.models.Sequential([
      tf.keras.layers.Flatten(input_shape=(28, 28)),
      tf.keras.layers.Dense(128, activation='relu'),
      tf.keras.layers.Dropout(config['dropout']),
      tf.keras.layers.Dense(10, activation='softmax')
    ])

    opt = tf.keras.optimizers.Adam(learning_rate=config['learning_rate'])

    model.compile(optimizer=opt,
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    res = model.fit(x_train, y_train, epochs=config['epochs'], batch_size=config['batch_size'])
    res_test = model.evaluate(x_test, y_test)
    # res test[0] reports the loss from the evaluation, res_test[1] reports the accuracy
    tune.report(test_loss = res_test[0])
    return res_test[0]

#### Run hypertune for Tensorflow Model...

In [None]:
%%capture tf_run_output
hyperparameters = [(0.00000001, 0.1),  # learning_rate
                   (0.0, 0.9),  # dropout
                   (10, 100),  # epochs 
                   (10, 1000)]  # batch size
space = create_hyperspace(hyperparameters)

### for each space in hyperspace, we want to search the space using ray tune
i = 0
results = []
for section in tqdm(space):
    # create a skopt gp minimize object
    optimizer = Optimizer(section)
    search_algo = SkOptSearch(optimizer, ['learning_rate', 'dropout', 'epochs', 'batch_size'],
                              metric='test_loss', mode='min')
    # not using a gpu because running on local
    analysis = tune.run(mnist_tf_objective, search_alg=search_algo, num_samples=20)
    results.append(analysis)
    i += 1

# print out the best result
i = 0
for a in results:
    print("Best config for space "+str(i)+": "+a.get_best_config(metric="avg_test_loss", mode="min"))
    i +=1

2020-10-01 15:14:43,897	INFO (unknown file):0 -- gc.collect() freed 103 refs in 3.3938123600000267 seconds
2020-10-01 16:02:35,740	INFO (unknown file):0 -- gc.collect() freed 297 refs in 0.8725385370003096 seconds


In [None]:
tf_results = results

### Pytorch Model Objective Function Definition

In [32]:
class NumberNet(pl.LightningModule):
    def __init__(self, config):
        super().__init__()
        self.model = nn.Sequential(
            nn.Flatten(), 
            nn.Linear(784, 128), 
            nn.ReLU(), 
            nn.Dropout(config['dropout']), 
            nn.Linear(128, 10), 
            nn.Softmax())
        self.criterion = nn.CrossEntropyLoss()
        self.config = config
        self.test_loss = None
    
    def train_dataloader(self):
        return torch.utils.data.DataLoader(torchvision.datasets.MNIST("~/resiliency/", train=True, 
                                                                      transform=torchvision.transforms.ToTensor(), target_transform=None, download=True), 
                                           batch_size=int(self.config['batch_size']))
    
    def test_dataloader(self):
        return torch.utils.data.DataLoader(torchvision.datasets.MNIST("~/resiliency/", train=True, 
                                                                      transform=torchvision.transforms.ToTensor(), target_transform=None, download=True), 
                                           batch_size=int(self.config['batch_size']))
    
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.config['learning_rate'])
        return optimizer
    
    def forward(self, x):
        return self.model(x)
    
    def training_step(self, train_batch, batch_idx):
        x, y = train_batch
        logits = self.forward(x)
        loss = self.criterion(logits, y)
        logs = {'train_loss': loss}
        return {'loss': loss}
    
    def test_step(self, test_batch, batch_idx):
        x, y = test_batch
        logits = self.forward(x)
        loss = self.criterion(logits, y)
        logs = {'test_loss': loss}
        return {'test_loss': loss, 'logs': logs}
    
    def test_epoch_end(self, outputs):
        loss = []
        for x in outputs:
            loss.append(float(x['test_loss']))
        avg_loss = statistics.mean(loss)
        tensorboard_logs = {'test_loss': avg_loss}
        self.test_loss = avg_loss
        return {'avg_test_loss': avg_loss, 'log': tensorboard_logs}


In [33]:
def mnist_pt_objective(config):
    model = NumberNet(config)
    trainer = pl.Trainer(max_epochs=config['epochs'])
    trainer.fit(model)
    trainer.test(model)
    tune.report(test_loss=model.test_loss)
    return model.test_loss

In [34]:
%%capture tf_run_output
hyperparameters = [(0.00000001, 0.1),  # learning_rate
                   (0.0, 0.9),  # dropout
                   (10, 100),  # epochs 
                   (10, 1000)]  # batch size
space = create_hyperspace(hyperparameters)

### for each space in hyperspace, we want to search the space using ray tune
i = 0
results = []
for section in tqdm(space):
    # create a skopt gp minimize object
    optimizer = Optimizer(section)
    search_algo = SkOptSearch(optimizer, ['learning_rate', 'dropout', 'epochs', 'batch_size'],
                              metric='test_loss', mode='min')
    # not using a gpu because running on local
    analysis = tune.run(mnist_pt_objective, search_alg=search_algo, num_samples=20)
    results.append(analysis)
    i += 1

# print out the best result
i = 0
for a in results:
    print("Best config for space "+str(i)+": "+a.get_best_config(metric="avg_test_loss", mode="min"))
    i +=1

2020-10-01 14:48:43,414	ERROR trial_runner.py:567 -- Trial mnist_pt_objective_86ed9d7e: Error processing event.
Traceback (most recent call last):
  File "/Users/mzvyagin/miniconda3/envs/resiliency/lib/python3.8/site-packages/ray/tune/trial_runner.py", line 515, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/Users/mzvyagin/miniconda3/envs/resiliency/lib/python3.8/site-packages/ray/tune/ray_trial_executor.py", line 488, in fetch_result
    result = ray.get(trial_future[0], timeout=DEFAULT_GET_TIMEOUT)
  File "/Users/mzvyagin/miniconda3/envs/resiliency/lib/python3.8/site-packages/ray/worker.py", line 1428, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(TuneError): [36mray::ImplicitFunc.train()[39m (pid=56945, ip=192.168.1.240)
  File "python/ray/_raylet.pyx", line 484, in ray._raylet.execute_task
  File "python/ray/_raylet.pyx", line 438, in ray._raylet.execute_task.function_executor
  File "/Users/mzvyagin/miniconda3/envs/resi

KeyboardInterrupt: 

Epoch 73:  20%|██        | 22/109 [00:03<00:11,  7.32it/s, loss=1.907, v_num=0]
Epoch 59:  54%|█████▎    | 36/67 [00:08<00:07,  4.38it/s, loss=1.881, v_num=0]
Epoch 55:  62%|██████▏   | 57/92 [00:09<00:05,  5.96it/s, loss=1.611, v_num=0]
