In [1]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [3]:
"""
 MNIST example with training and validation monitoring using TensorboardX and Tensorboard.
 Requirements:
    Optionally TensorboardX (https://github.com/lanpa/tensorboard-pytorch): `pip install tensorboardX`
    Tensorboard: `pip install tensorflow` (or just install tensorboard without the rest of tensorflow)
 Usage:
    Start tensorboard:
    ```bash
    tensorboard --logdir=/tmp/tensorboard_logs/
    ```
    Run the example:
    ```bash
    python mnist_with_tensorboard_logger.py --log_dir=/tmp/tensorboard_logs
    ```
"""
from __future__ import print_function

import sys
from argparse import ArgumentParser
import logging

import torch
from torch.utils.data import DataLoader
from torch import nn
import torch.nn.functional as F
from torch.optim import SGD
from torchvision.datasets import MNIST
from torchvision.transforms import Compose, ToTensor, Normalize

from ignite.engine import Events, create_supervised_trainer, create_supervised_evaluator
from ignite.metrics import Accuracy, Loss
from ignite.contrib.handlers.tensorboard_logger import *


LOG_INTERVAL = 10


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x, dim=-1)


def get_data_loaders(train_batch_size, val_batch_size):
    data_transform = Compose([ToTensor(), Normalize((0.1307,), (0.3081,))])

    train_loader = DataLoader(MNIST(download=True, root=".", transform=data_transform, train=True),
                              batch_size=train_batch_size, shuffle=True)

    val_loader = DataLoader(MNIST(download=False, root=".", transform=data_transform, train=False),
                            batch_size=val_batch_size, shuffle=False)
    return train_loader, val_loader


def run(train_batch_size, val_batch_size, epochs, lr, momentum, log_dir):
    train_loader, val_loader = get_data_loaders(train_batch_size, val_batch_size)
    model = Net()
    device = 'cpu'

    if torch.cuda.is_available():
        device = 'cuda'

    optimizer = SGD(model.parameters(), lr=lr, momentum=momentum)
    criterion = nn.CrossEntropyLoss()
    trainer = create_supervised_trainer(model, optimizer, criterion, device=device)

    if sys.version_info > (3,):
        from ignite.contrib.metrics.gpu_info import GpuInfo
        try:
            GpuInfo().attach(trainer)
        except RuntimeError:
            print("INFO: By default, in this example it is possible to log GPU information (used memory, utilization). "
                  "As there is no pynvml python package installed, GPU information won't be logged. Otherwise, please "
                  "install it : `pip install pynvml`")

    metrics = {
        'accuracy': Accuracy(),
        'loss': Loss(criterion)
    }

    train_evaluator = create_supervised_evaluator(model, metrics=metrics, device=device)
    validation_evaluator = create_supervised_evaluator(model, metrics=metrics, device=device)

    @trainer.on(Events.EPOCH_COMPLETED)
    def compute_metrics(engine):
        train_evaluator.run(train_loader)
        validation_evaluator.run(val_loader)

    tb_logger = TensorboardLogger(log_dir=log_dir)

    tb_logger.attach(trainer,
                     log_handler=OutputHandler(tag="training",
                                               output_transform=lambda loss: {'batchloss': loss},
                                               metric_names='all'),
                     event_name=Events.ITERATION_COMPLETED(every=100))

    tb_logger.attach(train_evaluator,
                     log_handler=OutputHandler(tag="training",
                                               metric_names=["loss", "accuracy"],
                                               another_engine=trainer),
                     event_name=Events.EPOCH_COMPLETED)

    tb_logger.attach(validation_evaluator,
                     log_handler=OutputHandler(tag="validation",
                                               metric_names=["loss", "accuracy"],
                                               another_engine=trainer),
                     event_name=Events.EPOCH_COMPLETED)

    tb_logger.attach(trainer,
                     log_handler=OptimizerParamsHandler(optimizer),
                     event_name=Events.ITERATION_COMPLETED(every=100))

    tb_logger.attach(trainer,
                     log_handler=WeightsScalarHandler(model),
                     event_name=Events.ITERATION_COMPLETED(every=100))

    tb_logger.attach(trainer,
                     log_handler=WeightsHistHandler(model),
                     event_name=Events.EPOCH_COMPLETED(every=100))

    tb_logger.attach(trainer,
                     log_handler=GradsScalarHandler(model),
                     event_name=Events.ITERATION_COMPLETED(every=100))

    tb_logger.attach(trainer,
                     log_handler=GradsHistHandler(model),
                     event_name=Events.EPOCH_COMPLETED(every=100))

    # kick everything off
    trainer.run(train_loader, max_epochs=epochs)
    tb_logger.close()


if __name__ == "__main__":
#     parser = ArgumentParser()
#     parser.add_argument('--batch_size', type=int, default=64,
#                         help='input batch size for training (default: 64)')
#     parser.add_argument('--val_batch_size', type=int, default=1000,
#                         help='input batch size for validation (default: 1000)')
#     parser.add_argument('--epochs', type=int, default=10,
#                         help='number of epochs to train (default: 10)')
#     parser.add_argument('--lr', type=float, default=0.01,
#                         help='learning rate (default: 0.01)')
#     parser.add_argument('--momentum', type=float, default=0.5,
#                         help='SGD momentum (default: 0.5)')
#     parser.add_argument("--log_dir", type=str, default="tensorboard_logs",
#                         help="log directory for Tensorboard log output")

#     args = parser.parse_args()
    args = {
        'batch_size': 64,
        'val_batch_size': 1000,
        'epochs': 5,
        'lr': .01,
        'momentum': .5,
        'log_dir': '/tmp/tensorboard_logs/mnist' 
    }

    # Setup engine logger
    logger = logging.getLogger("ignite.engine.engine.Engine")
    handler = logging.StreamHandler()
    formatter = logging.Formatter("%(asctime)s %(name)-12s %(levelname)-8s %(message)s")
    handler.setFormatter(formatter)
    logger.addHandler(handler)
    logger.setLevel(logging.INFO)
    
#     args.batch_size=32
#     args.val_batch_size = 32
#     args.epochs = 5
#     args.lr = .01
#     args.momentum = .5
#     args.log_dir = 'tensorboard_logs'
#     print(args['batch_size'])

#     run(args.batch_size, args.val_batch_size, args.epochs, args.lr, args.momentum, args.log_dir)
    run(args['batch_size'], args['val_batch_size'], args['epochs'], args['lr'], args['momentum'], args['log_dir'])
    
    

2020-01-01 13:55:51,720 ignite.engine.engine.Engine INFO     Engine run starting with max_epochs=5.
2020-01-01 13:55:51,720 ignite.engine.engine.Engine INFO     Engine run starting with max_epochs=5.
INFO:ignite.engine.engine.Engine:Engine run starting with max_epochs=5.
2020-01-01 13:56:05,782 ignite.engine.engine.Engine INFO     Epoch[1] Complete. Time taken: 00:00:14
2020-01-01 13:56:05,782 ignite.engine.engine.Engine INFO     Epoch[1] Complete. Time taken: 00:00:14
INFO:ignite.engine.engine.Engine:Epoch[1] Complete. Time taken: 00:00:14
2020-01-01 13:56:05,784 ignite.engine.engine.Engine INFO     Engine run starting with max_epochs=1.
2020-01-01 13:56:05,784 ignite.engine.engine.Engine INFO     Engine run starting with max_epochs=1.
INFO:ignite.engine.engine.Engine:Engine run starting with max_epochs=1.
2020-01-01 13:56:17,877 ignite.engine.engine.Engine INFO     Epoch[1] Complete. Time taken: 00:00:12
2020-01-01 13:56:17,877 ignite.engine.engine.Engine INFO     Epoch[1] Complete. 

In [None]:
!conda install ignite -c pytorch-nightly -y

In [None]:
!pip install tensorboardX