In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms

from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from IPython.display import display, clear_output
import pandas as pd
import time
import json

from itertools import product
from collections import namedtuple
from collections import OrderedDict


from torchsummary import summary

In [2]:
# check PyTorch versions
print(torch.__version__)
print(torchvision.__version__)

1.7.0.dev20200911
0.8.0.dev20200911


In [3]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    use_cuda = True
    print(f"Number of GPU's available : {torch.cuda.device_count()}")
    print(f"GPU device name : {torch.cuda.get_device_name(0)}")
else:
    print("No GPU available, using CPU instead")
    device = torch.device("cpu")
    use_cuda = False

Number of GPU's available : 1
GPU device name : GeForce RTX 2080 Ti


In [4]:
# Use standard FashionMNIST dataset
train_set = torchvision.datasets.FashionMNIST(
    root = './data/FashionMNIST',
    train = True,
    download = True,
    transform = transforms.Compose([
        transforms.ToTensor()                                 
    ])
)

In [5]:
loader = torch.utils.data.DataLoader(
    train_set, batch_size = len(train_set), num_workers = 1, pin_memory = True
)
data = next(iter(loader))
mean = data[0].mean()
std = data[0].std()
mean, std

(tensor(0.2860), tensor(0.3530))

In [6]:
train_set_normal = torchvision.datasets.FashionMNIST(
    root = './data/FashionMNIST'
    ,train = True
    ,download = True
    ,transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ])
)


In [7]:
sample = next(iter(train_set))

In [8]:
image, label = sample

In [9]:
image.shape

torch.Size([1, 28, 28])

In [10]:
out_classes = len(train_set.classes)
out_classes

10

In [11]:
trainsets = {
    'not_normal': train_set,
    'normal' : train_set_normal
}

In [12]:
# Build the neural network, expand on top of nn.Module
# class Network(nn.Module):
#     def __init__(self):
#         super().__init__()

#         # define layers
#         self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
#         self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)

#         self.fc1 = nn.Linear(in_features=12*4*4, out_features=120)
#         self.fc2 = nn.Linear(in_features=120, out_features=60)
#         self.out = nn.Linear(in_features=60, out_features=10)

#         # define forward function
#     def forward(self, t):
#         # conv 1
#         t = self.conv1(t)
#         t = F.relu(t)
#         t = F.max_pool2d(t, kernel_size=2, stride=2)

#         # conv 2
#         t = self.conv2(t)
#         t = F.relu(t)
#         t = F.max_pool2d(t, kernel_size=2, stride=2)

#         # fc1
#         t = t.reshape(-1, 12*4*4)
#         t = self.fc1(t)
#         t = F.relu(t)

#         # fc2
#         t = self.fc2(t)
#         t = F.relu(t)

#         # output
#         t = self.out(t)
#         # don't need softmax here since we'll use cross-entropy as activation.

#         return t

In [13]:
torch.manual_seed(50)
Network1 = nn.Sequential(
    nn.Conv2d(in_channels = 1, out_channels = 6, kernel_size = 5),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size = 2, stride = 2),
    nn.Conv2d(in_channels = 6, out_channels = 12, kernel_size = 5),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size = 2, stride = 2),
    nn.Flatten(start_dim = 1),
    nn.Linear(in_features = 12*4*4, out_features = 120),
    nn.ReLU(),
    nn.Linear(in_features = 120, out_features = 60),
    nn.ReLU(),
    nn.Linear(in_features = 60, out_features = 10)
)

In [14]:
torch.manual_seed(50)
#  BatchNormalization computes statistics only with respect to a single axis 
#  (usually the channels axis, =-1 (last) by default); 
#  every other axis is collapsed, i.e. summed over for averaging.
NetworkBN = nn.Sequential(
    nn.Conv2d(in_channels = 1, out_channels = 6, kernel_size = 5),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size = 2, stride = 2),
    nn.BatchNorm2d(6),
    nn.Conv2d(in_channels = 6, out_channels = 12, kernel_size = 5),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size = 2, stride = 2),
    nn.Flatten(start_dim = 1),
    # 193 * 120 = num param
    nn.Linear(in_features = 12*4*4, out_features = 120),
    nn.ReLU(),
    nn.BatchNorm1d(120),
    nn.Linear(in_features = 120, out_features = 60),
    nn.ReLU(),
    nn.Linear(in_features = 60, out_features = 10)
)

In [15]:
print(NetworkBN)

Sequential(
  (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (1): ReLU()
  (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (3): BatchNorm2d(6, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (4): Conv2d(6, 12, kernel_size=(5, 5), stride=(1, 1))
  (5): ReLU()
  (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (7): Flatten(start_dim=1, end_dim=-1)
  (8): Linear(in_features=192, out_features=120, bias=True)
  (9): ReLU()
  (10): BatchNorm1d(120, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (11): Linear(in_features=120, out_features=60, bias=True)
  (12): ReLU()
  (13): Linear(in_features=60, out_features=10, bias=True)
)


In [16]:
#model = Network().to(device)

In [17]:
model = NetworkBN.to(device)
summary(model, input_size=(1, 28, 28), batch_size=128, device = str(torch.device("cuda")))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [128, 6, 24, 24]             156
              ReLU-2           [128, 6, 24, 24]               0
         MaxPool2d-3           [128, 6, 12, 12]               0
       BatchNorm2d-4           [128, 6, 12, 12]              12
            Conv2d-5            [128, 12, 8, 8]           1,812
              ReLU-6            [128, 12, 8, 8]               0
         MaxPool2d-7            [128, 12, 4, 4]               0
           Flatten-8                 [128, 192]               0
            Linear-9                 [128, 120]          23,160
             ReLU-10                 [128, 120]               0
      BatchNorm1d-11                 [128, 120]             240
           Linear-12                  [128, 60]           7,260
             ReLU-13                  [128, 60]               0
           Linear-14                  [

In [18]:
torch.manual_seed(50)
# layer normalization by computing the mean and variance used for normalization from all of the summed inputs 
# to the neurons in a layer on a single training case
NetworkLN = nn.Sequential(
    nn.Conv2d(in_channels = 1, out_channels = 6, kernel_size = 5),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size = 2, stride = 2),
    nn.LayerNorm([6,12,12]),
    nn.Conv2d(in_channels = 6, out_channels = 12, kernel_size = 5),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size = 2, stride = 2),
    nn.Flatten(start_dim = 1),
    nn.Linear(in_features = 12*4*4, out_features = 120),
    nn.ReLU(),
    nn.LayerNorm([120]),
    nn.Linear(in_features = 120, out_features = 60),
    nn.ReLU(),
    nn.Linear(in_features = 60, out_features = 10)
)

In [19]:
model = NetworkLN.to(device)
summary(model, input_size=(1, 28, 28), batch_size=128, device = str(torch.device("cuda")))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [128, 6, 24, 24]             156
              ReLU-2           [128, 6, 24, 24]               0
         MaxPool2d-3           [128, 6, 12, 12]               0
         LayerNorm-4           [128, 6, 12, 12]           1,728
            Conv2d-5            [128, 12, 8, 8]           1,812
              ReLU-6            [128, 12, 8, 8]               0
         MaxPool2d-7            [128, 12, 4, 4]               0
           Flatten-8                 [128, 192]               0
            Linear-9                 [128, 120]          23,160
             ReLU-10                 [128, 120]               0
        LayerNorm-11                 [128, 120]             240
           Linear-12                  [128, 60]           7,260
             ReLU-13                  [128, 60]               0
           Linear-14                  [

In [20]:
torch.manual_seed(50)
#  InstanceNorm2d and LayerNorm are very similar, but have some subtle differences. 
# InstanceNorm2d is applied on each channel of channeled data like RGB images, 
# but LayerNorm is usually applied on entire sample and often in NLP tasks.
# Additionally, LayerNorm applies elementwise affine transform, 
# while InstanceNorm2d usually don’t apply affine transform.
NetworkIN = nn.Sequential(
    nn.Conv2d(in_channels = 1, out_channels = 6, kernel_size = 5),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size = 2, stride = 2),
    # num_features – C from an expected input of size (N,C,H,W)
    nn.InstanceNorm2d(6, affine=True),
    nn.Conv2d(in_channels = 6, out_channels = 12, kernel_size = 5),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size = 2, stride = 2),
    nn.InstanceNorm2d(12, affine=True),
    nn.Flatten(start_dim = 1),
    nn.Linear(in_features = 12*4*4, out_features = 120),
    nn.ReLU(),
    # doesn't work InstanceNorm1d returns 0-filled tensor to 2D tensor.
    # This is because InstanceNorm1d reshapes inputs to(1, N * C, ...) 
    # from (N, C,...) and this makesvariances 0.
    # https://github.com/pytorch/pytorch/issues/11991
    # nn.InstanceNorm1d(120, affine=True),
    nn.Linear(in_features = 120, out_features = 60),
    nn.ReLU(),
    nn.Linear(in_features = 60, out_features = 10)
)

In [21]:
model = NetworkIN.to(device)
summary(model, input_size=(1, 28, 28), batch_size=128, device = str(torch.device("cuda")))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [128, 6, 24, 24]             156
              ReLU-2           [128, 6, 24, 24]               0
         MaxPool2d-3           [128, 6, 12, 12]               0
    InstanceNorm2d-4           [128, 6, 12, 12]              12
            Conv2d-5            [128, 12, 8, 8]           1,812
              ReLU-6            [128, 12, 8, 8]               0
         MaxPool2d-7            [128, 12, 4, 4]               0
    InstanceNorm2d-8            [128, 12, 4, 4]              24
           Flatten-9                 [128, 192]               0
           Linear-10                 [128, 120]          23,160
             ReLU-11                 [128, 120]               0
           Linear-12                  [128, 60]           7,260
             ReLU-13                  [128, 60]               0
           Linear-14                  [

In [22]:
# >>> input = torch.randn(20, 6, 10, 10)
# >>> # Separate 6 channels into 3 groups
# >>> m = nn.GroupNorm(3, 6)
# >>> # Separate 6 channels into 6 groups (equivalent with InstanceNorm)
# >>> m = nn.GroupNorm(6, 6)
# >>> # Put all 6 channels into a single group (equivalent with LayerNorm)
# >>> m = nn.GroupNorm(1, 6)
# >>> # Activating the module
# >>> output = m(input)

In [23]:
torch.manual_seed(50)
# The input channels are separated into num_groups groups, each containing num_channels / num_groups channels. 
# The mean and standard-deviation are calculated separately over the each group. 
NetworkGN = nn.Sequential(
    nn.Conv2d(in_channels = 1, out_channels = 6, kernel_size = 5),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size = 2, stride = 2),
    nn.GroupNorm(3,6),
    nn.Conv2d(in_channels = 6, out_channels = 12, kernel_size = 5),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size = 2, stride = 2),
    nn.Flatten(start_dim = 1),
    nn.Linear(in_features = 12*4*4, out_features = 120),
    nn.ReLU(),
    nn.GroupNorm(1, 120),
    nn.Linear(in_features = 120, out_features = 60),
    nn.ReLU(),
    nn.Linear(in_features = 60, out_features = 10)
)

In [24]:
model = NetworkGN.to(device)
summary(model, input_size=(1, 28, 28), batch_size=128, device = str(torch.device("cuda")))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [128, 6, 24, 24]             156
              ReLU-2           [128, 6, 24, 24]               0
         MaxPool2d-3           [128, 6, 12, 12]               0
         GroupNorm-4           [128, 6, 12, 12]              12
            Conv2d-5            [128, 12, 8, 8]           1,812
              ReLU-6            [128, 12, 8, 8]               0
         MaxPool2d-7            [128, 12, 4, 4]               0
           Flatten-8                 [128, 192]               0
            Linear-9                 [128, 120]          23,160
             ReLU-10                 [128, 120]               0
        GroupNorm-11                 [128, 120]             240
           Linear-12                  [128, 60]           7,260
             ReLU-13                  [128, 60]               0
           Linear-14                  [

In [25]:
def get_num_correct(preds, labels):
    return preds.argmax(dim=1).eq(labels).sum().item()

In [26]:
# import modules to build RunBuilder and RunManager helper classes
from collections  import OrderedDict
from collections import namedtuple
from itertools import product

# Read in the hyper-parameters and return a Run namedtuple containing all the 
# combinations of hyper-parameters
class RunBuilder():
    @staticmethod
    def get_runs(params):

        Run = namedtuple('Run', params.keys())

        runs = []
        for v in product(*params.values()):
            runs.append(Run(*v))

        return runs

In [27]:
# Helper class, help track loss, accuracy, epoch time, run time, 
# hyper-parameters etc. Also record to TensorBoard and write into csv, json
class RunManager():
    def __init__(self):

        # tracking every epoch count, loss, accuracy, time
        self.epoch_count = 0
        self.epoch_loss = 0
        self.epoch_num_correct = 0
        self.epoch_start_time = None

        # tracking every run count, run data, hyper-params used, time
        self.run_params = None
        self.run_count = 0
        self.run_data = []
        self.run_start_time = None

        # record model, loader and TensorBoard 
        self.network = None
        self.loader = None
        self.tb = None

    # record the count, hyper-param, model, loader of each run
    # record sample images and network graph to TensorBoard  
    def begin_run(self, run, network, loader):

        self.run_start_time = time.time()

        self.run_params = run
        self.run_count += 1

        self.network = network
        self.loader = loader
        self.tb = SummaryWriter(comment=f'-{run}')

        images, labels = next(iter(self.loader))
        grid = torchvision.utils.make_grid(images)

#         self.tb.add_image('images', grid)
#         self.tb.add_graph(self.network, images)

    # when run ends, close TensorBoard, zero epoch count
    def end_run(self):
        self.tb.close()
        self.epoch_count = 0

    # zero epoch count, loss, accuracy, 
    def begin_epoch(self):
        self.epoch_start_time = time.time()

        self.epoch_count += 1
        self.epoch_loss = 0
        self.epoch_num_correct = 0

    # 
    def end_epoch(self):
        # calculate epoch duration and run duration(accumulate)
        epoch_duration = time.time() - self.epoch_start_time
        run_duration = time.time() - self.run_start_time

        # record epoch loss and accuracy
        loss = self.epoch_loss / len(self.loader.dataset)
        accuracy = self.epoch_num_correct / len(self.loader.dataset)

        # Record epoch loss and accuracy to TensorBoard 
        self.tb.add_scalar('Loss', loss, self.epoch_count)
        self.tb.add_scalar('Accuracy', accuracy, self.epoch_count)

        # Record params to TensorBoard
        for name, param in self.network.named_parameters():
            self.tb.add_histogram(name, param, self.epoch_count)
            self.tb.add_histogram(f'{name}.grad', param.grad, self.epoch_count)

        # Write into 'results' (OrderedDict) for all run related data
        results = OrderedDict()
        results["run"] = self.run_count
        results["epoch"] = self.epoch_count
        results["loss"] = loss
        results["accuracy"] = accuracy
        results["epoch duration"] = epoch_duration
        results["run duration"] = run_duration

        # Record hyper-params into 'results'
        for k,v in self.run_params._asdict().items(): results[k] = v
        self.run_data.append(results)
        df = pd.DataFrame.from_dict(self.run_data, orient = 'columns')

        # display epoch information and show progress
        clear_output(wait=True)
        display(df)

    # accumulate loss of batch into entire epoch loss
    def track_loss(self, loss):
        # multiply batch size so variety of batch sizes can be compared
        self.epoch_loss += loss.item() * self.loader.batch_size

    # accumulate number of corrects of batch into entire epoch num_correct
    def track_num_correct(self, preds, labels):
        self.epoch_num_correct += self._get_num_correct(preds, labels)

    @torch.no_grad()
    def _get_num_correct(self, preds, labels):
        return preds.argmax(dim=1).eq(labels).sum().item()

    # save end results of all runs into csv, json for further a
    def save(self, fileName):

        pd.DataFrame.from_dict(
            self.run_data, 
            orient = 'columns',
        ).to_csv(f'{fileName}.csv')

        with open(f'{fileName}.json', 'w', encoding='utf-8') as f:
            json.dump(self.run_data, f, ensure_ascii=False, indent=4)

In [28]:
# Run multiple networks through testing framework
networks = {
    'no_batch_norm': Network1,
    'batch_norm': NetworkBN,
    'layer_norm': NetworkLN,
    'instance_norm': NetworkIN,
    'group_norm': NetworkGN
}
epochs = 20

In [29]:
# Test different configurations
# for every run [value] that is going to be used e.g [.001, .01] = two runs
params = OrderedDict(
    lr = [.0005],
    batch_size = [2, 32],
    num_workers = [1],
    device = ["cuda"],
    trainset = ["normal"],
    # try all the values in the dict network1, network2
    network = list(networks.keys())
)
m = RunManager()
# active run or current run
for run in RunBuilder.get_runs(params):
    
    device = torch.device(run.device)
    # redefine the network
    network = networks[run.network].to(device)
    loader = DataLoader(trainsets[run.trainset], batch_size = run.batch_size, num_workers = run.num_workers, 
                       pin_memory = True) 
    optimizer = optim.Adam(network.parameters(), lr = run.lr) 
    
    m.begin_run(run, network, loader)
    for epoch in range(epochs):
        m.begin_epoch()
        for batch in loader:
            
            images = batch[0].to(device)
            labels = batch[1].to(device)
            preds = network(images)
            loss = F.cross_entropy(preds, labels)
            #7.9
            #optimizer.zero_grad()
            #8 sec
            for p in network.parameters(): p.grad = None
            loss.backward() # Calculate gradients
            optimizer.step() # Update Weights
            
            m.track_loss(loss)
            m.track_num_correct(preds, labels)
        m.end_epoch()
    m.end_run()
m.save("results")

Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,num_workers,device,trainset,network
0,1,1,0.501964,0.813250,104.005440,104.097855,0.0005,2,1,cuda,normal,no_batch_norm
1,1,2,0.357416,0.868633,101.602623,205.779470,0.0005,2,1,cuda,normal,no_batch_norm
2,1,3,0.320824,0.879733,102.224217,308.073121,0.0005,2,1,cuda,normal,no_batch_norm
3,1,4,0.299649,0.888950,101.741824,409.880468,0.0005,2,1,cuda,normal,no_batch_norm
4,1,5,0.283599,0.894933,100.488043,510.431862,0.0005,2,1,cuda,normal,no_batch_norm
...,...,...,...,...,...,...,...,...,...,...,...,...
195,10,16,0.058070,0.978417,9.381324,153.329192,0.0005,32,1,cuda,normal,group_norm
196,10,17,0.056952,0.978700,9.394866,162.814431,0.0005,32,1,cuda,normal,group_norm
197,10,18,0.055642,0.979683,9.403691,172.311783,0.0005,32,1,cuda,normal,group_norm
198,10,19,0.053220,0.979983,9.996461,182.390891,0.0005,32,1,cuda,normal,group_norm


In [34]:
pd.DataFrame.from_dict(m.run_data).sort_values("accuracy", ascending = False)

Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,num_workers,device,trainset,network
178,9,19,0.028089,0.989967,11.541177,222.439402,0.0005,32,1,cuda,normal,instance_norm
179,9,20,0.027090,0.989917,11.553787,234.079134,0.0005,32,1,cuda,normal,instance_norm
176,9,17,0.029152,0.989750,11.535239,199.222934,0.0005,32,1,cuda,normal,instance_norm
177,9,18,0.028601,0.989533,11.501659,210.810308,0.0005,32,1,cuda,normal,instance_norm
175,9,16,0.031225,0.988800,11.610630,187.603899,0.0005,32,1,cuda,normal,instance_norm
...,...,...,...,...,...,...,...,...,...,...,...,...
28,2,9,0.870528,0.671367,127.477228,1148.852623,0.0005,2,1,cuda,normal,batch_norm
23,2,4,0.879747,0.663950,127.726983,510.655797,0.0005,2,1,cuda,normal,batch_norm
22,2,3,0.933278,0.643617,127.398721,382.845248,0.0005,2,1,cuda,normal,batch_norm
21,2,2,0.979345,0.634617,127.382731,255.363166,0.0005,2,1,cuda,normal,batch_norm


In [36]:
pd.set_option('display.max_rows', None)

In [37]:
pd.DataFrame.from_dict(m.run_data).sort_values("accuracy", ascending = False)

Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,num_workers,device,trainset,network
178,9,19,0.028089,0.989967,11.541177,222.439402,0.0005,32,1,cuda,normal,instance_norm
179,9,20,0.02709,0.989917,11.553787,234.079134,0.0005,32,1,cuda,normal,instance_norm
176,9,17,0.029152,0.98975,11.535239,199.222934,0.0005,32,1,cuda,normal,instance_norm
177,9,18,0.028601,0.989533,11.501659,210.810308,0.0005,32,1,cuda,normal,instance_norm
175,9,16,0.031225,0.9888,11.61063,187.603899,0.0005,32,1,cuda,normal,instance_norm
174,9,15,0.032318,0.988633,12.152803,175.903975,0.0005,32,1,cuda,normal,instance_norm
173,9,14,0.03483,0.987717,11.53331,163.667644,0.0005,32,1,cuda,normal,instance_norm
172,9,13,0.036655,0.986667,11.574715,152.053688,0.0005,32,1,cuda,normal,instance_norm
171,9,12,0.038866,0.98585,11.514431,140.395772,0.0005,32,1,cuda,normal,instance_norm
170,9,11,0.040686,0.985283,11.526553,128.795341,0.0005,32,1,cuda,normal,instance_norm


In [38]:
# helper function to calculate all predictions of train set
def get_all_preds(model, loader):
    all_preds = torch.tensor([])
    for batch in loader:
        images, labels = batch

        preds = model(images)
        all_preds = torch.cat(
            (all_preds, preds),
            dim = 0
        )
    return all_preds

In [40]:
# bigger batch size since we only do FP
#prediction_loader = torch.utils.data.DataLoader(train_set, batch_size=1000)
#train_preds = get_all_preds(network, prediction_loader)

In [None]:
!pip install scikit-plot

In [None]:
# use scikitplot to plot the confusion matrix
import matplotlib.pyplot as plt

from sklearn.metrics import confusion_matrix
import scikitplot as skplt

cm = confusion_matrix(train_set.targets, train_preds.argmax(dim=1))
cm

In [None]:
skplt.metrics.plot_confusion_matrix(train_set.targets,train_preds.argmax(dim=1), normalize=True)

In [None]:
# use ngrok to display TensorBoard on Colab
!wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
!unzip ngrok-stable-linux-amd64.zip


In [None]:
LOG_DIR = './runs'
get_ipython().system_raw(
    'tensorboard --logdir {} --host 0.0.0.0 --port 6006 &'
    .format(LOG_DIR)
)

In [None]:
get_ipython().system_raw('./ngrok http 6006 &')


In [None]:
! curl -s http://localhost:4040/api/tunnels | python3 -c \
    "import sys, json; print(json.load(sys.stdin)['tunnels'][0]['public_url'])"