In [10]:
import torch
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms
from torch.autograd import Variable
import tensorflow as tf
import numpy as np
import scipy.misc 
from io import BytesIO 

In [11]:
class Logger(object):
    
    def __init__(self, log_dir):
        """Create a summary writer logging to log_dir."""
        self.writer = tf.summary.FileWriter(log_dir)

    def scalar_summary(self, tag, value, step):
        """Log a scalar variable."""
        summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)])
        self.writer.add_summary(summary, step)

    def image_summary(self, tag, images, step):
        """Log a list of images."""

        img_summaries = []
        for i, img in enumerate(images):
            # Write the image to a string
            try:
                s = StringIO()
            except:
                s = BytesIO()
            scipy.misc.toimage(img).save(s, format="png")

            # Create an Image object
            img_sum = tf.Summary.Image(encoded_image_string=s.getvalue(),
                                       height=img.shape[0],
                                       width=img.shape[1])
            # Create a Summary value
            img_summaries.append(tf.Summary.Value(tag='%s/%d' % (tag, i), image=img_sum))

        # Create and write Summary
        summary = tf.Summary(value=img_summaries)
        self.writer.add_summary(summary, step)
        
    def histo_summary(self, tag, values, step, bins=1000):
        """Log a histogram of the tensor of values."""

        # Create a histogram using numpy
        counts, bin_edges = np.histogram(values, bins=bins)

        # Fill the fields of the histogram proto
        hist = tf.HistogramProto()
        hist.min = float(np.min(values))
        hist.max = float(np.max(values))
        hist.num = int(np.prod(values.shape))
        hist.sum = float(np.sum(values))
        hist.sum_squares = float(np.sum(values**2))

        # Drop the start of the first bin
        bin_edges = bin_edges[1:]

        # Add bin edges and counts
        for edge in bin_edges:
            hist.bucket_limit.append(edge)
        for c in counts:
            hist.bucket.append(c)

        # Create and write Summary
        summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)])
        self.writer.add_summary(summary, step)
        self.writer.flush()

In [12]:
# MNIST Dataset 
dataset = dsets.MNIST(root='./data', 
                      train=True, 
                      transform=transforms.ToTensor(),  
                      download=True)

# Data Loader (Input Pipeline)
data_loader = torch.utils.data.DataLoader(dataset=dataset, 
                                          batch_size=100, 
                                          shuffle=True)

In [13]:
def to_np(x):
    return x.data.cpu().numpy()

def to_var(x):
    if torch.cuda.is_available():
        x = x.cuda()
    return Variable(x)   

In [14]:
# Neural Network Model (1 hidden layer)
class Net(nn.Module):
    def __init__(self, input_size=784, hidden_size=500, num_classes=10):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size) 
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)  
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out


In [15]:
net = Net()
if torch.cuda.is_available():
    net.cuda()

In [16]:
# Set the logger
logger = Logger('./logs')

In [17]:
# Loss and Optimizer
criterion = nn.CrossEntropyLoss()  
optimizer = torch.optim.Adam(net.parameters(), lr=0.00001)  

data_iter = iter(data_loader)
iter_per_epoch = len(data_loader)
total_step = 50000

In [18]:
# Start training
for step in range(total_step):
    
    # Reset the data_iter
    if (step+1) % iter_per_epoch == 0:
        data_iter = iter(data_loader)

    # Fetch the images and labels and convert them to variables
    images, labels = next(data_iter)
    images, labels = to_var(images.view(images.size(0), -1)), to_var(labels)
    
    # Forward, backward and optimize
    optimizer.zero_grad()  # zero the gradient buffer
    outputs = net(images)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

    # Compute accuracy
    _, argmax = torch.max(outputs, 1)
    accuracy = (labels == argmax.squeeze()).float().mean()

    if (step+1) % 100 == 0:
        print ('Step [%d/%d], Loss: %.4f, Acc: %.2f' 
               %(step+1, total_step, loss.data[0], accuracy.data[0]))

        #============ TensorBoard logging ============#
        # (1) Log the scalar values
        info = {
            'loss': loss.data[0],
            'accuracy': accuracy.data[0]
        }

        for tag, value in info.items():
            logger.scalar_summary(tag, value, step+1)

        # (2) Log values and gradients of the parameters (histogram)
        for tag, value in net.named_parameters():
            tag = tag.replace('.', '/')
            logger.histo_summary(tag, to_np(value), step+1)
            logger.histo_summary(tag+'/grad', to_np(value.grad), step+1)

        # (3) Log the images
        info = {
            'images': to_np(images.view(-1, 28, 28)[:10])
        }

        for tag, images in info.items():
            logger.image_summary(tag, images, step+1)

Step [100/50000], Loss: 2.2013, Acc: 0.52
Step [200/50000], Loss: 2.0939, Acc: 0.66
Step [300/50000], Loss: 1.9519, Acc: 0.80
Step [400/50000], Loss: 1.8311, Acc: 0.79
Step [500/50000], Loss: 1.6748, Acc: 0.85
Step [600/50000], Loss: 1.5053, Acc: 0.87
Step [700/50000], Loss: 1.4880, Acc: 0.78
Step [800/50000], Loss: 1.3246, Acc: 0.85
Step [900/50000], Loss: 1.3188, Acc: 0.77
Step [1000/50000], Loss: 1.2063, Acc: 0.80
Step [1100/50000], Loss: 1.1358, Acc: 0.82
Step [1200/50000], Loss: 0.9550, Acc: 0.87
Step [1300/50000], Loss: 0.8603, Acc: 0.92
Step [1400/50000], Loss: 0.9213, Acc: 0.83
Step [1500/50000], Loss: 0.9349, Acc: 0.82
Step [1600/50000], Loss: 0.7830, Acc: 0.87
Step [1700/50000], Loss: 0.7285, Acc: 0.87
Step [1800/50000], Loss: 0.7723, Acc: 0.83
Step [1900/50000], Loss: 0.7345, Acc: 0.87
Step [2000/50000], Loss: 0.7791, Acc: 0.80
Step [2100/50000], Loss: 0.5747, Acc: 0.91
Step [2200/50000], Loss: 0.6322, Acc: 0.92
Step [2300/50000], Loss: 0.5405, Acc: 0.90
Step [2400/50000], L

Step [19000/50000], Loss: 0.1774, Acc: 0.94
Step [19100/50000], Loss: 0.1820, Acc: 0.95
Step [19200/50000], Loss: 0.2531, Acc: 0.89
Step [19300/50000], Loss: 0.2196, Acc: 0.91
Step [19400/50000], Loss: 0.1828, Acc: 0.96
Step [19500/50000], Loss: 0.1662, Acc: 0.95
Step [19600/50000], Loss: 0.1662, Acc: 0.95
Step [19700/50000], Loss: 0.3422, Acc: 0.91
Step [19800/50000], Loss: 0.1922, Acc: 0.94
Step [19900/50000], Loss: 0.2694, Acc: 0.93
Step [20000/50000], Loss: 0.1457, Acc: 0.97
Step [20100/50000], Loss: 0.1884, Acc: 0.96
Step [20200/50000], Loss: 0.2385, Acc: 0.92
Step [20300/50000], Loss: 0.1596, Acc: 0.96
Step [20400/50000], Loss: 0.2116, Acc: 0.94
Step [20500/50000], Loss: 0.2128, Acc: 0.95
Step [20600/50000], Loss: 0.2303, Acc: 0.93
Step [20700/50000], Loss: 0.1924, Acc: 0.95
Step [20800/50000], Loss: 0.2406, Acc: 0.93
Step [20900/50000], Loss: 0.2893, Acc: 0.89
Step [21000/50000], Loss: 0.2244, Acc: 0.94
Step [21100/50000], Loss: 0.1806, Acc: 0.98
Step [21200/50000], Loss: 0.1253

KeyboardInterrupt: 