In [2]:
## R-dropout instantiation for the fashion mnist dataset
### This code is adapted from an official Pytorch tutorial, see https://pytorch.org/tutorials/beginner/basics/optimization_tutorial.html
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda

batchsize = 32
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

train_dataloader = DataLoader(training_data, batch_size=batchsize)
test_dataloader = DataLoader(test_data, batch_size=batchsize)

In [None]:
# define an NLP model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(512, 10),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [21]:
# Initialize the loss function
import torch
import torch.nn as nn

log_softmax = nn.LogSoftmax(dim=-1)
kl_loss_fn = nn.KLDivLoss(reduction="sum", log_target=True)
nll_loss_fn = nn.NLLLoss()
def r_dropout_loss(model, data, y, alpha=0.05): 
    # alpha is the regularization coefficient. It should be not too large.
    batch_size = data.size(0)
    data = torch.cat([data,data], dim=0)
    pred = model(data)
    log_probs = log_softmax(pred)
    log_probs1, log_probs2 = log_probs[:batch_size, :], log_probs[batch_size:, :]
    nll_loss = 0.5*( nll_loss_fn(log_probs1, y) + nll_loss_fn(log_probs2, y) ) # (nll_loss1+nll_loss_2)/2
    kl_loss = 0.5*( kl_loss_fn(log_probs1, log_probs2) + kl_loss_fn(log_probs2, log_probs1) ) # (KL(p||q)+KL(q||p))/2
    loss = nll_loss + alpha*kl_loss
    
    return loss

In [22]:
# define training loop and test loop
def train_loop(dataloader, model, nll_loss_fn, optimizer, R_Dropout=False, alpha=.05):
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        # Compute prediction and loss  
        if R_Dropout:
            loss = r_dropout_loss(model, X, y, alpha)
        else:
            pred = model(X)
            loss = nll_loss_fn(log_softmax(pred), y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 400 == 0:
            loss, current = loss.item(), batch * len(y)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


def test_loop(dataloader, model, nll_loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            log_probs = log_softmax(pred)
            test_loss += nll_loss_fn(log_probs, y).item()
            correct += (log_probs.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    return correct

In [14]:
# training without R Dropout
learning_rate = 1e-3
epochs = 10
model = NeuralNetwork()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer,R_Dropout=False)
    accuracy = test_loop(test_dataloader, model, loss_fn)
print("Training without R Dropout is done! Training with R Dropout begins.")

Epoch 1
-------------------------------
loss: 2.261373  [    0/60000]
loss: 0.553106  [12800/60000]
loss: 0.592600  [25600/60000]
loss: 0.445376  [38400/60000]
loss: 0.397838  [51200/60000]
Test Error: 
 Accuracy: 81.3%, Avg loss: 0.512097 

Epoch 2
-------------------------------
loss: 0.421200  [    0/60000]
loss: 0.267619  [12800/60000]
loss: 0.521208  [25600/60000]
loss: 0.458661  [38400/60000]
loss: 0.522558  [51200/60000]
Test Error: 
 Accuracy: 82.3%, Avg loss: 0.504001 

Epoch 3
-------------------------------
loss: 0.373268  [    0/60000]
loss: 0.423727  [12800/60000]
loss: 0.623614  [25600/60000]
loss: 0.318316  [38400/60000]
loss: 0.515666  [51200/60000]
Test Error: 
 Accuracy: 83.2%, Avg loss: 0.467599 

Epoch 4
-------------------------------
loss: 0.313050  [    0/60000]
loss: 0.325350  [12800/60000]
loss: 0.395733  [25600/60000]
loss: 0.434972  [38400/60000]
loss: 0.476629  [51200/60000]
Test Error: 
 Accuracy: 83.3%, Avg loss: 0.481612 

Epoch 5
------------------------

In [23]:
# training with R Dropout
model = NeuralNetwork()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, nll_loss_fn, optimizer,R_Dropout=True, alpha=.05)
    accuracy_R_Dropout = test_loop(test_dataloader, model, nll_loss_fn)
print("Training with R Dropout is done!")
print(f"Regular training model gives an accuracy of {(100*accuracy):>0.2f}%")
print(f"R Dropout training model gives an accuracy of {(100*accuracy_R_Dropout):>0.2f}%")

Epoch 1
-------------------------------
loss: 2.312427  [    0/60000]
loss: 0.546848  [12800/60000]
loss: 0.843776  [25600/60000]
loss: 0.655903  [38400/60000]
loss: 0.589552  [51200/60000]
Test Error: 
 Accuracy: 81.2%, Avg loss: 0.505939 

Epoch 2
-------------------------------
loss: 0.564342  [    0/60000]
loss: 0.461426  [12800/60000]
loss: 0.587409  [25600/60000]
loss: 0.479830  [38400/60000]
loss: 0.432772  [51200/60000]
Test Error: 
 Accuracy: 82.7%, Avg loss: 0.477541 

Epoch 3
-------------------------------
loss: 0.485433  [    0/60000]
loss: 0.480954  [12800/60000]
loss: 0.439569  [25600/60000]
loss: 0.469620  [38400/60000]
loss: 0.434831  [51200/60000]
Test Error: 
 Accuracy: 82.9%, Avg loss: 0.463533 

Epoch 4
-------------------------------
loss: 0.395898  [    0/60000]
loss: 0.346613  [12800/60000]
loss: 0.500517  [25600/60000]
loss: 0.597435  [38400/60000]
loss: 0.461014  [51200/60000]
Test Error: 
 Accuracy: 83.6%, Avg loss: 0.444258 

Epoch 5
------------------------

R-Dropout indeed improves the performance of the model a little. You can check out the above code if you are interested.