From c96a0b4aa118d15326d4de29817e02b132a955d3 Mon Sep 17 00:00:00 2001
From: laubonghaudoi <liubanghoudai24@gmail.com>
Date: Sun, 2 Dec 2018 00:24:34 -0800
Subject: [PATCH 1/4] Update README and comments

---
 CapsNet.py     | 4 ++--
 Decoder.py     | 4 ++--
 DigitCaps.py   | 6 +++---
 PrimaryCaps.py | 4 ++--
 README.md      | 4 ++--
 main.py        | 2 +-
 6 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/CapsNet.py b/CapsNet.py
index 88aaac1..2a611f6 100644
--- a/CapsNet.py
+++ b/CapsNet.py
@@ -34,10 +34,10 @@ def __init__(self, opt):
     def forward(self, x):
         '''
         Args:
-            `x`: [batch_size, 1, 28, 28] A MNIST sample
+            `x`: [batch_size, 1, 28, 28] MNIST samples
         
         Return:
-            `v`: [batch_size, 10, 16] CapsNet outputs, 16D rediction vectors of
+            `v`: [batch_size, 10, 16] CapsNet outputs, 16D prediction vectors of
                 10 digit capsules
 
         The dimension transformation procedure of an input tensor in each layer:
diff --git a/Decoder.py b/Decoder.py
index 2bfbc58..974ec47 100644
--- a/Decoder.py
+++ b/Decoder.py
@@ -28,8 +28,8 @@ def __init__(self, opt):
     def forward(self, v, target):
         '''
         Args:
-            v: [batch_size, 10, 16]
-            target: [batch_size, 10]
+            `v`: [batch_size, 10, 16]
+            `target`: [batch_size, 10]
 
         Return:
             `reconstruction`: [batch_size, 784]
diff --git a/DigitCaps.py b/DigitCaps.py
index 17b5eac..bc2d46c 100644
--- a/DigitCaps.py
+++ b/DigitCaps.py
@@ -7,8 +7,8 @@
 class DigitCaps(nn.Module):
     '''
     The `DigitCaps` layer consists of 10 16D capsules. Compared to the traditional
-    scalar output neurons in fully connected layers(FCN), the `DigitCaps` layer
-    can be seen as an FCN with 16-dimensional output neurons, where we call
+    scalar output neurons in fully connected networks(FCN), the `DigitCaps` layer
+    can be seen as an FCN with ten 16-dimensional output neurons, which we call
     these neurons "capsules".
 
     In this layer, we take the input `[1152, 8]` tensor `u` as 1152 [8,] vectors
@@ -34,7 +34,7 @@ def __init__(self, opt):
         The the coupling coefficients `b` [1152, 10] is a temporary variable which
         does NOT belong to the layer's parameters. In other words, `b` is not updated
         by gradient back-propagations. Instead, we update `b` by Dynamic Routing
-        in every forward propagation. See docstring of `self.forward` for details.
+        in every forward propagation. See the docstring of `self.forward` for details.
         '''
         super(DigitCaps, self).__init__()
         self.opt = opt
diff --git a/PrimaryCaps.py b/PrimaryCaps.py
index d688519..9269d63 100644
--- a/PrimaryCaps.py
+++ b/PrimaryCaps.py
@@ -7,7 +7,7 @@ class PrimaryCaps(nn.Module):
     '''
     The `PrimaryCaps` layer consists of 32 capsule units. Each unit takes
     the output of the `Conv1` layer, which is a `[256, 20, 20]` feature
-    tensor (ignoring `batch_size`), and performs a 2D convolution with 8
+    tensor (omitting `batch_size`), and performs a 2D convolution with 8
     output channels, kernel size 9 and stride 2, thus outputing a [8, 6, 6]
     tensor. In other words, you can see these 32 capsules as 32 paralleled 2D
     convolutional layers. Then we concatenate these 32 capsules' outputs and
@@ -16,7 +16,7 @@ class PrimaryCaps(nn.Module):
 
     As indicated in Section 4, Page 4 in the paper, *One can see PrimaryCaps
     as a Convolution layer with Eq.1 as its block non-linearity.*, outputs of
-    the `PrimaryCaps` layer are squashed before passing to the next layer.
+    the `PrimaryCaps` layer are squashed before being passed to the next layer.
 
     Reference: Section 4, Fig. 1
     '''
diff --git a/README.md b/README.md
index 13226c7..831295b 100644
--- a/README.md
+++ b/README.md
@@ -13,13 +13,13 @@ As I am busy these days, I might not have time to checkout and fix every issue.
 
 ## Requirements
 
-- pytorch 0.2.0
+- pytorch 0.4.1
 - torchvision
 - pytorch-extras (For one-hot vector conversion)
 - tensorboard-pytorch
 - tqdm
 
-All codes are tested under Python 3.6.3.
+All codes are tested under Python 3.6.
 
 ## Get Started
 
diff --git a/main.py b/main.py
index e8cf27c..aaa5fd7 100644
--- a/main.py
+++ b/main.py
@@ -17,7 +17,7 @@
 8. `train()` and `test()` in `main.py`
 
 You might find helpful with the paper *Dynamic Routing Between Capsules*
-at your hand for referencing.
+at your hand for referencing when reading these codes.
 """
 
 import os

From 45f60844c3efb60ec0315506c5ddd9b97c91895f Mon Sep 17 00:00:00 2001
From: laubonghaudoi <liubanghoudai24@gmail.com>
Date: Mon, 3 Dec 2018 08:04:37 -0800
Subject: [PATCH 2/4] FIx Decoder

---
 Decoder.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Decoder.py b/Decoder.py
index 974ec47..7ecda61 100644
--- a/Decoder.py
+++ b/Decoder.py
@@ -53,8 +53,8 @@ def forward(self, v, target):
         assert v_masked.size() == torch.Size([batch_size, 16])
 
         # Forward
-        v = self.fc1(v_masked)
-        v = self.fc2(v)
+        v = F.relu(self.fc1(v_masked))
+        v = F.relu(self.fc2(v))
         reconstruction = torch.sigmoid(self.fc3(v))
 
         assert reconstruction.size() == torch.Size([batch_size, 784])

From 21614ec084292de38f682e1ab9ddbef970684340 Mon Sep 17 00:00:00 2001
From: Alexbanana19 <alexlimh23@gmail.com>
Date: Tue, 4 Dec 2018 04:59:56 +0000
Subject: [PATCH 3/4] fix cuda memory error

---
 .gitignore |   1 +
 "\\"       | 192 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 main.py    |  24 ++++---
 3 files changed, 206 insertions(+), 11 deletions(-)
 create mode 100644 "\\"

diff --git a/.gitignore b/.gitignore
index 6a564d6..f0d97cf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,3 +8,4 @@ __pycache__/
 # data directory
 data/
 ckpt/
+runs/
diff --git "a/\\" "b/\\"
new file mode 100644
index 0000000..5da0456
--- /dev/null
+++ "b/\\"
@@ -0,0 +1,192 @@
+"""
+A tutorial-style implementation of CapsNet in PyTorch.
+
+Paper link: https://arxiv.org/abs/1710.09829v2
+
+@author laubonghaudoi
+
+For better understanding, read the codes and comments in the following order:
+
+1. `__main__` in `main.py`
+2. `utils.py`
+3. `CapsNet.__init__()` and `CapsNet.forward()` in `CapsNet.py`
+4. `PrimaryCaps.py`
+5. `DigitCaps.py`
+6. `Decoder.py`
+7. `CapsNet.marginal_loss()`, `CapsNet.reconstruction_loss()` and` CapsNet.loss()` in `CapsNet.py`
+8. `train()` and `test()` in `main.py`
+
+You might find helpful with the paper *Dynamic Routing Between Capsules*
+at your hand for referencing when reading these codes.
+"""
+
+import os
+import time
+from tqdm import *
+
+import torch
+import torch_extras
+import torchvision.utils as vutils
+from tensorboardX import SummaryWriter
+from torch.autograd import Variable
+
+from CapsNet import CapsNet
+from utils import get_opts, get_dataloader
+
+# PyTorch does not provide one-hot vector conversion, we achieve this
+# by pytorch-extras
+setattr(torch, 'one_hot', torch_extras.one_hot)
+
+
+def train(opt, train_loader, test_loader, model, writer):
+    num_data = len(train_loader.dataset)
+    num_batches = len(train_loader)
+
+    optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr)
+    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, 0.5)
+
+    model.train()
+    for epoch in range(opt.epochs):
+        # Update learning rate
+        scheduler.step()
+        print('Learning rate: {}'.format(scheduler.get_lr()[0]))
+
+        start_time = time.time()
+        for batch_idx, (data, target) in enumerate(tqdm(train_loader)):
+            batch_size = data.size(0)
+            global_step = batch_idx + epoch * num_batches
+
+            # Transform to one-hot indices: [batch_size, 10]
+            target = torch.one_hot((batch_size, 10), target.view(-1, 1))
+            assert target.size() == torch.Size([batch_size, 10])
+
+            # Use GPU if available
+            data, target = Variable(data), Variable(target)
+            if opt.use_cuda & torch.cuda.is_available():
+                data, target = data.cuda(), target.cuda()
+
+            # Train step
+            optimizer.zero_grad()
+            output = model(data)
+
+            L, m_loss, r_loss = model.loss(output, target, data)
+            L.backward()
+
+            optimizer.step()
+
+            # Log losses
+            writer.add_scalar('train/loss', L.item(), global_step)
+            writer.add_scalar('train/marginal_loss', m_loss.item(), global_step)
+            writer.add_scalar('train/reconstruction_loss', r_loss.item(), global_step)
+
+            # Print losses
+            if batch_idx % opt.print_every == 0:
+                tqdm.write('Epoch: {}    Loss: {:.6f}   Marginal loss: {:.6f}   Recons. loss: {:.6f}'.format(
+                    epoch, L.item(), m_loss.item(), r_loss.item()))
+
+        # Print time elapsed for every epoch
+        end_time = time.time()
+        print('Epoch {} takes {:.0f} seconds.'.format(
+            epoch, end_time - start_time))
+
+        # Test model
+        test(opt, test_loader, model, writer, epoch, num_batches)
+
+
+
+def test(opt, test_loader, model, writer, epoch, num_batches):
+    loss = 0
+    margin_loss = 0
+    recons_loss = 0
+
+    correct = 0
+
+    step = epoch * num_batches + num_batches
+    model.eval()
+    for data, target in test_loader:
+        # Store the indices for calculating accuracy
+        label = target.unsqueeze(0).type(torch.LongTensor)
+
+        batch_size = data.size(0)
+        # Transform to one-hot indices: [batch_size, 10]
+        target = torch.one_hot((batch_size, 10), target.view(-1, 1))
+        assert target.size() == torch.Size([batch_size, 10])
+
+        # Use GPU if available
+        data, target = Variable(data, volatile=True), Variable(target)
+        if opt.use_cuda & torch.cuda.is_available():
+            data, target = data.cuda(), target.cuda()
+
+        # Output predictions
+        output = model(data)
+        L, m_loss, r_loss = model.loss(output, target, data)
+        loss += L.item()
+        margin_loss += m_loss.item()
+        recons_loss += r_loss.item()
+
+        # Count correct numbers
+        # norms: [batch_size, 10, 16]
+        norms = torch.sqrt(torch.sum(output**2, dim=2))
+        # pred: [batch_size,]
+        pred = norms.data.max(1, keepdim=True)[1].type(torch.LongTensor)
+        correct += pred.eq(label.view_as(pred)).cpu().sum()
+
+    # Visualize reconstructed images of the last batch
+    recons = model.Decoder(output, target)
+    recons = recons.view(batch_size, 1, 28, 28)
+    recons = vutils.make_grid(recons.data, normalize=True, scale_each=True)
+    writer.add_image('Image-{}'.format(step), recons, step)
+
+    # Log test losses
+    loss /= len(test_loader)
+    margin_loss /= len(test_loader)
+    recons_loss /= len(test_loader)
+    acc = correct / len(test_loader.dataset)
+    writer.add_scalar('test/loss', loss.item(), step)
+    writer.add_scalar('test/marginal_loss', margin_loss.item(), step)
+    writer.add_scalar('test/reconstruction_loss', recons_loss.item(), step)
+    writer.add_scalar('test/accuracy', acc, step)
+
+    # Print test losses
+    print('\nTest loss: {:.4f}   Marginal loss: {:.4f}   Recons loss: {:.4f}'.format(
+        loss.item(), margin_loss.item(), recons_loss.item()))
+    print('Accuracy: {}/{} ({:.0f}%)\n'.format(correct, len(test_loader.dataset),
+        100. * correct / len(test_loader.dataset)))
+
+    # Checkpoint model
+    torch.save(model, './ckpt/epoch_{}-loss_{:.6f}-acc_{:.6f}.pt'.format(
+        epoch, loss.item(), acc))
+
+
+if __name__ == "__main__":
+    # Default configurations
+    opt = get_opts()
+    train_loader, test_loader = get_dataloader(opt)
+
+    # Initialize CapsNet
+    model = CapsNet(opt)
+
+    # Enable GPU usage
+    if opt.use_cuda & torch.cuda.is_available():
+        model.cuda()
+
+    # Print the model architecture and parameters
+    print("Model architectures: ")
+    print(model)
+
+    print("\nSizes of parameters: ")
+    for name, param in model.named_parameters():
+        print("{}: {}".format(name, list(param.size())))
+    n_params = sum([p.nelement() for p in model.parameters()])
+    # The coupling coefficients b_ij are not included in the parameter list,
+    # we need to add them mannually, which is 1152 * 10 = 11520.
+    print('\nTotal number of parameters: %d \n' % (n_params+11520))
+
+    # Make model checkpoint directory
+    if not os.path.exists('ckpt'):
+        os.makedirs('ckpt')
+
+    # Start training
+    writer = SummaryWriter()
+    train(opt, train_loader, test_loader, model, writer)
+    writer.close()
diff --git a/main.py b/main.py
index aaa5fd7..90755c7 100644
--- a/main.py
+++ b/main.py
@@ -61,7 +61,8 @@ def train(opt, train_loader, test_loader, model, writer):
             assert target.size() == torch.Size([batch_size, 10])
 
             # Use GPU if available
-            data, target = Variable(data), Variable(target)
+            with torch.no_grad():
+            	data, target = Variable(data), Variable(target)
             if opt.use_cuda & torch.cuda.is_available():
                 data, target = data.cuda(), target.cuda()
 
@@ -113,23 +114,24 @@ def test(opt, test_loader, model, writer, epoch, num_batches):
         assert target.size() == torch.Size([batch_size, 10])
 
         # Use GPU if available
-        data, target = Variable(data, volatile=True), Variable(target)
+        with torch.no_grad():
+            data, target = Variable(data), Variable(target)
         if opt.use_cuda & torch.cuda.is_available():
             data, target = data.cuda(), target.cuda()
 
         # Output predictions
         output = model(data)
         L, m_loss, r_loss = model.loss(output, target, data)
-        loss += L
-        margin_loss += m_loss
-        recons_loss += r_loss
+        loss += L.item()
+        margin_loss += m_loss.item()
+        recons_loss += r_loss.item()
 
         # Count correct numbers
         # norms: [batch_size, 10, 16]
         norms = torch.sqrt(torch.sum(output**2, dim=2))
         # pred: [batch_size,]
         pred = norms.data.max(1, keepdim=True)[1].type(torch.LongTensor)
-        correct += pred.eq(label.view_as(pred)).cpu().sum()
+        correct += pred.eq(label.view_as(pred)).cpu().sum().item()
 
     # Visualize reconstructed images of the last batch
     recons = model.Decoder(output, target)
@@ -142,20 +144,20 @@ def test(opt, test_loader, model, writer, epoch, num_batches):
     margin_loss /= len(test_loader)
     recons_loss /= len(test_loader)
     acc = correct / len(test_loader.dataset)
-    writer.add_scalar('test/loss', loss.item(), step)
-    writer.add_scalar('test/marginal_loss', margin_loss.item(), step)
-    writer.add_scalar('test/reconstruction_loss', recons_loss.item(), step)
+    writer.add_scalar('test/loss', loss, step)
+    writer.add_scalar('test/marginal_loss', margin_loss, step)
+    writer.add_scalar('test/reconstruction_loss', recons_loss, step)
     writer.add_scalar('test/accuracy', acc, step)
 
     # Print test losses
     print('\nTest loss: {:.4f}   Marginal loss: {:.4f}   Recons loss: {:.4f}'.format(
-        loss.item(), margin_loss.item(), recons_loss.item()))
+        loss, margin_loss, recons_loss))
     print('Accuracy: {}/{} ({:.0f}%)\n'.format(correct, len(test_loader.dataset),
         100. * correct / len(test_loader.dataset)))
 
     # Checkpoint model
     torch.save(model, './ckpt/epoch_{}-loss_{:.6f}-acc_{:.6f}.pt'.format(
-        epoch, loss.item(), acc))
+        epoch, loss, acc))
 
 
 if __name__ == "__main__":

From ea9d7527e288cc09f345eca60fc50c448f97e5a8 Mon Sep 17 00:00:00 2001
From: laubonghaudoi <liubanghoudai24@gmail.com>
Date: Tue, 4 Dec 2018 00:02:56 -0800
Subject: [PATCH 4/4] Remove trash

---
 "\\" | 192 -----------------------------------------------------------
 1 file changed, 192 deletions(-)
 delete mode 100644 "\\"

diff --git "a/\\" "b/\\"
deleted file mode 100644
index 5da0456..0000000
--- "a/\\"
+++ /dev/null
@@ -1,192 +0,0 @@
-"""
-A tutorial-style implementation of CapsNet in PyTorch.
-
-Paper link: https://arxiv.org/abs/1710.09829v2
-
-@author laubonghaudoi
-
-For better understanding, read the codes and comments in the following order:
-
-1. `__main__` in `main.py`
-2. `utils.py`
-3. `CapsNet.__init__()` and `CapsNet.forward()` in `CapsNet.py`
-4. `PrimaryCaps.py`
-5. `DigitCaps.py`
-6. `Decoder.py`
-7. `CapsNet.marginal_loss()`, `CapsNet.reconstruction_loss()` and` CapsNet.loss()` in `CapsNet.py`
-8. `train()` and `test()` in `main.py`
-
-You might find helpful with the paper *Dynamic Routing Between Capsules*
-at your hand for referencing when reading these codes.
-"""
-
-import os
-import time
-from tqdm import *
-
-import torch
-import torch_extras
-import torchvision.utils as vutils
-from tensorboardX import SummaryWriter
-from torch.autograd import Variable
-
-from CapsNet import CapsNet
-from utils import get_opts, get_dataloader
-
-# PyTorch does not provide one-hot vector conversion, we achieve this
-# by pytorch-extras
-setattr(torch, 'one_hot', torch_extras.one_hot)
-
-
-def train(opt, train_loader, test_loader, model, writer):
-    num_data = len(train_loader.dataset)
-    num_batches = len(train_loader)
-
-    optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr)
-    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, 0.5)
-
-    model.train()
-    for epoch in range(opt.epochs):
-        # Update learning rate
-        scheduler.step()
-        print('Learning rate: {}'.format(scheduler.get_lr()[0]))
-
-        start_time = time.time()
-        for batch_idx, (data, target) in enumerate(tqdm(train_loader)):
-            batch_size = data.size(0)
-            global_step = batch_idx + epoch * num_batches
-
-            # Transform to one-hot indices: [batch_size, 10]
-            target = torch.one_hot((batch_size, 10), target.view(-1, 1))
-            assert target.size() == torch.Size([batch_size, 10])
-
-            # Use GPU if available
-            data, target = Variable(data), Variable(target)
-            if opt.use_cuda & torch.cuda.is_available():
-                data, target = data.cuda(), target.cuda()
-
-            # Train step
-            optimizer.zero_grad()
-            output = model(data)
-
-            L, m_loss, r_loss = model.loss(output, target, data)
-            L.backward()
-
-            optimizer.step()
-
-            # Log losses
-            writer.add_scalar('train/loss', L.item(), global_step)
-            writer.add_scalar('train/marginal_loss', m_loss.item(), global_step)
-            writer.add_scalar('train/reconstruction_loss', r_loss.item(), global_step)
-
-            # Print losses
-            if batch_idx % opt.print_every == 0:
-                tqdm.write('Epoch: {}    Loss: {:.6f}   Marginal loss: {:.6f}   Recons. loss: {:.6f}'.format(
-                    epoch, L.item(), m_loss.item(), r_loss.item()))
-
-        # Print time elapsed for every epoch
-        end_time = time.time()
-        print('Epoch {} takes {:.0f} seconds.'.format(
-            epoch, end_time - start_time))
-
-        # Test model
-        test(opt, test_loader, model, writer, epoch, num_batches)
-
-
-
-def test(opt, test_loader, model, writer, epoch, num_batches):
-    loss = 0
-    margin_loss = 0
-    recons_loss = 0
-
-    correct = 0
-
-    step = epoch * num_batches + num_batches
-    model.eval()
-    for data, target in test_loader:
-        # Store the indices for calculating accuracy
-        label = target.unsqueeze(0).type(torch.LongTensor)
-
-        batch_size = data.size(0)
-        # Transform to one-hot indices: [batch_size, 10]
-        target = torch.one_hot((batch_size, 10), target.view(-1, 1))
-        assert target.size() == torch.Size([batch_size, 10])
-
-        # Use GPU if available
-        data, target = Variable(data, volatile=True), Variable(target)
-        if opt.use_cuda & torch.cuda.is_available():
-            data, target = data.cuda(), target.cuda()
-
-        # Output predictions
-        output = model(data)
-        L, m_loss, r_loss = model.loss(output, target, data)
-        loss += L.item()
-        margin_loss += m_loss.item()
-        recons_loss += r_loss.item()
-
-        # Count correct numbers
-        # norms: [batch_size, 10, 16]
-        norms = torch.sqrt(torch.sum(output**2, dim=2))
-        # pred: [batch_size,]
-        pred = norms.data.max(1, keepdim=True)[1].type(torch.LongTensor)
-        correct += pred.eq(label.view_as(pred)).cpu().sum()
-
-    # Visualize reconstructed images of the last batch
-    recons = model.Decoder(output, target)
-    recons = recons.view(batch_size, 1, 28, 28)
-    recons = vutils.make_grid(recons.data, normalize=True, scale_each=True)
-    writer.add_image('Image-{}'.format(step), recons, step)
-
-    # Log test losses
-    loss /= len(test_loader)
-    margin_loss /= len(test_loader)
-    recons_loss /= len(test_loader)
-    acc = correct / len(test_loader.dataset)
-    writer.add_scalar('test/loss', loss.item(), step)
-    writer.add_scalar('test/marginal_loss', margin_loss.item(), step)
-    writer.add_scalar('test/reconstruction_loss', recons_loss.item(), step)
-    writer.add_scalar('test/accuracy', acc, step)
-
-    # Print test losses
-    print('\nTest loss: {:.4f}   Marginal loss: {:.4f}   Recons loss: {:.4f}'.format(
-        loss.item(), margin_loss.item(), recons_loss.item()))
-    print('Accuracy: {}/{} ({:.0f}%)\n'.format(correct, len(test_loader.dataset),
-        100. * correct / len(test_loader.dataset)))
-
-    # Checkpoint model
-    torch.save(model, './ckpt/epoch_{}-loss_{:.6f}-acc_{:.6f}.pt'.format(
-        epoch, loss.item(), acc))
-
-
-if __name__ == "__main__":
-    # Default configurations
-    opt = get_opts()
-    train_loader, test_loader = get_dataloader(opt)
-
-    # Initialize CapsNet
-    model = CapsNet(opt)
-
-    # Enable GPU usage
-    if opt.use_cuda & torch.cuda.is_available():
-        model.cuda()
-
-    # Print the model architecture and parameters
-    print("Model architectures: ")
-    print(model)
-
-    print("\nSizes of parameters: ")
-    for name, param in model.named_parameters():
-        print("{}: {}".format(name, list(param.size())))
-    n_params = sum([p.nelement() for p in model.parameters()])
-    # The coupling coefficients b_ij are not included in the parameter list,
-    # we need to add them mannually, which is 1152 * 10 = 11520.
-    print('\nTotal number of parameters: %d \n' % (n_params+11520))
-
-    # Make model checkpoint directory
-    if not os.path.exists('ckpt'):
-        os.makedirs('ckpt')
-
-    # Start training
-    writer = SummaryWriter()
-    train(opt, train_loader, test_loader, model, writer)
-    writer.close()