In [1]:
import numpy as np
import os
import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
from torch.utils.data import DataLoader
from torch.autograd import Variable
import torchvision.transforms as transforms
from custom_dataset import *
from mhbn import *
import time
import util

In [2]:
print('Loading data')
transform = transforms.Compose([
    transforms.CenterCrop(500),
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

Loading data


In [3]:
#cpu or gpu
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [4]:
#root path of data
root = "/home/alan/Desktop/view/classes/"

In [5]:
#the batch size of train and validation
batch_size = 4

In [6]:
# Load dataset
dset_train = MultiViewDataSet(root, 'train', transform=transform)
train_loader = DataLoader(dset_train, batch_size=batch_size, shuffle=True, num_workers=2)

dset_val = MultiViewDataSet(root, 'test', transform=transform)
val_loader = DataLoader(dset_val, batch_size=batch_size, shuffle=True, num_workers=2)

In [7]:
classes, class_to_idx = dset_train.classes, dset_train.class_to_idx

In [8]:
#number of local features
num_local_features = 10

In [9]:
model = mhbnn(True, len(classes), num_local_features)
model.to(device)
cudnn.benchmark = True
print('Running on ' + str(device))

Running on cuda:0


In [10]:
lr = 0.0001
lr_decay = 0.1
lr_decay_freq = 30
n_epochs = 100
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

best_acc = 0.0
best_loss = 0.0
start_epoch = 0
print_freq = 10

In [11]:
resume_path = './checkpoint'
resume_ckp_path = ''

In [12]:
def load_checkpoint():
    global best_acc, start_epoch
    # Load checkpoint.
    print('\n==> Loading checkpoint..')
    assert os.path.isfile(resume_ckp_path), 'Error: no checkpoint file found!'

    checkpoint = torch.load(resume_ckp_path)
    best_acc = checkpoint['best_acc']
    start_epoch = checkpoint['epoch']
    model.load_state_dict(checkpoint['state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer'])

In [13]:
def train():
    train_size = len(train_loader)

    for i, (inputs, targets) in enumerate(train_loader):
        # Convert from list of 3D to 4D
        #convert to shape [batch_size, views, channels, width, height]
        inputs = np.stack(inputs, axis=1)

        inputs = torch.from_numpy(inputs)

        inputs, targets = inputs.cuda(device), targets.cuda(device)
        inputs, targets = Variable(inputs), Variable(targets)

        # compute output
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i + 1) %  print_freq == 0:
            print("\tIter [%d/%d] Loss: %.4f" % (i + 1, train_size, loss.item()))

In [14]:
def eval(data_loader, is_test=False):
    if is_test:
       load_checkpoint()

    # Eval
    total = 0.0
    correct = 0.0

    total_loss = 0.0
    n = 0

    for i, (inputs, targets) in enumerate(data_loader):
        with torch.no_grad():
            # Convert from list of 3D to 4D
            inputs = np.stack(inputs, axis=1)

            inputs = torch.from_numpy(inputs)

            inputs, targets = inputs.cuda(device), targets.cuda(device)
            inputs, targets = Variable(inputs), Variable(targets)

            # compute output
            outputs = model(inputs)
            loss = criterion(outputs, targets)

            total_loss += loss
            n += 1

            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += (predicted.cpu() == targets.cpu()).sum()
    avg_test_acc = 100 * correct / total
    avg_loss = total_loss / n

    return avg_test_acc, avg_loss

In [15]:
if resume_ckp_path:
    load_checkpoint()

In [16]:
for epoch in range(start_epoch, n_epochs):
    print('\n-----------------------------------')
    print('Epoch: [%d/%d]' % (epoch+1, n_epochs))
    start = time.time()

    model.train()
    train()
    print('Time taken: %.2f sec.' % (time.time() - start))

    model.eval()
    avg_test_acc, avg_loss = eval(val_loader)

    print('\nEvaluation:')
    print('\tVal Acc: %.2f - Loss: %.4f' % (avg_test_acc.item(), avg_loss.item()))
    print('\tCurrent best  val acc: %.2f' % best_acc)
    

    # Log epoch to tensorboard
    # See log using: tensorboard --logdir='logs' --port=6006
#     util.logEpoch(logger, model, epoch + 1, avg_loss, avg_test_acc)

    # Save model
    if avg_test_acc > best_acc:
        print('\tSaving checkpoint - Acc: %.2f' % avg_test_acc)
        best_acc = avg_test_acc
        best_loss = avg_loss
        util.save_checkpoint({
            'epoch': epoch + 1,
            'state_dict': model.state_dict(),
            'acc': avg_test_acc,
            'best_acc': best_acc,
            'optimizer': optimizer.state_dict(),
        }, 'mhbnn_'+str(num_local_features))

    # Decaying Learning Rate
    if (epoch + 1) % lr_decay_freq == 0:
        lr *= lr_decay
        optimizer = torch.optim.Adam(model.parameters(), lr=lr)
        print('Learning rate:', lr)


-----------------------------------
Epoch: [1/100]
	Iter [10/2461] Loss: 3.6883
	Iter [20/2461] Loss: 3.6968
	Iter [30/2461] Loss: 3.6757
	Iter [40/2461] Loss: 3.6590
	Iter [50/2461] Loss: 3.5795
	Iter [60/2461] Loss: 3.6115
	Iter [70/2461] Loss: 3.4769
	Iter [80/2461] Loss: 3.5802
	Iter [90/2461] Loss: 3.6035
	Iter [100/2461] Loss: 3.5225
	Iter [110/2461] Loss: 3.0842
	Iter [120/2461] Loss: 3.2837
	Iter [130/2461] Loss: 3.5027
	Iter [140/2461] Loss: 3.4837
	Iter [150/2461] Loss: 3.4527
	Iter [160/2461] Loss: 3.3980
	Iter [170/2461] Loss: 3.3681
	Iter [180/2461] Loss: 3.8943
	Iter [190/2461] Loss: 3.4648
	Iter [200/2461] Loss: 3.3594
	Iter [210/2461] Loss: 3.6478
	Iter [220/2461] Loss: 3.1512
	Iter [230/2461] Loss: 3.6209
	Iter [240/2461] Loss: 3.6048
	Iter [250/2461] Loss: 2.7274
	Iter [260/2461] Loss: 3.3344
	Iter [270/2461] Loss: 3.4610
	Iter [280/2461] Loss: 2.8464
	Iter [290/2461] Loss: 2.9396
	Iter [300/2461] Loss: 2.1151
	Iter [310/2461] Loss: 3.2575
	Iter [320/2461] Loss: 3.42

	Iter [170/2461] Loss: 0.4038
	Iter [180/2461] Loss: 1.5911
	Iter [190/2461] Loss: 1.8787
	Iter [200/2461] Loss: 0.9013


Process Process-6:
Traceback (most recent call last):
Process Process-5:
  File "/home/alan/anaconda3/envs/ml/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/alan/anaconda3/envs/ml/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/alan/anaconda3/envs/ml/lib/python3.6/site-packages/torchvision-0.2.1-py3.6.egg/torchvision/transforms/transforms.py", line 49, in __call__
    img = t(img)
  File "/home/alan/anaconda3/envs/ml/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 106, in _worker_loop
    samples = collate_fn([dataset[i] for i in batch_indices])
  File "/home/alan/anaconda3/envs/ml/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 106, in <listcomp>
    samples = collate_fn([dataset[i] for i in batch_indices])
Traceback (most recent call last):
  File "/home/alan/Desktop/MHBN/custom_dataset.py", line 43, in __getitem__
    im = self.t

KeyboardInterrupt: 