In [1]:
import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
from torch.utils.data import DataLoader
from torch.autograd import Variable
import tensorflow as tf

import torchvision.transforms as transforms
from PIL.Image import BILINEAR
from multiprocessing import Process, freeze_support, set_start_method

import argparse
import numpy as np
import time
import uuid
import os
import sys


from models.resnet import *
#import augment
from augment import augment_on_GPU

import util
from logger import Logger
from custom_dataset import MultiViewDataSet

Instructions for updating:
Use the retry module or similar alternatives.


In [2]:
parser = argparse.ArgumentParser(description='MVCNN-PyTorch')
parser.add_argument('--data', metavar='DIR', default='/localscratch/Users/amotahari/MV_CNN_views', help='path to dataset')
parser.add_argument('--resnet', default=18, choices=[18, 34, 50, 101, 152], type=int, metavar='N', help='resnet depth (default: resnet18)')
parser.add_argument('--epochs', default=10000, type=int, metavar='N', help='number of total epochs to run (default: 100)')
parser.add_argument('-b', '--batch-size', default=22, type=int,
                    metavar='N', help='mini-batch size (default: 4)')
parser.add_argument('--lr', '--learning-rate', default=0.00001, type=float,
                    metavar='LR', help='initial learning rate (default: 0.01)')
parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
                    help='momentum (default: 0.9)')
parser.add_argument('--lr-decay-freq', default=200, type=float,
                    metavar='W', help='learning rate decay (default: 30)')
parser.add_argument('--lr-decay', default=0.5, type=float,
                    metavar='W', help='learning rate decay (default: 0.1)')
parser.add_argument('--print-freq', '-p', default=10, type=int,
                    metavar='N', help='print frequency (default: 10)')
parser.add_argument('-r', '--resume', default='', type=str, metavar='PATH',
                    help='path to latest checkpoint (default: none)')
parser.add_argument('-o', '--output', default='/Shared/CTmechanics_COPDGene/Amin/Airway_PyTorch', type=str, metavar='PATH',
                    help='path to Output folder for logs and checkpoints (default: none)')
parser.add_argument('-w', '--workers', default=1, type=int,
                    metavar='N', help='Number of workers in input pipe (default: 4)')
parser.add_argument('-wd', '--weight_decay', default=0.0, type=float,
                    metavar='W', help='Weight decay factor (default: 0.1)')

parser.add_argument('-f', '--fun', default='', type=str, metavar='PATH',
                    help='path to Output folder for logs and checkpoints (default: none)')
#parser.add_argument('--pretrained', dest='pretrained', action='store_true', help='use pre-trained model')

args = parser.parse_args()


In [3]:
transform = transforms.Compose([
    #transforms.CenterCrop(500),
    #transforms.RandomAffine(30, translate=(.2,.2), scale=None, shear=None, resample=BILINEAR, fillcolor=0), # Augmentation
    transforms.Resize(224),
    transforms.ToTensor(),
])

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#resnet = nn.DataParallel(resnet18(num_classes=2), device_ids=[0, 1, 2, 3])

# Load dataset
#try:
#    fileObject = open(os.path.join(args.data,'trainingDataset.HD5'),'r')
#    dset_train = pickle.load(fileObject)
#except: 

print('Loading data')
dset_train = MultiViewDataSet(args.data, 'train', transform=transform)
#fileObject = open(os.path.join(args.data,'trainingDataset.HD5'),'wb')
#pickle.dump(dset_train,fileObject)
print("\nTraining Data Loaded!")

#try:
#    fileObject = open(os.path.join(args.data,'validationDataset.HD5'),'r')
#    dset_val = pickle.load(fileObject)
#except: 
dset_val = MultiViewDataSet(args.data, 'validation', transform=transform)
#    fileObject = open(os.path.join(args.data,'validationDataset.HD5'),'wb')
#    pickle.dump(dset_train,fileObject)
print("\nValidation Data Loaded!")

Loading data
/localscratch/Users/amotahari/MV_CNN_views/sets/train.txt
Loading train data: 100% 
Training Data Loaded!
/localscratch/Users/amotahari/MV_CNN_views/sets/validation.txt
Loading validation data: 100% 
Validation Data Loaded!


In [4]:
val_loader = DataLoader(dset_val, batch_size=args.batch_size, shuffle=True, num_workers=args.workers)
train_loader = DataLoader(dset_train, batch_size=args.batch_size, shuffle=True, num_workers=args.workers)

In [5]:
#torch.cuda.empty_cache()

args.output = os.path.join(args.output, str(uuid.uuid4().hex))
if not os.path.exists(args.output):
    os.makedirs(args.output)

with open(os.path.join(args.output,'Arguments.txt'), "w") as text_file:
    print(args, file = text_file) #text_file.write(args)
    
classes = dset_train.classes
print(len(classes), classes)

if args.resnet == 18:
    resnet = resnet18(num_classes=len(classes))
elif args.resnet == 34:
    resnet = resnet34(num_classes=len(classes))
elif args.resnet == 50:
    resnet = resnet50(num_classes=len(classes))
elif args.resnet == 101:
    resnet = resnet101(num_classes=len(classes))
elif args.resnet == 152:
    resnet = resnet152(num_classes=len(classes))

print('Using resnet' + str(args.resnet))
resnet.to(device)
device_ids = range(torch.cuda.device_count())
print("CUDA devices available: ",device_ids)
resnet = nn.DataParallel(resnet, device_ids=device_ids)
cudnn.benchmark = True

print('Running on ' + str(device))

logger = Logger(os.path.join(args.output, 'logs'))

# Loss and Optimizer
lr = args.lr
n_epochs = args.epochs
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(resnet.parameters(), lr=lr, weight_decay=args.weight_decay)

best_acc = 0.0
best_loss = 0.0
start_epoch = 0

# Helper functions
def load_checkpoint():
    global best_acc, start_epoch
    # Load checkpoint.
    print('\n==> Loading checkpoint..')
    assert os.path.isfile(args.resume), 'Error: no checkpoint file found!'

    checkpoint = torch.load(args.resume)
    best_acc = checkpoint['best_acc']
    start_epoch = checkpoint['epoch']
    resnet.load_state_dict(checkpoint['state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer'])


def train():
    train_size = len(train_loader)

    for i, (inputs, targets) in enumerate(train_loader):
        # Convert from list of 3D to 4D
        #inputs = np.stack(inputs, axis=1)
        #inputs = np.stack(inputs, axis=0)
        inputs = augment_on_GPU(inputs)

        #print("shape of Train input= ", inputs.shape)        
        #inputs = augment_on_GPU(inputs)
        inputs = torch.from_numpy(inputs)
        #print("shape of Train input from numpy= ", inputs.shape)  
        inputs, targets = inputs.cuda(), targets.cuda(0)
        inputs, targets = Variable(inputs), Variable(targets)
        
        # compute output
        outputs = resnet(inputs)
        #print(outputs.get_device(), targets.get_device())

        loss = criterion(outputs, targets)

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i + 1) % args.print_freq == 0:
            print("\tIter [%d/%d] Loss: %.4f" % (i + 1, train_size, loss.item()))


# Validation and Testing
def eval(data_loader, is_test=False):
    if is_test:
        load_checkpoint()

    # Eval
    total = 0.0
    correct = 0.0

    total_loss = 0.0
    n = 0

    for i, (inputs, targets) in enumerate(data_loader):
        with torch.no_grad():
            # Convert from list of 3D to 4D
            #inputs = np.stack(inputs, axis=0)
            inputs = augment_on_GPU(inputs)
            
            #print("shape of Val input= ", inputs.shape)
            #inputs = augment_on_GPU(inputs)
            inputs = torch.from_numpy(inputs)

            inputs, targets = inputs.cuda(), targets.cuda(0)
            inputs, targets = Variable(inputs), Variable(targets)

            # compute output
            outputs = resnet(inputs)
            loss = criterion(outputs, targets)

            total_loss += loss
            n += 1

            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += (predicted.cpu() == targets.cpu()).sum()

    avg_test_acc = 100 * correct / total
    avg_loss = total_loss / n

    return avg_test_acc, avg_loss

# Training / Eval loop
if args.resume:
    load_checkpoint()

for epoch in range(start_epoch, n_epochs):
    print('\n-----------------------------------')
    print('Epoch: [%d/%d]' % (epoch+1, n_epochs))
    start = time.time()

    resnet.train()
    train()
    print('Time taken: %.2f sec.' % (time.time() - start))
    if (epoch + 1) % 5 == 0: # Eval every 5 epoch
        resnet.eval()
        avg_test_acc, avg_loss = eval(val_loader)

        print('\nEvaluation:')
        print('\tVal Acc: %.2f - Loss: %.4f' % (avg_test_acc.item(), avg_loss.item()))
        print('\tCurrent best val acc: %.2f' % best_acc)

        # Log epoch to tensorboard
        # See log using: tensorboard --logdir='logs' --port=6006
        util.logEpoch(logger, resnet, epoch + 1, avg_loss, avg_test_acc)

        # Save model
        if avg_test_acc > best_acc:
            print('\tSaving checkpoint - Acc: %.2f' % avg_test_acc)
            best_acc = avg_test_acc
            best_loss = avg_loss
            util.save_checkpoint({
                'epoch': epoch + 1,
                'state_dict': resnet.state_dict(),
                'acc': avg_test_acc,
                'best_acc': best_acc,
                'optimizer': optimizer.state_dict(),
                },
                checkpoint = args.output
            )

    # Decaying Learning Rate
    if (epoch + 1) % args.lr_decay_freq == 0:
        lr *= args.lr_decay
        optimizer = torch.optim.Adam(resnet.parameters(), lr=lr, weight_decay=args.weight_decay)
        print('Learning rate:', lr)

2 ['Standard', 'Abnormal']
Using resnet18
CUDA devices available:  range(0, 4)
Running on cuda:0

-----------------------------------
Epoch: [1/10000]
	Iter [10/66] Loss: 0.6123
	Iter [20/66] Loss: 0.6407
	Iter [30/66] Loss: 0.6109
	Iter [40/66] Loss: 0.6551
	Iter [50/66] Loss: 0.6270
	Iter [60/66] Loss: 0.6740
Time taken: 99.86 sec.

-----------------------------------
Epoch: [2/10000]
	Iter [10/66] Loss: 0.7055
	Iter [20/66] Loss: 0.5993
	Iter [30/66] Loss: 0.5816
	Iter [40/66] Loss: 0.6489
	Iter [50/66] Loss: 0.7227
	Iter [60/66] Loss: 0.6690
Time taken: 82.85 sec.

-----------------------------------
Epoch: [3/10000]
	Iter [10/66] Loss: 0.7882
	Iter [20/66] Loss: 0.6532
	Iter [30/66] Loss: 0.7693
	Iter [40/66] Loss: 0.7006
	Iter [50/66] Loss: 0.5269
	Iter [60/66] Loss: 0.6738
Time taken: 83.43 sec.

-----------------------------------
Epoch: [4/10000]
	Iter [10/66] Loss: 0.6330
	Iter [20/66] Loss: 0.7583
	Iter [30/66] Loss: 0.6441
	Iter [40/66] Loss: 0.7043
	Iter [50/66] Loss: 0.59

	Iter [60/66] Loss: 0.6901
Time taken: 83.20 sec.

-----------------------------------
Epoch: [33/10000]
	Iter [10/66] Loss: 0.4737
	Iter [20/66] Loss: 0.6843
	Iter [30/66] Loss: 0.6378
	Iter [40/66] Loss: 0.6537
	Iter [50/66] Loss: 0.6488
	Iter [60/66] Loss: 0.7192
Time taken: 82.46 sec.

-----------------------------------
Epoch: [34/10000]
	Iter [10/66] Loss: 0.6810
	Iter [20/66] Loss: 0.6489
	Iter [30/66] Loss: 0.6292
	Iter [40/66] Loss: 0.6502
	Iter [50/66] Loss: 0.7176
	Iter [60/66] Loss: 0.6683
Time taken: 82.95 sec.

-----------------------------------
Epoch: [35/10000]
	Iter [10/66] Loss: 0.6634
	Iter [20/66] Loss: 0.7238
	Iter [30/66] Loss: 0.5541
	Iter [40/66] Loss: 0.6828
	Iter [50/66] Loss: 0.6897
	Iter [60/66] Loss: 0.6137
Time taken: 82.17 sec.

Evaluation:
	Val Acc: 63.00 - Loss: 0.6547
	Current best val acc: 63.00

-----------------------------------
Epoch: [36/10000]
	Iter [10/66] Loss: 0.6875
	Iter [20/66] Loss: 0.7628
	Iter [30/66] Loss: 0.6971
	Iter [40/66] Loss: 0

	Iter [10/66] Loss: 0.6326
	Iter [20/66] Loss: 0.4624
	Iter [30/66] Loss: 0.6613
	Iter [40/66] Loss: 0.7320
	Iter [50/66] Loss: 0.7152
	Iter [60/66] Loss: 0.6734
Time taken: 79.90 sec.

Evaluation:
	Val Acc: 63.00 - Loss: 1.2412
	Current best val acc: 63.00

-----------------------------------
Epoch: [66/10000]
	Iter [10/66] Loss: 0.7707
	Iter [20/66] Loss: 0.6435
	Iter [30/66] Loss: 0.6305
	Iter [40/66] Loss: 0.7598
	Iter [50/66] Loss: 0.6468
	Iter [60/66] Loss: 0.5874
Time taken: 82.25 sec.

-----------------------------------
Epoch: [67/10000]
	Iter [10/66] Loss: 0.6864
	Iter [20/66] Loss: 0.7226
	Iter [30/66] Loss: 0.6812
	Iter [40/66] Loss: 0.7159
	Iter [50/66] Loss: 0.7291
	Iter [60/66] Loss: 0.6577
Time taken: 83.86 sec.

-----------------------------------
Epoch: [68/10000]
	Iter [10/66] Loss: 0.6910
	Iter [20/66] Loss: 0.6474
	Iter [30/66] Loss: 0.6273
	Iter [40/66] Loss: 0.7013
	Iter [50/66] Loss: 0.5740
	Iter [60/66] Loss: 0.6807
Time taken: 83.71 sec.

---------------------

	Iter [20/66] Loss: 0.6437
	Iter [30/66] Loss: 0.6566
	Iter [40/66] Loss: 0.6577
	Iter [50/66] Loss: 0.6754
	Iter [60/66] Loss: 0.6808
Time taken: 81.33 sec.

-----------------------------------
Epoch: [98/10000]
	Iter [10/66] Loss: 0.6060
	Iter [20/66] Loss: 0.7141
	Iter [30/66] Loss: 0.6246
	Iter [40/66] Loss: 0.6619
	Iter [50/66] Loss: 0.6798
	Iter [60/66] Loss: 0.6016
Time taken: 83.28 sec.

-----------------------------------
Epoch: [99/10000]
	Iter [10/66] Loss: 0.6335
	Iter [20/66] Loss: 0.6456
	Iter [30/66] Loss: 0.6252
	Iter [40/66] Loss: 0.7370
	Iter [50/66] Loss: 0.6467
	Iter [60/66] Loss: 0.6850
Time taken: 83.92 sec.

-----------------------------------
Epoch: [100/10000]
	Iter [10/66] Loss: 0.5410
	Iter [20/66] Loss: 0.6574
	Iter [30/66] Loss: 0.7148
	Iter [40/66] Loss: 0.6044
	Iter [50/66] Loss: 0.5700
	Iter [60/66] Loss: 0.5944
Time taken: 84.06 sec.

Evaluation:
	Val Acc: 63.00 - Loss: 0.6602
	Current best val acc: 63.00

-----------------------------------
Epoch: [101

	Iter [40/66] Loss: 0.6841
	Iter [50/66] Loss: 0.6031
	Iter [60/66] Loss: 0.5245
Time taken: 83.18 sec.

-----------------------------------
Epoch: [130/10000]
	Iter [10/66] Loss: 0.6773
	Iter [20/66] Loss: 0.5907
	Iter [30/66] Loss: 0.6254
	Iter [40/66] Loss: 0.7440
	Iter [50/66] Loss: 0.6775
	Iter [60/66] Loss: 0.6572
Time taken: 83.67 sec.

Evaluation:
	Val Acc: 63.00 - Loss: 0.7090
	Current best val acc: 63.00

-----------------------------------
Epoch: [131/10000]
	Iter [10/66] Loss: 0.6268
	Iter [20/66] Loss: 0.5914
	Iter [30/66] Loss: 0.6432
	Iter [40/66] Loss: 0.6767
	Iter [50/66] Loss: 0.6846
	Iter [60/66] Loss: 0.6323
Time taken: 83.65 sec.

-----------------------------------
Epoch: [132/10000]
	Iter [10/66] Loss: 0.6564
	Iter [20/66] Loss: 0.6383
	Iter [30/66] Loss: 0.6155
	Iter [40/66] Loss: 0.5907
	Iter [50/66] Loss: 0.6511
	Iter [60/66] Loss: 0.6795
Time taken: 83.93 sec.

-----------------------------------
Epoch: [133/10000]
	Iter [10/66] Loss: 0.6939
	Iter [20/66] Los

	Iter [30/66] Loss: 0.5967
	Iter [40/66] Loss: 0.6281
	Iter [50/66] Loss: 0.6574
	Iter [60/66] Loss: 0.6218
Time taken: 80.55 sec.

-----------------------------------
Epoch: [162/10000]
	Iter [10/66] Loss: 0.6579
	Iter [20/66] Loss: 0.6953
	Iter [30/66] Loss: 0.7403
	Iter [40/66] Loss: 0.6597
	Iter [50/66] Loss: 0.7406
	Iter [60/66] Loss: 0.7339
Time taken: 84.07 sec.

-----------------------------------
Epoch: [163/10000]
	Iter [10/66] Loss: 0.7237
	Iter [20/66] Loss: 0.6616
	Iter [30/66] Loss: 0.6013
	Iter [40/66] Loss: 0.7012
	Iter [50/66] Loss: 0.6599
	Iter [60/66] Loss: 0.6941
Time taken: 82.70 sec.

-----------------------------------
Epoch: [164/10000]
	Iter [10/66] Loss: 0.6606
	Iter [20/66] Loss: 0.6785
	Iter [30/66] Loss: 0.7614
	Iter [40/66] Loss: 0.6020
	Iter [50/66] Loss: 0.5966
	Iter [60/66] Loss: 0.7007
Time taken: 83.18 sec.

-----------------------------------
Epoch: [165/10000]
	Iter [10/66] Loss: 0.8346
	Iter [20/66] Loss: 0.6739
	Iter [30/66] Loss: 0.6349
	Iter [40

	Iter [50/66] Loss: 0.6890
	Iter [60/66] Loss: 0.6890
Time taken: 82.11 sec.

-----------------------------------
Epoch: [194/10000]
	Iter [10/66] Loss: 0.6501
	Iter [20/66] Loss: 0.6576
	Iter [30/66] Loss: 0.6575
	Iter [40/66] Loss: 0.6495
	Iter [50/66] Loss: 0.6811
	Iter [60/66] Loss: 0.6730
Time taken: 83.47 sec.

-----------------------------------
Epoch: [195/10000]
	Iter [10/66] Loss: 0.6890
	Iter [20/66] Loss: 0.6649
	Iter [30/66] Loss: 0.6890
	Iter [40/66] Loss: 0.6728
	Iter [50/66] Loss: 0.6482
	Iter [60/66] Loss: 0.6726
Time taken: 80.62 sec.

Evaluation:
	Val Acc: 63.00 - Loss: 0.6723
	Current best val acc: 63.00

-----------------------------------
Epoch: [196/10000]
	Iter [10/66] Loss: 0.6643
	Iter [20/66] Loss: 0.6808
	Iter [30/66] Loss: 0.6393
	Iter [40/66] Loss: 0.6724
	Iter [50/66] Loss: 0.7391
	Iter [60/66] Loss: 0.6471
Time taken: 80.72 sec.

-----------------------------------
Epoch: [197/10000]
	Iter [10/66] Loss: 0.6890
	Iter [20/66] Loss: 0.6805
	Iter [30/66] Los

	Iter [60/66] Loss: 0.6787
Time taken: 83.24 sec.

Evaluation:
	Val Acc: 63.00 - Loss: 0.6674
	Current best val acc: 63.00

-----------------------------------
Epoch: [226/10000]
	Iter [10/66] Loss: 0.6787
	Iter [20/66] Loss: 0.6466
	Iter [30/66] Loss: 0.6787
	Iter [40/66] Loss: 0.7108
	Iter [50/66] Loss: 0.6144
	Iter [60/66] Loss: 0.6894
Time taken: 82.71 sec.

-----------------------------------
Epoch: [227/10000]
	Iter [10/66] Loss: 0.6572
	Iter [20/66] Loss: 0.6679
	Iter [30/66] Loss: 0.6465
	Iter [40/66] Loss: 0.6464
	Iter [50/66] Loss: 0.7001
	Iter [60/66] Loss: 0.7109
Time taken: 82.96 sec.

-----------------------------------
Epoch: [228/10000]
	Iter [10/66] Loss: 0.7001
	Iter [20/66] Loss: 0.6678
	Iter [30/66] Loss: 0.6571
	Iter [40/66] Loss: 0.6355
	Iter [50/66] Loss: 0.6462
	Iter [60/66] Loss: 0.6786
Time taken: 82.27 sec.

-----------------------------------
Epoch: [229/10000]
	Iter [10/66] Loss: 0.6678
	Iter [20/66] Loss: 0.6462
	Iter [30/66] Loss: 0.6570
	Iter [40/66] Los

	Iter [50/66] Loss: 0.6896
	Iter [60/66] Loss: 0.6550
Time taken: 81.67 sec.

-----------------------------------
Epoch: [258/10000]
	Iter [10/66] Loss: 0.6550
	Iter [20/66] Loss: 0.6435
	Iter [30/66] Loss: 0.6434
	Iter [40/66] Loss: 0.6549
	Iter [50/66] Loss: 0.6202
	Iter [60/66] Loss: 0.6665
Time taken: 79.83 sec.

-----------------------------------
Epoch: [259/10000]
	Iter [10/66] Loss: 0.6781
	Iter [20/66] Loss: 0.6550
	Iter [30/66] Loss: 0.6781
	Iter [40/66] Loss: 0.6666
	Iter [50/66] Loss: 0.6781
	Iter [60/66] Loss: 0.6319
Time taken: 83.07 sec.

-----------------------------------
Epoch: [260/10000]
	Iter [10/66] Loss: 0.6665
	Iter [20/66] Loss: 0.6434
	Iter [30/66] Loss: 0.6665
	Iter [40/66] Loss: 0.6781
	Iter [50/66] Loss: 0.6665
	Iter [60/66] Loss: 0.6665
Time taken: 82.09 sec.

Evaluation:
	Val Acc: 63.00 - Loss: 0.6658
	Current best val acc: 63.00

-----------------------------------
Epoch: [261/10000]
	Iter [10/66] Loss: 0.7012
	Iter [20/66] Loss: 0.6897
	Iter [30/66] Los

Time taken: 82.60 sec.

-----------------------------------
Epoch: [290/10000]
	Iter [10/66] Loss: 0.6542
	Iter [20/66] Loss: 0.6898
	Iter [30/66] Loss: 0.6423
	Iter [40/66] Loss: 0.6660
	Iter [50/66] Loss: 0.6542
	Iter [60/66] Loss: 0.6542
Time taken: 81.20 sec.

Evaluation:
	Val Acc: 63.00 - Loss: 0.6653
	Current best val acc: 63.00

-----------------------------------
Epoch: [291/10000]
	Iter [10/66] Loss: 0.6542
	Iter [20/66] Loss: 0.7016
	Iter [30/66] Loss: 0.6779
	Iter [40/66] Loss: 0.6779
	Iter [50/66] Loss: 0.6660
	Iter [60/66] Loss: 0.6779
Time taken: 82.45 sec.

-----------------------------------
Epoch: [292/10000]
	Iter [10/66] Loss: 0.6779
	Iter [20/66] Loss: 0.6898
	Iter [30/66] Loss: 0.6779
	Iter [40/66] Loss: 0.6779
	Iter [50/66] Loss: 0.6542
	Iter [60/66] Loss: 0.6898
Time taken: 83.14 sec.

-----------------------------------
Epoch: [293/10000]
	Iter [10/66] Loss: 0.6779
	Iter [20/66] Loss: 0.6423
	Iter [30/66] Loss: 0.6660
	Iter [40/66] Loss: 0.6779
	Iter [50/66] Los

	Iter [60/66] Loss: 0.6779
Time taken: 82.47 sec.

-----------------------------------
Epoch: [322/10000]
	Iter [10/66] Loss: 0.6659
	Iter [20/66] Loss: 0.6779
	Iter [30/66] Loss: 0.6779
	Iter [40/66] Loss: 0.6300
	Iter [50/66] Loss: 0.6659
	Iter [60/66] Loss: 0.6898
Time taken: 80.18 sec.

-----------------------------------
Epoch: [323/10000]
	Iter [10/66] Loss: 0.6898
	Iter [20/66] Loss: 0.7137
	Iter [30/66] Loss: 0.6420
	Iter [40/66] Loss: 0.6779
	Iter [50/66] Loss: 0.7137
	Iter [60/66] Loss: 0.6659
Time taken: 82.91 sec.

-----------------------------------
Epoch: [324/10000]
	Iter [10/66] Loss: 0.7137
	Iter [20/66] Loss: 0.6421
	Iter [30/66] Loss: 0.6421
	Iter [40/66] Loss: 0.6540
	Iter [50/66] Loss: 0.6898
	Iter [60/66] Loss: 0.7137
Time taken: 81.27 sec.

-----------------------------------
Epoch: [325/10000]
	Iter [10/66] Loss: 0.6659
	Iter [20/66] Loss: 0.6659
	Iter [30/66] Loss: 0.6659
	Iter [40/66] Loss: 0.6659
	Iter [50/66] Loss: 0.6659
	Iter [60/66] Loss: 0.6539
Time take

	Iter [10/66] Loss: 0.6538
	Iter [20/66] Loss: 0.6658
	Iter [30/66] Loss: 0.6658
	Iter [40/66] Loss: 0.6178
	Iter [50/66] Loss: 0.6658
	Iter [60/66] Loss: 0.6298
Time taken: 82.87 sec.

-----------------------------------
Epoch: [355/10000]
	Iter [10/66] Loss: 0.7018
	Iter [20/66] Loss: 0.6778
	Iter [30/66] Loss: 0.6778
	Iter [40/66] Loss: 0.6658
	Iter [50/66] Loss: 0.7018
	Iter [60/66] Loss: 0.6538
Time taken: 83.04 sec.

Evaluation:
	Val Acc: 63.00 - Loss: 0.6651
	Current best val acc: 63.00

-----------------------------------
Epoch: [356/10000]
	Iter [10/66] Loss: 0.6778
	Iter [20/66] Loss: 0.6299
	Iter [30/66] Loss: 0.7258
	Iter [40/66] Loss: 0.6419
	Iter [50/66] Loss: 0.6299
	Iter [60/66] Loss: 0.6778
Time taken: 82.72 sec.

-----------------------------------
Epoch: [357/10000]
	Iter [10/66] Loss: 0.7258
	Iter [20/66] Loss: 0.6658
	Iter [30/66] Loss: 0.6538
	Iter [40/66] Loss: 0.6538
	Iter [50/66] Loss: 0.6658
	Iter [60/66] Loss: 0.6658
Time taken: 80.68 sec.

------------------

	Iter [10/66] Loss: 0.6538
	Iter [20/66] Loss: 0.6658
	Iter [30/66] Loss: 0.7258
	Iter [40/66] Loss: 0.6299
	Iter [50/66] Loss: 0.6778
	Iter [60/66] Loss: 0.6538
Time taken: 82.40 sec.

-----------------------------------
Epoch: [387/10000]
	Iter [10/66] Loss: 0.7018
	Iter [20/66] Loss: 0.6418
	Iter [30/66] Loss: 0.6419
	Iter [40/66] Loss: 0.6898
	Iter [50/66] Loss: 0.6778
	Iter [60/66] Loss: 0.6418
Time taken: 80.74 sec.

-----------------------------------
Epoch: [388/10000]
	Iter [10/66] Loss: 0.6538
	Iter [20/66] Loss: 0.7018
	Iter [30/66] Loss: 0.6538
	Iter [40/66] Loss: 0.6778
	Iter [50/66] Loss: 0.6658
	Iter [60/66] Loss: 0.6778
Time taken: 82.49 sec.

-----------------------------------
Epoch: [389/10000]
	Iter [10/66] Loss: 0.7019
	Iter [20/66] Loss: 0.6298
	Iter [30/66] Loss: 0.7139
	Iter [40/66] Loss: 0.6538
	Iter [50/66] Loss: 0.7019
	Iter [60/66] Loss: 0.6658
Time taken: 83.22 sec.

-----------------------------------
Epoch: [390/10000]
	Iter [10/66] Loss: 0.6898
	Iter [20

	Iter [20/66] Loss: 0.6658
	Iter [30/66] Loss: 0.6658
	Iter [40/66] Loss: 0.6297
	Iter [50/66] Loss: 0.6418
	Iter [60/66] Loss: 0.6297
Time taken: 83.06 sec.

-----------------------------------
Epoch: [419/10000]
	Iter [10/66] Loss: 0.6538
	Iter [20/66] Loss: 0.6417
	Iter [30/66] Loss: 0.6658
	Iter [40/66] Loss: 0.6658
	Iter [50/66] Loss: 0.6658
	Iter [60/66] Loss: 0.6537
Time taken: 82.80 sec.

-----------------------------------
Epoch: [420/10000]
	Iter [10/66] Loss: 0.6658
	Iter [20/66] Loss: 0.6177
	Iter [30/66] Loss: 0.7379
	Iter [40/66] Loss: 0.7139
	Iter [50/66] Loss: 0.7019
	Iter [60/66] Loss: 0.6057
Time taken: 82.34 sec.

Evaluation:
	Val Acc: 63.00 - Loss: 0.6651
	Current best val acc: 63.00

-----------------------------------
Epoch: [421/10000]
	Iter [10/66] Loss: 0.6538
	Iter [20/66] Loss: 0.6538
	Iter [30/66] Loss: 0.6658
	Iter [40/66] Loss: 0.6778
	Iter [50/66] Loss: 0.6418
	Iter [60/66] Loss: 0.6418
Time taken: 81.50 sec.

-----------------------------------
Epoch: [4

Process Process-514:
Traceback (most recent call last):
  File "/opt/apps/python/3.6.4/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/opt/apps/python/3.6.4/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/opt/apps/pytorch/0.4.0_cuda-9.0.176.1_python-3.6.4/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 52, in _worker_loop
    r = index_queue.get()
  File "/opt/apps/python/3.6.4/lib/python3.6/multiprocessing/queues.py", line 335, in get
    res = self._reader.recv_bytes()
  File "/opt/apps/python/3.6.4/lib/python3.6/multiprocessing/connection.py", line 216, in recv_bytes
    buf = self._recv_bytes(maxlength)
  File "/opt/apps/python/3.6.4/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)
  File "/opt/apps/python/3.6.4/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)


KeyboardInterrupt: 

In [7]:
#Check input pipeline throughput
start = time.time()
for i, (inputs, targets) in enumerate(train_loader):
    #inputs = augment_on_GPU(inputs)
    if i==0:
        print("Input tensor shape: ",inputs.shape)
    
    sys.stdout.write('Reading batch {} of {} \r'.format(i+1,len(train_loader)))
    sys.stdout.flush()
#    if i==10:
#        break
print('\nTime to read one epoch:  %.2f seconds.' % (time.time() - start))

Input tensor shape:  torch.Size([22, 48, 3, 224, 224])
Reading batch 66 of 66 
Time to read one epoch:  65.46 seconds.


In [None]:
from tensorflow.python.client import device_lib

print((device_lib.list_local_devices()[1].name))

In [None]:
print(inputs[0:2,:].shape)

A = augment_on_GPU(inputs)
A = torch.from_numpy(A)
print(A.shape)

In [None]:
import matplotlib.pyplot as plt
print(inputs[0,0,:].shape)

C = A-inputs
B = C[0,5,:].transpose(0, 2)
B = B.transpose(0,1)
plt.imshow(B)


In [1]:
!jupyter nbconvert --to script interactive_controller.ipynb

[NbConvertApp] Converting notebook interactive_controller.ipynb to script
[NbConvertApp] Writing 10478 bytes to interactive_controller.py


In [None]:
torch.cuda.empty_cache()
