In [1]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
from torch.utils.data import sampler
import math
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "3"
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np

In [2]:
#torch.cuda.set_device(0)
# specify dtype
use_cuda = torch.cuda.is_available()
if use_cuda:
    dtype = torch.cuda.FloatTensor
else:
    dtype = torch.FloatTensor
print(torch.cuda.device_count())
print(use_cuda)

1
True


In [3]:
# Hyperparameters
batch_size = 32
learning_rate = 4e-3
momentum = 0.9
num_epoch = 15

In [4]:
# Data
class ChunkSampler(sampler.Sampler):
    """Samples elements sequentially from some offset. 
    Arguments:
        num_samples: # of desired datapoints
        start: offset where we should start selecting from
    """
    def __init__(self, num_samples, start = 0):
        self.num_samples = num_samples
        self.start = start

    def __iter__(self):
        return iter(range(self.start, self.start + self.num_samples))

    def __len__(self):
        return self.num_samples

data_path = '/home/put_data/frank840925/IDP/data'
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))])

trainset = torchvision.datasets.CIFAR10(root=data_path, 
                                        train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, 
                                          num_workers=4)

valset = torchvision.datasets.CIFAR10(root=data_path, 
                                       train=False, download=True, transform=transform)
valloader = torch.utils.data.DataLoader(valset, batch_size=batch_size, shuffle=False, sampler=ChunkSampler(5000,0), 
                                        num_workers=4)

testset = torchvision.datasets.CIFAR10(root=data_path, 
                                       train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, sampler=ChunkSampler(5000,5000),
                                        num_workers=4)

print(trainset.__len__())
print(valset.__len__())
print(testset.__len__())

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
50000
10000
10000


In [5]:
# Helper functions
# functions to show an image
def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))

def plot_losses(loss_history1=None, loss_history2=None):
    plt.clf()
    fig = plt.figure()
    ax1 = fig.add_subplot(111)
    if loss_history1:
        ax1.plot(loss_history1, color="blue", label="train")
    if loss_history2:
        ax1.plot(loss_history2, color="green", label="val")
    #ax2 = ax1.twinx()
    #ax2.set_yscale('log')
    plt.xlabel("epoch") 
    plt.ylabel("loss") 
    plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    plt.title("Cross-entropy loss")
    #plt.savefig('output_losses.png')

def plot_accuracy(accuracy1=None, accuracy2=None):
    plt.clf()
    fig2 = plt.figure()
    ax1 = fig2.add_subplot(111)
    if accuracy1:
        ax1.plot(accuracy1, color="red", label="train")
    if accuracy2:
        ax1.plot(accuracy2, color="black", label="val")
    plt.xlabel("epoch") 
    plt.ylabel("accuracy")
    plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    plt.title("Train/Val accuracy") 
    #plt.savefig('accuracy.png')

# get some random training images
#dataiter = iter(trainloader)
#images, labels = dataiter.next()
#print(images.shape)

In [6]:
# self-defined modules(layers)
class Flatten(nn.Module):
    def forward(self, x):
        # read in N, C, H, W
        N, C, H, W = x.size()
        # flatten the C * H * W values into a single vector per image
        return x.view(N, -1)  
        
class first_idp_tensor_3(nn.Module):
    def __init__(self, idp1=0.35, idp2=0.7, idp3=1):
        super(first_idp_tensor_3, self).__init__()
        self.idp1 = idp1
        self.idp2 = idp2
        self.idp3 = idp3
    def forward(self, c):
        #input the first conv-Relu-Linear output, N*C*H*W, replicate and apply idp and concat in first dim (N)
        N, C, H, W = c.size()
        non_zero_channel_1 = int(C*(self.idp1))
        non_zero_channel_2 = int(C*(self.idp2))
        non_zero_channel_3 = int(C*(self.idp3))
        if C-non_zero_channel_1 > 0:
            #zeros = Variable(torch.zeros(N, C-non_zero_channel_1, H, W)).type(dtype)
            zeros = Variable(torch.zeros(C-non_zero_channel_1).view(C-non_zero_channel_1,1)).type(dtype) #C
            zeros = zeros.expand(C-non_zero_channel_1, H) #C*H
            zeros = torch.stack([zeros]*W,1) #C*H*W
            zeros = torch.stack([zeros]*N) #N*C*H*W
            c1 = torch.cat([c[:, :non_zero_channel_1, :, :].clone(), zeros], 1).type(dtype)
        else:
            c1 = c
        if C-non_zero_channel_2 > 0:
            #zeros = Variable(torch.zeros(N, C-non_zero_channel_2, H, W)).type(dtype)
            zeros = Variable(torch.zeros(C-non_zero_channel_2).view(C-non_zero_channel_2,1)).type(dtype) #C
            zeros = zeros.expand(C-non_zero_channel_2, H) #C*H
            zeros = torch.stack([zeros]*W,1) #C*H*W
            zeros = torch.stack([zeros]*N) #N*C*H*W
            c2 = torch.cat([c[:, :non_zero_channel_2, :, :].clone(), zeros], 1).type(dtype)
        else:
            c2 = c
        if C-non_zero_channel_3 > 0:
            #zeros = Variable(torch.zeros(N, C-non_zero_channel_3, H, W)).type(dtype)
            zeros = Variable(torch.zeros(C-non_zero_channel_3).view(C-non_zero_channel_3,1)).type(dtype) #C
            zeros = zeros.expand(C-non_zero_channel_3, H) #C*H
            zeros = torch.stack([zeros]*W,1) #C*H*W
            zeros = torch.stack([zeros]*N) #N*C*H*W
            c3 = torch.cat([c[:, :non_zero_channel_3, :, :].clone(), zeros], 1).type(dtype)
        else:
            c3 = c
        out = torch.cat([c1, c2, c3], 0)
        return out
        
class middle_idp_tensor_3(nn.Module):
    def __init__(self, idp1=0.35, idp2=0.7, idp3=1):
        super(middle_idp_tensor_3, self).__init__()
        self.idp1 = idp1
        self.idp2 = idp2
        self.idp3 = idp3
    def forward(self, c):
        #input a middle conv-Relu-Linear output, (3*N)*C*H*W, apply IDP1, IDP2, IDP3 to each
        NN, C, H, W = c.size()
        if NN>=3:
            N = int(NN/3)
            non_zero_channel_1 = int(C*(self.idp1))
            non_zero_channel_2 = int(C*(self.idp2))
            non_zero_channel_3 = int(C*(self.idp3))
            c1 = c[:N,:,:,:]
            c2 = c[N:2*N,:,:,:]
            c3 = c[2*N:,:,:,:]
            if C-non_zero_channel_1 > 0:
                #zeros = Variable(torch.zeros(N, C-non_zero_channel_1, H, W)).type(dtype)
                zeros = Variable(torch.zeros(C-non_zero_channel_1).view(C-non_zero_channel_1,1)).type(dtype) #C
                zeros = zeros.expand(C-non_zero_channel_1, H) #C*H
                zeros = torch.stack([zeros]*W,1) #C*H*W
                zeros = torch.stack([zeros]*N) #N*C*H*W
                c1 = torch.cat([c1[:, :non_zero_channel_1, :, :].clone(), zeros], 1).type(dtype)
            else:
                c1 = c1
            if C-non_zero_channel_2 > 0:
                #zeros = Variable(torch.zeros(N, C-non_zero_channel_2, H, W)).type(dtype)
                zeros = Variable(torch.zeros(C-non_zero_channel_2).view(C-non_zero_channel_2,1)).type(dtype) #C
                zeros = zeros.expand(C-non_zero_channel_2, H) #C*H
                zeros = torch.stack([zeros]*W,1) #C*H*W
                zeros = torch.stack([zeros]*N) #N*C*H*W
                c2 = torch.cat([c2[:, :non_zero_channel_2, :, :].clone(), zeros], 1).type(dtype)
            else:
                c2 = c2
            if C-non_zero_channel_3 > 0:
                #zeros = Variable(torch.zeros(N, C-non_zero_channel_3, H, W)).type(dtype)
                zeros = Variable(torch.zeros(C-non_zero_channel_3).view(C-non_zero_channel_3,1)).type(dtype) #C
                zeros = zeros.expand(C-non_zero_channel_3, H) #C*H
                zeros = torch.stack([zeros]*W,1) #C*H*W
                zeros = torch.stack([zeros]*N) #N*C*H*W
                c3 = torch.cat([c3[:, :non_zero_channel_3, :, :].clone(), zeros], 1).type(dtype)
            else:
                c3 = c3
            out = torch.cat([c1, c2, c3],0)
            return out
        else:
            return c
    
class func_trainable(nn.Module):
    def __init__(self):
        super(func_trainable, self).__init__()
        self.register_parameter('coeff_tensor', None)
        #self.coeff_tensor =  nn.Parameter(torch.ones().type(dtype)) 
    
    def reset_parameters(self, x):
        self.coeff_tensor = nn.Parameter(torch.ones(x.size()[1]).type(dtype), requires_grad=True)  
    
    def forward(self, x):
        N, C, H, W = x.size()
        if self.coeff_tensor is None:
            self.reset_parameters(x)
        coeff_tensor = self.coeff_tensor.view(C, 1)
        coeff_tensor = coeff_tensor.expand(C, H) #C*H
        coeff_tensor = torch.stack([coeff_tensor]*W,1) #C*H*W
        coeff_tensor = torch.stack(([coeff_tensor]*N)) #N*C*H*W
        
        x = torch.mul(x.clone(), coeff_tensor)
        return x

In [41]:
x = Variable(torch.Tensor([-2]), requires_grad=True, volatile=False).type(dtype)
y = Variable(torch.Tensor([5]), requires_grad=True).type(dtype)
z = Variable(torch.Tensor([-4]),requires_grad=True).type(dtype)
f = (x+y)*z
print(q)
print(f)
f.backward()
print(y.grad)

Variable containing:
 3
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Variable containing:
-12
[torch.cuda.FloatTensor of size 1 (GPU 0)]

None


In [None]:
m= func_trainable().type(dtype)
print(len(list(m.parameters())))
#print(list(m.parameters())[0])
out = m(y).mean()
print(len(list(m.parameters())))

In [42]:
class atp_tesla_coef_idp_VGG_3branch(nn.Module):
    def __init__(self, idp_layers, model=models.vgg16(pretrained=True).type(dtype), idp1=0.35, idp2=0.7, idp3=1):
        #idp is 0~1, idp_layers is a set, specify which layer in features should apply idp
        #now idp_layers is 3,6,8,11,13,15,18,20,22,25,27,29
        super(atp_tesla_coef_idp_VGG_3branch, self).__init__()
        self.idp1 = idp1
        self.idp2 = idp2
        self.idp3 = idp3
        self.idp_layers = idp_layers
        self.features = nn.Sequential(*(self.new_features_list(model)))
        self.classifier = nn.Sequential(nn.Linear(512, 512),
                              nn.ReLU(inplace=True),
                              nn.Dropout(0.5),
                              nn.Linear(512, 10))
        print(len(list(self.features.parameters())))
        print(len(list(self.classifier.parameters())))
        
    def new_features_list(self, model):
        new_layers = nn.ModuleList()
        #create from pre-trained resnet
        for i, layer in enumerate(list(model.features.children())):
            if i ==1:
                new_layers.append(layer)
                new_layers.append(func_trainable())
                new_layers.append(first_idp_tensor_3(self.idp1, self.idp2, self.idp3))
            elif i not in self.idp_layers:
                new_layers.append(layer)                
            else:
                new_layers.append(layer)
                new_layers.append(func_trainable())
                new_layers.append(middle_idp_tensor_3(self.idp1, self.idp2, self.idp3))
        new_layers.append(Flatten())
        return new_layers        
    
    def forward(self, x):
        f = self.features(x)
        NN, M = f.size()
        N = int(NN/3)
        o1 = f[:N, :]
        o2 = f[N:2*N, :]
        o3 = f[2*N:,:]
        o1 = self.classifier(o1)
        o2 = self.classifier(o2)
        o3 = self.classifier(o3)
        return o1, o2, o3

In [43]:
def atp_train_tesla_3(num_epoch, net, criterion, alpha1=0, alpha2=0, alpha3=1, index=2):
    train_loss = []
    test_loss = []
    train_acc = []
    test_acc = []

    # start training
    for epoch in range(num_epoch):
        if epoch%2 == 0:
            running_loss1 = []
            running_loss2 = []
            running_correct1=[]
            running_correct2=[]
            #train weight and bias
            for i, data in enumerate(trainloader, 0):
                correct = 0
                net.train(True)
                # get the inputs
                inputs, labels = data
                # wrap them in Variable
                inputs, labels = Variable(inputs).type(dtype), Variable(labels).type(torch.cuda.LongTensor)
                #
                params_w_b = []
                params_gamma = []

                for i, (name, param) in enumerate(net.named_parameters()):
                    if name.endswith('coeff_tensor'):
                        #print(i, name, param.size())
                        params_gamma.append(param)
                    else:
                        params_w_b.append(param)
                        
                optimizer1 = optim.SGD(params_w_b, lr=learning_rate, momentum=momentum)
                # zero the parameter gradients
                optimizer1.zero_grad()
                # forward + backward + optimize
                outputs = net(inputs)
                loss1 = criterion(outputs[0], labels) #idp-low
                loss2 = criterion(outputs[1], labels) #idp-mid
                loss3 = criterion(outputs[2], labels) #idp-high
                loss = alpha1*loss1+alpha2*loss2+alpha3*loss3
                loss.backward()
                optimizer1.step()
                # print statistics
                # loss
                running_loss1.append(loss.data[0])
                # accuracy
                _, predicted = torch.max(outputs[index].data, 1)
                correct += (predicted == labels.data).sum()
                correct = correct/labels.size(0)*100
                running_correct1.append(correct)

            for i, tdata in enumerate(valloader, 0):
                test_correct = 0
                net.train(False)
                tinputs, tlabels = tdata
                tinputs, tlabels = Variable(tinputs).type(dtype), Variable(tlabels).type(torch.cuda.LongTensor)
                toutputs = net(tinputs)
                tloss1 = criterion(toutputs[0], tlabels)
                tloss2 = criterion(toutputs[1], tlabels)
                tloss3 = criterion(toutputs[2], tlabels)
                tloss = alpha1*tloss1+alpha2*tloss2+alpha3*loss3
                running_loss2.append(tloss.data[0])
                _, tpredicted = torch.max(toutputs[index].data, 1)
                test_correct += (tpredicted == tlabels.data).sum()
                test_correct = test_correct/tlabels.size(0)*100
                running_correct2.append(test_correct)
        
        
            train_loss.append(np.mean(running_loss1))
            test_loss.append(np.mean(running_loss2))
            train_acc.append(np.mean(running_correct1))
            test_acc.append(np.mean(running_correct2))
        
            #statistics
            print('Epoch [%d/%d], Train Loss:%.3f, Val Loss:%.3f, Train Accuracy:%.3f percent, Val Accuracy:%.3f percent' 
                        %(epoch+1, num_epoch, train_loss[-1], test_loss[-1], train_acc[-1], test_acc[-1]))
        
        elif epoch%2 ==1:
            running_loss1 = []
            running_loss2 = []
            running_correct1=[]
            running_correct2=[]
            #train gamma, and manually clip it 
            for i, data in enumerate(trainloader, 0):
                correct = 0
                net.train(True)
                # get the inputs
                inputs, labels = data
                # wrap them in Variable
                inputs, labels = Variable(inputs).type(dtype), Variable(labels).type(torch.cuda.LongTensor)
                #
                params_w_b = []
                params_gamma = []

                for i, (name, param) in enumerate(net.named_parameters()):
                    if name.endswith('coeff_tensor'):
                        #print(i, name, param.size())
                        params_gamma.append(param)
                    else:
                        params_w_b.append(param)
                
                optimizer2 = optim.SGD(params_gamma, lr=learning_rate, momentum=momentum)
                # zero the parameter gradients
                optimizer2.zero_grad()
                # forward + backward + optimize
                outputs = net(inputs)
                loss1 = criterion(outputs[0], labels) #idp-low
                loss2 = criterion(outputs[1], labels) #idp-mid
                loss3 = criterion(outputs[2], labels) #idp-high
                loss = alpha1*loss1+alpha2*loss2+alpha3*loss3
                loss.backward()
                optimizer2.step()
                # print statistics
                # loss
                running_loss1.append(loss.data[0])
                # accuracy
                _, predicted = torch.max(outputs[index].data, 1)
                correct += (predicted == labels.data).sum()
                correct = correct/labels.size(0)*100
                running_correct1.append(correct)

            for i, tdata in enumerate(valloader, 0):
                test_correct = 0
                net.train(False)
                tinputs, tlabels = tdata
                tinputs, tlabels = Variable(tinputs).type(dtype), Variable(tlabels).type(torch.cuda.LongTensor)
                toutputs = net(tinputs)
                tloss1 = criterion(toutputs[0], tlabels)
                tloss2 = criterion(toutputs[1], tlabels)
                tloss3 = criterion(toutputs[2], tlabels)
                tloss = alpha1*tloss1+alpha2*tloss2+alpha3*loss3
                running_loss2.append(tloss.data[0])
                _, tpredicted = torch.max(toutputs[index].data, 1)
                test_correct += (tpredicted == tlabels.data).sum()
                test_correct = test_correct/tlabels.size(0)*100
                running_correct2.append(test_correct)
                
            #clip weight manually
            for i, (name, param) in enumerate(net.named_parameters()):
                if name.endswith('coeff_tensor'):
                    print(param.data)
                    param = param.clamp(max=param.data[0])
                    print(param.data)
        
            train_loss.append(np.mean(running_loss1))
            test_loss.append(np.mean(running_loss2))
            train_acc.append(np.mean(running_correct1))
            test_acc.append(np.mean(running_correct2))

            #statistics
            print('Epoch [%d/%d], Train Loss:%.3f, Val Loss:%.3f, Train Accuracy:%.3f percent, Val Accuracy:%.3f percent' 
                        %(epoch+1, num_epoch, train_loss[-1], test_loss[-1], train_acc[-1], test_acc[-1]))
        
        #save model every 10 epoch
        if epoch!=0 and epoch%10 ==0:
            file = 'saved_models/'+str(epoch)+'_epoch_tesla_linear_idp_model.pkl'
            torch.save(net.state_dict(), file)
        #early stopping
        if epoch >= 3:
            if (test_acc[-1] - test_acc[-2]) <=0 and (test_acc[-1] - test_acc[-3]) <= 0:
                print('Early stopping')
                return train_loss, test_loss, train_acc, test_acc
            else:
                continue
        
    print('Finished Training')
    return train_loss, test_loss, train_acc, test_acc

In [44]:
# define models, optimizers, training
idp_ranges = [0.1, 0.35, 1]
all_train_loss = []
all_test_loss = []
all_train_acc = []
all_test_acc = []

criterion = nn.CrossEntropyLoss()

idp_layers = {3,6,8,11,13,15,18,20,22,25,27,29}
net = atp_tesla_coef_idp_VGG_3branch(idp_layers, idp1=0.2, idp2=0.5, idp3=1).type(dtype)

26
4


In [45]:
net

atp_tesla_coef_idp_VGG_3branch (
  (features): Sequential (
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU (inplace)
    (2): func_trainable (
    )
    (3): first_idp_tensor_3 (
    )
    (4): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): ReLU (inplace)
    (6): func_trainable (
    )
    (7): middle_idp_tensor_3 (
    )
    (8): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
    (9): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (10): ReLU (inplace)
    (11): func_trainable (
    )
    (12): middle_idp_tensor_3 (
    )
    (13): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU (inplace)
    (15): func_trainable (
    )
    (16): middle_idp_tensor_3 (
    )
    (17): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
    (18): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (19): ReLU (inplace)
    (20): func_trainable (

In [14]:
for index, idp in enumerate(idp_ranges):
    if index == 0:
        print('train with idp3')
        train_loss, test_loss, train_acc, test_acc = atp_train_tesla_3(num_epoch, net, criterion, 
                                                                 alpha1=0, alpha2=0, alpha3=1, index=2)
        all_train_loss.extend(train_loss)
        all_test_loss.extend(test_loss)
        all_train_acc.extend(train_acc)
        all_test_acc.extend(test_acc)
    elif index ==1:
        print('train with idp2')
        train_loss, test_loss, train_acc, test_acc = atp_train_tesla_3(num_epoch, net, criterion,
                                                                 alpha1=0, alpha2=0.5, alpha3=1, index=1)        
        all_train_loss.extend(train_loss)
        all_test_loss.extend(test_loss)
        all_train_acc.extend(train_acc)
        all_test_acc.extend(test_acc)
    else:
        print('train with idp1')
        train_loss, test_loss, train_acc, test_acc = atp_train_tesla_3(num_epoch, net, criterion, 
                                                                  alpha1=0.5, alpha2=0.25, alpha3=0.25, index=0)        
        all_train_loss.extend(train_loss)
        all_test_loss.extend(test_loss)
        all_train_acc.extend(train_acc)
        all_test_acc.extend(test_acc)

train with idp3
Epoch [1/15], Train Loss:0.517, Val Loss:0.293, Train Accuracy:82.802 percent, Val Accuracy:84.634 percent

 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
[torch.cuda.FloatTensor of size 64 (GPU 0)]


 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
[torch.cuda.FloatTensor of size 64 (GPU 0)]


 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
[torch.cuda.FloatTensor of size 64 (GPU 0)]


 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1


Epoch [3/15], Train Loss:0.361, Val Loss:0.352, Train Accuracy:87.732 percent, Val Accuracy:81.608 percent

 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
[torch.cuda.FloatTensor of size 64 (GPU 0)]


 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
[torch.cuda.FloatTensor of size 64 (GPU 0)]


 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
[torch.cuda.FloatTensor of size 64 (GPU 0)]


 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 

Process Process-49:
Process Process-51:
Process Process-50:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/frank840925/miniconda3/envs/IDP/lib/python3.6/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/home/frank840925/miniconda3/envs/IDP/lib/python3.6/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/home/frank840925/miniconda3/envs/IDP/lib/python3.6/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/home/frank840925/miniconda3/envs/IDP/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/frank840925/miniconda3/envs/IDP/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/frank840925/miniconda3/envs/IDP/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs

KeyboardInterrupt: 

  File "/home/frank840925/miniconda3/envs/IDP/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 34, in _worker_loop
    r = index_queue.get()
  File "/home/frank840925/miniconda3/envs/IDP/lib/python3.6/multiprocessing/queues.py", line 342, in get
    res = self._reader.recv_bytes()
  File "/home/frank840925/miniconda3/envs/IDP/lib/python3.6/multiprocessing/connection.py", line 216, in recv_bytes
    buf = self._recv_bytes(maxlength)
  File "/home/frank840925/miniconda3/envs/IDP/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)
  File "/home/frank840925/miniconda3/envs/IDP/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)
KeyboardInterrupt
