In [5]:
from torchvision import utils
from basic_fcn import *
from dataloader import *
from utils import *
import torchvision
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
import time
import math
from tqdm import tqdm
import gc

In [6]:
def init_weights(m):
    if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
        torch.nn.init.xavier_uniform_(m.weight.data)
        torch.nn.init.xavier_uniform_(m.bias.data.view(m.bias.data.shape[0],1))
        #a = math.sqrt(3) * math.sqrt(2/m.bias.data.shape[0])
        #torch.nn.init._no_grad_uniform_(m.bias.data, -a, a)
        
        


In [7]:

    
def train(model, criterion, epochs, train_loader, val_loader, test_loader, use_gpu, name):

    optimizer = optim.Adam(fcn_model.parameters(), lr=5e-3)
    if use_gpu:
        device = torch.device("cuda:0")
        model = torch.nn.DataParallel(model)
        model.to(device)
        
        
    
    val_loss_set = []
    val_acc_set = []
    val_iou_set = []
    
    # Early Stop criteria
    minLoss = 1e6
    minLossIdx = 0
    earliestStopEpoch = 10
    earlyStopDelta = 5
    for epoch in range(epochs):
        ts = time.time()
        
        #print(np.array(val_loss).shape)
        # early-stopping 
#         if epoch > 11:
#             if val_loss[-1] < val_loss[-10]:
#                 open('save_param', 'w').close()
#                 torch.save(fcn_model.state_dict(), 'save_param')
                
                  
        for iter, (inputs, tar, labels) in tqdm(enumerate(train_loader)):
            #print("\n**********************************************\nIter")
            #checkM()
            optimizer.zero_grad()
            del tar
            
            if use_gpu:
                inputs = inputs.to(device)# Move your inputs onto the gpu
                labels = labels.to(device) # Move your labels onto the gpu
            
                
            outputs = model(inputs)
            del inputs
            loss = criterion(outputs, Variable(labels.long()))
            del labels
            del outputs
            #print("\n**********************************************\nPre back")
            #checkM()
            loss.backward()
            loss = loss.item()
            optimizer.step()

            if iter % 10 == 0:
                print("epoch{}, iter{}, loss: {}".format(epoch, iter, loss))
            
            #print("\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n")
        
        print("Finish epoch {}, time elapsed {}".format(epoch, time.time() - ts))
        
        # calculate val loss each epoch
        val_loss, val_acc, val_iou = val(model, val_loader, criterion, use_gpu)
        val_loss_set.append(val_loss.item())
        val_acc_set.append(val_acc.item())
        val_iou_set.append(val_iou.item())
        
        # Early stopping
        if val_loss < minLoss:
            # Store new best
            torch.save(model, name)
            minLoss = val_loss.item()
            minLossIdx = epoch
            
        # If passed min threshold, and no new min has been reached for delta epochs
        elif epoch > earliestStopEpoch and (epoch - minLossIdx) > earlyStopDelta:
            print("Stopping early at {}".format(minLossIdx))
            break
        # TODO what is this for?
        #model.train()
        
    return val_loss_set, val_acc_set, val_iou_set, predictions


def val(model, val_loader, criterion, use_gpu):
    
    # set to evaluation mode 
    model.eval()

    softmax = nn.Softmax(dim = 1)
    
    loss = []
    pred = []
    acc = []
    
    IOU_init = False
    if use_gpu:
        device = torch.device("cuda:0")
        
        #model.to(device)
        
    for iter, (X, tar, Y) in tqdm(enumerate(val_loader)):
        
        if not IOU_init:
            IOU_init = True
            IOU = np.zeros((1,tar.shape[1]))
            
        if use_gpu:
            inputs = X.to(device)
            labels = Y.to(device)
            
        else:
            inputs, labels = X, Y

            
        with torch.no_grad():   
            outputs = model(inputs)    
            loss.append(criterion(outputs, labels.long()).item())
            prediction = softmax(outputs) 
            acc.append(pixel_acc(prediction, labels).item())
            IOU = IOU + np.array(iou(prediction, labels))
        
    
    acc = sum(acc)/len(acc)
    avg_loss = sum(loss)/len(loss) 
    IOU = IOU/iter  
    
    return avg_loss, acc, IOU      
       
    
    
    
def test(model, use_gpu):
    
    softmax = nn.Softmax(dim = 1)
    
    pred = []
    acc = []
    if use_gpu:
        device = torch.device("cuda:0")
        
        model.to(device)
    
    IOU_init = False
    for iter, (X, tar, Y) in enumerate(test_loader):
        
        if not IOU_init:
            IOU_init = True
            IOU = np.zeros((1,tar.shape[1]))
        
        if use_gpu:
            inputs = X.to(device)
            labels = Y.to(device)
        else:
            inputs, labels = X, Y
                    
        
        outputs = fcn_model(inputs)  
        
        prediction = softmax(outputs)
        acc.append(pixel_acc(prediction, labels))
        IOU = IOU + np.array(iou(prediction, Y))
        
    acc = sum(acc)/len(acc)        
    IOU = IOU/iter

    #Complete this function - Calculate accuracy and IoU 
    # Make sure to include a softmax after the output from your model
    
    return acc, IOU
    


In [8]:
def checkM():
    for obj in gc.get_objects():
        try:
            if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)):
                print(type(obj), obj.size())
        except:
            pass

if __name__ == "__main__":
    train_dataset = CityScapesDataset(csv_file='train.csv')
    val_dataset = CityScapesDataset(csv_file='val.csv')
    test_dataset = CityScapesDataset(csv_file='test.csv')
    train_loader = DataLoader(dataset=train_dataset,
                          batch_size=6,
                          num_workers=8,
                          shuffle=True)
    val_loader = DataLoader(dataset=val_dataset,
                          batch_size=8,
                          num_workers=8,
                          shuffle=True)
    test_loader = DataLoader(dataset=test_dataset,
                          batch_size=4,
                          num_workers=4,
                          shuffle=True)
    
    
    epochs     = 100
    criterion = torch.nn.CrossEntropyLoss()
    # Fix magic number
    fcn_model = FCN(n_class=34)
    fcn_model.apply(init_weights)
    
    
    epochs     = 100
    use_gpu = torch.cuda.is_available()
#     if use_gpu:
#         device = torch.device("cuda:0")
#         fcn_model = torch.nn.DataParallel(fcn_model)
#         fcn_model.to(device)
#     val(fcn_model, val_loader, criterion, use_gpu)
    train(fcn_model, criterion, epochs, train_loader, val_loader, test_loader, use_gpu, "FCN")
    
    
    fcn_model.load_state_dict(torch.load('./save_param'))
    


0it [00:00, ?it/s][A


**********************************************
Iter
<class 'torch.Tensor'> torch.Size([6, 3, 1024, 2048])
<class 'torch.Tensor'> torch.Size([6, 34, 1024, 2048])
<class 'torch.Tensor'> torch.Size([6, 1024, 2048])
<class 'torch.Tensor'> torch.Size([6, 3, 1024, 2048])
<class 'torch.Tensor'> torch.Size([6, 34, 1024, 2048])
<class 'torch.Tensor'> torch.Size([6, 1024, 2048])
<class 'torch.Tensor'> torch.Size([6, 3, 1024, 2048])
<class 'torch.Tensor'> torch.Size([6, 34, 1024, 2048])
<class 'torch.Tensor'> torch.Size([6, 1024, 2048])
<class 'torch.nn.parameter.Parameter'> torch.Size([512, 256, 3, 3])
<class 'torch.nn.parameter.Parameter'> torch.Size([256])
<class 'torch.nn.parameter.Parameter'> torch.Size([256])
<class 'torch.nn.parameter.Parameter'> torch.Size([256])
<class 'torch.nn.parameter.Parameter'> torch.Size([512, 512, 3, 3])
<class 'torch.nn.parameter.Parameter'> torch.Size([512])
<class 'torch.nn.parameter.Parameter'> torch.Size([512])
<class 'torch.nn.parameter.Parameter'> torch.S

  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


RuntimeError: CUDA out of memory. Tried to allocate 1.59 GiB (GPU 0; 10.92 GiB total capacity; 9.04 GiB already allocated; 255.50 MiB free; 1.11 GiB cached) (malloc at /opt/conda/conda-bld/pytorch_1573049310284/work/c10/cuda/CUDACachingAllocator.cpp:267)
frame #0: c10::Error::Error(c10::SourceLocation, std::string const&) + 0x47 (0x7f6c5e78f687 in /opt/conda/lib/python3.7/site-packages/torch/lib/libc10.so)
frame #1: <unknown function> + 0x1ea29 (0x7f6c5e9d2a29 in /opt/conda/lib/python3.7/site-packages/torch/lib/libc10_cuda.so)
frame #2: <unknown function> + 0x1fade (0x7f6c5e9d3ade in /opt/conda/lib/python3.7/site-packages/torch/lib/libc10_cuda.so)
frame #3: at::native::empty_cuda(c10::ArrayRef<long>, c10::TensorOptions const&, c10::optional<c10::MemoryFormat>) + 0x3f4 (0x7f6c6aa75854 in /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch.so)
frame #4: <unknown function> + 0x3d94528 (0x7f6c69013528 in /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch.so)
frame #5: <unknown function> + 0x384dc28 (0x7f6c68accc28 in /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch.so)
frame #6: torch::cuda::gather(c10::ArrayRef<at::Tensor>, long, c10::optional<int>) + 0xa25 (0x7f6c6940ce15 in /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch.so)
frame #7: <unknown function> + 0x794762 (0x7f6c93747762 in /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch_python.so)
frame #8: <unknown function> + 0x2065e6 (0x7f6c931b95e6 in /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch_python.so)
frame #9: _PyMethodDef_RawFastCallKeywords + 0x264 (0x55fcc50aa774 in /opt/conda/bin/python)
frame #10: _PyCFunction_FastCallKeywords + 0x21 (0x55fcc50aa891 in /opt/conda/bin/python)
frame #11: _PyEval_EvalFrameDefault + 0x4ede (0x55fcc5117fce in /opt/conda/bin/python)
frame #12: _PyEval_EvalCodeWithName + 0x2f9 (0x55fcc5058929 in /opt/conda/bin/python)
frame #13: _PyFunction_FastCallKeywords + 0x325 (0x55fcc50a9f25 in /opt/conda/bin/python)
frame #14: _PyEval_EvalFrameDefault + 0x4b69 (0x55fcc5117c59 in /opt/conda/bin/python)
frame #15: _PyEval_EvalCodeWithName + 0xba9 (0x55fcc50591d9 in /opt/conda/bin/python)
frame #16: _PyFunction_FastCallDict + 0x1d5 (0x55fcc50599f5 in /opt/conda/bin/python)
frame #17: THPFunction_apply(_object*, _object*) + 0x8d6 (0x7f6c93453086 in /opt/conda/lib/python3.7/site-packages/torch/lib/libtorch_python.so)
frame #18: PyCFunction_Call + 0xe7 (0x55fcc507b007 in /opt/conda/bin/python)
frame #19: _PyEval_EvalFrameDefault + 0x5c64 (0x55fcc5118d54 in /opt/conda/bin/python)
frame #20: _PyEval_EvalCodeWithName + 0xba9 (0x55fcc50591d9 in /opt/conda/bin/python)
frame #21: _PyFunction_FastCallKeywords + 0x387 (0x55fcc50a9f87 in /opt/conda/bin/python)
frame #22: _PyEval_EvalFrameDefault + 0x416 (0x55fcc5113506 in /opt/conda/bin/python)
frame #23: _PyEval_EvalCodeWithName + 0xba9 (0x55fcc50591d9 in /opt/conda/bin/python)
frame #24: _PyFunction_FastCallKeywords + 0x387 (0x55fcc50a9f87 in /opt/conda/bin/python)
frame #25: _PyEval_EvalFrameDefault + 0x14dc (0x55fcc51145cc in /opt/conda/bin/python)
frame #26: _PyFunction_FastCallKeywords + 0xfb (0x55fcc50a9cfb in /opt/conda/bin/python)
frame #27: _PyEval_EvalFrameDefault + 0x4b69 (0x55fcc5117c59 in /opt/conda/bin/python)
frame #28: _PyEval_EvalCodeWithName + 0x2f9 (0x55fcc5058929 in /opt/conda/bin/python)
frame #29: _PyFunction_FastCallDict + 0x1d5 (0x55fcc50599f5 in /opt/conda/bin/python)
frame #30: _PyObject_Call_Prepend + 0x63 (0x55fcc5078e23 in /opt/conda/bin/python)
frame #31: PyObject_Call + 0x6e (0x55fcc506b51e in /opt/conda/bin/python)
frame #32: _PyEval_EvalFrameDefault + 0x1f4c (0x55fcc511503c in /opt/conda/bin/python)
frame #33: _PyEval_EvalCodeWithName + 0x2f9 (0x55fcc5058929 in /opt/conda/bin/python)
frame #34: _PyFunction_FastCallDict + 0x1d5 (0x55fcc50599f5 in /opt/conda/bin/python)
frame #35: _PyObject_Call_Prepend + 0x63 (0x55fcc5078e23 in /opt/conda/bin/python)
frame #36: <unknown function> + 0x1702ea (0x55fcc50b22ea in /opt/conda/bin/python)
frame #37: _PyObject_FastCallKeywords + 0x3fb (0x55fcc50b316b in /opt/conda/bin/python)
frame #38: _PyEval_EvalFrameDefault + 0x4ac6 (0x55fcc5117bb6 in /opt/conda/bin/python)
frame #39: _PyFunction_FastCallKeywords + 0xfb (0x55fcc50a9cfb in /opt/conda/bin/python)
frame #40: _PyEval_EvalFrameDefault + 0x416 (0x55fcc5113506 in /opt/conda/bin/python)
frame #41: _PyEval_EvalCodeWithName + 0x2f9 (0x55fcc5058929 in /opt/conda/bin/python)
frame #42: PyEval_EvalCodeEx + 0x44 (0x55fcc50597e4 in /opt/conda/bin/python)
frame #43: PyEval_EvalCode + 0x1c (0x55fcc505980c in /opt/conda/bin/python)
frame #44: <unknown function> + 0x1e0c70 (0x55fcc5122c70 in /opt/conda/bin/python)
frame #45: _PyMethodDef_RawFastCallKeywords + 0xe9 (0x55fcc50aa5f9 in /opt/conda/bin/python)
frame #46: _PyCFunction_FastCallKeywords + 0x21 (0x55fcc50aa891 in /opt/conda/bin/python)
frame #47: _PyEval_EvalFrameDefault + 0x47d4 (0x55fcc51178c4 in /opt/conda/bin/python)
frame #48: _PyGen_Send + 0x2a2 (0x55fcc50b3ea2 in /opt/conda/bin/python)
frame #49: _PyEval_EvalFrameDefault + 0x1acc (0x55fcc5114bbc in /opt/conda/bin/python)
frame #50: _PyGen_Send + 0x2a2 (0x55fcc50b3ea2 in /opt/conda/bin/python)
frame #51: _PyEval_EvalFrameDefault + 0x1acc (0x55fcc5114bbc in /opt/conda/bin/python)
frame #52: _PyGen_Send + 0x2a2 (0x55fcc50b3ea2 in /opt/conda/bin/python)
frame #53: _PyMethodDef_RawFastCallKeywords + 0x8c (0x55fcc50aa59c in /opt/conda/bin/python)
frame #54: _PyMethodDescr_FastCallKeywords + 0x4f (0x55fcc50b2cdf in /opt/conda/bin/python)
frame #55: _PyEval_EvalFrameDefault + 0x4cbc (0x55fcc5117dac in /opt/conda/bin/python)
frame #56: _PyFunction_FastCallKeywords + 0xfb (0x55fcc50a9cfb in /opt/conda/bin/python)
frame #57: _PyEval_EvalFrameDefault + 0x416 (0x55fcc5113506 in /opt/conda/bin/python)
frame #58: _PyFunction_FastCallKeywords + 0xfb (0x55fcc50a9cfb in /opt/conda/bin/python)
frame #59: _PyEval_EvalFrameDefault + 0x6f0 (0x55fcc51137e0 in /opt/conda/bin/python)
frame #60: _PyEval_EvalCodeWithName + 0x2f9 (0x55fcc5058929 in /opt/conda/bin/python)
frame #61: _PyFunction_FastCallDict + 0x400 (0x55fcc5059c20 in /opt/conda/bin/python)
frame #62: _PyObject_Call_Prepend + 0x63 (0x55fcc5078e23 in /opt/conda/bin/python)
frame #63: PyObject_Call + 0x6e (0x55fcc506b51e in /opt/conda/bin/python)




