In [1]:
from torchvision import utils
from basic_fcn import *
from dataloader import *
from utils import *
import torchvision
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
import time
import math
from tqdm import tqdm
import gc
import os

In [2]:
def init_weights(m):
    if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
        torch.nn.init.xavier_uniform_(m.weight.data)
        torch.nn.init.xavier_uniform_(m.bias.data.view(m.bias.data.shape[0],1))
        #a = math.sqrt(3) * math.sqrt(2/m.bias.data.shape[0])
        #torch.nn.init._no_grad_uniform_(m.bias.data, -a, a)
        
        


In [3]:

    
def train(model, criterion, epochs, train_loader, val_loader, test_loader, use_gpu, name):
    
    #Create non-existing logfiles
    logname = 'logfile.txt'
    if os.path.exists('logfile.txt') == True:
        i = 1
        logname = 'logfile' + str(i) + '.txt'
        while os.path.exists('logfile' + str(i) + '.txt'):
            i+=1
            logname = 'logfile' + str(i) + '.txt'

    print('Loading results to logfile: ' + logname)
    with open(logname, "a") as file:
        file.write("Lofile DATA: Validation Loss and Accuracy\n") 
    
    logname_summary = 'logfile' + str(i) + '_summary.txt'    
    print('Loading Summary to : ' + logname_summary) 
    
    
    optimizer = optim.Adam(fcn_model.parameters(), lr=5e-3)
    if use_gpu:
        device = torch.device("cuda:0")
        model = torch.nn.DataParallel(model)
        model.to(device)
        
        
    
    val_loss_set = []
    val_acc_set = []
    val_iou_set = []
    
    
    training_loss = []
    
    # Early Stop criteria
    minLoss = 1e6
    minLossIdx = 0
    earliestStopEpoch = 10
    earlyStopDelta = 5
    for epoch in range(epochs):
        ts = time.time()
        
        #print(np.array(val_loss).shape)
        # early-stopping 
#         if epoch > 11:
#             if val_loss[-1] < val_loss[-10]:
#                 open('save_param', 'w').close()
#                 torch.save(fcn_model.state_dict(), 'save_param')
                
                  
        for iter, (inputs, tar, labels) in tqdm(enumerate(train_loader)):
            #print("\n**********************************************\nIter")
            #checkM()
            optimizer.zero_grad()
            del tar
            
            if use_gpu:
                inputs = inputs.to(device)# Move your inputs onto the gpu
                labels = labels.to(device) # Move your labels onto the gpu
            
                
            outputs = model(inputs)
            del inputs
            loss = criterion(outputs, Variable(labels.long()))
            del labels
            del outputs
            #print("\n**********************************************\nPre back")
            #checkM()
            loss.backward()
            loss = loss#.item()
            optimizer.step()

            if iter % 10 == 0:
                print("epoch{}, iter{}, loss: {}".format(epoch, iter, loss))

                    
                    
            #print("\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n")
        
        # calculate val loss each epoch
        val_loss, val_acc, val_iou = val(model, val_loader, criterion, use_gpu)
        val_loss_set.append(val_loss)
        val_acc_set.append(val_acc)
        val_iou_set.append(val_iou)
        
        print("epoch {}, time {}, train loss {}, val loss {}, val acc {}, val iou {}".format(epoch, time.time() - ts,
                                                                                                loss, val_loss,
                                                                                                val_acc,
                                                                                                val_iou))        
        training_loss.append(loss)
        
        with open(logname, "a") as file:
            file.write("writing!\n")
            file.write("Finish epoch {}, time elapsed {}".format(epoch, time.time() - ts))
            file.write("\n training Loss:   " + str(loss.item()))
            file.write("\n Validation Loss: " + str(val_loss_set[-1]))
            file.write("\n Validation acc:  " + str(val_acc_set[-1]))
            file.write("\n Validation iou:  " + str(val_iou_set[-1]) + "\n ")                                             
                                                                                                
                                                                                                
        
        # Early stopping
        if val_loss < minLoss:
            # Store new best
            torch.save(model, name)
            minLoss = val_loss#.item()
            minLossIdx = epoch
            
        # If passed min threshold, and no new min has been reached for delta epochs
        elif epoch > earliestStopEpoch and (epoch - minLossIdx) > earlyStopDelta:
            print("Stopping early at {}".format(minLossIdx))
            break
        # TODO what is this for?
        #model.train()

        
        
    with open(logname_summary, "a") as file:
            file.write("Summary!\n")
            file.write("Stopped early at {}".format(minLossIdx))
            file.write("\n training Loss:   " + str(training_loss))        
            file.write("\n Validation Loss: " + str(val_loss_set))
            file.write("\n Validation acc:  " + str(val_acc_set))
            file.write("\n Validation iou:  " + str(val_iou_set) + "\n ")
            
        
    return val_loss_set, val_acc_set, val_iou_set


def val(model, val_loader, criterion, use_gpu):
    
    # set to evaluation mode 
    model.eval()

    softmax = nn.Softmax(dim = 1)
    
    loss = []
    pred = []
    acc = []
    
    IOU_init = False
    if use_gpu:
        device = torch.device("cuda:0")
        
        #model.to(device)
        
    for iter, (X, tar, Y) in tqdm(enumerate(val_loader)):
        
        if not IOU_init:
            IOU_init = True
            IOU = np.zeros((1,tar.shape[1]))
            
        if use_gpu:
            inputs = X.to(device)
            labels = Y.to(device)
            
        else:
            inputs, labels = X, Y

            
        with torch.no_grad():   
            outputs = model(inputs)    
            loss.append(criterion(outputs, labels.long()).item())
            prediction = softmax(outputs) 
            acc.append(pixel_acc(prediction, labels).item())
            IOU = IOU + np.array(iou(prediction, labels))
        
    
    acc = sum(acc)/len(acc)
    avg_loss = sum(loss)/len(loss) 
    IOU = IOU/iter  
    
    return avg_loss, acc, IOU      
       
    
    
    
def test(model, use_gpu):
    
    softmax = nn.Softmax(dim = 1)
    
    pred = []
    acc = []
    if use_gpu:
        device = torch.device("cuda:0")
        
        model.to(device)
    
    IOU_init = False
    for iter, (X, tar, Y) in enumerate(test_loader):
        
        if not IOU_init:
            IOU_init = True
            IOU = np.zeros((1,tar.shape[1]))
        
        if use_gpu:
            inputs = X.to(device)
            labels = Y.to(device)
        else:
            inputs, labels = X, Y
                    
        
        outputs = fcn_model(inputs)  
        
        prediction = softmax(outputs)
        acc.append(pixel_acc(prediction, labels))
        IOU = IOU + np.array(iou(prediction, Y))
        
    acc = sum(acc)/len(acc)        
    IOU = IOU/iter

    #Complete this function - Calculate accuracy and IoU 
    # Make sure to include a softmax after the output from your model
    
    return acc, IOU
    

In [4]:
def checkM():
    for obj in gc.get_objects():
        try:
            if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)):
                print(type(obj), obj.size())
        except:
            pass

if __name__ == "__main__":
    train_dataset = CityScapesDataset(csv_file='train.csv')
    val_dataset = CityScapesDataset(csv_file='val.csv')
    test_dataset = CityScapesDataset(csv_file='test.csv')
    train_loader = DataLoader(dataset=train_dataset,
                          batch_size=3,
                          num_workers=8,
                          shuffle=True)
    val_loader = DataLoader(dataset=val_dataset,
                          batch_size=3,
                          num_workers=8,
                          shuffle=True)
    test_loader = DataLoader(dataset=test_dataset,
                          batch_size=4,
                          num_workers=4,
                          shuffle=True)
    
    
    epochs     = 100
    criterion = torch.nn.CrossEntropyLoss()
    # Fix magic number
    fcn_model = FCN(n_class=34)
    fcn_model.apply(init_weights)
    
    
    epochs     = 100
    use_gpu = torch.cuda.is_available()
#     if use_gpu:
#         device = torch.device("cuda:0")
#         fcn_model = torch.nn.DataParallel(fcn_model)
#         fcn_model.to(device)
#     val(fcn_model, val_loader, criterion, use_gpu)
    train(fcn_model, criterion, epochs, train_loader, val_loader, test_loader, use_gpu, "FCN")
    
    
    fcn_model.load_state_dict(torch.load('./save_param'))
    

Loading results to logfile: logfile13.txt
Loading Summary to : logfile13_summary.txt


2it [00:15, 10.90s/it]

epoch0, iter0, loss: 3.587266683578491


12it [00:22,  1.03it/s]

epoch0, iter10, loss: 2.6518564224243164


20it [00:29,  1.13it/s]

epoch0, iter20, loss: 2.22672176361084



0it [00:00, ?it/s][A
1it [00:07,  7.60s/it][A

34



2it [00:08,  5.47s/it][A

34



3it [00:09,  4.19s/it][A

34



4it [00:09,  3.06s/it][A

34



5it [00:11,  2.55s/it][A

34



6it [00:11,  1.91s/it][A

34



7it [00:13,  1.85s/it][A

34



8it [00:13,  1.42s/it][A

34



9it [00:15,  1.56s/it][A

34



10it [00:15,  1.21s/it][A

34



11it [00:17,  1.35s/it][A

34



12it [00:18,  1.10s/it][A

34



13it [00:19,  1.28s/it][A

34



14it [00:20,  1.02s/it][A

34



15it [00:21,  1.25s/it][A

34



16it [00:22,  1.03s/it][A

34



17it [00:24,  1.20s/it][A

34



18it [00:24,  1.01it/s][A

34



19it [00:26,  1.12s/it][A

34



20it [00:26,  1.05it/s][A

34



21it [00:28,  1.12s/it][A

34



22it [00:28,  1.07it/s][A

34



23it [00:30,  1.14s/it][A

34



24it [00:30,  1.06it/s][A

34



25it [00:32,  1.17s/it][A

34



26it [00:32,  1.06it/s][A

34



27it [00:34,  1.11s/it][A

34



28it [00:34,  1.10it/s][A

34



29it [00:36,  1.07s/it][A

34



30it [00:36,  1.14it/s][A

34



31it [00:38,  1.12s/it][A

34



32it [00:38,  1.11it/s][A

34



33it [00:40,  1.17s/it][A

34



34it [00:40,  1.06it/s][A

34



35it [00:42,  1.08s/it][A

34



36it [00:42,  1.11it/s][A

34



37it [00:44,  1.08s/it][A

34



38it [00:44,  1.10it/s][A

34



39it [00:46,  1.18s/it][A

34



40it [00:47,  1.05it/s][A

34



41it [00:48,  1.11s/it][A

34



42it [00:48,  1.08it/s][A

34



43it [00:50,  1.13s/it][A

34



44it [00:51,  1.10it/s][A

34



45it [00:52,  1.09s/it][A

34



46it [00:52,  1.14it/s][A

34



47it [00:54,  1.04s/it][A

34



48it [00:54,  1.18it/s][A

34



49it [00:56,  1.08s/it][A

34



50it [00:56,  1.11it/s][A

34



51it [00:58,  1.11s/it][A

34



52it [00:58,  1.12it/s][A

34



53it [01:00,  1.08s/it][A

34



54it [01:00,  1.10it/s][A

34



55it [01:02,  1.11s/it][A

34



56it [01:02,  1.10it/s][A

34



57it [01:04,  1.08s/it][A

34



58it [01:04,  1.14it/s][A

34



59it [01:06,  1.09s/it][A

34



60it [01:06,  1.13it/s][A

34



61it [01:08,  1.15s/it][A

34



62it [01:08,  1.07it/s][A

34



63it [01:10,  1.10s/it][A

34



64it [01:10,  1.12it/s][A

34



65it [01:12,  1.05s/it][A

34



66it [01:12,  1.14it/s][A

34



67it [01:14,  1.12s/it][A

34



68it [01:14,  1.07it/s][A

34



69it [01:16,  1.11s/it][A

34



70it [01:16,  1.11it/s][A

34



71it [01:18,  1.16s/it][A

34



72it [01:19,  1.07it/s][A

34



73it [01:20,  1.11s/it][A

34



74it [01:20,  1.11it/s][A

34



75it [01:22,  1.10s/it][A

34



76it [01:22,  1.11it/s][A

34



77it [01:24,  1.10s/it][A

34



78it [01:24,  1.12it/s][A

34



79it [01:26,  1.13s/it][A

34



80it [01:26,  1.10it/s][A

34



81it [01:28,  1.06s/it][A

34



82it [01:28,  1.16it/s][A

34



83it [01:30,  1.02s/it][A

34



84it [01:30,  1.15it/s][A

34



85it [01:32,  1.08s/it][A

34



86it [01:32,  1.14it/s][A

34



87it [01:34,  1.14s/it][A

34



88it [01:34,  1.04it/s][A

34



89it [01:36,  1.15s/it][A

34



90it [01:37,  1.07it/s][A

34



91it [01:38,  1.10s/it][A

34



92it [01:38,  1.11it/s][A

34



93it [01:40,  1.08s/it][A

34



94it [01:40,  1.12it/s][A

34



95it [01:42,  1.05s/it][A

34



96it [01:42,  1.14it/s][A

34



97it [01:44,  1.05s/it][A

34



98it [01:44,  1.15it/s][A

34



99it [01:46,  1.01s/it][A

34



100it [01:46,  1.09it/s][A

34



101it [01:48,  1.12s/it][A

34



102it [01:48,  1.12it/s][A

34



103it [01:50,  1.03s/it][A

34



104it [01:50,  1.16it/s][A

34



105it [01:52,  1.08s/it][A

34



106it [01:52,  1.13it/s][A

34



107it [01:53,  1.03s/it][A

34



108it [01:54,  1.15it/s][A

34



109it [01:55,  1.06s/it][A

34



110it [01:56,  1.08it/s][A

34



111it [01:58,  1.15s/it][A

34



112it [01:58,  1.08it/s][A

34



113it [01:59,  1.07s/it][A

34



114it [02:00,  1.18it/s][A

34



115it [02:01,  1.04s/it][A

34



116it [02:02,  1.15it/s][A

34



117it [02:04,  1.15s/it][A

34



118it [02:04,  1.07it/s][A

34



119it [02:06,  1.13s/it][A

34



120it [02:06,  1.09it/s][A

34



121it [02:07,  1.04s/it][A

34



122it [02:08,  1.19it/s][A

34



123it [02:09,  1.04s/it][A

34



124it [02:10,  1.18it/s][A

34



125it [02:12,  1.22s/it][A

34



126it [02:12,  1.02it/s][A

34



127it [02:14,  1.21s/it][A

34



128it [02:14,  1.03it/s][A

34



129it [02:16,  1.07s/it][A

34



130it [02:16,  1.17it/s][A

34



131it [02:17,  1.08it/s][A

34



132it [02:17,  1.30it/s][A

34



133it [02:19,  1.01s/it][A

34



134it [02:19,  1.20it/s][A

34



135it [02:21,  1.02s/it][A

34



136it [02:21,  1.15it/s][A

34



137it [02:23,  1.06s/it][A

34



138it [02:23,  1.15it/s][A

34



139it [02:25,  1.03s/it][A

34



140it [02:25,  1.20it/s][A

34



141it [02:26,  1.02it/s][A

34



142it [02:27,  1.21it/s][A

34



143it [02:29,  1.07s/it][A

34



144it [02:29,  1.16it/s][A

34



145it [02:30,  1.06it/s][A

34



146it [02:30,  1.26it/s][A

34



147it [02:32,  1.11it/s][A

34



148it [02:32,  1.34it/s][A

34



149it [02:33,  1.04it/s][A

34



150it [02:34,  1.21it/s][A

34



151it [02:35,  1.03s/it][A

34



152it [02:36,  1.18it/s][A

34



153it [02:37,  1.05it/s][A

34



154it [02:37,  1.32it/s][A

34



155it [02:38,  1.35it/s][A

34



156it [02:38,  1.69it/s][A

34



157it [02:39,  1.65it/s][A

34



158it [02:39,  1.98it/s][A

34



159it [02:40,  1.85it/s][A

34



160it [02:40,  2.21it/s][A

34



161it [02:41,  2.06it/s][A

34



162it [02:41,  2.42it/s][A

34



163it [02:42,  2.18it/s][A

34



164it [02:42,  2.51it/s][A

34



165it [02:42,  2.21it/s][A

34



166it [02:43,  2.52it/s][A

34



167it [02:43,  2.28it/s][A

34


  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


epoch 0, time 196.4854850769043, train loss 2.22672176361084, val loss 2.5506451758082043, val acc 0.0, val iou [[       nan 0.                nan 0.         0.                nan
         nan 0.08659349 0.                nan        nan 0.15022947
         nan        nan        nan        nan        nan 0.
         nan        nan 0.         0.0042119         nan 0.05364144
         nan        nan 0.                nan        nan        nan
         nan        nan        nan        nan]]


FileNotFoundError: [Errno 2] No such file or directory: './save_param'