In [1]:
import sys
sys.path.append('./models/')
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
import time
import os
from data_loader import Dataset,Options
import models.unet_normals as unet
from tensorboardX import SummaryWriter
# import OpenEXR, Imath

### Setup Options
Set the various parameters:
- dataroot: The folder where the training data is stored
- file_list: List of filenames of images for training
- batchSize: Batch size for model
- shuffle: If true, will shuffle the dataset
- phase: If 'train', then it's in training mode.
- num_epochs: Number of epochs to train the model for
- imsize: Dimensions of the image (square)
- num_classes: Num of classes in the output
- gpu: Which GPU device to use
- logs_path: The path where the log files (tensorboard) will be saved.

In [2]:
class OPT():
    def __init__(self):
        self.dataroot = './data/'
        self.file_list = './data/datalist'
        self.batchSize = 32
        self.shuffle = True
        self.phase = 'train'
        self.num_epochs = 500
        self.imsize = 224
        self.num_classes = int(3)
        self.gpu = '0'
        self.logs_path = 'logs/exp10'
        self.use_pretrained = False

opt = OPT()

### Setup logging and dataloaders

In [3]:
###################### Options #############################
phase = opt.phase
device = torch.device("cuda:"+ opt.gpu if torch.cuda.is_available() else "cpu")

###################### TensorBoardX #############################
if os.path.exists(opt.logs_path):
    raise Exception('The folder \"{}\" already exists! Define a new log path or delete old contents.'.format(opt.logs_path))
    
writer = SummaryWriter(opt.logs_path, comment='create-graph')
graph_created = False

###################### DataLoader #############################
dataloader = Dataset(opt)


shuffling the dataset


### Create the model
We use a UNet model. The last few layers of this model are modified to return a 3 channel image, containing the x,y,z values of surface normal vectors.

In [4]:
###################### ModelBuilder #############################
model = unet.Unet(num_classes=opt.num_classes)

# Load weights from checkpoint
if (opt.use_pretrained == True):
    checkpoint_path = 'logs/exp7/checkpoints/checkpoint.pth'
    model.load_state_dict(torch.load(checkpoint_path))

model = model.to(device)
model.train()

###################### Setup Optimazation #############################
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
exp_lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

###################### Loss fuction #############################
'''
@input: The 2 vectors whose cosine loss is to be calculated
The dimensions of the matrices are expected to be (batchSize, 3, imsize, imsize). 

@return: 
elementwise_mean: will return the sum of all losses divided by num of elements
none: The loss will be calculated to be of size (batchSize, imsize, imsize) containing cosine loss of each pixel
'''
def loss_fn(input_vec, target_vec, reduction='elementwise_mean'):
    cos = nn.CosineSimilarity(dim=1, eps=1e-6)
    loss_val = 1.0 - cos(input_vec, target_vec)
    if (reduction=='elementwise_mean'):
        return torch.mean(loss_val)
    elif (reduction=='none'):
        return loss_val
    else:
        raise Exception('Warning! The reduction is invalid. Please use \'elementwise_mean\' or \'none\''.format())


### Train the model


In [None]:
###################### Train Model #############################
# Calculate total iter_num
total_iter_num = 0

for epoch in range(opt.num_epochs):
    print('Epoch {}/{}'.format(epoch, opt.num_epochs - 1))
    print('-' * 10)

    # Each epoch has a training and validation phase
    running_loss = 0.0
    
    


    # Iterate over data.
    for i in range(int(dataloader.size()/opt.batchSize)):
        total_iter_num += 1
        
        # Get data
        inputs, labels =  dataloader.get_batch()
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        #ToDo: get labels into correct format
        
        ## Create Graph ##
        if graph_created == False:
            graph_created = True
            writer.add_graph(model, inputs, verbose=False)
        
        # Forward + Backward Prop
        optimizer.zero_grad()
        torch.set_grad_enabled(True)
        normal_vectors = model(inputs)
        normal_vectors_norm = nn.functional.normalize(normal_vectors, p=2, dim=1)
        
        loss = loss_fn(normal_vectors_norm, labels, reduction='elementwise_mean')
        loss.backward()
        optimizer.step()

        # statistics
        running_loss += loss.item()
        writer.add_scalar('loss', loss.item(), total_iter_num)
        
        if (i % 10 == 0):
            print('Epoch{} Batch{} Loss: {:.4f}'.format(epoch, i, loss.item()))

    epoch_loss = running_loss / (dataloader.size()/opt.batchSize)
    writer.add_scalar('epoch_loss', epoch_loss, epoch)
    print('{} Loss: {:.4f}'.format(phase, epoch_loss))
    
    # Save the model checkpoint
    directory = opt.logs_path+'/checkpoints/'
    if not os.path.exists(directory):
        os.makedirs(directory)
        
    if (epoch % 5 == 0):
        filename = opt.logs_path + '/checkpoints/checkpoint-epoch_{}.pth'.format(epoch,i)
        torch.save(model.state_dict(), filename)
        

# Save final Checkpoint
filename = opt.logs_path + '/checkpoints/checkpoint.pth'
torch.save(model.state_dict(), filename)


Epoch 0/499
----------
Epoch0 Batch0 Loss: 0.4729
Epoch0 Batch10 Loss: 0.2546
Epoch0 Batch20 Loss: 0.2095
Epoch0 Batch30 Loss: 0.2140
Epoch0 Batch40 Loss: 0.2456
Epoch0 Batch50 Loss: 0.1891
Epoch0 Batch60 Loss: 0.2391
Epoch0 Batch70 Loss: 0.2504
shuffling the dataset
train Loss: 0.2354
Epoch 1/499
----------
Epoch1 Batch0 Loss: 0.2073
Epoch1 Batch10 Loss: 0.2040
Epoch1 Batch20 Loss: 0.2815
Epoch1 Batch30 Loss: 0.1884
Epoch1 Batch40 Loss: 0.1900
Epoch1 Batch50 Loss: 0.2387
Epoch1 Batch60 Loss: 0.2338
Epoch1 Batch70 Loss: 0.2166
shuffling the dataset
train Loss: 0.2220
Epoch 2/499
----------
Epoch2 Batch0 Loss: 0.2118
Epoch2 Batch10 Loss: 0.2367
Epoch2 Batch20 Loss: 0.2001
Epoch2 Batch30 Loss: 0.2276
Epoch2 Batch40 Loss: 0.2048
Epoch2 Batch50 Loss: 0.1665
Epoch2 Batch60 Loss: 0.1885
Epoch2 Batch70 Loss: 0.1945
shuffling the dataset
train Loss: 0.2162
Epoch 3/499
----------
Epoch3 Batch0 Loss: 0.2504
Epoch3 Batch10 Loss: 0.2218
Epoch3 Batch20 Loss: 0.2444
Epoch3 Batch30 Loss: 0.2307
Epoch

Epoch28 Batch0 Loss: 0.1104
Epoch28 Batch10 Loss: 0.1445
Epoch28 Batch20 Loss: 0.1147
Epoch28 Batch30 Loss: 0.1155
Epoch28 Batch40 Loss: 0.1237
Epoch28 Batch50 Loss: 0.0910
Epoch28 Batch60 Loss: 0.1342
Epoch28 Batch70 Loss: 0.1288
shuffling the dataset
train Loss: 0.1361
Epoch 29/499
----------
Epoch29 Batch0 Loss: 0.1254
Epoch29 Batch10 Loss: 0.1216
Epoch29 Batch20 Loss: 0.1255
Epoch29 Batch30 Loss: 0.1349
Epoch29 Batch40 Loss: 0.1655
Epoch29 Batch50 Loss: 0.1442
Epoch29 Batch60 Loss: 0.1627
Epoch29 Batch70 Loss: 0.1153
shuffling the dataset
train Loss: 0.1359
Epoch 30/499
----------
Epoch30 Batch0 Loss: 0.1064
Epoch30 Batch10 Loss: 0.1067
Epoch30 Batch20 Loss: 0.1171
Epoch30 Batch30 Loss: 0.1306
Epoch30 Batch40 Loss: 0.1185
Epoch30 Batch50 Loss: 0.1420
Epoch30 Batch60 Loss: 0.1224
Epoch30 Batch70 Loss: 0.1361
shuffling the dataset
train Loss: 0.1302
Epoch 31/499
----------
Epoch31 Batch0 Loss: 0.1540
Epoch31 Batch10 Loss: 0.1107
Epoch31 Batch20 Loss: 0.1267
Epoch31 Batch30 Loss: 0.15

Epoch55 Batch70 Loss: 0.0560
shuffling the dataset
train Loss: 0.0516
Epoch 56/499
----------
Epoch56 Batch0 Loss: 0.0509
Epoch56 Batch10 Loss: 0.0509
Epoch56 Batch20 Loss: 0.0508
Epoch56 Batch30 Loss: 0.0444
Epoch56 Batch40 Loss: 0.0497
Epoch56 Batch50 Loss: 0.0521
Epoch56 Batch60 Loss: 0.0495
Epoch56 Batch70 Loss: 0.0459
shuffling the dataset
train Loss: 0.0511
Epoch 57/499
----------
Epoch57 Batch0 Loss: 0.0475
Epoch57 Batch10 Loss: 0.0504
Epoch57 Batch20 Loss: 0.0457
Epoch57 Batch30 Loss: 0.0474
Epoch57 Batch40 Loss: 0.0438
Epoch57 Batch50 Loss: 0.0471
Epoch57 Batch60 Loss: 0.0511
Epoch57 Batch70 Loss: 0.0429
shuffling the dataset
train Loss: 0.0494
Epoch 58/499
----------
Epoch58 Batch0 Loss: 0.0585
Epoch58 Batch10 Loss: 0.0550
Epoch58 Batch20 Loss: 0.0452
Epoch58 Batch30 Loss: 0.0434
Epoch58 Batch40 Loss: 0.0455
Epoch58 Batch50 Loss: 0.0448
Epoch58 Batch60 Loss: 0.0453
Epoch58 Batch70 Loss: 0.0456
shuffling the dataset
train Loss: 0.0473
Epoch 59/499
----------
Epoch59 Batch0 Los

Epoch83 Batch40 Loss: 0.0243
Epoch83 Batch50 Loss: 0.0234
Epoch83 Batch60 Loss: 0.0284
Epoch83 Batch70 Loss: 0.0333
shuffling the dataset
train Loss: 0.0278
Epoch 84/499
----------
Epoch84 Batch0 Loss: 0.0304
Epoch84 Batch10 Loss: 0.0255
Epoch84 Batch20 Loss: 0.0533
Epoch84 Batch30 Loss: 0.0348
Epoch84 Batch40 Loss: 0.0243
Epoch84 Batch50 Loss: 0.0354
Epoch84 Batch60 Loss: 0.0290
Epoch84 Batch70 Loss: 0.0204
shuffling the dataset
train Loss: 0.0275
Epoch 85/499
----------
Epoch85 Batch0 Loss: 0.0296
Epoch85 Batch10 Loss: 0.0262
Epoch85 Batch20 Loss: 0.0272
Epoch85 Batch30 Loss: 0.0258
Epoch85 Batch40 Loss: 0.0316
Epoch85 Batch50 Loss: 0.0292
Epoch85 Batch60 Loss: 0.0241
Epoch85 Batch70 Loss: 0.0300
shuffling the dataset
train Loss: 0.0272
Epoch 86/499
----------
Epoch86 Batch0 Loss: 0.0237
Epoch86 Batch10 Loss: 0.0287
Epoch86 Batch20 Loss: 0.0336
Epoch86 Batch30 Loss: 0.0245
Epoch86 Batch40 Loss: 0.0301
Epoch86 Batch50 Loss: 0.0207
Epoch86 Batch60 Loss: 0.0255
Epoch86 Batch70 Loss: 0.0

shuffling the dataset
train Loss: 0.0212
Epoch 111/499
----------
Epoch111 Batch0 Loss: 0.0244
Epoch111 Batch10 Loss: 0.0262
Epoch111 Batch20 Loss: 0.0224
Epoch111 Batch30 Loss: 0.0183
Epoch111 Batch40 Loss: 0.0160
Epoch111 Batch50 Loss: 0.0163
Epoch111 Batch60 Loss: 0.0184
Epoch111 Batch70 Loss: 0.0224
shuffling the dataset
train Loss: 0.0218
Epoch 112/499
----------
Epoch112 Batch0 Loss: 0.0143
Epoch112 Batch10 Loss: 0.0211
Epoch112 Batch20 Loss: 0.0225
Epoch112 Batch30 Loss: 0.0195
Epoch112 Batch40 Loss: 0.0214
Epoch112 Batch50 Loss: 0.0201
Epoch112 Batch60 Loss: 0.0209
Epoch112 Batch70 Loss: 0.0142
shuffling the dataset
train Loss: 0.0200
Epoch 113/499
----------
Epoch113 Batch0 Loss: 0.0182
Epoch113 Batch10 Loss: 0.0164
Epoch113 Batch20 Loss: 0.0194
Epoch113 Batch30 Loss: 0.0253
Epoch113 Batch40 Loss: 0.0165
Epoch113 Batch50 Loss: 0.0197
Epoch113 Batch60 Loss: 0.0196
Epoch113 Batch70 Loss: 0.0207
shuffling the dataset
train Loss: 0.0200
Epoch 114/499
----------
Epoch114 Batch0 Los