In [None]:
import torch
import numpy as np
from torch.autograd import Variable
import time
import skvideo.io
from models import UNet11, LinkNet34
from torchvision import transforms
img_transform = transforms.Compose([
    lambda x: x[:544],
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.255])
])

In [None]:
video = skvideo.io.vread('Example/test_video.mp4')

In [None]:
model.eval()
start = time.time()
frames = 0
out = None
for rgb_frame in video:
    input_img = torch.unsqueeze(img_transform(rgb_frame).cuda(), dim=0)
    out = model(input_img)
    frames += 1
end = time.time()    
print("fps: {}".format(frames/(end-start)))

In [None]:
import glob

In [None]:
train = glob.glob('data/Train/CameraRGB/*.png')

In [None]:
val = np.random.choice(train, 80, replace=False)

In [None]:
val = [f.split('/')[-1] for f in val]

In [None]:
import shutil

trainRGB = 'data/Train/CameraRGB/'
trainSeg = 'data/Train/CameraSeg/'
valRGB = 'data/Val/CameraRGB/'
valSeg = 'data/Val/CameraSeg/'

for f in val:
    shutil.move(trainRGB+f, valRGB)
    shutil.move(trainSeg+f, valSeg)

In [None]:
from torchvision import transforms
# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
    'train': transforms.Compose([
        lambda x: x[:544],
        transforms.ToPILImage(),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        lambda x: x[:544],
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# data_dir = 'data'
# image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
#                                           data_transforms[x])
#                   for x in ['train', 'val']}
# dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4,
#                                              shuffle=True, num_workers=4)
#               for x in ['train', 'val']}
# dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
# class_names = image_datasets['train'].classes

# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
import cv2
import torch
from torch.utils.data import Dataset
import numpy as np

class UDSegDataset(Dataset):
    def __init__(self, file_names, path, transform):
        self.rgb_file_names = [path + 'CameraRGB/' + f for f in file_names]
        self.seg_file_names = [path + 'CameraSeg/' + f for f in file_names]
        self.transform = transform

    def __len__(self):
        return len(self.rgb_file_names)

    def __getitem__(self, idx):
        bgr_img = cv2.imread(self.rgb_file_names[idx])
        rgb_img = cv2.cvtColor(bgr_img, cv2.COLOR_BGR2RGB)
        seg_img = cv2.imread(self.seg_file_names[idx])[:544, :, 2]
        
        img = self.transform(rgb_img)
        road_mask = np.where((seg_img == 7) | (seg_img == 6), 1, 0)
        car_mask = np.where(seg_img == 10, 2, 0)
        car_mask[496:] = 0
#         mask = np.dstack((road_mask, car_mask)).transpose((2, 0, 1))
        mask = road_mask+car_mask
        return img, torch.tensor(mask, dtype=torch.float)

In [None]:
import matplotlib.pyplot as plt
import numpy as np
def imshow(inp, title=None):
    """Imshow for Tensor."""
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    plt.imshow(inp)
    if title is not None:
        plt.title(title)

In [None]:
imshow(img)

In [None]:
plt.imshow(mask.numpy())

In [1]:
import copy
import torch
import os
import time
import glob
import gc
from torchvision import transforms
from dataset import UDSegDataset
from torch.utils.data import DataLoader
from models import LinkNet34
from loss import LossMulti
import torch.optim as optim

In [2]:
## training loop
w = 800
h = 544
batch_sz = 8

files = { x: [os.path.basename(f) for f in glob.glob(
    'data/'+x+'/CameraRGB/*.png')] for x in {'train', 'val'}}
data_transforms = {
    'train': transforms.Compose([
        lambda x: x[:544],
        transforms.ToPILImage(),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        lambda x: x[:544],
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

dataloader = {x: 
              DataLoader(
                  UDSegDataset(files[x], 'data/'+x+'/', data_transforms[x]), 
                  batch_size=8, 
                  shuffle=True, 
              ) 
              for x in {'train','val'}}

# def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
#     since = time.time()
    
#     best_model_wts = copy.deepcopy(model.state_dict())
#     best_acc = 0.0
    
#     for epoch in range(num_epochs):
#         print('Epoch {}/{}'.format(epoch, num_epochs-1))
#         print('-' * 10)
        
#         for phase in ['train', 'val']:
#             if phase == 'train':
#                 scheduler.step()
#                 model.train()
#             else:
#                 model.eval()
                
#             running_loss = 0.0
#             running_corrects = 0.0
            
#             for bi, (inputs, labels) in enumerate(dataloader[phase]):
#                 for obj in gc.get_objects():
#                     if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)):
#                         print(type(obj), obj.size())
#                 print('starting batch {}'.format(bi))
# #                 inputs = inputs.cuda()
# #                 labels = labels.cuda()
                
#                 optimizer.zero_grad()
                
#                 with torch.set_grad_enabled(phase == 'train'):
#                     outputs = model(inputs)
#                     _, preds = torch.max(outputs, dim=1)
#                     loss = criterion(outputs, labels)
                    
#                     if phase == 'train':
#                         loss.backward()
#                         optimizer.step()
                        
#                 running_loss += loss.item() * inputs.size(0)
#                 running_corrects += torch.sum(preds == labels.data)
                
#             epoch_loss = running_loss / len(dataloader[phase])
#             epoch_acc = running_corrects.double() / (len(dataloader[phase]) * w * h)
            
#             if phase == 'val' and epoch_acc > best_acc:
#                 best_acc = epoch_acc
#                 best_model_wts = copy.deepcopy(model.state_dict())

#         print()

#     time_elapsed = time.time() - since
#     print('Training complete in {:.0f}m {:.0f}s'.format(
#         time_elapsed // 60, time_elapsed % 60))
#     print('Best val Acc: {:4f}'.format(best_acc))

#     # load best model weights
#     model.load_state_dict(best_model_wts)
#     return model

In [3]:
model = LinkNet34(num_classes=3, pretrained=False).cuda()
model.load_state_dict(torch.load('./best.pt'))
criterion = LossMulti(jaccard_weight=1, num_classes=3, class_weights=torch.tensor([.1, .65, .25], dtype=torch.float).cuda())
optimizer = optim.Adam(model.parameters())
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
num_epochs = 2
road_beta, car_beta = 0.5, 2

In [6]:
num_epochs = 10

In [9]:
# train_model(model, criterion, optimizer, exp_lr_scheduler)
since = time.time()

best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0

for epoch in range(num_epochs):
    print('Epoch {}/{}'.format(epoch, num_epochs-1))
    print('-' * 10)

    for phase in ['train', 'val']:
#     for phase in ['val']:
        if phase == 'train':
            scheduler.step()
            model.train()
        else:
            model.eval()

        running_loss = 0.0
        running_corrects = 0.0
        road_prec, road_recall, car_prec, car_recall = 0,0,0,0
        for bi, (inputs, labels) in enumerate(dataloader[phase]):

            optimizer.zero_grad()

            with torch.set_grad_enabled(phase == 'train'):
                outputs = model(inputs)
                _, preds = torch.max(outputs, dim=1)
                loss = criterion(outputs, labels)

                if phase == 'train':
                    loss.backward()
                    optimizer.step()

            
            running_loss += loss.item() * inputs.size(0)
            
            road_prec += (torch.sum((preds == 1) * (labels.data == 1)).item() / 
                              torch.sum(preds == 1).item())
            road_recall += (torch.sum((preds == 1) * (labels.data == 1)).item() / 
                              torch.sum(labels.data == 1).item())
            
            car_prec += (torch.sum((preds == 2) * (labels.data == 2)).item() / 
                              torch.sum(preds == 2).item())
            car_recall += (torch.sum((preds == 2) * (labels.data == 2)).item() / 
                              torch.sum(labels.data == 2).item())
            
#             print(('{} Loss: {:.4f}, car_re: {:.3f}, car_pre: {:.3f}, road_re: {:.3f}, road_pre: {:.4f}')
#                   .format(
#                       bi+1, running_loss/(bi+1), car_recall/(bi+1), car_prec/(bi+1), 
#                       road_recall/(bi+1), road_prec/(bi+1)))
        
        car_prec /= len(dataloader[phase])
        car_recall /= len(dataloader[phase])
        road_prec /= len(dataloader[phase])
        road_recall /= len(dataloader[phase])
        
        road_f1 = (1+road_beta**2) * ((road_prec*road_recall)/
                                          (road_beta**2*road_prec+road_recall))
        car_f1 = (1+car_beta**2) * ((car_prec*car_recall)/
                                      (car_beta**2*car_prec+car_recall))
        epoch_acc = (road_f1+car_f1)/2
        
        
        epoch_loss = running_loss / len(dataloader[phase])

        print(('{} Loss: {:.4f}, car_re: {:.3f}, car_pre: {:.3f}, road_re: {:.3f}, ' + 
              'road_pre: {:.4f}, total_score: {:.4f}').format(
                phase, epoch_loss, car_recall, car_prec, road_recall, road_prec, epoch_acc))
        
        if phase == 'val' and epoch_acc > best_acc:
            best_acc = epoch_acc
            best_model_wts = copy.deepcopy(model.state_dict())
            #torch.save(best_model_wts, './best.pt')
            

    print()

time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
    time_elapsed // 60, time_elapsed % 60))
print('Best val Acc: {:4f}'.format(best_acc))

# load best model weights
model.load_state_dict(best_model_wts)

Epoch 0/9
----------
train Loss: 1.7962, car_re: 0.705, car_pre: 0.793, road_re: 0.996, road_pre: 0.9541, total_score: 0.8416
val Loss: 0.7387, car_re: 0.919, car_pre: 0.873, road_re: 0.998, road_pre: 0.9842, total_score: 0.9480

Epoch 1/9
----------
train Loss: 1.6615, car_re: 0.734, car_pre: 0.794, road_re: 0.996, road_pre: 0.9567, total_score: 0.8546
val Loss: 0.7244, car_re: 0.921, car_pre: 0.880, road_re: 0.998, road_pre: 0.9840, total_score: 0.9497

Epoch 2/9
----------
train Loss: 1.6451, car_re: 0.745, car_pre: 0.786, road_re: 0.996, road_pre: 0.9577, total_score: 0.8592
val Loss: 0.7210, car_re: 0.919, car_pre: 0.884, road_re: 0.998, road_pre: 0.9850, total_score: 0.9495

Epoch 3/9
----------
train Loss: 1.6166, car_re: 0.749, car_pre: 0.798, road_re: 0.996, road_pre: 0.9579, total_score: 0.8617
val Loss: 0.7298, car_re: 0.921, car_pre: 0.874, road_re: 0.998, road_pre: 0.9847, total_score: 0.9491

Epoch 4/9
----------
train Loss: 1.5924, car_re: 0.744, car_pre: 0.794, road_re:

In [None]:
torch.save(best_model_wts, './new_best.pt')

In [None]:
tdl = dataloader['train']

In [None]:
imgs, masks = None, None
for i, m in tdl:
    imgs, masks = i, m
    break

In [None]:
outputs = model(imgs)

In [None]:
_, preds = torch.max(outputs, dim=1)

In [None]:
plt.imshow(preds[0].cpu())

In [None]:
plt.imshow(masks[0].cpu())

In [None]:
import cv2

In [None]:
img = imgs[0].cpu().numpy().transpose(1, 2, 0)
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])
img = std * img + mean
img = np.clip(img, 0, 1)
mask = masks[0].cpu().numpy()
my_mask = outputs[0].cpu().detach().numpy()

In [None]:
my_mask = np.argmax(my_mask, axis=0).astype(np.float64)

In [None]:
masked_img = np.copy(img)

In [None]:
masked_img[(my_mask==1).nonzero()] = (0, 0, 1)
masked_img[(my_mask==2).nonzero()] = (0, 1, 0)

In [None]:
plt.imshow(masked_img)

In [None]:
weighted_img = cv2.addWeighted(img, .7, masked_img, .3, 0)

In [None]:
plt.imshow(weighted_img)

In [None]:
plt.imshow(np.pad(weighted_img, ((0, 56), (0, 0), (0,0)), 'constant', constant_values=(0)))