## Initialisation

In [None]:
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
import numpy as np
from torch.autograd import Variable

# for dataloader
from torch.utils.data import DataLoader, TensorDataset
from torchvision.datasets.vision import VisionDataset
import glob
from torchvision import transforms,datasets

#visualisation
import matplotlib.pyplot as plt
import cv2
from PIL import Image

#saving model
from datetime import datetime

#Testing
import time
from tqdm import tqdm


## Encoder which takes 3 channel input

In [None]:
class encoder(nn.Module):
    def __init__(self,num_classes = 2):
        super().__init__()
        num_classes = 2
        model1 = models.vgg16(pretrained=True)
        features1, classifier1 = list(model1.features.children()), list(model1.classifier.children())

        self.features1_3 = nn.Sequential(*features1[: 17])
        self.features1_4 = nn.Sequential(*features1[17: 24])
        self.features1_5 = nn.Sequential(*features1[24:])
        
        
    def forward(self,img):
        pool1_3 = self.features1_3(img)
        pool1_4 = self.features1_4(pool1_3)
        pool1_5 = self.features1_5(pool1_4)
        return pool1_3,pool1_4,pool1_5

## Encoder which takes 6 channel input

In [None]:
class encoder_2(nn.Module):
    def __init__(self,num_classes = 2):
        super().__init__()
        num_classes = 2
        model1 = models.vgg16(pretrained=True)
        features1, classifier1 = list(model1.features.children()), list(model1.classifier.children())
        
        self.feature1_2 = nn.Sequential(nn.Conv2d(6,64,kernel_size=3, stride=1, padding=1),nn.ReLU(inplace=True))
        self.features1_3 = nn.Sequential(*features1[2: 17])
        self.features1_4 = nn.Sequential(*features1[17: 24])
        self.features1_5 = nn.Sequential(*features1[24:])
        
        
    def forward(self,img):
        pool1_2 = self.feature1_2(img)
        pool1_3 = self.features1_3(pool1_2)
        pool1_4 = self.features1_4(pool1_3)
        pool1_5 = self.features1_5(pool1_4)
        return pool1_3,pool1_4,pool1_5

## Decoder part

In [None]:
class decoder(nn.Module):
    def __init__(self,n_classes = 2):
        super().__init__()    
        self.score_pool3 = nn.Conv2d(256,n_classes, kernel_size=1)
        self.score_pool4 = nn.Conv2d(512,n_classes, kernel_size=1)

        self.upsampling2 = nn.ConvTranspose2d(n_classes, n_classes, kernel_size=4,stride=2, bias=False)
        self.upsampling8 = nn.ConvTranspose2d(n_classes, n_classes, kernel_size=16,stride=8, bias=False)

        self.classifier = nn.Sequential(nn.Conv2d(512, n_classes, kernel_size=1), nn.Sigmoid())
        self._initialize_weights()

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)
    def forward(self,pool3,pool4,pool5,x_size):
        o = self.classifier(pool5)
        o = self.upsampling2(o)

        o2 = self.score_pool4(pool4)
        o = o[:, :, 1:1 + o2.size()[2], 1:1 + o2.size()[3]]
        o = o + o2

        o = self.upsampling2(o)

        o2 = self.score_pool3(pool3)
        o = o[:, :, 1:1 + o2.size()[2], 1:1 + o2.size()[3]]
        o = o + o2

        o = self.upsampling8(o)
        cx = int((o.shape[3] - x_size[3]) / 2)
        cy = int((o.shape[2] - x_size[2]) / 2)
        o = o[:, :, cy:cy + x_size[2], cx:cx + x_size[3]]

        return o

## ModNet model with 1 x 1 convolution with dropout

In [None]:
def conv_1_1(in_features, out_features):
    return nn.Sequential(nn.Conv2d(in_channels=in_features, out_channels=out_features, kernel_size=1),nn.Dropout(p=0.5))

In [None]:
class modnet(nn.Module):
    def __init__(self):
        super(modnet,self).__init__()
        num_classes = 2
        self.encoder1 = encoder()
        self.encoder2 = encoder()
        
        self.decoder1 = decoder()
        self.decoder2 = decoder()
        self.conv1_3 = conv_1_1(256,256)
        self.conv1_4 = conv_1_1(512,512)
        self.conv1_5 = conv_1_1(512,512)

    def forward(self, rgb, of):
        x_size = rgb.size()
        #combined_flow = torch.cat((spatial_stem, motion_stem),dim=1)
        # encoder 1
        pool1_3, pool1_4, pool1_5 = self.encoder1(rgb)
        # encoder 2
        pool2_3, pool2_4, pool2_5 = self.encoder2(of)
        # combined features
        pool3 = self.conv1_3(pool1_3 + pool2_3)
        pool4 = self.conv1_4(pool1_4 + pool2_4)
        pool5 = self.conv1_5(pool1_5 + pool2_5)
        #pool3 = pool1_3 + pool2_3
        #pool4 = pool1_4 + pool2_4
        #pool5 = pool1_5 + pool2_5
        # decoder 1
        spatial_out = self.decoder1(pool3,pool4,pool5,x_size)
        #decoder 2
        motion_out = self.decoder2(pool3,pool4,pool5,x_size)
        
        return spatial_out,motion_out
    
    
model = modnet()
# testing the output of the model
out = model(torch.rand(1,3,448,448),torch.rand(1,3,448,448))
print(out[0].shape,out[1].shape)
gpu_available = torch.cuda.is_available()
if gpu_available:
    model=model.cuda()

## ModNet model - Configuration 1

In [None]:
class modnet(nn.Module):
    def __init__(self):
        super(modnet,self).__init__()
        num_classes = 2
        self.encoder1 = encoder()
        self.encoder2 = encoder()
        
        self.decoder1 = decoder()
        self.decoder2 = decoder()


    def forward(self, rgb, of):
        x_size = rgb.size()
        #combined_flow = torch.cat((spatial_stem, motion_stem),dim=1)
        # encoder 1
        pool1_3, pool1_4, pool1_5 = self.encoder1(rgb)
        # encoder 2
        pool2_3, pool2_4, pool2_5 = self.encoder2(of)
        # combined features
        pool3 = pool1_3 + pool2_3
        pool4 = pool1_4 + pool2_4
        pool5 = pool1_5 + pool2_5
        # decoder 1
        spatial_out = self.decoder1(pool3,pool4,pool5,x_size)
        #decoder 2
        motion_out = self.decoder2(pool3,pool4,pool5,x_size)
        
        return spatial_out,motion_out
    
    
model = modnet()
# testing the output of the model
out = model(torch.rand(1,3,448,448),torch.rand(1,3,448,448))
print(out[0].shape,out[1].shape)
gpu_available = torch.cuda.is_available()
if gpu_available:
    model=model.cuda()

## ModNet model - Singledecoder

In [None]:
class modnet_single_decoder(nn.Module):
    def __init__(self):
        super(modnet_single_decoder,self).__init__()
        num_classes = 2
        self.encoder1 = encoder()
        self.encoder2 = encoder()
        
        self.decoder1 = decoder(n_classes = 3)


    def forward(self, rgb, of):
        x_size = rgb.size()
        #combined_flow = torch.cat((spatial_stem, motion_stem),dim=1)
        # encoder 1
        pool1_3, pool1_4, pool1_5 = self.encoder1(rgb)
        # encoder 2
        pool2_3, pool2_4, pool2_5 = self.encoder2(of)
        # combined features
        pool3 = pool1_3 + pool2_3
        pool4 = pool1_4 + pool2_4
        pool5 = pool1_5 + pool2_5
        # decoder 1
        out = self.decoder1(pool3,pool4,pool5,x_size)
        #decoder 2
        #motion_out = self.decoder2(pool3,pool4,pool5,x_size)
        
        return out
    
    
model = modnet_single_decoder()
# testing the output of the model
out = model(torch.rand(1,3,370,1226),torch.rand(1,3,370,1226))
print(out.shape)
gpu_available = torch.cuda.is_available()
if gpu_available:
    model=model.cuda()

##  ModNet model - Configuration 2 - 6ch combination

In [None]:
class modnet_2(nn.Module):
    def __init__(self):
        super(modnet_2,self).__init__()
        num_classes = 2
        self.encoder1 = encoder()
        self.encoder2 = encoder_2()
        
        self.decoder1 = decoder()
        self.decoder2 = decoder()

    def forward(self, rgb, of):
        x_size = rgb.size()
        combined_flow = torch.cat((rgb, of),dim=1)
        #print(combined_flow.shape)
        # encoder 1
        pool1_3, pool1_4, pool1_5 = self.encoder1(rgb)
        # encoder 2
        pool2_3, pool2_4, pool2_5 = self.encoder2(combined_flow)
        # combined features
        pool3 = pool1_3 + pool2_3
        pool4 = pool1_4 + pool2_4
        pool5 = pool1_5 + pool2_5
        # decoder 1
        spatial_out = self.decoder1(pool3,pool4,pool5,x_size)
        #decoder 2
        motion_out = self.decoder2(pool3,pool4,pool5,x_size)
        
        return spatial_out,motion_out
    
    
model = modnet_2()
# testing the output of the model
out = model(torch.rand(1,3,224,224),torch.rand(1,3,224,224))
print(out[0].shape,out[1].shape)
gpu_available = torch.cuda.is_available()
if gpu_available:
    model=model.cuda()

## Dataloader

In [None]:
class synDataset(VisionDataset):
    def __init__(self, inp_dim):   
        self.transforms = transforms.ToTensor()
        self.inp_dim = inp_dim
        self.rgb_dir = glob.glob("/homebackup/dataset/2011_09_30/Kitti/dataset_odometry_annotation/dataset_for_training/images/*.png")
        self.rgb_dir.sort()
        self.flow_dir = glob.glob("/homebackup/dataset/2011_09_30/Kitti/dataset_odometry_annotation/dataset_for_training/flownet/*.png")
        self.flow_dir.sort()
        self.mask1_dir = glob.glob("/homebackup/dataset/2011_09_30/Kitti/dataset_odometry_annotation/dataset_for_training/mask1/*.png")
        self.mask1_dir.sort()
        self.mask2_dir = glob.glob("/homebackup/dataset/2011_09_30/Kitti/dataset_odometry_annotation/dataset_for_training/mask2/*.png")
        self.mask2_dir.sort()
                  
    def __getitem__(self, index):
        rgb = Image.open(self.rgb_dir[index]).convert('RGB')
        rgb = rgb.resize(self.inp_dim)
        flow = Image.open(self.flow_dir[index]).convert('RGB')
        flow = flow.resize(self.inp_dim)
        mask1 = Image.open(self.mask1_dir[index])
        mask1 = mask1.resize(self.inp_dim)
        mask2 = Image.open(self.mask2_dir[index])
        mask2 = mask2.resize(self.inp_dim)
        
        if self.transforms is not None:
            rgb = self.transforms(rgb)
            flow = self.transforms(flow)
            mask1 = self.transforms(mask1)
            mask2 = self.transforms(mask2)
        return rgb, flow, mask1, mask2

    def __len__(self):  
        return len(self.rgb_dir)
    

batch_size = 1
dataloaders = {
    'train': DataLoader(synDataset(inp_dim=(224,224)), batch_size=batch_size, shuffle=True, num_workers=2),
}

## Loss function

### Dice loss + Binary cross entropy

In [None]:
def dice_loss(pred, target):
    pred = pred.contiguous().view(pred.size()[0], -1)
    target = target.contiguous().view(target.size()[0], -1).float()

    a = torch.sum(pred * target, 1)
    b = torch.sum(pred * pred, 1) + 0.001
    c = torch.sum(target * target, 1) + 0.001
    d = (2 * a) / (b + c)
    return 1 - d

In [None]:
from collections import defaultdict
dtype = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor

#from loss import dice_loss

def calc_loss(pred, target, bce_weight=0.5):
    bce = F.binary_cross_entropy_with_logits(pred, target).type(dtype)

    pred = torch.sigmoid(pred).type(dtype)
    dice = dice_loss(pred, target)

    loss = bce * bce_weight + dice * (1 - bce_weight)
    
    return loss

## Hyper parameter

In [None]:
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
num_epochs = 30

## For labels

In [None]:
def label_conv(labels,targetimage, mask1=True):
    if mask1:
        # background        
        labels[:,0,:,:] = (targetimage > 0)==0
        labels[:,0,:,:] = (targetimage == 0)==1  
        # mask1
        labels[:,1,:,:] = (targetimage > 0)==1
        labels[:,1,:,:] = (targetimage == 0)==0
    else:
        # background        
        labels[:,0,:,:] = (targetimage < 1)==1
        labels[:,0,:,:] = (targetimage == 1)==0  
        # mask1
        labels[:,1,:,:] = (targetimage < 1)==0
        labels[:,1,:,:] = (targetimage == 1)==1
    return labels

## Label conv for single decoder combination

In [None]:
def label_conv(labels,targetimage, mask1=True):
    if mask1:
        # background        
        labels[:,0,:,:] = (targetimage != 0)==0
        labels[:,0,:,:] = (targetimage == 0)==1  
        # mask1
        labels[:,1,:,:] = (targetimage == 1)==1
        labels[:,1,:,:] = (targetimage != 1)==0
        # mask1
        labels[:,2,:,:] = (targetimage == 2)==2
        labels[:,2,:,:] = (targetimage != 2)==0
    else:
        # background        
        labels[:,0,:,:] = (targetimage < 1)==1
        labels[:,0,:,:] = (targetimage == 1)==0  
        # mask1
        labels[:,1,:,:] = (targetimage < 1)==0
        labels[:,1,:,:] = (targetimage == 1)==1
    return labels

## Class weight calculation for class imbalance

In [None]:
class_weight = np.array([0.0,0.0,0.0])

for data in tqdm(dataloaders['train']):
    imgs, flowimage, mask1, mask2 = data
    mask1 = mask1.detach().squeeze(0).permute(1,2,0).numpy().astype('int8')
    mask1[mask1>0.5]=1
    mask1[mask1<0.5]=0
    mask2 = mask2.detach().squeeze(0).permute(1,2,0).numpy().astype('int8')
    mask2[mask2>0.5]=1
    mask2[mask2<0.5]=0
    true_masks = mask1+mask2
    (unique, counts) = np.unique(true_masks, return_counts=True)
    frequencies = np.asarray((unique, counts))
    for i in range(frequencies.shape[1]):
        class_weight[frequencies[0,i]] += frequencies[1,i]
class_weight = class_weight.min()/class_weight

## Training

In [None]:
loss_values =[]
running_loss = 0.0

for epoch in range(num_epochs):  # loop over the dataset multiple times
    print('Epoch {}/{}'.format(epoch, num_epochs - 1))
    print('-' * 10)

    for param_group in optimizer.param_groups:
        print("LR", param_group['lr'])
    scheduler.step()
    for i, data in enumerate(dataloaders['train']):
        inputimage, flowimage, mask1, mask2 = data
        
        if gpu_available:
            inputimage = inputimage.cuda()
            flowimage = flowimage.cuda()
            mask1 = mask1.cuda()
            mask2 = mask2.cuda()

        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputimage,flowimage)
        
        labels = torch.empty_like(outputs[0],dtype=torch.float)
        lab_channel1 = (label_conv(labels,mask1,True))
        labels = torch.empty_like(outputs[1],dtype=torch.float)
        lab_channel2 = (label_conv(labels,mask2,True))

        if gpu_available:
            lab_channel1 = lab_channel1.cuda()
            lab_channel2 = lab_channel2.cuda()


        #loss_dec1 = dice_loss(torch.sigmoid(outputs[0]),lab_channel1)
        #loss_dec2 = dice_loss(torch.sigmoid(outputs[1]),lab_channel2)
        loss_dec1 = calc_loss(outputs[0],lab_channel1)
        loss_dec2 = calc_loss(outputs[1],lab_channel2)
        losses = loss_dec1+loss_dec2

        #loss += lmbd * reg_loss
        losses.backward()
        optimizer.step()

        # print statistics
        running_loss += losses.item()
        loss_values.append(losses.item())
        if i % 10 == 9:
            print('[%d, %5d] loss: %.10f' %(epoch + 1, i + 1, running_loss / 10))
            #loss_values.append(running_loss / co)
            running_loss = 0.0
    epoch_loss = running_loss / len(dataloaders['train'])
    print('epoch loss: %.4f'%(epoch_loss))

## Training single decoder configuration

In [None]:
loss_values =[]
running_loss = 0.0

for epoch in range(num_epochs):  # loop over the dataset multiple times
    print('Epoch {}/{}'.format(epoch, num_epochs - 1))
    print('-' * 10)

    for param_group in optimizer.param_groups:
        print("LR", param_group['lr'])
    scheduler.step()
    for i, data in enumerate(dataloaders['train']):
        inputimage, flowimage, mask1, mask2 = data
        mask1[mask1>0.5]=1
        mask2[mask2>0.5]=1
        mask = mask1+mask2
        
        if gpu_available:
            inputimage = inputimage.cuda()
            flowimage = flowimage.cuda()
            mask = mask.cuda()


        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputimage,flowimage)
        
        labels = torch.empty_like(outputs,dtype=torch.float)
        lab_channel1 = (label_conv(labels,mask,True))

        if gpu_available:
            lab_channel1 = lab_channel1.cuda()

        #loss_dec1 = dice_loss(torch.sigmoid(outputs[0]),lab_channel1)
        #loss_dec2 = dice_loss(torch.sigmoid(outputs[1]),lab_channel2)
        losses = []
        labels = [Variable(label.cuda()) for label in lab_channel1.squeeze(0)]
        #print(len(labels))
        for pair in zip(outputs, labels):          
            #print(pair[0].shape,pair[1].shape)
            #losses.append(criterion(pair[0].unsqueeze(0), pair[1].unsqueeze(0).long()))
            losses.append(F.cross_entropy(pair[0].unsqueeze(0), 
                                          pair[1].unsqueeze(0).long()))
        #losses = criterion(outputs,lab_channel1.long().squeeze(0))
        

        loss = 0
        for w, l in zip(class_weight, losses):
            loss += w*l

        #loss += lmbd * reg_loss
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        loss_values.append(loss.item())
        if i % 10 == 9:
            print('[%d, %5d] loss: %.10f' %(epoch + 1, i + 1, running_loss / 10))
            #loss_values.append(running_loss / co)
            running_loss = 0.0
    epoch_loss = running_loss / len(dataloaders['train'])
    print('epoch loss: %.4f'%(epoch_loss))

## Saving models

In [None]:
model_name = 'cpkt_modnet_'+ datetime.now().strftime("%Y_%m_%d_%H%M%S")+'_15ep_singledec_bceweight_reddata.pth'
torch.save(model.state_dict(), model_name)

## Loading models

In [None]:
# checkpoint_modnet_2021_06_27_004245_newdataset - not clear
# checkpoint_modnet_2021_06_29_133752_0_5_bc3_448res - professor
# checkpoint_modnet_2021_06_30_022156_newar_bothloss_448 - works fine with false positive
# cpkt_modnet_2021_07_04_070214_15ep_cfg2_combloss_reddata.pth - better shape in most of the cases
# cpkt_modnet_2021_07_07_083336_30ep_cfg1_combloss_reddata_orgdim - 120mb - poor performance
# cpkt_modnet_2021_07_09_072004_25ep_cfg1_bce_reddata_orgdim - not clear shape
# cpkt_modnet_2021_07_09_001123_30ep_cfg1_closs_dloss_reddata_orgdim - poor performance
# cpkt_modnet_2021_07_10_075834_30ep_cfg2_closs_reddata_orgdim.pth - Better



#model.load_state_dict(torch.load('./checkpoint_modnet_2021_06_11_013934.pth'))
#model.load_state_dict(torch.load('./checkpoint_modnet_2021_06_21_221234_newflownet_olddataset.pth'))
#model.load_state_dict(torch.load('./cpkt_modnet_2021_07_06_071731_30ep_cfg1_combloss_reddata_orgdim.pth')) # ok
#model.load_state_dict(torch.load('./cpkt_modnet_2021_07_09_072004_25ep_cfg1_bce_reddata_orgdim.pth'))
#model.load_state_dict(torch.load('./cpkt_modnet_2021_07_09_144506_30ep_singledec_bceweight_reddata_orgdim.pth'))

model.load_state_dict(torch.load('./cpkt_modnet_2021_07_18_184304_30ep_cfg1_closs_actdata_orgdim.pth'))

## Testing

In [None]:
class synDataset_test(VisionDataset):
    def __init__(self, inp_dim):   
        self.transforms = transforms.ToTensor()
        self.inp_dim = inp_dim
        self.rgb_dir = glob.glob("/homebackup/dataset/2011_09_30/Kitti/Thesis_evaluation/07/image_3/*.png")
        self.rgb_dir.sort()
        self.flow_dir = glob.glob("/homebackup/dataset/2011_09_30/Kitti/Thesis_evaluation/07/flownet/*.png")
        self.flow_dir.sort()
        #self.mask_dir = glob.glob("/homebackup/dataset/2011_09_30/downloads/images-20210610T200228Z-001/test/mask/*.png")
        #self.mask_dir.sort()

                  
    def __getitem__(self, index):
        rgb = Image.open(self.rgb_dir[index]).convert('RGB')
        rgb = rgb.resize(self.inp_dim)
        flow = Image.open(self.flow_dir[index]).convert('RGB')
        flow = flow.resize(self.inp_dim)
        #mask = Image.open(self.mask_dir[index])
        #mask = mask.resize(self.inp_dim)

        
        if self.transforms is not None:
            rgb = self.transforms(rgb)
            flow = self.transforms(flow)
            #mask = self.transforms(mask)

        return rgb, flow

    def __len__(self):  
        return len(self.rgb_dir)
    

batch_size = 1
dataloaders = {
    'test': DataLoader(synDataset_test(inp_dim=(1226,370)), batch_size=batch_size, shuffle=True, num_workers=2),
}


In [None]:
start_time = time.time()
model = model.eval()
for i, data in enumerate(dataloaders['test']):
    inputimage, flowimage= data

    if gpu_available:
        inputimage = inputimage.cuda()
        flowimage = flowimage.cuda()
        #mask = mask.cuda()

    outputs = model(inputimage,flowimage)
    break
print("--- %s seconds ---" % (time.time() - start_time))

pred1 = torch.sigmoid(outputs[0])
pred1[pred1<0.3]=0
pred1 = torch.argmax(pred1, dim=1)
pred2 = torch.sigmoid(outputs[1])
pred2[pred2<0.3]=0
pred2 = torch.argmax(pred2, dim=1)
#printing image
fig,((ax1,ax2),(ax4,ax5))=plt.subplots(2,2,figsize=(20,20),facecolor='w')
ax1.imshow(pred1.cpu().detach().numpy().squeeze(0),  interpolation='none',cmap='jet')
ax2.imshow(pred2.cpu().detach().numpy().squeeze(0),  interpolation='none',cmap='jet')
#ax3.imshow(mask.cpu().detach().numpy().squeeze(0).squeeze(0),  interpolation='none',cmap='jet')
ax4.imshow(inputimage.squeeze(0).cpu().permute(1,2,0))
ax5.imshow(flowimage.squeeze(0).cpu().permute(1,2,0))
ax1.axis('off')
ax2.axis('off')
#ax3.axis('off')
ax4.axis('off')
ax5.axis('off')
ax1.set_title('prediction_spatial',fontsize=20)
ax2.set_title('prediction_motion',fontsize=20)

#ax3.set_title('GT_motion',fontsize=20)
ax4.set_title('Input',fontsize=20)
ax5.set_title('Flow',fontsize=20)
plt.show()

## Single encoder

In [None]:
start_time = time.time()
model = model.eval()
for i, data in enumerate(dataloaders['test']):
    inputimage, flowimage= data

    if gpu_available:
        inputimage = inputimage.cuda()
        flowimage = flowimage.cuda()
        #mask = mask.cuda()

    outputs = model(inputimage,flowimage)
    break
print("--- %s seconds ---" % (time.time() - start_time))

#pred2 = torch.sigmoid(outputs)
#pred2[pred2<0.1]=0
pred2 = torch.argmax(outputs, dim=1)
#printing image
fig,((ax2,ax4,ax5))=plt.subplots(3,1,figsize=(20,20),facecolor='w')
#ax1.imshow(pred1.cpu().detach().numpy().squeeze(0),  interpolation='none',cmap='jet')
ax2.imshow(pred2.cpu().detach().numpy().squeeze(0),  interpolation='none',cmap='jet')
#ax3.imshow(mask.cpu().detach().numpy().squeeze(0).squeeze(0),  interpolation='none',cmap='jet')
ax4.imshow(inputimage.squeeze(0).cpu().permute(1,2,0))
ax5.imshow(flowimage.squeeze(0).cpu().permute(1,2,0))

ax2.axis('off')
ax4.axis('off')
ax5.axis('off')
ax2.set_title('prediction_motion',fontsize=20)
ax4.set_title('Input',fontsize=20)
ax5.set_title('Flow',fontsize=20)
plt.show()

In [None]:
plt.imshow(torch.sigmoid(outputs).squeeze(0)[2,:,:].detach().cpu().numpy())

## Result without loader

In [None]:
model = model.eval()
transform = transforms.ToTensor()

In [None]:
inputimage = Image.open('/homebackup/dataset/2011_09_30/Kitti/Thesis_evaluation/07/image_3/000056.png').convert('RGB')
orig_size = inputimage.size
inputimage = inputimage.resize((1226,370))
inputimage = transform(inputimage)

flowimage = Image.open('/homebackup/dataset/2011_09_30/Kitti/Thesis_evaluation/07/flownet/000056-vis.png').convert('RGB')
flowimage = flowimage.resize((1226,370))
flowimage = transform(flowimage)

if gpu_available:
    inputimage = inputimage.cuda()
    flowimage = flowimage.cuda()

outputs = model(inputimage.unsqueeze(0),flowimage.unsqueeze(0))

In [None]:
flowimage1 = cv2.imread('/homebackup/dataset/2011_09_30/Kitti/Thesis_evaluation/07/flownet/000978-vis.png')
flowimage2= cv2.imread('/homebackup/dataset/2011_09_30/Kitti/Thesis_evaluation/07/flownet/000979-vis.png')

In [None]:
plt.imshow(flowimage1-flowimage2)

In [None]:
pred2 = torch.argmax(outputs[1], dim=1)
flow = np.ones((flowimage.detach().cpu().permute(1,2,0).numpy()).shape)
print(np.max(flowimage.detach().cpu().permute(1,2,0).numpy()),np.min(flowimage.detach().cpu().permute(1,2,0).numpy()))
flow[(flowimage.detach().cpu().permute(1,2,0).numpy())>0.97]=0
flow = np.mean((flow),axis=2)
flow[flow>0]=1
prediction = pred2.detach().cpu().numpy().squeeze(0)*flow

prediction = np.array(prediction, dtype='uint8')
prediction = cv2.resize(prediction, (orig_size), interpolation = cv2.INTER_AREA)

In [None]:
pred1 = torch.sigmoid(outputs[0])
pred1[pred1<0.6]=0
pred1 = torch.argmax(pred1, dim=1)
pred2 = torch.sigmoid(outputs[1])
pred2[pred2<0.6]=0
pred2 = torch.argmax(pred2, dim=1)
flow = np.ones((flowimage.detach().cpu().permute(1,2,0).numpy()).shape)
print(np.max(flowimage.detach().cpu().permute(1,2,0).numpy()),np.min(flowimage.detach().cpu().permute(1,2,0).numpy()))
flow[(flowimage.detach().cpu().permute(1,2,0).numpy())>0.9]=0
flow = np.mean((flow),axis=2)
flow[flow>0]=1
prediction = pred2.detach().cpu().numpy().squeeze(0)*flow

prediction = np.array(prediction, dtype='uint8')
prediction = cv2.resize(prediction, (orig_size), interpolation = cv2.INTER_AREA)

In [None]:
plt.figure(figsize=(20,20))
plt.imshow(inputimage.detach().cpu().permute(1,2,0).numpy())

In [None]:
plt.figure(figsize=(20,20))
plt.imshow(flowimage.detach().cpu().permute(1,2,0).numpy())

In [None]:
plt.figure(figsize=(20,20))
plt.imshow(flow)

In [None]:
plt.figure(figsize=(20,20))
plt.imshow(pred1.detach().cpu().numpy().squeeze(0))

In [None]:
plt.figure(figsize=(20,20))
plt.imshow(pred2.detach().cpu().numpy().squeeze(0))

In [None]:
plt.figure(figsize=(20,20))
plt.imshow(prediction)

In [None]:
solo_mask = cv2.imread('/homebackup/dataset/2011_09_30/Kitti/Thesis_evaluation/07/track/000066_track.png',0)

## IOU calculation

In [None]:
def iou_calc(target,prediction,i):
    temp_target = np.zeros(target.shape)
    temp_target[target==i]=1
    intersection = np.logical_and(temp_target, prediction)
    union = np.logical_or(temp_target, prediction)
    iou_score = np.sum(intersection) / np.sum(union)
    return iou_score

## Combining solov2 ouput with motion segmentation 

In [None]:
#solo_mask = cv2.imread(solo_track_path,0)
mask = np.ones(solo_mask.shape)
for i in list(np.unique(solo_mask)):
    if i !=0:
        if i in list(np.unique(prediction*solo_mask)):
            iou = iou_calc(solo_mask,prediction,i)
            if iou>0.001:
                mask[solo_mask==i]=155
            else:
                mask[solo_mask==i]=55
        else:
            mask[solo_mask==i]=55

In [None]:
final_mask = Image.fromarray(mask.astype('uint8'))
final_mask = final_mask.resize((1226,370))
final_mask