In [1]:
%load_ext autoreload

In [7]:
%autoreload
import torch
import numpy as np
import torch.nn as nn
from PIL import Image
from pprint import pprint
import matplotlib.pyplot as plt
from torch.autograd import Variable
from mmdet.models.backbones import ResNet
from mmdet.models.flow_heads import FlowHead
from torchvision.transforms.functional import resize
from spatial_correlation_sampler import SpatialCorrelationSampler

# test image
img0 = Image.open('/external/datasets/kitti/2015/training/image_2/000000_10.png')
img0 = np.asarray(resize(img0, (256, 832)))
img0 = torch.from_numpy(img0).permute(2, 0, 1).unsqueeze(0).cuda() / 255. 

img1 = Image.open('/external/datasets/kitti/2015/training/image_2/000000_11.png')
img1 = np.asarray(resize(img1, (256, 832)))
img1 = torch.from_numpy(img1).permute(2, 0, 1).unsqueeze(0).cuda() / 255. 

backbone = ResNet(depth=18).cuda()
print(backbone)
features0 = [f.contiguous() for f in backbone(img0)]
features1 = [f.contiguous() for f in backbone(img1)]
pprint([f.shape for f in features0])

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kerne

In [30]:
def conv(in_planes, out_planes, kernel_size=3, stride=1, padding=1, dilation=1):   
    return nn.Sequential(
            nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, 
                        padding=padding, dilation=dilation, bias=True),
            nn.LeakyReLU(0.1))

def predict_flow(in_planes):
    return nn.Conv2d(in_planes,2,kernel_size=3,stride=1,padding=1,bias=True)

def deconv(in_planes, out_planes, kernel_size=4, stride=2, padding=1):
    return nn.ConvTranspose2d(in_planes, out_planes, kernel_size, stride, padding, bias=True)

class FlowHead(nn.Module):
    """
        PWC-DC net. add dilation convolution and densenet connections
    """
    def __init__(self, md=4):
        """
        input: md --- maximum displacement (for correlation. default: 4), after warping

        """
        super(FlowHead,self).__init__()
        
#         self.conv1a  = conv(3,   16, kernel_size=3, stride=2)
#         self.conv1aa = conv(16,  16, kernel_size=3, stride=1)
#         self.conv1b  = conv(16,  16, kernel_size=3, stride=1)

#         self.conv2a  = conv(16,  32, kernel_size=3, stride=2)
#         self.conv2aa = conv(32,  32, kernel_size=3, stride=1)
#         self.conv2b  = conv(32,  32, kernel_size=3, stride=1)

#         self.conv3a  = conv(32,  64, kernel_size=3, stride=2)
#         self.conv3aa = conv(64,  64, kernel_size=3, stride=1)
#         self.conv3b  = conv(64,  64, kernel_size=3, stride=1)

#         self.conv4a  = conv(64,  96, kernel_size=3, stride=2)
#         self.conv4aa = conv(96,  96, kernel_size=3, stride=1)
#         self.conv4b  = conv(96,  96, kernel_size=3, stride=1)

#         self.conv5a  = conv(96, 128, kernel_size=3, stride=2)
#         self.conv5aa = conv(128,128, kernel_size=3, stride=1)
#         self.conv5b  = conv(128,128, kernel_size=3, stride=1)

#         self.conv6aa = conv(128,196, kernel_size=3, stride=2)
#         self.conv6a  = conv(196,196, kernel_size=3, stride=1)
#         self.conv6b  = conv(196,196, kernel_size=3, stride=1)

        # Original NVIDIA correlation module
        # self.corr    = Correlation(pad_size=md, kernel_size=1, max_displacement=md, stride1=1, stride2=1, corr_multiply=1)
        self.corr    = SpatialCorrelationSampler(kernel_size=1, patch_size=(2*md + 1), stride=1, padding=0, dilation=1, dilation_patch=1)
        self.leakyRELU = nn.LeakyReLU(0.1)
        
        nd = (2*md+1)**2
        dd = np.cumsum([128,128,96,64,32])

#         od = nd
#         self.conv6_0 = conv(od,      128, kernel_size=3, stride=1)
#         self.conv6_1 = conv(od+dd[0],128, kernel_size=3, stride=1)
#         self.conv6_2 = conv(od+dd[1],96,  kernel_size=3, stride=1)
#         self.conv6_3 = conv(od+dd[2],64,  kernel_size=3, stride=1)
#         self.conv6_4 = conv(od+dd[3],32,  kernel_size=3, stride=1)        
#         self.predict_flow6 = predict_flow(od+dd[4])
#         self.deconv6 = deconv(2, 2, kernel_size=4, stride=2, padding=1) 
#         self.upfeat6 = deconv(od+dd[4], 2, kernel_size=4, stride=2, padding=1) 

#         od = nd+128+4
#         self.conv5_0 = conv(od,      128, kernel_size=3, stride=1)
#         self.conv5_1 = conv(od+dd[0],128, kernel_size=3, stride=1)
#         self.conv5_2 = conv(od+dd[1],96,  kernel_size=3, stride=1)
#         self.conv5_3 = conv(od+dd[2],64,  kernel_size=3, stride=1)
#         self.conv5_4 = conv(od+dd[3],32,  kernel_size=3, stride=1)
#         self.predict_flow5 = predict_flow(od+dd[4]) 
#         self.deconv5 = deconv(2, 2, kernel_size=4, stride=2, padding=1) 
#         self.upfeat5 = deconv(od+dd[4], 2, kernel_size=4, stride=2, padding=1) 
        
        od = nd
        self.conv4_0 = conv(od,      128, kernel_size=3, stride=1)
        self.conv4_1 = conv(od+dd[0],128, kernel_size=3, stride=1)
        self.conv4_2 = conv(od+dd[1],96,  kernel_size=3, stride=1)
        self.conv4_3 = conv(od+dd[2],64,  kernel_size=3, stride=1)
        self.conv4_4 = conv(od+dd[3],32,  kernel_size=3, stride=1)
        self.predict_flow4 = predict_flow(od+dd[4]) 
        self.deconv4 = deconv(2, 2, kernel_size=4, stride=2, padding=1) 
        self.upfeat4 = deconv(od+dd[4], 2, kernel_size=4, stride=2, padding=1) 
        
        od = nd+256+4
        self.conv3_0 = conv(od,      128, kernel_size=3, stride=1)
        self.conv3_1 = conv(od+dd[0],128, kernel_size=3, stride=1)
        self.conv3_2 = conv(od+dd[1],96,  kernel_size=3, stride=1)
        self.conv3_3 = conv(od+dd[2],64,  kernel_size=3, stride=1)
        self.conv3_4 = conv(od+dd[3],32,  kernel_size=3, stride=1)
        self.predict_flow3 = predict_flow(od+dd[4]) 
        self.deconv3 = deconv(2, 2, kernel_size=4, stride=2, padding=1) 
        self.upfeat3 = deconv(od+dd[4], 2, kernel_size=4, stride=2, padding=1) 
        
        od = nd+128+4
        self.conv2_0 = conv(od,      128, kernel_size=3, stride=1)
        self.conv2_1 = conv(od+dd[0],128, kernel_size=3, stride=1)
        self.conv2_2 = conv(od+dd[1],96,  kernel_size=3, stride=1)
        self.conv2_3 = conv(od+dd[2],64,  kernel_size=3, stride=1)
        self.conv2_4 = conv(od+dd[3],32,  kernel_size=3, stride=1)
        self.predict_flow2 = predict_flow(od+dd[4]) 
        self.deconv2 = deconv(2, 2, kernel_size=4, stride=2, padding=1) 
        # TODO: THIS IS WHERE I AM AND IM INTEGRATING C11 INTO THE MODULE
        self.upfeat2 = deconv(od+dd[4], 2, kernel_size=4, stride=2, padding=1) 
        
        self.dc_conv1 = conv(od+dd[4], 128, kernel_size=3, stride=1, padding=1,  dilation=1)
        self.dc_conv2 = conv(128,      128, kernel_size=3, stride=1, padding=2,  dilation=2)
        self.dc_conv3 = conv(128,      128, kernel_size=3, stride=1, padding=4,  dilation=4)
        self.dc_conv4 = conv(128,      96,  kernel_size=3, stride=1, padding=8,  dilation=8)
        self.dc_conv5 = conv(96,       64,  kernel_size=3, stride=1, padding=16, dilation=16)
        self.dc_conv6 = conv(64,       32,  kernel_size=3, stride=1, padding=1,  dilation=1)
        self.dc_conv7 = predict_flow(32)

        for m in self.modules():
            if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
                nn.init.kaiming_normal_(m.weight.data, mode='fan_in')
                if m.bias is not None:
                    m.bias.data.zero_()


    def warp(self, x, flo):
        """
        warp an image/tensor (im2) back to im1, according to the optical flow

        x: [B, C, H, W] (im2)
        flo: [B, 2, H, W] flow

        """
        B, C, H, W = x.size()
        # mesh grid 
        xx = torch.arange(0, W).view(1,-1).repeat(H,1)
        yy = torch.arange(0, H).view(-1,1).repeat(1,W)
        xx = xx.view(1,1,H,W).repeat(B,1,1,1)
        yy = yy.view(1,1,H,W).repeat(B,1,1,1)
        grid = torch.cat((xx,yy),1).float()

        if x.is_cuda:
            grid = grid.cuda()
        vgrid = Variable(grid) + flo

        # scale grid to [-1,1] 
        vgrid[:,0,:,:] = 2.0*vgrid[:,0,:,:].clone() / max(W-1,1)-1.0
        vgrid[:,1,:,:] = 2.0*vgrid[:,1,:,:].clone() / max(H-1,1)-1.0

        vgrid = vgrid.permute(0,2,3,1)        
        output = nn.functional.grid_sample(x, vgrid)
        mask = torch.autograd.Variable(torch.ones(x.size())).cuda()
        mask = nn.functional.grid_sample(mask, vgrid)

        # if W==128:
            # np.save('mask.npy', mask.cpu().data.numpy())
            # np.save('warp.npy', output.cpu().data.numpy())
        
        mask[mask<0.9999] = 0
        mask[mask>0] = 1
        
        return output*mask


    def forward(self,x, feats0=None, feats1=None):
        im1 = x[:,:3,:,:]
        im2 = x[:,3:,:,:]
        
        c11, c12, c13, c14 = feats0
        c21, c22, c23, c24 = feats1


        corr4 = self.corr(c14, c24)  
        b, pw, ph, w, h = corr4.shape
        corr4 = corr4.reshape(b, pw*ph, w, h)
        corr4 = self.leakyRELU(corr4)
        x = torch.cat((self.conv4_0(corr4), corr4), 1)
        x = torch.cat((self.conv4_1(x), x),1)
        x = torch.cat((self.conv4_2(x), x),1)
        x = torch.cat((self.conv4_3(x), x),1)
        x = torch.cat((self.conv4_4(x), x),1)
        flow4 = self.predict_flow4(x)
        up_flow4 = self.deconv4(flow4)
        up_feat4 = self.upfeat4(x)


        warp3 = self.warp(c23, up_flow4*2.5)
        corr3 = self.corr(c13, warp3) 
        b, pw, ph, w, h = corr3.shape
        corr3 = corr3.reshape(b, pw*ph, w, h)
        corr3 = self.leakyRELU(corr3)


        x = torch.cat((corr3, c13, up_flow4, up_feat4), 1)
        x = torch.cat((self.conv3_0(x), x),1)
        x = torch.cat((self.conv3_1(x), x),1)
        x = torch.cat((self.conv3_2(x), x),1)
        x = torch.cat((self.conv3_3(x), x),1)
        x = torch.cat((self.conv3_4(x), x),1)
        flow3 = self.predict_flow3(x)
        up_flow3 = self.deconv3(flow3)
        up_feat3 = self.upfeat3(x)


        warp2 = self.warp(c22, up_flow3*5.0) 
        corr2 = self.corr(c12, warp2)
        b, pw, ph, w, h = corr2.shape
        corr2 = corr2.reshape(b, pw*ph, w, h)
        corr2 = self.leakyRELU(corr2)
        x = torch.cat((corr2, c12, up_flow3, up_feat3), 1)
        x = torch.cat((self.conv2_0(x), x),1)
        x = torch.cat((self.conv2_1(x), x),1)
        x = torch.cat((self.conv2_2(x), x),1)
        x = torch.cat((self.conv2_3(x), x),1)
        x = torch.cat((self.conv2_4(x), x),1)
        flow2 = self.predict_flow2(x)
        up_flow2 = self.deconv2(flow2)
        up_feat2 = self.upfeat2(x)
        
        
        warp1 = self.warp(c22, up_flow3*5.0) 
        corr2 = self.corr(c12, warp2)
        b, pw, ph, w, h = corr2.shape
        corr2 = corr2.reshape(b, pw*ph, w, h)
        corr2 = self.leakyRELU(corr2)
        x = torch.cat((corr2, c12, up_flow3, up_feat3), 1)
        x = torch.cat((self.conv2_0(x), x),1)
        x = torch.cat((self.conv2_1(x), x),1)
        x = torch.cat((self.conv2_2(x), x),1)
        x = torch.cat((self.conv2_3(x), x),1)
        x = torch.cat((self.conv2_4(x), x),1)
        flow2 = self.predict_flow2(x)
 
        x = self.dc_conv4(self.dc_conv3(self.dc_conv2(self.dc_conv1(x))))
        flow2 = flow2 + self.dc_conv7(self.dc_conv6(self.dc_conv5(x)))
        
        if self.training:
            return flow2,flow3,flow4
        else:
            return flow2
    
    def loss(self, pred_flow, gt_flow):
        """
            Supervised loss is the dense L1-norm.
        """
        loss = None
        return loss

imgs = torch.cat([img0, img1], dim=1)
flow_head = FlowHead().cuda()
outs = flow_head(imgs, features0, features1)
print(outs[0].shape)

torch.Size([1, 512, 16, 52]) torch.Size([1, 256, 32, 104]) torch.Size([1, 128, 64, 208]) torch.Size([1, 64, 128, 416])


NameError: name 'asdf' is not defined