In [1]:
!nvidia-smi

Sun Aug 21 12:33:40 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 512.72       Driver Version: 512.72       CUDA Version: 11.6     |
|-------------------------------+----------------------+----------------------+
| GPU  Name            TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ... WDDM  | 00000000:01:00.0 Off |                  N/A |
| N/A   48C    P8     3W /  N/A |      0MiB /  6144MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

## Pytorch Lightning PWCNet 

In [2]:
import ptlflow

08/21/2022 12:33:44 - INFO: Loading faiss with AVX2 support.
08/21/2022 12:33:44 - INFO: Successfully loaded faiss with AVX2 support.


In [3]:
# ptlflow.get_trainable_model_names()

In [4]:
from ptlflow.utils.correlation import IterSpatialCorrelationSampler as SpatialCorrelationSampler

import torch
import torch.nn as nn
import numpy as np

In [5]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

device(type='cuda')

In [6]:
def conv(in_planes, out_planes, kernel_size=3, stride=1, padding=1, dilation=1):   
    return nn.Sequential(
            nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, 
                        padding=padding, dilation=dilation, bias=True),
            nn.LeakyReLU(0.1))


def predict_flow(in_planes):
    return nn.Conv2d(in_planes,2,kernel_size=3,stride=1,padding=1,bias=True)


def deconv(in_planes, out_planes, kernel_size=4, stride=2, padding=1):
    return nn.ConvTranspose2d(in_planes, out_planes, kernel_size, stride, padding, bias=True)


In [7]:
class PWCNet(nn.Module):

    def __init__(self):
        super(PWCNet, self).__init__()
        
        self.div_flow = 20.0
        
        self.md = 4

        self.conv1a  = conv(3,   16, kernel_size=3, stride=2)
        self.conv1aa = conv(16,  16, kernel_size=3, stride=1)
        self.conv1b  = conv(16,  16, kernel_size=3, stride=1)
        self.conv2a  = conv(16,  32, kernel_size=3, stride=2)
        self.conv2aa = conv(32,  32, kernel_size=3, stride=1)
        self.conv2b  = conv(32,  32, kernel_size=3, stride=1)
        self.conv3a  = conv(32,  64, kernel_size=3, stride=2)
        self.conv3aa = conv(64,  64, kernel_size=3, stride=1)
        self.conv3b  = conv(64,  64, kernel_size=3, stride=1)
        self.conv4a  = conv(64,  96, kernel_size=3, stride=2)
        self.conv4aa = conv(96,  96, kernel_size=3, stride=1)
        self.conv4b  = conv(96,  96, kernel_size=3, stride=1)
        self.conv5a  = conv(96, 128, kernel_size=3, stride=2)
        self.conv5aa = conv(128,128, kernel_size=3, stride=1)
        self.conv5b  = conv(128,128, kernel_size=3, stride=1)
        self.conv6aa = conv(128,196, kernel_size=3, stride=2)
        self.conv6a  = conv(196,196, kernel_size=3, stride=1)
        self.conv6b  = conv(196,196, kernel_size=3, stride=1)

        self.leakyRELU = nn.LeakyReLU(0.1)

        self.corr = SpatialCorrelationSampler(kernel_size=1, patch_size=2*self.md+1, padding=0)
        
        nd = (2*self.md+1)**2
        dd = np.cumsum([128,128,96,64,32])

        od = nd
        self.conv6_0 = conv(od,      128, kernel_size=3, stride=1)
        self.conv6_1 = conv(od+dd[0],128, kernel_size=3, stride=1)
        self.conv6_2 = conv(od+dd[1],96,  kernel_size=3, stride=1)
        self.conv6_3 = conv(od+dd[2],64,  kernel_size=3, stride=1)
        self.conv6_4 = conv(od+dd[3],32,  kernel_size=3, stride=1)        
        self.predict_flow6 = predict_flow(od+dd[4])
        self.deconv6 = deconv(2, 2, kernel_size=4, stride=2, padding=1) 
        self.upfeat6 = deconv(od+dd[4], 2, kernel_size=4, stride=2, padding=1) 
        
        od = nd+128+4
        self.conv5_0 = conv(od,      128, kernel_size=3, stride=1)
        self.conv5_1 = conv(od+dd[0],128, kernel_size=3, stride=1)
        self.conv5_2 = conv(od+dd[1],96,  kernel_size=3, stride=1)
        self.conv5_3 = conv(od+dd[2],64,  kernel_size=3, stride=1)
        self.conv5_4 = conv(od+dd[3],32,  kernel_size=3, stride=1)
        self.predict_flow5 = predict_flow(od+dd[4]) 
        self.deconv5 = deconv(2, 2, kernel_size=4, stride=2, padding=1) 
        self.upfeat5 = deconv(od+dd[4], 2, kernel_size=4, stride=2, padding=1) 
        
        od = nd+96+4
        self.conv4_0 = conv(od,      128, kernel_size=3, stride=1)
        self.conv4_1 = conv(od+dd[0],128, kernel_size=3, stride=1)
        self.conv4_2 = conv(od+dd[1],96,  kernel_size=3, stride=1)
        self.conv4_3 = conv(od+dd[2],64,  kernel_size=3, stride=1)
        self.conv4_4 = conv(od+dd[3],32,  kernel_size=3, stride=1)
        self.predict_flow4 = predict_flow(od+dd[4]) 
        self.deconv4 = deconv(2, 2, kernel_size=4, stride=2, padding=1) 
        self.upfeat4 = deconv(od+dd[4], 2, kernel_size=4, stride=2, padding=1) 
        
        od = nd+64+4
        self.conv3_0 = conv(od,      128, kernel_size=3, stride=1)
        self.conv3_1 = conv(od+dd[0],128, kernel_size=3, stride=1)
        self.conv3_2 = conv(od+dd[1],96,  kernel_size=3, stride=1)
        self.conv3_3 = conv(od+dd[2],64,  kernel_size=3, stride=1)
        self.conv3_4 = conv(od+dd[3],32,  kernel_size=3, stride=1)
        self.predict_flow3 = predict_flow(od+dd[4]) 
        self.deconv3 = deconv(2, 2, kernel_size=4, stride=2, padding=1) 
        self.upfeat3 = deconv(od+dd[4], 2, kernel_size=4, stride=2, padding=1) 
        
        od = nd+32+4
        self.conv2_0 = conv(od,      128, kernel_size=3, stride=1)
        self.conv2_1 = conv(od+dd[0],128, kernel_size=3, stride=1)
        self.conv2_2 = conv(od+dd[1],96,  kernel_size=3, stride=1)
        self.conv2_3 = conv(od+dd[2],64,  kernel_size=3, stride=1)
        self.conv2_4 = conv(od+dd[3],32,  kernel_size=3, stride=1)
        self.predict_flow2 = predict_flow(od+dd[4]) 

        for m in self.modules():
            if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
                nn.init.kaiming_normal_(m.weight.data, mode='fan_in')
                if m.bias is not None:
                    m.bias.data.zero_()
        self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear', align_corners=True)


    def warp(self, x, flo):
        """
        warp an image/tensor (im2) back to im1, according to the optical flow
        x: [B, C, H, W] (im2)
        flo: [B, 2, H, W] flow
        """
        B, C, H, W = x.size()
        # mesh grid 
        xx = torch.arange(0, W).view(1,-1).repeat(H,1)
        yy = torch.arange(0, H).view(-1,1).repeat(1,W)
        xx = xx.view(1,1,H,W).repeat(B,1,1,1)
        yy = yy.view(1,1,H,W).repeat(B,1,1,1)
        grid = torch.cat((xx,yy),1).float()

        if x.is_cuda:
            grid = grid.to(dtype=x.dtype, device=x.device)
        vgrid = grid + flo

        # scale grid to [-1,1] 
        vgrid[:,0,:,:] = 2.0*vgrid[:,0,:,:].clone() / max(W-1,1)-1.0
        vgrid[:,1,:,:] = 2.0*vgrid[:,1,:,:].clone() / max(H-1,1)-1.0

        vgrid = vgrid.permute(0,2,3,1)        
        output = nn.functional.grid_sample(x, vgrid, align_corners=True)
        mask = torch.ones(x.size()).to(dtype=x.dtype, device=x.device)
        mask = nn.functional.grid_sample(mask, vgrid, align_corners=True)
        
        mask[mask<0.9999] = 0
        mask[mask>0] = 1
        
        return output*mask


    def forward(self, im1, im2):
        
        H, W = im1.shape[-2:]
        # im1 = inputs['images'][:, 0]
        # im2 = inputs['images'][:, 1]
        
        c11 = self.conv1b(self.conv1aa(self.conv1a(im1)))
        c21 = self.conv1b(self.conv1aa(self.conv1a(im2)))
        c12 = self.conv2b(self.conv2aa(self.conv2a(c11)))
        c22 = self.conv2b(self.conv2aa(self.conv2a(c21)))
        c13 = self.conv3b(self.conv3aa(self.conv3a(c12)))
        c23 = self.conv3b(self.conv3aa(self.conv3a(c22)))
        c14 = self.conv4b(self.conv4aa(self.conv4a(c13)))
        c24 = self.conv4b(self.conv4aa(self.conv4a(c23)))
        c15 = self.conv5b(self.conv5aa(self.conv5a(c14)))
        c25 = self.conv5b(self.conv5aa(self.conv5a(c24)))
        c16 = self.conv6b(self.conv6a(self.conv6aa(c15)))
        c26 = self.conv6b(self.conv6a(self.conv6aa(c25)))


        corr6 = self.corr(c16, c26)
        corr6 = corr6.view(corr6.shape[0], -1, corr6.shape[3], corr6.shape[4])
        corr6 = corr6 / c16.shape[1]
        corr6 = self.leakyRELU(corr6)


        x = torch.cat((self.conv6_0(corr6), corr6),1)
        x = torch.cat((self.conv6_1(x), x),1)
        x = torch.cat((self.conv6_2(x), x),1)
        x = torch.cat((self.conv6_3(x), x),1)
        x = torch.cat((self.conv6_4(x), x),1)
        flow6 = self.predict_flow6(x)
        up_flow6 = self.deconv6(flow6)
        up_feat6 = self.upfeat6(x)

        
        warp5 = self.warp(c25, up_flow6*0.625)
        corr5 = self.corr(c15, warp5)
        corr5 = corr5.view(corr5.shape[0], -1, corr5.shape[3], corr5.shape[4])
        corr5 = corr5 / c15.shape[1]
        corr5 = self.leakyRELU(corr5)
        x = torch.cat((corr5, c15, up_flow6, up_feat6), 1)
        x = torch.cat((self.conv5_0(x), x),1)
        x = torch.cat((self.conv5_1(x), x),1)
        x = torch.cat((self.conv5_2(x), x),1)
        x = torch.cat((self.conv5_3(x), x),1)
        x = torch.cat((self.conv5_4(x), x),1)
        flow5 = self.predict_flow5(x)
        up_flow5 = self.deconv5(flow5)
        up_feat5 = self.upfeat5(x)

       
        warp4 = self.warp(c24, up_flow5*1.25)
        corr4 = self.corr(c14, warp4)
        corr4 = corr4.view(corr4.shape[0], -1, corr4.shape[3], corr4.shape[4])
        corr4 = corr4 / c14.shape[1]
        corr4 = self.leakyRELU(corr4)
        x = torch.cat((corr4, c14, up_flow5, up_feat5), 1)
        x = torch.cat((self.conv4_0(x), x),1)
        x = torch.cat((self.conv4_1(x), x),1)
        x = torch.cat((self.conv4_2(x), x),1)
        x = torch.cat((self.conv4_3(x), x),1)
        x = torch.cat((self.conv4_4(x), x),1)
        flow4 = self.predict_flow4(x)
        up_flow4 = self.deconv4(flow4)
        up_feat4 = self.upfeat4(x)


        warp3 = self.warp(c23, up_flow4*2.5)
        corr3 = self.corr(c13, warp3)
        corr3 = corr3.view(corr3.shape[0], -1, corr3.shape[3], corr3.shape[4])
        corr3 = corr3 / c13.shape[1]
        corr3 = self.leakyRELU(corr3)
        

        x = torch.cat((corr3, c13, up_flow4, up_feat4), 1)
        x = torch.cat((self.conv3_0(x), x),1)
        x = torch.cat((self.conv3_1(x), x),1)
        x = torch.cat((self.conv3_2(x), x),1)
        x = torch.cat((self.conv3_3(x), x),1)
        x = torch.cat((self.conv3_4(x), x),1)
        flow3 = self.predict_flow3(x)
        up_flow3 = self.deconv3(flow3)
        up_feat3 = self.upfeat3(x)


        warp2 = self.warp(c22, up_flow3*5.0)
        corr2 = self.corr(c12, warp2)
        corr2 = corr2.view(corr2.shape[0], -1, corr2.shape[3], corr2.shape[4])
        corr2 = corr2 / c12.shape[1]
        corr2 = self.leakyRELU(corr2)
        x = torch.cat((corr2, c12, up_flow3, up_feat3), 1)
        x = torch.cat((self.conv2_0(x), x),1)
        x = torch.cat((self.conv2_1(x), x),1)
        x = torch.cat((self.conv2_2(x), x),1)
        x = torch.cat((self.conv2_3(x), x),1)
        x = torch.cat((self.conv2_4(x), x),1)
        flow2 = self.predict_flow2(x)

        flow_up = self.upsample1(flow2*self.div_flow)
        
        flow_preds = [flow2, flow3, flow4, flow5, flow6]

        if self.training:
            return flow_preds

        else:
            return flow_up

In [8]:
ptflow_model = PWCNet()
# ptflow_model = model.to(device)

In [9]:
img1, img2 = torch.randn(1, 3, 256, 256), torch.randn(1, 3, 256, 256)

# img1 = img1.to(device)
# img2 = img2.to(device)

img1.device

device(type='cpu')

In [10]:
flows = ptflow_model(img1, img2)

In [11]:
flows[0].shape

torch.Size([1, 2, 64, 64])

In [12]:
ptflow_model.eval()
print()




In [13]:
flow = ptflow_model(img1, img2)

flow.shape

torch.Size([1, 2, 256, 256])

___

## EzFlow PWCNet

In [14]:
from ezflow.models import build_model

In [15]:
ezflow_model = build_model('PWCNet', cfg_path='../configs/pwcnet/models/ezflow.yaml', custom_cfg=True)

  (out_channels, in_channels // groups, *kernel_size), **factory_kwargs))


In [16]:
flows = ezflow_model(img1, img2)

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


In [17]:
flows[0].shape

torch.Size([1, 2, 64, 64])

In [18]:
ezflow_model.eval()

flow = ezflow_model(img1, img2)
flow.shape

torch.Size([1, 2, 256, 256])

___

### nnflow PWCNetV1

In [19]:
from nnflow import *

In [20]:
nnflow_model = build_model('PWCNetV1', cfg_path='../configs/pwcnet/models/nnflow_v1.yaml', custom_cfg=True)

In [21]:
nnflow_model.md, nnflow_model.div_flow, nnflow_model.padding

(4, 20.0, 0)

In [22]:
nnflow_model.train()
flows = nnflow_model(img1, img2)


for flow in flows:
    print(flow.shape)

torch.Size([1, 2, 64, 64])
torch.Size([1, 2, 32, 32])
torch.Size([1, 2, 16, 16])
torch.Size([1, 2, 8, 8])
torch.Size([1, 2, 4, 4])


In [38]:
nnflow_model.eval()

flow_up, flow_preds_v1 = nnflow_model(img1, img2)
flow_up.shape

torch.Size([1, 2, 256, 256])

In [24]:
len(flow_preds)

5

___

### nnflow PWCNetV2

In [25]:
#nnflow_model_v2 = build_model('PWCNetV2', cfg_path='../configs/pwcnet/models/nnflow_v2.yaml', custom_cfg=True)

In [26]:
nnflow_model_v3 = build_model('PWCNetV2', cfg_path='../configs/pwcnet/models/nnflow_v3.yaml', custom_cfg=True)

In [27]:
flows = nnflow_model_v3(img1, img2)

In [28]:
for level in flows:
    print(f"{level} {flows[level].shape}")

level6 torch.Size([1, 2, 4, 4])
level5 torch.Size([1, 2, 8, 8])
level4 torch.Size([1, 2, 16, 16])
level3 torch.Size([1, 2, 32, 32])
level2 torch.Size([1, 2, 64, 64])


In [36]:
nnflow_model_v3.eval()

flow_up, flow_preds_v3 = nnflow_model_v3(img1, img2)
flow_up.shape

torch.Size([1, 2, 256, 256])

___

In [30]:
target = torch.randn(1,2,256,256)

In [31]:
from ezflow.functional import FUNCTIONAL_REGISTRY

In [32]:
loss = FUNCTIONAL_REGISTRY.get('MultiLevelEPE')
# loss = FUNCTIONAL_REGISTRY.get('MultiScale')
loss

nnflow.losses.MultiLevelEPE

In [33]:
loss_fn = loss()

In [39]:
loss_fn(flow_preds_v1, target / 20.0)

tensor(0.7070, grad_fn=<DivBackward0>)

In [37]:
loss_fn(flow_preds_v3, target / 20.0)

tensor(1.4985, grad_fn=<DivBackward0>)