In [1]:
import numpy as np
import cv2
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import models, transforms, utils
import copy
%matplotlib inline

2がblock1_conv2に対応
16がblock3_conv4に対応

In [2]:
image_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225],
        )
    ])

In [3]:
sample = cv2.imread('sample1.jpg')

In [4]:
image = image_transform(sample).unsqueeze(0)

In [5]:
sample_template = cv2.imread('template1.png')
template = image_transform(sample_template).unsqueeze(0)

In [6]:
class Featex():
    def __init__(self, model, use_cuda):
        self.use_cuda = use_cuda
        self.feature1 = None
        self.feature2 = None
        self.model= copy.deepcopy(model.eval())
        self.model = self.model[:17]
        if self.use_cuda:
            self.model = self.model.cuda()
        self.model[2].register_forward_hook(self.save_feature1)
        self.model[16].register_forward_hook(self.save_feature2)
        
    def save_feature1(self, module, input, output):
        self.feature1 = output.detach()
    
    def save_feature2(self, module, input, output):
        self.feature2 = output.detach()
        
    def __call__(self, input):
        if self.use_cuda:
            input = input.cuda()
        _ = self.model(input)
        # resize feature2 to the same size of feature1
        self.feature2 = F.interpolate(self.feature2, size=(self.feature1.size()[2], self.feature1.size()[3]), mode='bilinear', align_corners=True)
        return torch.cat((self.feature1, self.feature2), dim=1)

In [7]:
def call( self, x ):
        x1, x2 = x
        bs, H, W, _ = [tf.shape(x1)[i] for i in range(4)]
        _, h, w, _ = [tf.shape(x2)[i] for i in range(4)]
        x1 = tf.reshape(x1, ( bs, H*W, -1 ) )
        x2 = tf.reshape(x2, ( bs, h*w, -1 ) )
        concat = tf.concat([x1, x2], axis=1)
        x_mean = K.mean( concat, axis=1, keepdims=True )
        x_std = K.std( concat, axis=1, keepdims = True )
        x1 = (x1 - x_mean) / x_std
        x2 = (x2 - x_mean) / x_std
        x1 = tf.reshape(x1, ( bs, H, W, -1 ) )
        x2 = tf.reshape(x2, ( bs, h, w, -1 ) )
        return [x1, x2]

In [8]:
class MyNormLayer():
    def __call__(self, x1, x2):
        bs, _ , H, W = x1.size()
        _, _, h, w = x2.size()
        x1 = x1.view(bs, -1, H*W)
        x2 = x2.view(bs, -1, h*w)
        concat = torch.cat((x1, x2), dim=2)
        x_mean = torch.mean(concat, dim=2, keepdim=True)
        x_std = torch.std(concat, dim=2, keepdim=True)
        x1 = (x1 - x_mean) / x_std
        x2 = (x2 - x_mean) / x_std
        x1 = x1.view(bs, -1, H, W)
        x2 = x2.view(bs, -1, h, w)
        return [x1, x2]

In [9]:
class CreateModel():
    def __init__(self, alpha, model, use_cuda):
        self.alpha = alpha
        self.featex = Featex(model, use_cuda)
    def __call__(self, template, image):
        T_feat = self.featex(template)
        I_feat = self.featex(image)
        I_feat, T_feat = MyNormLayer()(I_feat, T_feat)
        print(T_feat.size())
        print(I_feat.size())
        dist = torch.einsum("xcab,xcde->xabde", I_feat / torch.norm(I_feat, dim=1, keepdim=True), T_feat / torch.norm(T_feat, dim=1, keepdim=True))
        conf_map = QATM(self.alpha)(dist)
        return conf_map

In [10]:
def call( self, x ):
        batch_size, ref_row, ref_col, qry_row, qry_col = [ tf.shape(x)[k] for k in range(5) ]
        x = tf.reshape( x, [batch_size, ref_row * ref_col, qry_row * qry_col ] )
        xm_ref = x - K.max(x,axis=1,keepdims=True)
        conf_ref = tf.nn.softmax( self.coef_ref*xm_ref, axis=1 )
        xm_qry = x - K.max(x,axis=2,keepdims=True)
        conf_qry = tf.nn.softmax( self.coef_qry*xm_qry, axis=2 )
        confidence = K.sqrt(conf_ref * conf_qry )
        conf_values, ind3 = tf.nn.top_k( confidence, k=1 ) # batch_size, ref_size, 1
        ind1, ind2 = tf.meshgrid( tf.range( batch_size ), 
                                  tf.range( ref_row * ref_col ), indexing='ij' )
        ind1 = K.flatten( ind1 )
        ind2 = K.flatten( ind2 )
        ind3 = K.flatten( ind3 )
        indices = K.stack([ind1,ind2,ind3],axis=1)
        values = tf.gather_nd( confidence, indices )
        values = tf.reshape( values, [batch_size, ref_row, ref_col, 1])
        return values
    
    def compute_output_shape(self, input_shape):
        bs, H, W, _, _ = input_shape
        return (bs, H, W, 1)

IndentationError: unindent does not match any outer indentation level (<tokenize>, line 20)

In [11]:
class QATM():
    def __init__(self, alpha):
        self.alpha = alpha
        
    def __call__(self, x):
        batch_size, ref_row, ref_col, qry_row, qry_col = x.size()
        print("x", x.size())
        x = x.view(batch_size, ref_row*ref_col, qry_row*qry_col)
        xm_ref = x - torch.max(x, dim=1, keepdim=True)[0]
        conf_ref = F.softmax(self.alpha*xm_ref, dim=1)
        print("conf_ref", conf_ref.size())
        xm_qry = x - torch.max(x, dim=2, keepdim=True)[0]
        conf_qry = F.softmax(self.alpha * xm_qry, dim=2)
        print("conf_qry", conf_qry.size())
        confidence = torch.sqrt(conf_ref * conf_qry)
        conf_values, ind3 = torch.topk(confidence, 1)
        ind1, ind2 = torch.meshgrid(torch.arange(batch_size), torch.arange(ref_row*ref_col))
        ind1 = ind1.flatten()
        ind2 = ind2.flatten()
        ind3 = ind3.flatten()
        if x.is_cuda:
            ind1 = ind1.cuda()
            ind2 = ind2.cuda()
        indices = torch.stack([ind1, ind2, ind3], dim=1)
        
        print("confidence", confidence.size())
        print("indices", indices.size())
        #values = confidence.masked_select(indices)
        values = confidence[indices.tolist()]
        values = torch.reshape(values, [batch_size, ref_row, ref_col, 1])
        return values
    def compute_output_shape( self, input_shape ):
        bs, H, W, _, _ = input_shape
        return (bs, H, W, 1)

In [2]:
t = torch.tensor([[1,2],[3,4]])
torch.gather(t, 1, torch.tensor([[0,1],[0,1]]))

tensor([[1, 2],
        [3, 4]])

In [38]:
indices = torch.tensor([[0, 1, 1],[ 1,  2, 1]])
params  = torch.tensor([[1, 2,5, 7], [3, 4,6, 8]])
params[indices.tolist()]

tensor([2, 6, 4])

In [39]:
indices.size()

torch.Size([2, 3])

In [40]:
params.size()

torch.Size([2, 4])

In [45]:
params[indices[0].tolist()]

tensor([[1, 2, 5, 7],
        [3, 4, 6, 8],
        [3, 4, 6, 8]])

In [49]:
params.shape

torch.Size([2, 4])

In [56]:
ones = torch.ones((1, 5, 3))

In [57]:
ones.shape

torch.Size([1, 5, 3])

In [62]:
ones[torch.tensor([0, 0, 0])]

tensor([[[1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.]],

        [[1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.]],

        [[1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.]]])

In [77]:
ix1, ix2 = torch.meshgrid(torch.arange(2), torch.arange(10))

In [85]:
ix2.flatten()

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [89]:
ix3 = tor(20)

In [91]:
ix3

tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1.])

In [78]:
ones = torch.ones((2, 10, 5))

In [94]:
ones[ix1.flatten(), ix2.flatten(), ix1.flatten()].shape

torch.Size([20])

In [76]:
ones[0].shape

torch.Size([10, 5])

In [50]:
params[[1, 1, 1]].shape

torch.Size([3, 4])

In [30]:
params[[0,1]]

tensor([[1, 2, 5],
        [3, 4, 6]])

In [35]:
indices = torch.tensor([[1], [0]])
params  = torch.tensor([[1, 2], [3, 4]])
params[indices[0, :], indices[1,:]]

tensor([3])

In [28]:
indices[:, 0]

tensor([0, 0])

In [21]:
lookup=torch.ones((15, 720, 30))
children=torch.randint(0,15,(15, 720, 19, 2),dtype=torch.long)
lookup[children[:,:,:,0],children[:,:,:,1],:].shape

torch.Size([15, 720, 19, 30])

In [15]:
model = CreateModel(model=models.vgg19(pretrained=True).features, alpha=25, use_cuda=True)

In [16]:
def run_one_sample(template, sample):
    val = model(template, sample)
    print(val)
    # compute geometry average on score map
    val = np.log( val )
    gray = val[0,:,:,0]
    gray = cv2.resize( gray, (image.shape[1], image.shape[0]) )
    score = compute_score( gray, w, h ) 
    score[score>-1e-7] = score.min()
    score = np.exp(score / (h*w)) # reverse number range back after computing geometry average
    
    # plot result
    x, y, w, h = locate_bbox( score, w, h )
    image_plot = cv2.rectangle( image_plot, (int(x), int(y)), (int(x+w), int(y+h)), (255, 0, 0), 2 )
    fig, ax = plt.subplots( 1, 3, figsize=(20, 5) )
    ax[0].imshow(template_plot)
    ax[1].imshow(image_plot)
    
    ax[2].imshow(score, 'jet')
    plt.show()

In [17]:
run_one_sample(template, image)

torch.Size([1, 320, 40, 40])
torch.Size([1, 320, 225, 384])
x torch.Size([1, 225, 384, 40, 40])
conf_ref torch.Size([1, 86400, 1600])
conf_qry torch.Size([1, 86400, 1600])
confidence torch.Size([1, 86400, 1600])
indices torch.Size([86400, 3])


RuntimeError: CUDA out of memory. Tried to allocate 133483.89 GiB (GPU 0; 7.94 GiB total capacity; 3.31 GiB already allocated; 4.18 GiB free; 4.07 MiB cached)

In [17]:
a = torch.rand(2,3,4)

In [None]:
torch