## Visual Place Recognition under opposing viewpoints with varying scene appearance 

The following code uses pre-computed features to demonstrate the approach described in the ICRA 2019 paper titled:

*Look No Deeper: Recognizing Places from Opposing Viewpoints under Varying Scene Appearance using Single-View Depth Estimation*

The example dataset used here - a subset of the [Oxford Robotcar dataset](https://robotcar-dataset.robots.ox.ac.uk/) - compares the **rear-view** images from the **Autumn** traverse to the **front-view** images from the **Summer** traverse.

In [1]:
import numpy as np
import os, sys, cv2, time

from scipy.spatial.distance import cdist


In [2]:
# Pre-computed data paths

refPath_depthData = "../precomputed/depthData/1-m.npz"
refPath_imgDesc = "../precomputed/imgGlobalDesc/1-m.npz"
refPath_kpDense = "../precomputed/kpDesc_C5Tensor/1-m/"

queryPath_imgDesc = "../precomputed/imgGlobalDesc/5-s.npz"
queryPath_kpDense = "../precomputed/kpDesc_C5Tensor/5-s/"

In [3]:
# system parameters

topN = 5 # top matches to consider for re-ranking
seqL = 1 # number of adjacent frames to consider (setting 4 means using [r-2,r+2] frames centered at index r)
depT = 98 # depth range to consider for keypoints
MULTITHREADING_ON = False


### Find top-N image matches for query data

In [4]:
print("Loading global image descriptors for reference and query datasets...")

imgDescRef = np.load(refPath_imgDesc)['arr_0']
imgDescQuery = np.load(queryPath_imgDesc)['arr_0']

print("Descriptor Data Shapes:",imgDescRef.shape,imgDescQuery.shape)

Loading global image descriptors for reference and query datasets...
Descriptor Data Shapes: (761, 4096) (814, 4096)


In [5]:
print("Finding global matches using cosine distance...")

distMat = cdist(imgDescRef,imgDescQuery,"cosine")
topMatches = np.argsort(distMat,axis=0)[:topN,:]

numRefImgs, numQueryImgs = distMat.shape[0], distMat.shape[1]

print("Distance Matrix Shape:",distMat.shape)

Finding global matches using cosine distance...
Distance Matrix Shape: (761, 814)


### Seq2Single 

In [6]:
# Load pre-computed depth masks and c5 tensors (for keypoint and descriptor extraction) 

def loadC5Tensor(inPath,idx):
    return np.load(os.path.join(inPath,"{0:07d}.npz".format(idx)))['arr_0']
  
print("Loading dense conv5 tensors, required for local keypoint and descriptor extraction...")

denseFtRefAll = np.array([loadC5Tensor(refPath_kpDense,i1) for i1 in range(numRefImgs)])
numFmaps = denseFtRefAll.shape[2]

print("Dense Descriptor shape [numImages,numRows*numCols,numFeatureMaps]:",denseFtRefAll.shape)


print("\nLoading depth masks...")

depImgRefAll = np.load(refPath_depthData)['arr_0']
dImgRows, dImgCols = depImgRefAll.shape[1], depImgRefAll.shape[2]

print("Depth masks shape [numImages,numRows,numCols]:",depImgRefAll.shape)


Loading dense conv5 tensors, required for local keypoint and descriptor extraction...
Dense Descriptor shape [numImages,numRows*numCols,numFeatureMaps]: (761, 620, 2048)

Loading depth masks...
Depth masks shape [numImages,numRows,numCols]: (761, 640, 1024)


In [7]:
c5Shape = np.array([20,31]) # this depends on the spatial resolution of the network used (ResNet101 from RefineNet)

# keypoints extracted from c5 are resized using this multiplying factor to obtain their depth value
mulFac_dep2c5 = np.array([dImgRows,dImgCols])/c5Shape.astype(float)
print(mulFac_dep2c5)

[32.         33.03225806]


In [8]:
def getKP2d(kpInds,resShape_,mulFac_):
    kp = np.fliplr(mulFac_*np.array(np.unravel_index(kpInds,resShape_)).transpose()).astype(int)
    return kp

def getRefSeqInfo(refIdx,maxIndVal,seqLen):
    lb = max(refIdx-int(np.floor(seqLen/2.0)),0)
    ub = min(refIdx+int(np.ceil(seqLen/2.0)),maxIndVal)
    return np.arange(lb,ub)

def filterKPs_depth(kp_d,minD,maxD):
    return np.argwhere(np.multiply(kp_d<maxD, kp_d>minD))[:,0]

def get_cosine_corresponding_vectors(d1,d2):
    return 1 - np.sum(d1*d2,axis=1)/(np.linalg.norm(d1,axis=1)*np.linalg.norm(d2,axis=1))

def getKpDep(kpIndFlat,depImg,resShape,mulFac):
    
    kp2d = getKP2d(kpIndFlat,resShape,mulFac)
    
    kp_dep = depImg[kp2d[:,1].flatten(), kp2d[:,0].flatten()]    
    
    return kp_dep

In [9]:
def matchData(denseDescQuery,topMatchIndsList,c5ShapeIn,mulFac,numRefImgs_,numFmaps_,seqLen=seqL,depThresh=depT):
    """
    uses global variables: denseFtRefAll, depImgRefAll
    """
    scores = []
    
    kp2IndFlat = np.argmax(denseDescQuery,axis=0)
    
    for rIdx in topMatchIndsList:
        tBegin = time.time()
       
        seqRange = getRefSeqInfo(rIdx,numRefImgs_,seqLen) 
                
        distMat = np.ones([denseDescQuery.shape[1],len(seqRange)])*100
        for lc,k1 in enumerate(seqRange):
            
            denseDescRef = denseFtRefAll[k1].copy()
            
            kp1IndFlatRef = np.argmax(denseDescRef,axis=0)
            kp1_depth = getKpDep(kp1IndFlatRef,depImgRefAll[k1],c5ShapeIn,mulFac)

            inRangeInds = filterKPs_depth(kp1_depth,0,depThresh)

            descRefSubset = denseDescRef[kp1IndFlatRef[inRangeInds]]
            descQuerySubset = denseDescQuery[kp2IndFlat[inRangeInds]]
            
            dists = get_cosine_corresponding_vectors(descRefSubset,descQuerySubset)
                        
            distMat[inRangeInds,lc] = dists.copy()
       
        minDists = np.min(distMat,axis=1)
        validInds1 = np.argwhere(minDists!=100).flatten()
        distVals = minDists[validInds1]
        
        meanDist = np.mean(distVals)            
        scores.append(meanDist)        

#         print("Compute per reference image:", time.time()-tBegin)
        
    return np.array(scores)

#### Process the query data

In [10]:
# wrapped in a function for multithreading implemented in the next block
def processQueryIndex(qIdx):
    topMatchIndsList = topMatches[:,qIdx]

    denseFtQuery = loadC5Tensor(queryPath_kpDense,qIdx)
    denseDescQuery = denseFtQuery.reshape([-1,numFmaps])
    
    scores = matchData(denseDescQuery,topMatchIndsList,c5Shape,mulFac_dep2c5,numRefImgs,numFmaps)
    
    return scores


finalScores = []

if not MULTITHREADING_ON:

    for i in range(numQueryImgs):
        s = processQueryIndex(i)    
        finalScores.append(s)
        if i%5==0:
            print("processed",i)        

processed 0
processed 5
processed 10
processed 15
processed 20
processed 25
processed 30
processed 35
processed 40
processed 45
processed 50
processed 55
processed 60
processed 65
processed 70
processed 75
processed 80
processed 85
processed 90
processed 95
processed 100
processed 105
processed 110
processed 115
processed 120
processed 125
processed 130
processed 135
processed 140
processed 145
processed 150
processed 155
processed 160
processed 165
processed 170
processed 175
processed 180
processed 185
processed 190
processed 195
processed 200
processed 205
processed 210
processed 215
processed 220
processed 225
processed 230
processed 235
processed 240
processed 245
processed 250
processed 255
processed 260
processed 265
processed 270
processed 275
processed 280
processed 285
processed 290
processed 295
processed 300
processed 305
processed 310
processed 315
processed 320
processed 325
processed 330
processed 335
processed 340
processed 345
processed 350
processed 355
processed 360


#### With Multithreading

In [11]:
if MULTITHREADING_ON:
    from multiprocessing.dummy import Pool as ThreadPool

    numT = 4
    scores = []
    for i in range(0,numQueryImgs,numT):
        pool = ThreadPool(numT)
        s = pool.map(processQueryIndex, range(i,min(i+numT,numQueryImgs)))
        pool.close()
        pool.join()
        scores.append(s)
        if i%5==0:
            print("processed",i)
    finalScores = np.concatenate(scores)

### Write the output 

In [12]:
bestMatch = topMatches[np.argmin(finalScores,axis=1),np.arange(numQueryImgs)]

In [14]:
np.savetxt("../out/matchesOut.txt",np.vstack([np.arange(len(bestMatch)),bestMatch]).transpose(),fmt='%d')
np.savetxt("../out/matchesBaseline.txt",np.vstack([np.arange(len(bestMatch)),topMatches[0,:]]).transpose(),fmt='%d')