#### Docker command
docker run -it -p 127.0.0.1:8887:8888 -v ~/workspace/sequenceSLAM/src/ipy:/workspace -v ~/dataset/:/workspace/dataset/ souravgarg/opencv-python-udacity-caffe:cpu

nvidia-docker run -it -p 192.168.5.78:8887:8888 -v ~/workspace/dataset/:/workspace/dataset/ -v ~/workspace/:/workspace/ caffe-gpu-jupyter-opencv

In [None]:
import numpy as np
import os
import StringIO
import cv2
from matplotlib import pyplot as plt
%matplotlib inline

#### Set up input/output for annotations

In [None]:
folderPath = '../../dataset/transient_attributes/annotations/'
if os.path.exists(folderPath) is False:
    print("Folder path doesn't exist.")
    
attributes = np.loadtxt(folderPath+'attributes.txt',dtype='str')

annotationsFileName = folderPath+'annotations.tsv'

# Col 1 is file name, col 2-41 are attributes, splitting each attribute column 'score,conf'
# Hence, an att index i has its score at 2i+1 and conf at 2i+1
s = open(annotationsFileName).read().replace(',','\t')
annotations = np.loadtxt(StringIO.StringIO(s),dtype='str')


#### Set up input/output for images 

In [None]:
folderPath2 = '../../dataset/transient_attributes/imageAlignedLD/'
if os.path.exists(folderPath2) is False:
    print("Folder path doesn't exist.")

folderPath3 = '../../dataset/transient_attributes/outData/'
if os.path.exists(folderPath3+'train/') is False:
    os.mkdir(folderPath3+'train/')
    os.mkdir(folderPath3+'val/')
    os.mkdir(folderPath3+'test/')

outImgNamesFile = 'concatImgsList.txt'

#### Collect unique webcams 

In [None]:
allCams = annotations[:,0]
allCams = [os.path.split(elem)[0] for elem in allCams.tolist()]
uniCams, retIdx = np.unique(allCams,return_index=True)

# check if retIdx is ordered
if (np.sum(np.subtract(np.sort(retIdx),retIdx))) == 0:
    print("cam folders are in order, retIdx can be used as splitting range for cam folder images")
else:
    print("WARNING: cam folders not in order")

camFolderAnnot = np.split(annotations,retIdx[1:])
print(len(camFolderAnnot))

#### Set up variables 

In [None]:
desiredAttributes = [1,2] # attributes in attributes.txt, 1 is for daylight, 2 for night etc...
minConf = 5 # min confidence votes to consider the image
minScore = 0.8 # min score (0 to 1) to consider the image

#### Collect the image names for desired attribute based on confidence and score values 

In [None]:
def getIndicesForDesiredAttribute(inputArr,desiredAtt,minScore,minConf):
    index = 2*desiredAtt+1
    # select the columns that fulfil the minScore and minConf conditions
    val = np.greater_equal( inputArr[:,index:index+2].astype(float), \
               np.tile(np.array([[minScore,minConf]]),(inputArr.shape[0],1)).astype(float) \
              )
    # ANDing the score and conf comparison
    selIndices = val[:,0] * val[:,1]
#     print(inputArr[selIndices,0])
#     return selIndices
    return inputArr[selIndices,0] # return the image names 

In [None]:
# Loop over all desired attributes
desiredImgs = []
numImgs = []
for att in desiredAttributes:
    print("Processing desired attribte ",att)
    # collect the image names for each of the cam folders, as list of ndarrays
    imgsList =  [getIndicesForDesiredAttribute(camFolderAnnot[i],att,minScore,minConf) \
                   for i in range(len(camFolderAnnot))] 
    desiredImgs.append(imgsList)
    num = np.array([arr.shape[0] for arr in imgsList])
    print("Number of images for each cam folder",num)
    numImgs.append(num)

#### Create pairs of image names for selected attribute pair 

In [None]:
imgPairIndices = []
numPairs = 0
for index in range(numImgs[0].shape[0]):
    
    # Lesser and greater indices are required to choose with or without replacement respectively
    greaterIdx = 0
    lesserIdx = 1
    
    # If any of cam folders have one of the attribute images count equal to zero
    if numImgs[0][index] == 0 or numImgs[1][index] == 0:
        imgPairIndices.append([])
        continue
    
    # Set the indices
    if numImgs[0][index] < numImgs[1][index]:
        greaterIdx = 1
        lesserIdx = 0
        
    # Get the random indices from both the attributes
    ind1 = np.random.choice(desiredImgs[greaterIdx][index],replace=False,size=numImgs[greaterIdx][index])
    ind2 = np.random.choice(desiredImgs[lesserIdx][index],replace=True,size=numImgs[greaterIdx][index])
    
    # Zip the pair values
    if greaterIdx == 0:
        imgPairIndices.append(zip(ind1,ind2))
    else:
        imgPairIndices.append(zip(ind2,ind1))
    
    numPairs += ind1.shape[0]
    
print("Total number of image pairs generated - ", numPairs)

#### Read images 

In [None]:
counter = 0
concatImgNames = []
trainNum = 0.65*numPairs
valNum = 0.15*numPairs
testNum = numPairs - trainNum - valNum

for idx,arr in enumerate(imgPairIndices):
    print("Processing cam folder",idx)
    for pair in arr:

        img1 = cv2.imread(folderPath2+str(pair[0]))
        img2 = cv2.imread(folderPath2+str(pair[1]))
        
        img1 = cv2.resize(img1,(256,256))
        img2 = cv2.resize(img2,(256,256))
        
        imgPair = np.hstack([img1,img2])#cv2.hconcat(img1,img2)
        
        concatImgNames.append(pair)       
        
        writePath = folderPath3
        if counter < trainNum:
            writePath += 'train/'
        elif counter < (trainNum+valNum):
            writePath += 'val/'
        else:
            writePath += 'test/'
            
        cv2.imwrite(writePath+str(counter)+'.jpg',imgPair)
        
        counter += 1
        
#         plt.imshow(img1)
#         plt.show()

np.savetxt(folderPath3+outImgNamesFile,concatImgNames,fmt='%s')

#### Create another test dataset with some random images  

In [None]:
counter = 0
while True:
    counter += 1
    img = cv2.imread(folderPath3+'test2/'+str(counter)+'.png')
    
    if img == None:
        break
    
    img = cv2.resize(img,(256,256))
    img = np.hstack([img,img])
    cv2.imwrite(folderPath3+'test2/'+str(counter)+'_.png',img)

### Prepare data from two videos given corresponding frame indices 

#### Initialize variables 

In [None]:
folderPath = "../../dataset/slamData/surfers-paradise/"
vid1File = 'day.avi'
vid2File = 'night.avi'

originalGTindices1File = 'gt-surfers-paradise-day.txt'
originalGTindices2File = 'gt-surfers-paradise-night.txt'

interpGTFile = 'gt-surfers-paradise.txt'

#### Read data 

In [None]:
# Add the matching pairs from the originally manually selected ones
originalGTindices1 = np.loadtxt(folderPath+originalGTindices1File,dtype=int)
originalGTindices2 = np.loadtxt(folderPath+originalGTindices2File,dtype=int)

indicesPair = np.vstack([originalGTindices1[:,0],originalGTindices2[:,0]]).transpose()
print(indicesPair.shape)


# Add some random pairs from the interpolated ground truth
interGT = np.loadtxt(folderPath+interpGTFile,dtype=int)
numPairs = 100
randomGT = interGT[np.random.randint(0,interGT.shape[0],numPairs)]
indicesPairRandom = np.vstack([randomGT[:,0],randomGT[:,1]]).transpose()
print(indicesPairRandom.shape)

#### Read images 

In [None]:
def getImagesFromVid(video,indices,imgOutPath):
    indices = np.unique(indices)
    print(indices.shape)
    imgCounter = 0
    indexIter = 0
    for i in range(indices[-1]+1):
        ret, img = video.read()
        if ret is False:
            print("read %d images, no more images..."%imgCounter)
            break
        elif imgCounter == indices[indexIter]:
            img = cv2.resize(img,(256,256))
            cv2.imwrite(imgOutPath+str(imgCounter)+'.png',img)
            imgCounter += 1
            indexIter += 1
        else:
            imgCounter += 1

In [None]:
imgOutPath1 = folderPath + 'images/day/'
cap1 = cv2.VideoCapture(folderPath+vid1File)
indices1 = np.concatenate([originalGTindices1[:,0],randomGT[:,0]])
getImagesFromVid(cap1,indices1,imgOutPath1)

In [None]:
imgOutPath2 = folderPath + 'images/night/'
cap2 = cv2.VideoCapture(folderPath+vid2File)
indices2 = np.concatenate([originalGTindices2[:,0],randomGT[:,1]])
getImagesFromVid(cap2,indices2,imgOutPath2)

In [None]:
allPairs = np.vstack([indicesPair,indicesPairRandom])
counter = 0
for pair in allPairs:
    img1 = cv2.imread(imgOutPath1+str(pair[0])+'.png')
    img2 = cv2.imread(imgOutPath2+str(pair[1])+'.png')

    imgPair = np.hstack([img1,img2])

    cv2.imwrite(folderPath+'images/trainData/'+str(counter)+'.jpg',imgPair)

    counter += 1