## Implementation using VLAD Encoding

In [16]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from sklearn import preprocessing
import scipy.spatial.distance as spdistance
import os
from sklearn import mixture
import math
import copy
import gzip
import pickle

In [4]:
### First step is to extract the features using ROOT_SIFT
class FeatureEx():
    
    def __init__(self):
        self.sift = cv2.xfeatures2d.SIFT_create()
        
    def extract(self, img):
        keypoints, img_features = self.sift.detectAndCompute(img, None)
        return keypoints, img_features
    
    #### For computing ROOT_SIFT feature
    def compute(self, img):
        img = cv2.imread(img, cv2.IMREAD_GRAYSCALE)
        if img is None:
            return "No image"
        
        kpts, descriptors = self.extract(img)
        
        ## Hellinger normalization
        descriptors += np.finfo(np.float32).eps
        descriptors /= np.sum(descriptors, axis=1)[:, np.newaxis]
        descriptors = np.sqrt(descriptors)
        
        return (kpts, descriptors)

In [5]:
def get_feature_matrix(path, max_desc=150000, max_desc_per_file=150):
    'path denotes the path to the image files'
    fEx = FeatureEx() #creating Feature extraction object
    
    directory = path
    
    # features is a matrix of features(row-wise)
    features = np.asarray(())
    
    for file in os.listdir(directory):
        print (file)
        if file.endswith(".tif"): 
            print(os.path.join(directory, file))
            filePath = os.path.join(directory, file)
            
            kpts, descriptors = fEx.compute(filePath)
            if descriptors.shape[0] > max_desc_per_file:
                'select the random max_desc_per_file from the descriptors'
                idx = np.random.randint(descriptors.shape[0], size=max_desc_per_file)
                descriptors = descriptors[idx, :]
                
            if features.size == 0:
                features = descriptors
            else:
                if features.shape[0] + descriptors.shape[0] <= max_desc:
                    features = np.vstack((features, descriptors))
                else:
                    break
        else:
            continue
    
    return features


In [6]:
def compute_gmm_params(features):
    'features is a matrix of features(row-wise) to train gmm model'
    
    ## using default number of clusters = 100 as is used in paper
    gmm = mixture.GaussianMixture(n_components=100, covariance_type='diag')
    gmm.fit(features)
    
    return gmm

In [7]:
# Let max_desc = 150000 for training ubm
# And max_desc_per_file = max_desc/no_of_files = 150000/1000 in this case
features = get_feature_matrix('../icdar2013_benchmarking_dataset/')
gmm = compute_gmm_params(features)

139_4.tif
../icdar2013_benchmarking_dataset/139_4.tif
148_4.tif
../icdar2013_benchmarking_dataset/148_4.tif
163_3.tif
../icdar2013_benchmarking_dataset/163_3.tif
105_1.tif
../icdar2013_benchmarking_dataset/105_1.tif
140_4.tif
../icdar2013_benchmarking_dataset/140_4.tif
207_1.tif
../icdar2013_benchmarking_dataset/207_1.tif
040_3.tif
../icdar2013_benchmarking_dataset/040_3.tif
125_2.tif
../icdar2013_benchmarking_dataset/125_2.tif
012_1.tif
../icdar2013_benchmarking_dataset/012_1.tif
001_3.tif
../icdar2013_benchmarking_dataset/001_3.tif
182_2.tif
../icdar2013_benchmarking_dataset/182_2.tif
092_2.tif
../icdar2013_benchmarking_dataset/092_2.tif
175_2.tif
../icdar2013_benchmarking_dataset/175_2.tif
054_1.tif
../icdar2013_benchmarking_dataset/054_1.tif
099_4.tif
../icdar2013_benchmarking_dataset/099_4.tif
184_4.tif
../icdar2013_benchmarking_dataset/184_4.tif
106_3.tif
../icdar2013_benchmarking_dataset/106_3.tif
070_1.tif
../icdar2013_benchmarking_dataset/070_1.tif
050_2.tif
../icdar2013_bench

005_2.tif
../icdar2013_benchmarking_dataset/005_2.tif
109_2.tif
../icdar2013_benchmarking_dataset/109_2.tif
150_4.tif
../icdar2013_benchmarking_dataset/150_4.tif
194_4.tif
../icdar2013_benchmarking_dataset/194_4.tif
248_3.tif
../icdar2013_benchmarking_dataset/248_3.tif
160_3.tif
../icdar2013_benchmarking_dataset/160_3.tif
139_1.tif
../icdar2013_benchmarking_dataset/139_1.tif
067_2.tif
../icdar2013_benchmarking_dataset/067_2.tif
128_3.tif
../icdar2013_benchmarking_dataset/128_3.tif
212_2.tif
../icdar2013_benchmarking_dataset/212_2.tif
127_2.tif
../icdar2013_benchmarking_dataset/127_2.tif
236_3.tif
../icdar2013_benchmarking_dataset/236_3.tif
231_4.tif
../icdar2013_benchmarking_dataset/231_4.tif
029_2.tif
../icdar2013_benchmarking_dataset/029_2.tif
220_2.tif
../icdar2013_benchmarking_dataset/220_2.tif
189_1.tif
../icdar2013_benchmarking_dataset/189_1.tif
110_1.tif
../icdar2013_benchmarking_dataset/110_1.tif
059_4.tif
../icdar2013_benchmarking_dataset/059_4.tif
205_1.tif
../icdar2013_bench

065_4.tif
../icdar2013_benchmarking_dataset/065_4.tif
226_2.tif
../icdar2013_benchmarking_dataset/226_2.tif
040_2.tif
../icdar2013_benchmarking_dataset/040_2.tif
004_2.tif
../icdar2013_benchmarking_dataset/004_2.tif
130_3.tif
../icdar2013_benchmarking_dataset/130_3.tif
036_3.tif
../icdar2013_benchmarking_dataset/036_3.tif
120_1.tif
../icdar2013_benchmarking_dataset/120_1.tif
019_1.tif
../icdar2013_benchmarking_dataset/019_1.tif
026_4.tif
../icdar2013_benchmarking_dataset/026_4.tif
088_3.tif
../icdar2013_benchmarking_dataset/088_3.tif
003_3.tif
../icdar2013_benchmarking_dataset/003_3.tif
219_2.tif
../icdar2013_benchmarking_dataset/219_2.tif
225_2.tif
../icdar2013_benchmarking_dataset/225_2.tif
160_2.tif
../icdar2013_benchmarking_dataset/160_2.tif
018_3.tif
../icdar2013_benchmarking_dataset/018_3.tif
158_4.tif
../icdar2013_benchmarking_dataset/158_4.tif
072_2.tif
../icdar2013_benchmarking_dataset/072_2.tif
102_1.tif
../icdar2013_benchmarking_dataset/102_1.tif
113_1.tif
../icdar2013_bench

141_4.tif
../icdar2013_benchmarking_dataset/141_4.tif
210_1.tif
../icdar2013_benchmarking_dataset/210_1.tif
209_4.tif
../icdar2013_benchmarking_dataset/209_4.tif
133_4.tif
../icdar2013_benchmarking_dataset/133_4.tif
197_1.tif
../icdar2013_benchmarking_dataset/197_1.tif
005_3.tif
../icdar2013_benchmarking_dataset/005_3.tif
222_2.tif
../icdar2013_benchmarking_dataset/222_2.tif
177_4.tif
../icdar2013_benchmarking_dataset/177_4.tif
005_4.tif
../icdar2013_benchmarking_dataset/005_4.tif
043_3.tif
../icdar2013_benchmarking_dataset/043_3.tif
082_2.tif
../icdar2013_benchmarking_dataset/082_2.tif
032_1.tif
../icdar2013_benchmarking_dataset/032_1.tif
161_2.tif
../icdar2013_benchmarking_dataset/161_2.tif
232_2.tif
../icdar2013_benchmarking_dataset/232_2.tif
240_4.tif
../icdar2013_benchmarking_dataset/240_4.tif
191_4.tif
../icdar2013_benchmarking_dataset/191_4.tif
182_1.tif
../icdar2013_benchmarking_dataset/182_1.tif
080_3.tif
../icdar2013_benchmarking_dataset/080_3.tif
249_4.tif
../icdar2013_bench

136_2.tif
../icdar2013_benchmarking_dataset/136_2.tif
017_3.tif
../icdar2013_benchmarking_dataset/017_3.tif
085_3.tif
../icdar2013_benchmarking_dataset/085_3.tif
016_3.tif
../icdar2013_benchmarking_dataset/016_3.tif
233_3.tif
../icdar2013_benchmarking_dataset/233_3.tif
182_4.tif
../icdar2013_benchmarking_dataset/182_4.tif
151_1.tif
../icdar2013_benchmarking_dataset/151_1.tif
114_4.tif
../icdar2013_benchmarking_dataset/114_4.tif
051_1.tif
../icdar2013_benchmarking_dataset/051_1.tif
233_4.tif
../icdar2013_benchmarking_dataset/233_4.tif
108_1.tif
../icdar2013_benchmarking_dataset/108_1.tif
053_1.tif
../icdar2013_benchmarking_dataset/053_1.tif
030_3.tif
../icdar2013_benchmarking_dataset/030_3.tif
119_3.tif
../icdar2013_benchmarking_dataset/119_3.tif
049_2.tif
../icdar2013_benchmarking_dataset/049_2.tif
196_1.tif
../icdar2013_benchmarking_dataset/196_1.tif
079_2.tif
../icdar2013_benchmarking_dataset/079_2.tif
237_1.tif
../icdar2013_benchmarking_dataset/237_1.tif
116_2.tif
../icdar2013_bench

014_2.tif
../icdar2013_benchmarking_dataset/014_2.tif
146_3.tif
../icdar2013_benchmarking_dataset/146_3.tif
128_2.tif
../icdar2013_benchmarking_dataset/128_2.tif
231_2.tif
../icdar2013_benchmarking_dataset/231_2.tif
197_4.tif
../icdar2013_benchmarking_dataset/197_4.tif
213_2.tif
../icdar2013_benchmarking_dataset/213_2.tif
204_4.tif
../icdar2013_benchmarking_dataset/204_4.tif
137_3.tif
../icdar2013_benchmarking_dataset/137_3.tif
195_1.tif
../icdar2013_benchmarking_dataset/195_1.tif
157_4.tif
../icdar2013_benchmarking_dataset/157_4.tif
095_4.tif
../icdar2013_benchmarking_dataset/095_4.tif
100_2.tif
../icdar2013_benchmarking_dataset/100_2.tif
085_1.tif
../icdar2013_benchmarking_dataset/085_1.tif
097_4.tif
../icdar2013_benchmarking_dataset/097_4.tif
202_2.tif
../icdar2013_benchmarking_dataset/202_2.tif
040_4.tif
../icdar2013_benchmarking_dataset/040_4.tif
245_1.tif
../icdar2013_benchmarking_dataset/245_1.tif
093_3.tif
../icdar2013_benchmarking_dataset/093_3.tif
092_1.tif
../icdar2013_bench

014_3.tif
../icdar2013_benchmarking_dataset/014_3.tif
245_3.tif
../icdar2013_benchmarking_dataset/245_3.tif
047_4.tif
../icdar2013_benchmarking_dataset/047_4.tif
116_4.tif
../icdar2013_benchmarking_dataset/116_4.tif
158_3.tif
../icdar2013_benchmarking_dataset/158_3.tif
239_3.tif
../icdar2013_benchmarking_dataset/239_3.tif
010_2.tif
../icdar2013_benchmarking_dataset/010_2.tif
054_4.tif
../icdar2013_benchmarking_dataset/054_4.tif
002_4.tif
../icdar2013_benchmarking_dataset/002_4.tif
055_3.tif
../icdar2013_benchmarking_dataset/055_3.tif
061_1.tif
../icdar2013_benchmarking_dataset/061_1.tif
083_3.tif
../icdar2013_benchmarking_dataset/083_3.tif
021_4.tif
../icdar2013_benchmarking_dataset/021_4.tif
148_2.tif
../icdar2013_benchmarking_dataset/148_2.tif
010_4.tif
../icdar2013_benchmarking_dataset/010_4.tif
014_1.tif
../icdar2013_benchmarking_dataset/014_1.tif
076_4.tif
../icdar2013_benchmarking_dataset/076_4.tif
225_4.tif
../icdar2013_benchmarking_dataset/225_4.tif
244_3.tif
../icdar2013_bench



In [8]:
def vlad(data, means, assignments, components, normalize=['l2c']):
    def encode(k):
        uk_ = assignments[:,k].T.dot(data)        

        clustermass = assignments[:,k].sum()
        if clustermass > 0:
            uk_ -= clustermass * means[k]

        if 'l2c' in normalize:
            n = max(math.sqrt(np.sum(uk_ * uk_)), 1e-12)
            uk_ /= n

        return uk_

    uk = list(map(encode, range(components)))
    fin_enc = np.concatenate(uk, axis=0).reshape(1,-1)
    fin_enc = np.sign(fin_enc) * np.sqrt(np.abs(fin_enc))
    fin_enc = preprocessing.normalize(fin_enc)

    return fin_enc

In [9]:
fEx = FeatureEx()
filePath = '../icdar2013_benchmarking_dataset/001_1.tif'
kpts, data = fEx.compute(filePath)

posteriors = gmm.predict_proba(data)
enc = vlad(data, gmm.means_, posteriors, gmm.means_.shape[0]) 
print(enc)

[[-0.01270613 -0.00752948 -0.00349255 ...  0.00822349  0.00864072
  -0.00536022]]


In [10]:
def ubm_adaptation(path, outDir, gmm):
    'path denotes the path to the image files'
    fEx = FeatureEx() # creating Feature extraction object
    
    directory = path

    for file in os.listdir(directory):
        print (file)
        if file.endswith(".tif"): 
            print(os.path.join(directory, file))
            fp = os.path.join(directory, file)
            print(fp)
            kpts, data = fEx.compute(fp)
            
            posteriors = gmm.predict_proba(data)
            enc = vlad(data, gmm.means_, posteriors, gmm.means_.shape[0]) 
            print(enc)
            
            fileName, file_ext = os.path.splitext(file)
            
            if not os.path.exists(outDir):
                os.makedirs(outDir)
            
            filepath = os.path.join(outDir, fileName + '.pkl.gz')
            with gzip.open(filepath, 'wb') as f:
                pickle.dump(enc, f, -1)
        else:
            continue

In [11]:
ubm_adaptation('../experimental_dataset_2013/', '../outTestVoc', gmm)

105_1.tif
../experimental_dataset_2013/105_1.tif
../experimental_dataset_2013/105_1.tif
[[ 0.01289462 -0.01035107 -0.0075466  ... -0.00462059  0.01308311
   0.01194765]]
040_3.tif
../experimental_dataset_2013/040_3.tif
../experimental_dataset_2013/040_3.tif
[[ 0.01497958  0.00602547 -0.01159657 ...  0.00486668 -0.01165884
  -0.01502499]]
125_2.tif
../experimental_dataset_2013/125_2.tif
../experimental_dataset_2013/125_2.tif
[[-0.01364407 -0.01391162 -0.00968708 ...  0.00600871 -0.00324186
   0.00241186]]
092_2.tif
../experimental_dataset_2013/092_2.tif
../experimental_dataset_2013/092_2.tif
[[-0.014017   -0.00994662  0.00292597 ... -0.01283861 -0.01445962
  -0.01306218]]
054_1.tif
../experimental_dataset_2013/054_1.tif
../experimental_dataset_2013/054_1.tif
[[ 0.00771045 -0.00937594 -0.01153465 ...  0.01547084  0.01048596
  -0.01479227]]
099_4.tif
../experimental_dataset_2013/099_4.tif
../experimental_dataset_2013/099_4.tif
[[-0.01168906 -0.01250762 -0.00602511 ... -0.01681031 -0.01064

[[-0.00312483  0.01046438 -0.0120128  ... -0.01015697  0.00633915
   0.0027868 ]]
096_3.tif
../experimental_dataset_2013/096_3.tif
../experimental_dataset_2013/096_3.tif
[[ 0.00897749  0.0104112  -0.00950987 ... -0.01336717 -0.01214613
   0.00839025]]
073_3.tif
../experimental_dataset_2013/073_3.tif
../experimental_dataset_2013/073_3.tif
[[ 0.00812246 -0.00643859 -0.01282678 ... -0.01628999 -0.01365116
  -0.00850957]]
101_4.tif
../experimental_dataset_2013/101_4.tif
../experimental_dataset_2013/101_4.tif
[[-0.00867182 -0.00947182  0.00707213 ...  0.00515221  0.00936491
   0.01423039]]
038_3.tif
../experimental_dataset_2013/038_3.tif
../experimental_dataset_2013/038_3.tif
[[-0.00306076  0.01056806 -0.00826599 ...  0.0074407   0.00817699
  -0.01239585]]
077_1.tif
../experimental_dataset_2013/077_1.tif
../experimental_dataset_2013/077_1.tif
[[-0.01495004  0.01401075  0.01634741 ...  0.00858826  0.01931991
   0.00686051]]
102_3.tif
../experimental_dataset_2013/102_3.tif
../experimental_dat

[[-0.01521551  0.00609195  0.01262874 ... -0.01137483  0.01225807
   0.0192488 ]]
039_1.tif
../experimental_dataset_2013/039_1.tif
../experimental_dataset_2013/039_1.tif
[[-0.01451746 -0.00964363  0.00773394 ...  0.00751418  0.00925029
   0.01566455]]
088_4.tif
../experimental_dataset_2013/088_4.tif
../experimental_dataset_2013/088_4.tif
[[-0.01546538  0.01125002  0.01409346 ... -0.01165982 -0.00763183
  -0.0099877 ]]
088_1.tif
../experimental_dataset_2013/088_1.tif
../experimental_dataset_2013/088_1.tif
[[-0.01212149 -0.00700573  0.01003642 ...  0.00940101  0.01639769
   0.00415586]]
062_4.tif
../experimental_dataset_2013/062_4.tif
../experimental_dataset_2013/062_4.tif
[[-0.01193977 -0.00312802  0.0133824  ... -0.01087961 -0.01198023
  -0.00692033]]
055_4.tif
../experimental_dataset_2013/055_4.tif
../experimental_dataset_2013/055_4.tif
[[-0.01551998 -0.00711147 -0.00907343 ...  0.00491634 -0.00671417
  -0.01466877]]
068_3.tif
../experimental_dataset_2013/068_3.tif
../experimental_dat

[[-0.0086783  -0.00458342  0.01135412 ... -0.00698896 -0.00596265
  -0.01293623]]
058_3.tif
../experimental_dataset_2013/058_3.tif
../experimental_dataset_2013/058_3.tif
[[-0.00916142 -0.00914477  0.00794531 ...  0.00850499  0.01390649
   0.00547475]]
053_4.tif
../experimental_dataset_2013/053_4.tif
../experimental_dataset_2013/053_4.tif
[[-0.01045693  0.01040791 -0.00884076 ... -0.01325409 -0.00800381
   0.01053982]]
103_3.tif
../experimental_dataset_2013/103_3.tif
../experimental_dataset_2013/103_3.tif
[[ 0.00543728  0.01135783 -0.00661588 ...  0.00575099 -0.01384317
  -0.01250778]]
064_4.tif
../experimental_dataset_2013/064_4.tif
../experimental_dataset_2013/064_4.tif
[[-0.01618651 -0.01247457 -0.00884139 ...  0.00610477  0.01456371
   0.01001237]]
086_1.tif
../experimental_dataset_2013/086_1.tif
../experimental_dataset_2013/086_1.tif
[[-0.00892658 -0.01217842 -0.00443324 ...  0.0079357   0.01185864
  -0.0138923 ]]
079_3.tif
../experimental_dataset_2013/079_3.tif
../experimental_dat

[[-0.01207373 -0.00957434  0.01229974 ...  0.01012747 -0.00936625
  -0.01193123]]
125_4.tif
../experimental_dataset_2013/125_4.tif
../experimental_dataset_2013/125_4.tif
[[-0.01286945 -0.01025218 -0.00957036 ... -0.00748094 -0.00912804
   0.00730849]]
094_1.tif
../experimental_dataset_2013/094_1.tif
../experimental_dataset_2013/094_1.tif
[[-0.00640893  0.00935676  0.0102822  ... -0.00925623 -0.00615427
   0.00799044]]
029_3.tif
../experimental_dataset_2013/029_3.tif
../experimental_dataset_2013/029_3.tif
[[-0.00943334 -0.01012896  0.01495236 ... -0.00350206  0.00862163
   0.01424249]]
083_1.tif
../experimental_dataset_2013/083_1.tif
../experimental_dataset_2013/083_1.tif
[[-0.00519183 -0.0083417   0.01201486 ...  0.014031   -0.0033002
  -0.00710303]]
085_4.tif
../experimental_dataset_2013/085_4.tif
../experimental_dataset_2013/085_4.tif
[[-0.00558921  0.00353731 -0.00893947 ... -0.00314129 -0.00468353
  -0.01060212]]
046_4.tif
../experimental_dataset_2013/046_4.tif
../experimental_data

[[-0.01800252 -0.0153366   0.01176696 ...  0.00623794  0.01892048
   0.00886235]]
085_3.tif
../experimental_dataset_2013/085_3.tif
../experimental_dataset_2013/085_3.tif
[[-0.01182444  0.0080435   0.00774934 ... -0.00981116 -0.00448905
  -0.00885921]]
114_4.tif
../experimental_dataset_2013/114_4.tif
../experimental_dataset_2013/114_4.tif
[[-0.01216411 -0.01537856  0.00932019 ... -0.01318959 -0.01464432
  -0.01311232]]
051_1.tif
../experimental_dataset_2013/051_1.tif
../experimental_dataset_2013/051_1.tif
[[ 0.01489239 -0.00281715  0.00238738 ...  0.00630254 -0.01142889
  -0.01002664]]
108_1.tif
../experimental_dataset_2013/108_1.tif
../experimental_dataset_2013/108_1.tif
[[-0.00427369 -0.01210824  0.00072605 ...  0.01159076 -0.00667261
  -0.01317638]]
053_1.tif
../experimental_dataset_2013/053_1.tif
../experimental_dataset_2013/053_1.tif
[[-0.01771241  0.00943712  0.01499226 ... -0.00820553  0.01279526
   0.01042412]]
030_3.tif
../experimental_dataset_2013/030_3.tif
../experimental_dat

[[ 0.001588    0.00841331  0.01469514 ...  0.00597209 -0.00738803
  -0.0095075 ]]
073_4.tif
../experimental_dataset_2013/073_4.tif
../experimental_dataset_2013/073_4.tif
[[-0.00778672 -0.01456016 -0.01088731 ... -0.00878765  0.00429915
  -0.00962179]]
118_4.tif
../experimental_dataset_2013/118_4.tif
../experimental_dataset_2013/118_4.tif
[[-0.01382078  0.00855103  0.00865972 ... -0.00866717 -0.01201692
  -0.00633893]]
076_3.tif
../experimental_dataset_2013/076_3.tif
../experimental_dataset_2013/076_3.tif
[[-0.01390023  0.00570496  0.01348467 ...  0.01241977  0.00824502
  -0.01189696]]
119_1.tif
../experimental_dataset_2013/119_1.tif
../experimental_dataset_2013/119_1.tif
[[ 0.00732161  0.00588537 -0.00750229 ... -0.00887032 -0.00985756
   0.00626754]]
068_4.tif
../experimental_dataset_2013/068_4.tif
../experimental_dataset_2013/068_4.tif
[[-0.01388471 -0.00939379  0.01598413 ... -0.01010328  0.01384444
   0.01581437]]
107_1.tif
../experimental_dataset_2013/107_1.tif
../experimental_dat

[[-0.01156595  0.01145009  0.01892066 ...  0.00737757  0.01802158
   0.01636709]]
103_1.tif
../experimental_dataset_2013/103_1.tif
../experimental_dataset_2013/103_1.tif
[[ 0.01348739  0.01584417  0.01104673 ... -0.01230496 -0.00829375
  -0.01471716]]
071_3.tif
../experimental_dataset_2013/071_3.tif
../experimental_dataset_2013/071_3.tif
[[-0.00962595 -0.01266006  0.01345792 ...  0.00887073  0.01239968
   0.01435224]]
102_2.tif
../experimental_dataset_2013/102_2.tif
../experimental_dataset_2013/102_2.tif
[[ 0.01157592  0.01068852 -0.00531928 ...  0.00441137 -0.00792815
  -0.00556129]]
112_4.tif
../experimental_dataset_2013/112_4.tif
../experimental_dataset_2013/112_4.tif
[[-0.01042796  0.00616132  0.01303832 ... -0.01192343 -0.01306738
  -0.00632812]]
092_4.tif
../experimental_dataset_2013/092_4.tif
../experimental_dataset_2013/092_4.tif
[[-0.01421699 -0.01487747 -0.00654953 ...  0.01105962  0.00616861
  -0.0062341 ]]
057_3.tif
../experimental_dataset_2013/057_3.tif
../experimental_dat

[[-0.01016842  0.0099708   0.01996087 ... -0.00595968  0.01090156
   0.01479348]]
080_4.tif
../experimental_dataset_2013/080_4.tif
../experimental_dataset_2013/080_4.tif
[[ 0.00771342  0.00067451 -0.00903971 ...  0.00798063  0.00341398
  -0.00811974]]
081_4.tif
../experimental_dataset_2013/081_4.tif
../experimental_dataset_2013/081_4.tif
[[-0.01275691 -0.00895425  0.01444866 ...  0.01381844  0.00865658
  -0.01300507]]
112_1.tif
../experimental_dataset_2013/112_1.tif
../experimental_dataset_2013/112_1.tif
[[-0.01655499 -0.01576037 -0.00746449 ...  0.0099202  -0.00734861
  -0.00962346]]
031_4.tif
../experimental_dataset_2013/031_4.tif
../experimental_dataset_2013/031_4.tif
[[ 0.00985034  0.01262454  0.003802   ... -0.001649   -0.00371427
  -0.01111184]]
063_3.tif
../experimental_dataset_2013/063_3.tif
../experimental_dataset_2013/063_3.tif
[[-0.01306003 -0.00610024  0.01001142 ...  0.01013856  0.00502854
  -0.00394871]]
044_1.tif
../experimental_dataset_2013/044_1.tif
../experimental_dat

In [12]:
enc = np.array([1, 2, 3])
enc = enc.reshape(1, 3)
directory = '../outVoc'
if not os.path.exists(directory):
    os.makedirs(directory)
filePath = '../outVoc/file.pkl.gz'
with gzip.open(filePath, 'wb') as f:
    pickle.dump(enc, f, -1)

In [13]:
ubm_adaptation('../icdar2013_benchmarking_dataset/', '../outTrainVoc', gmm)

139_4.tif
../icdar2013_benchmarking_dataset/139_4.tif
../icdar2013_benchmarking_dataset/139_4.tif
[[-0.01625872 -0.01330348 -0.00570675 ... -0.00909988 -0.00740817
  -0.00419516]]
148_4.tif
../icdar2013_benchmarking_dataset/148_4.tif
../icdar2013_benchmarking_dataset/148_4.tif
[[-0.0139771  -0.01473832 -0.00969678 ... -0.00785636 -0.0136693
  -0.01050402]]
163_3.tif
../icdar2013_benchmarking_dataset/163_3.tif
../icdar2013_benchmarking_dataset/163_3.tif
[[-0.01320421 -0.00444787  0.01497272 ... -0.01022958 -0.00766311
   0.00431748]]
105_1.tif
../icdar2013_benchmarking_dataset/105_1.tif
../icdar2013_benchmarking_dataset/105_1.tif
[[-0.01939105 -0.00929082 -0.00570609 ... -0.00586701  0.01095674
   0.00335065]]
140_4.tif
../icdar2013_benchmarking_dataset/140_4.tif
../icdar2013_benchmarking_dataset/140_4.tif
[[-0.01192809 -0.00398105  0.01529813 ... -0.00728612 -0.00946698
  -0.0075454 ]]
207_1.tif
../icdar2013_benchmarking_dataset/207_1.tif
../icdar2013_benchmarking_dataset/207_1.tif
[[-

[[-0.01434801  0.00867003  0.01939859 ...  0.00675616 -0.00924044
  -0.00978882]]
112_3.tif
../icdar2013_benchmarking_dataset/112_3.tif
../icdar2013_benchmarking_dataset/112_3.tif
[[-0.01112702 -0.0109464   0.01215346 ... -0.00865522 -0.00751229
   0.00308086]]
188_1.tif
../icdar2013_benchmarking_dataset/188_1.tif
../icdar2013_benchmarking_dataset/188_1.tif
[[ 0.01468033 -0.01431172 -0.00764655 ...  0.01491055 -0.00196211
  -0.00892819]]
148_1.tif
../icdar2013_benchmarking_dataset/148_1.tif
../icdar2013_benchmarking_dataset/148_1.tif
[[-0.01042052 -0.01224605  0.01028489 ...  0.0114924   0.01353825
  -0.0089806 ]]
111_3.tif
../icdar2013_benchmarking_dataset/111_3.tif
../icdar2013_benchmarking_dataset/111_3.tif
[[ 0.01458958  0.00159187 -0.00398124 ...  0.01123908 -0.01097713
  -0.01250272]]
169_1.tif
../icdar2013_benchmarking_dataset/169_1.tif
../icdar2013_benchmarking_dataset/169_1.tif
[[-0.00782925 -0.00952323 -0.00868409 ... -0.00379334 -0.00888187
   0.00916779]]
144_3.tif
../icdar

[[-0.00670678 -0.00649461  0.00696694 ...  0.00974219  0.00195925
  -0.00579608]]
069_3.tif
../icdar2013_benchmarking_dataset/069_3.tif
../icdar2013_benchmarking_dataset/069_3.tif
[[-0.01382432 -0.00862824  0.01815375 ...  0.00564495  0.00892504
   0.01009221]]
037_2.tif
../icdar2013_benchmarking_dataset/037_2.tif
../icdar2013_benchmarking_dataset/037_2.tif
[[ 0.00941306 -0.00541722  0.01042686 ... -0.00382889  0.00587764
  -0.01056662]]
077_2.tif
../icdar2013_benchmarking_dataset/077_2.tif
../icdar2013_benchmarking_dataset/077_2.tif
[[ 0.01757819  0.00433968 -0.01536324 ...  0.00869829 -0.00642485
  -0.01032371]]
087_4.tif
../icdar2013_benchmarking_dataset/087_4.tif
../icdar2013_benchmarking_dataset/087_4.tif
[[-0.00978512 -0.01309013  0.00729961 ... -0.01206926 -0.00949033
  -0.00976681]]
169_4.tif
../icdar2013_benchmarking_dataset/169_4.tif
../icdar2013_benchmarking_dataset/169_4.tif
[[ 0.00067934 -0.0133725  -0.00127257 ... -0.01520721 -0.00377141
  -0.00351573]]
088_2.tif
../icdar

[[-0.01407377 -0.01741626 -0.01218703 ...  0.0101585   0.01913413
   0.00842505]]
115_2.tif
../icdar2013_benchmarking_dataset/115_2.tif
../icdar2013_benchmarking_dataset/115_2.tif
[[-0.01039006  0.00898178  0.00932868 ...  0.00387103 -0.01463334
  -0.01725647]]
172_3.tif
../icdar2013_benchmarking_dataset/172_3.tif
../icdar2013_benchmarking_dataset/172_3.tif
[[ 0.0176709  -0.00557702 -0.01584649 ...  0.00609814 -0.0090274
  -0.01020139]]
105_2.tif
../icdar2013_benchmarking_dataset/105_2.tif
../icdar2013_benchmarking_dataset/105_2.tif
[[-0.01814793 -0.01099294  0.00891986 ... -0.01600477 -0.0114558
   0.00804543]]
064_3.tif
../icdar2013_benchmarking_dataset/064_3.tif
../icdar2013_benchmarking_dataset/064_3.tif
[[-0.0137196  -0.0146321  -0.01415529 ...  0.00973758 -0.01185125
  -0.01082616]]
183_2.tif
../icdar2013_benchmarking_dataset/183_2.tif
../icdar2013_benchmarking_dataset/183_2.tif
[[-0.01654852  0.00243906  0.01488094 ... -0.01149355 -0.01410645
  -0.01332706]]
084_2.tif
../icdar20

[[ 0.00323363 -0.00585611  0.0084087  ...  0.018558    0.01232678
  -0.00737761]]
098_3.tif
../icdar2013_benchmarking_dataset/098_3.tif
../icdar2013_benchmarking_dataset/098_3.tif
[[-0.01179016  0.00426398  0.01539927 ...  0.0056121   0.00898082
   0.00760055]]
171_2.tif
../icdar2013_benchmarking_dataset/171_2.tif
../icdar2013_benchmarking_dataset/171_2.tif
[[-0.01026213 -0.01040348 -0.00683396 ... -0.01053982  0.00579973
   0.01638348]]
193_4.tif
../icdar2013_benchmarking_dataset/193_4.tif
../icdar2013_benchmarking_dataset/193_4.tif
[[ 0.00275247 -0.0098412   0.01395814 ... -0.01263306  0.01350493
   0.00934908]]
090_2.tif
../icdar2013_benchmarking_dataset/090_2.tif
../icdar2013_benchmarking_dataset/090_2.tif
[[ 0.01466858  0.0029951  -0.01277294 ... -0.0039311  -0.01086885
   0.00297526]]
041_4.tif
../icdar2013_benchmarking_dataset/041_4.tif
../icdar2013_benchmarking_dataset/041_4.tif
[[-0.01454675 -0.01322877  0.00513072 ... -0.01107551 -0.00981707
  -0.00395029]]
119_4.tif
../icdar

[[-0.0150136  -0.0011421  -0.01380425 ... -0.01422511 -0.0145431
  -0.01469076]]
185_3.tif
../icdar2013_benchmarking_dataset/185_3.tif
../icdar2013_benchmarking_dataset/185_3.tif
[[ 0.00954126 -0.01292415 -0.00755973 ... -0.01393642 -0.00278071
   0.01309669]]
062_4.tif
../icdar2013_benchmarking_dataset/062_4.tif
../icdar2013_benchmarking_dataset/062_4.tif
[[-0.01576551  0.00931427 -0.00738098 ... -0.01066445 -0.01046928
   0.00213564]]
201_4.tif
../icdar2013_benchmarking_dataset/201_4.tif
../icdar2013_benchmarking_dataset/201_4.tif
[[-0.01156008 -0.00864605  0.00428385 ...  0.00605844  0.00559595
   0.01023581]]
055_4.tif
../icdar2013_benchmarking_dataset/055_4.tif
../icdar2013_benchmarking_dataset/055_4.tif
[[-0.01183484 -0.00677069  0.00900276 ... -0.00698649  0.00945743
  -0.0090038 ]]
022_2.tif
../icdar2013_benchmarking_dataset/022_2.tif
../icdar2013_benchmarking_dataset/022_2.tif
[[ 0.01922377  0.00483825 -0.01415947 ...  0.01551602  0.00802249
  -0.00931428]]
213_4.tif
../icdar2

[[-0.01051297 -0.00188118 -0.00733107 ... -0.01016204 -0.00558393
  -0.0064163 ]]
190_2.tif
../icdar2013_benchmarking_dataset/190_2.tif
../icdar2013_benchmarking_dataset/190_2.tif
[[-0.00451436 -0.01146317 -0.01456394 ... -0.01001927  0.01665109
   0.01423978]]
104_4.tif
../icdar2013_benchmarking_dataset/104_4.tif
../icdar2013_benchmarking_dataset/104_4.tif
[[ 0.01261654  0.01128939  0.01075401 ...  0.00363355 -0.01424958
  -0.01686324]]
179_1.tif
../icdar2013_benchmarking_dataset/179_1.tif
../icdar2013_benchmarking_dataset/179_1.tif
[[ 0.01586489  0.01386265 -0.01225503 ...  0.01191367 -0.01387915
  -0.01094552]]
091_2.tif
../icdar2013_benchmarking_dataset/091_2.tif
../icdar2013_benchmarking_dataset/091_2.tif
[[0.00614297 0.01034281 0.01219972 ... 0.00829334 0.01343123 0.01018939]]
213_3.tif
../icdar2013_benchmarking_dataset/213_3.tif
../icdar2013_benchmarking_dataset/213_3.tif
[[ 0.01041587 -0.01016447 -0.01048795 ... -0.0103519  -0.01085277
   0.00632574]]
022_1.tif
../icdar2013_ben

[[-0.01313279 -0.01077929 -0.00255482 ...  0.00848065 -0.0068793
  -0.0137067 ]]
082_3.tif
../icdar2013_benchmarking_dataset/082_3.tif
../icdar2013_benchmarking_dataset/082_3.tif
[[ 0.0127776  -0.01041554 -0.01719674 ...  0.00817919 -0.01023336
  -0.01320115]]
117_1.tif
../icdar2013_benchmarking_dataset/117_1.tif
../icdar2013_benchmarking_dataset/117_1.tif
[[ 0.00825577  0.01592655 -0.00590945 ... -0.00120486 -0.01588832
  -0.01322324]]
228_1.tif
../icdar2013_benchmarking_dataset/228_1.tif
../icdar2013_benchmarking_dataset/228_1.tif
[[-0.00330334 -0.01249994 -0.00635748 ... -0.01325784 -0.01197129
  -0.00814707]]
031_2.tif
../icdar2013_benchmarking_dataset/031_2.tif
../icdar2013_benchmarking_dataset/031_2.tif
[[ 0.01620582 -0.01102846 -0.01535891 ...  0.00074848 -0.01293643
  -0.01291899]]
120_3.tif
../icdar2013_benchmarking_dataset/120_3.tif
../icdar2013_benchmarking_dataset/120_3.tif
[[-0.01019635 -0.01185356 -0.01483271 ... -0.00516248  0.01664715
   0.01341762]]
216_2.tif
../icdar2

[[-0.01189624 -0.0150928  -0.00994959 ...  0.00974118  0.01229884
   0.01064947]]
231_1.tif
../icdar2013_benchmarking_dataset/231_1.tif
../icdar2013_benchmarking_dataset/231_1.tif
[[-0.00574273 -0.01370327  0.0018643  ...  0.01056686  0.01385652
   0.01039138]]
096_2.tif
../icdar2013_benchmarking_dataset/096_2.tif
../icdar2013_benchmarking_dataset/096_2.tif
[[-0.00663662 -0.01244483 -0.01116272 ...  0.0089234   0.01804376
   0.00594436]]
248_4.tif
../icdar2013_benchmarking_dataset/248_4.tif
../icdar2013_benchmarking_dataset/248_4.tif
[[ 0.0089897   0.00955056  0.00804265 ...  0.0128986  -0.00317082
  -0.00523362]]
135_2.tif
../icdar2013_benchmarking_dataset/135_2.tif
../icdar2013_benchmarking_dataset/135_2.tif
[[-0.00530199 -0.01258439 -0.01091411 ... -0.00814305 -0.00346989
   0.0113427 ]]
085_2.tif
../icdar2013_benchmarking_dataset/085_2.tif
../icdar2013_benchmarking_dataset/085_2.tif
[[-0.00319429  0.00619931 -0.0133131  ... -0.0073574  -0.00403708
   0.01330969]]
016_1.tif
../icdar

[[-0.0159257  -0.00973568  0.00361124 ...  0.00706903  0.00313351
   0.0084087 ]]
232_1.tif
../icdar2013_benchmarking_dataset/232_1.tif
../icdar2013_benchmarking_dataset/232_1.tif
[[-0.01256186 -0.01113703 -0.01259193 ... -0.00911359  0.00880785
   0.01052056]]
244_2.tif
../icdar2013_benchmarking_dataset/244_2.tif
../icdar2013_benchmarking_dataset/244_2.tif
[[-0.0112584   0.01254064  0.01269697 ... -0.01017529 -0.01057889
   0.00508351]]
039_3.tif
../icdar2013_benchmarking_dataset/039_3.tif
../icdar2013_benchmarking_dataset/039_3.tif
[[-0.01580167 -0.01062169  0.01160712 ... -0.01002269  0.01215612
  -0.00466693]]
010_3.tif
../icdar2013_benchmarking_dataset/010_3.tif
../icdar2013_benchmarking_dataset/010_3.tif
[[-0.01453879 -0.01543474  0.00161611 ...  0.00789243  0.00432162
  -0.00949265]]
095_3.tif
../icdar2013_benchmarking_dataset/095_3.tif
../icdar2013_benchmarking_dataset/095_3.tif
[[ 0.01433522 -0.00667961 -0.00968742 ... -0.01061316 -0.01170703
  -0.01203895]]
203_4.tif
../icdar

[[-0.00577997 -0.00711635  0.0147006  ... -0.00674941 -0.0091537
  -0.00935648]]
005_3.tif
../icdar2013_benchmarking_dataset/005_3.tif
../icdar2013_benchmarking_dataset/005_3.tif
[[-0.00234744 -0.01156414 -0.01339853 ...  0.00528228  0.01670406
   0.00514902]]
222_2.tif
../icdar2013_benchmarking_dataset/222_2.tif
../icdar2013_benchmarking_dataset/222_2.tif
[[-0.01797366 -0.00917966  0.01198783 ... -0.01427962 -0.01834436
  -0.00915297]]
177_4.tif
../icdar2013_benchmarking_dataset/177_4.tif
../icdar2013_benchmarking_dataset/177_4.tif
[[ 0.00696385 -0.006633   -0.0123772  ... -0.00952353 -0.012044
  -0.01283298]]
005_4.tif
../icdar2013_benchmarking_dataset/005_4.tif
../icdar2013_benchmarking_dataset/005_4.tif
[[-0.01199328 -0.00768381 -0.00513836 ... -0.01522497  0.00634599
  -0.01110829]]
043_3.tif
../icdar2013_benchmarking_dataset/043_3.tif
../icdar2013_benchmarking_dataset/043_3.tif
[[-0.01476685 -0.01486043  0.00984014 ... -0.00813846  0.01098878
   0.0056437 ]]
082_2.tif
../icdar201

[[-0.01679026 -0.01499182 -0.00782198 ... -0.01035356 -0.01488475
  -0.00177703]]
011_4.tif
../icdar2013_benchmarking_dataset/011_4.tif
../icdar2013_benchmarking_dataset/011_4.tif
[[-0.01640081 -0.00660476  0.00909022 ... -0.00782469  0.00985119
  -0.00292627]]
039_2.tif
../icdar2013_benchmarking_dataset/039_2.tif
../icdar2013_benchmarking_dataset/039_2.tif
[[-0.01471203 -0.0120706  -0.01307282 ...  0.01113197  0.00857806
  -0.01009547]]
071_2.tif
../icdar2013_benchmarking_dataset/071_2.tif
../icdar2013_benchmarking_dataset/071_2.tif
[[-0.00750978 -0.01409497 -0.01086216 ... -0.00984381 -0.01459393
  -0.01174174]]
035_4.tif
../icdar2013_benchmarking_dataset/035_4.tif
../icdar2013_benchmarking_dataset/035_4.tif
[[-0.0084349  -0.01290324 -0.00935492 ... -0.01369149 -0.01096981
   0.0057989 ]]
157_3.tif
../icdar2013_benchmarking_dataset/157_3.tif
../icdar2013_benchmarking_dataset/157_3.tif
[[-0.00865094 -0.01103782 -0.01180265 ... -0.01059078  0.0012818
  -0.01177887]]
124_4.tif
../icdar2

[[-0.01486139 -0.00668557 -0.00835518 ... -0.00887803 -0.0100101
  -0.00765785]]
187_4.tif
../icdar2013_benchmarking_dataset/187_4.tif
../icdar2013_benchmarking_dataset/187_4.tif
[[ 0.00653788  0.00502155  0.00458306 ... -0.01339562  0.00980602
   0.01225041]]
009_1.tif
../icdar2013_benchmarking_dataset/009_1.tif
../icdar2013_benchmarking_dataset/009_1.tif
[[-0.01640024 -0.00958338  0.01558257 ...  0.01029896  0.01449242
   0.00701587]]
229_2.tif
../icdar2013_benchmarking_dataset/229_2.tif
../icdar2013_benchmarking_dataset/229_2.tif
[[-0.01088346 -0.00987801  0.00698079 ...  0.01118874  0.01964394
   0.00969021]]
129_3.tif
../icdar2013_benchmarking_dataset/129_3.tif
../icdar2013_benchmarking_dataset/129_3.tif
[[ 0.01094983  0.00263446 -0.00573199 ...  0.00516069 -0.01637935
  -0.01089002]]
108_4.tif
../icdar2013_benchmarking_dataset/108_4.tif
../icdar2013_benchmarking_dataset/108_4.tif
[[-0.0153716  -0.00959929  0.01484315 ...  0.00697718 -0.00124202
   0.00426932]]
074_2.tif
../icdar2

[[-0.01173115 -0.00878871  0.01045597 ... -0.01004935  0.01558976
   0.01499962]]
033_1.tif
../icdar2013_benchmarking_dataset/033_1.tif
../icdar2013_benchmarking_dataset/033_1.tif
[[ 0.01159451  0.010392   -0.01397745 ...  0.00480546 -0.00649484
  -0.01175174]]
059_2.tif
../icdar2013_benchmarking_dataset/059_2.tif
../icdar2013_benchmarking_dataset/059_2.tif
[[-0.01015051  0.00475139  0.01270925 ...  0.0179948   0.01592968
  -0.00589394]]
235_4.tif
../icdar2013_benchmarking_dataset/235_4.tif
../icdar2013_benchmarking_dataset/235_4.tif
[[ 0.0090995  -0.01216946  0.00343175 ... -0.00403595  0.00645886
   0.00923818]]
002_3.tif
../icdar2013_benchmarking_dataset/002_3.tif
../icdar2013_benchmarking_dataset/002_3.tif
[[ 0.00814268 -0.00818821  0.00548635 ... -0.00847844 -0.00798518
  -0.01562188]]
145_1.tif
../icdar2013_benchmarking_dataset/145_1.tif
../icdar2013_benchmarking_dataset/145_1.tif
[[-0.01197014 -0.01138688 -0.01322255 ... -0.00200201 -0.00522045
  -0.00468185]]
189_3.tif
../icdar

[[-0.00725031  0.00145987  0.00820864 ... -0.00195143  0.00819524
   0.00995599]]
187_2.tif
../icdar2013_benchmarking_dataset/187_2.tif
../icdar2013_benchmarking_dataset/187_2.tif
[[-0.01732877 -0.01597482 -0.01501182 ... -0.01116665  0.00785293
   0.00854288]]
238_4.tif
../icdar2013_benchmarking_dataset/238_4.tif
../icdar2013_benchmarking_dataset/238_4.tif
[[ 0.00778711 -0.00828272 -0.00753228 ...  0.00886387 -0.00986864
  -0.01319011]]
063_2.tif
../icdar2013_benchmarking_dataset/063_2.tif
../icdar2013_benchmarking_dataset/063_2.tif
[[-0.01080599 -0.0059548   0.01786586 ... -0.00518104  0.0074361
   0.01834831]]
131_1.tif
../icdar2013_benchmarking_dataset/131_1.tif
../icdar2013_benchmarking_dataset/131_1.tif
[[-0.00561963 -0.00644552  0.01403265 ... -0.01102255 -0.00987203
  -0.00813238]]
142_4.tif
../icdar2013_benchmarking_dataset/142_4.tif
../icdar2013_benchmarking_dataset/142_4.tif
[[-0.0115658  -0.00292897  0.00716404 ... -0.01421238 -0.0160173
  -0.01187942]]
138_2.tif
../icdar20

[[-0.00632694 -0.0105336  -0.00980763 ... -0.00900447 -0.0101814
  -0.00671457]]
111_4.tif
../icdar2013_benchmarking_dataset/111_4.tif
../icdar2013_benchmarking_dataset/111_4.tif
[[ 0.00279375  0.00185525  0.00691808 ...  0.01468285 -0.00463055
  -0.00864432]]
094_3.tif
../icdar2013_benchmarking_dataset/094_3.tif
../icdar2013_benchmarking_dataset/094_3.tif
[[-0.01243553  0.00613891  0.01240249 ... -0.00989187  0.01194298
   0.01265678]]
094_4.tif
../icdar2013_benchmarking_dataset/094_4.tif
../icdar2013_benchmarking_dataset/094_4.tif
[[-0.00800688 -0.0074069   0.00306036 ...  0.00396374  0.00678091
   0.00786394]]
247_1.tif
../icdar2013_benchmarking_dataset/247_1.tif
../icdar2013_benchmarking_dataset/247_1.tif
[[ 0.00644961  0.01072758  0.01039457 ... -0.00907184 -0.01017149
   0.00605768]]
132_4.tif
../icdar2013_benchmarking_dataset/132_4.tif
../icdar2013_benchmarking_dataset/132_4.tif
[[-0.0102203  -0.01340608 -0.01400954 ... -0.01139099 -0.01143048
  -0.00352841]]
224_4.tif
../icdar2

[[-0.01521337  0.00319101  0.00829855 ...  0.01234055 -0.00617655
  -0.01338916]]
195_4.tif
../icdar2013_benchmarking_dataset/195_4.tif
../icdar2013_benchmarking_dataset/195_4.tif
[[-0.01641045 -0.01677769  0.01148951 ... -0.0050332   0.0100732
   0.01487907]]
073_1.tif
../icdar2013_benchmarking_dataset/073_1.tif
../icdar2013_benchmarking_dataset/073_1.tif
[[-0.01622697  0.00391953  0.00551255 ... -0.01089682  0.01372792
   0.00851442]]
091_4.tif
../icdar2013_benchmarking_dataset/091_4.tif
../icdar2013_benchmarking_dataset/091_4.tif
[[-0.01293617  0.00270859 -0.00534465 ... -0.01590365 -0.0041577
   0.01629421]]
069_4.tif
../icdar2013_benchmarking_dataset/069_4.tif
../icdar2013_benchmarking_dataset/069_4.tif
[[-0.01187277  0.00712485  0.02048655 ...  0.01037868  0.01174763
   0.00542648]]
192_2.tif
../icdar2013_benchmarking_dataset/192_2.tif
../icdar2013_benchmarking_dataset/192_2.tif
[[ 0.0138527   0.00978714 -0.01155363 ... -0.00808695  0.00941204
   0.01542034]]
118_3.tif
../icdar20

[[ 0.01112199  0.01361379  0.01155035 ... -0.00161523  0.01613133
   0.01424196]]
241_4.tif
../icdar2013_benchmarking_dataset/241_4.tif
../icdar2013_benchmarking_dataset/241_4.tif
[[-0.01213637 -0.00980703  0.01177696 ... -0.00430921 -0.00385019
  -0.01342496]]
115_4.tif
../icdar2013_benchmarking_dataset/115_4.tif
../icdar2013_benchmarking_dataset/115_4.tif
[[ 0.00425487 -0.00503086 -0.01177971 ... -0.01281553 -0.00473979
   0.01309308]]
078_2.tif
../icdar2013_benchmarking_dataset/078_2.tif
../icdar2013_benchmarking_dataset/078_2.tif
[[ 0.00539627  0.00855741  0.0088901  ...  0.01041556  0.00272814
  -0.00645454]]
042_2.tif
../icdar2013_benchmarking_dataset/042_2.tif
../icdar2013_benchmarking_dataset/042_2.tif
[[-0.01213194 -0.01281599 -0.00615949 ...  0.01649803  0.01092458
  -0.00404617]]
122_3.tif
../icdar2013_benchmarking_dataset/122_3.tif
../icdar2013_benchmarking_dataset/122_3.tif
[[-0.01449958 -0.01203199  0.01324513 ... -0.00681335 -0.00629871
  -0.00291404]]
030_4.tif
../icdar

[[-0.01753654 -0.00796955  0.00869386 ...  0.00200871  0.01505868
   0.01550653]]
112_4.tif
../icdar2013_benchmarking_dataset/112_4.tif
../icdar2013_benchmarking_dataset/112_4.tif
[[-0.01555296 -0.0153032   0.00744386 ... -0.01115707  0.01125229
   0.00842107]]
092_4.tif
../icdar2013_benchmarking_dataset/092_4.tif
../icdar2013_benchmarking_dataset/092_4.tif
[[ 0.01354371  0.00845734 -0.01463955 ...  0.01033705 -0.00381881
  -0.01101411]]
225_3.tif
../icdar2013_benchmarking_dataset/225_3.tif
../icdar2013_benchmarking_dataset/225_3.tif
[[-0.01328109  0.00725891  0.01065218 ...  0.0120537   0.00995351
   0.00592998]]
240_1.tif
../icdar2013_benchmarking_dataset/240_1.tif
../icdar2013_benchmarking_dataset/240_1.tif
[[-0.01292774 -0.00943744  0.01654986 ...  0.01740525  0.01159726
  -0.00356001]]
057_3.tif
../icdar2013_benchmarking_dataset/057_3.tif
../icdar2013_benchmarking_dataset/057_3.tif
[[-0.01286983 -0.01606781  0.00749909 ...  0.00757888 -0.00649723
  -0.00894216]]
026_1.tif
../icdar

[[-0.01098034 -0.0125998   0.01461913 ... -0.00919085  0.01143159
  -0.00498777]]
038_4.tif
../icdar2013_benchmarking_dataset/038_4.tif
../icdar2013_benchmarking_dataset/038_4.tif
[[ 0.01072152  0.01089092  0.00813324 ...  0.00843898  0.00930041
  -0.01485584]]
195_2.tif
../icdar2013_benchmarking_dataset/195_2.tif
../icdar2013_benchmarking_dataset/195_2.tif
[[-0.01148287 -0.00803417  0.01029868 ... -0.0033079   0.00192256
   0.01412127]]
190_1.tif
../icdar2013_benchmarking_dataset/190_1.tif
../icdar2013_benchmarking_dataset/190_1.tif
[[-0.01171347 -0.0149943  -0.01132664 ... -0.01292985  0.00990274
   0.01693934]]
047_2.tif
../icdar2013_benchmarking_dataset/047_2.tif
../icdar2013_benchmarking_dataset/047_2.tif
[[0.00051956 0.00759532 0.01462489 ... 0.0069781  0.00336393 0.00458283]]
098_1.tif
../icdar2013_benchmarking_dataset/098_1.tif
../icdar2013_benchmarking_dataset/098_1.tif
[[-0.01388012 -0.0109595   0.00412097 ...  0.01099122  0.01103249
   0.0059366 ]]
164_3.tif
../icdar2013_ben

[[-0.01391178 -0.0033287   0.01306126 ...  0.01076179 -0.00775488
  -0.01032121]]
055_3.tif
../icdar2013_benchmarking_dataset/055_3.tif
../icdar2013_benchmarking_dataset/055_3.tif
[[-0.00780308 -0.00443508  0.00631923 ...  0.00478619  0.00447319
  -0.00813074]]
061_1.tif
../icdar2013_benchmarking_dataset/061_1.tif
../icdar2013_benchmarking_dataset/061_1.tif
[[-0.01002001 -0.00829516  0.00755701 ...  0.0069474   0.00827652
   0.01109412]]
083_3.tif
../icdar2013_benchmarking_dataset/083_3.tif
../icdar2013_benchmarking_dataset/083_3.tif
[[ 0.00905205 -0.00467386 -0.01213219 ... -0.01470807 -0.01440566
  -0.00813002]]
021_4.tif
../icdar2013_benchmarking_dataset/021_4.tif
../icdar2013_benchmarking_dataset/021_4.tif
[[-0.01008934 -0.01039494  0.0068965  ... -0.00538594 -0.01252382
  -0.01277655]]
148_2.tif
../icdar2013_benchmarking_dataset/148_2.tif
../icdar2013_benchmarking_dataset/148_2.tif
[[ 0.00982564 -0.00682682 -0.0099919  ...  0.01129215  0.00369816
  -0.01229668]]
010_4.tif
../icdar

[[ 0.0168768  -0.01026377 -0.01450149 ... -0.00732243 -0.00475021
  -0.00296494]]
249_3.tif
../icdar2013_benchmarking_dataset/249_3.tif
../icdar2013_benchmarking_dataset/249_3.tif
[[-0.01294865 -0.00555526  0.01126333 ... -0.00421758  0.01113483
   0.01368396]]
187_3.tif
../icdar2013_benchmarking_dataset/187_3.tif
../icdar2013_benchmarking_dataset/187_3.tif
[[ 0.00841001 -0.01105592 -0.013821   ... -0.01474887  0.00902256
   0.00762857]]
001_1.tif
../icdar2013_benchmarking_dataset/001_1.tif
../icdar2013_benchmarking_dataset/001_1.tif
[[-0.01270613 -0.00752948 -0.00349255 ...  0.00822349  0.00864072
  -0.00536022]]
081_4.tif
../icdar2013_benchmarking_dataset/081_4.tif
../icdar2013_benchmarking_dataset/081_4.tif
[[-0.01709487 -0.00791267  0.01443432 ... -0.00941683  0.00723848
   0.01402195]]
170_2.tif
../icdar2013_benchmarking_dataset/170_2.tif
../icdar2013_benchmarking_dataset/170_2.tif
[[-0.01257639 -0.00802024  0.00635622 ... -0.00610373 -0.00791305
  -0.01516838]]
216_1.tif
../icdar

In [14]:
def load_pickle(fileName):
    'Returns the object stored inside the pickle file'
    if fileName.endswith('pkl.gz'):
        with gzip.open(fileName, 'rb') as f:
            desc = pickle.load(f)
    
    return desc

In [17]:
## Before calling this function
## verify that supervectors for train and test have been computed and are present as pickle files in- 
## -folder ../outTrainVoc and ../outTestVoc
def evaluate():
    'We evaluate using TOP-1 evaluation matrix'
    # X is the supervector belonging to test document
    # Y is the supervector belonging to train document
    testDir = '../outTestVoc/'
    Dir = '../outTestVoc/'
    
    total_test_files = 0
    correctly_classified1 = 0 # Denoted total no. of correctly classified test files
    correctly_classified2 = 0 # Denoted total no. of correctly classified test files
    correctly_classified3 = 0 # Denoted total no. of correctly classified test files
    mAP = 0
    
    for testFile in os.listdir(testDir):

        label_test = testFile[0:3] # First three letters denote the writer-ID
        # print ('testfile label: %s' %(label_test))
        # X is the supervector belonging to test document
        cos_dist = []
        if testFile.endswith('pkl.gz'):
            X = load_pickle(os.path.join(testDir, testFile))
            total_test_files += 1
        else:
            continue
        for file in os.listdir(Dir):
            # Y is the supervector belonging to train document
            if file.endswith('pkl.gz'):
                Y = load_pickle(os.path.join(Dir, file))
            else:
                continue
            
            # Computing cosine distance between X and Y supervectors
            dist = spdistance.cosine(X, Y)
            cos_dist.append((dist, file))
        
        # sorting cos_dist on the basis of distance
        list.sort(cos_dist)
        # Extracting the v
        print ("Top-3 Matches: ")
        
        ## Top-3 matches
        top3 = []
        top3.append(cos_dist[1])
        if testFile[4] == '1' or testFile[4] == '2': 
            for i in cos_dist:
                if i[1][4] != '1' and i[1][4] != '2':
                    top3.append(i)
                if len(top3) == 3:
                    break
        
        if testFile[4] == '3' or testFile[4] == '4':
            for i in cos_dist:
                if i[1][4] != '3' and i[1][4] != '4':
                    top3.append(i)
                if len(top3) == 3:
                    break
        
        print(top3)
        
        ### End: Top-3 matches 
        
        
        pred_label0 = top3[0][1] # looking at the 2nd nearest as 1st one is the same document
        pred_label0 = pred_label0[0:3] # First three letters denote the writer-ID
        
        pred_label1 = top3[1][1] # looking at the 2nd nearest as 1st one is the same document
        pred_label1 = pred_label1[0:3] # First three letters denote the writer-ID
        
        pred_label2 = top3[2][1] # looking at the 2nd nearest as 1st one is the same document
        pred_label2 = pred_label2[0:3] # First three letters denote the writer-ID
        
        ## mAP
        mAP += (pred_label0 == label_test) + (pred_label1 == label_test) + (pred_label2 == label_test)
        
        if (pred_label0 == label_test):
            correctly_classified1 += 1
            
        if (pred_label0 == label_test and pred_label1 == label_test):
            correctly_classified2 += 1
            
        if (pred_label0 == label_test and pred_label1 == label_test and pred_label2 == label_test):
            correctly_classified3 += 1
        
    return [correctly_classified1/total_test_files, correctly_classified2/total_test_files, correctly_classified3/total_test_files, mAP/(3*total_test_files)]

In [18]:
Top_k_accuracy = evaluate()

Top-3 Matches: 
[(0.6789374108715956, '113_3.pkl.gz'), (0.7445699766293976, '113_2.pkl.gz'), (0.777562655476001, '085_2.pkl.gz')]
Top-3 Matches: 
[(0.7322979737166315, '033_3.pkl.gz'), (0.8022030819868572, '033_2.pkl.gz'), (0.8199855734521605, '033_1.pkl.gz')]
Top-3 Matches: 
[(0.6758015850242798, '072_4.pkl.gz'), (0.7165034191016091, '055_2.pkl.gz'), (0.735809173678621, '072_2.pkl.gz')]
Top-3 Matches: 
[(0.5261239516696924, '039_4.pkl.gz'), (0.5667928454461845, '039_1.pkl.gz'), (0.5885481848914512, '039_2.pkl.gz')]
Top-3 Matches: 
[(0.6477490598671553, '114_1.pkl.gz'), (0.7611102229856639, '114_3.pkl.gz'), (0.7800110465017227, '081_3.pkl.gz')]
Top-3 Matches: 
[(0.5520942790641358, '053_2.pkl.gz'), (0.6036629401616778, '053_3.pkl.gz'), (0.6427450489347749, '053_4.pkl.gz')]
Top-3 Matches: 
[(0.6452268220380959, '085_3.pkl.gz'), (0.6452268220380959, '085_3.pkl.gz'), (0.7028531523730459, '088_3.pkl.gz')]
Top-3 Matches: 
[(0.7394258076536104, '075_4.pkl.gz'), (0.8018162849427847, '075_2.pk

Top-3 Matches: 
[(0.6280919182480109, '123_3.pkl.gz'), (0.6870470183765929, '123_2.pkl.gz'), (0.6971589594388146, '123_1.pkl.gz')]
Top-3 Matches: 
[(0.7989328981343882, '059_1.pkl.gz'), (0.8617108878617229, '104_3.pkl.gz'), (0.8617209699219267, '059_3.pkl.gz')]
Top-3 Matches: 
[(0.6578519212473706, '069_4.pkl.gz'), (0.7177247949306054, '069_2.pkl.gz'), (0.7570317258657115, '069_1.pkl.gz')]
Top-3 Matches: 
[(0.6622439972332763, '079_2.pkl.gz'), (0.6622439972332763, '079_2.pkl.gz'), (0.7415999648126503, '085_2.pkl.gz')]
Top-3 Matches: 
[(0.5832647561195068, '091_2.pkl.gz'), (0.6019559325360953, '091_3.pkl.gz'), (0.6371300408244647, '091_4.pkl.gz')]
Top-3 Matches: 
[(0.6851967010044693, '094_1.pkl.gz'), (0.7167855720479546, '097_3.pkl.gz'), (0.7653945821949751, '111_3.pkl.gz')]
Top-3 Matches: 
[(0.623151785689619, '037_2.pkl.gz'), (0.623151785689619, '037_2.pkl.gz'), (0.6837318570673833, '037_1.pkl.gz')]
Top-3 Matches: 
[(0.7993125212686598, '101_1.pkl.gz'), (0.8583081529372314, '090_3.pk

Top-3 Matches: 
[(0.6600531700405952, '055_1.pkl.gz'), (0.6942771631599661, '055_4.pkl.gz'), (0.7065405366894288, '120_4.pkl.gz')]
Top-3 Matches: 
[(0.7325114298149016, '060_1.pkl.gz'), (0.7737456585936122, '060_3.pkl.gz'), (0.7986737910939851, '044_4.pkl.gz')]
Top-3 Matches: 
[(0.5878099690892564, '095_2.pkl.gz'), (0.6612060933741097, '091_3.pkl.gz'), (0.689915851234183, '043_3.pkl.gz')]
Top-3 Matches: 
[(0.683085575566623, '073_1.pkl.gz'), (0.7561044926459768, '070_4.pkl.gz'), (0.7831147530287105, '073_4.pkl.gz')]
Top-3 Matches: 
[(0.7122909438308773, '086_3.pkl.gz'), (0.7728061242802412, '098_2.pkl.gz'), (0.7789142883701937, '074_2.pkl.gz')]
Top-3 Matches: 
[(0.6854245469713983, '111_3.pkl.gz'), (0.7108983911788032, '119_2.pkl.gz'), (0.739043673342235, '119_1.pkl.gz')]
Top-3 Matches: 
[(0.6822770876555087, '038_2.pkl.gz'), (0.6822770876555087, '038_2.pkl.gz'), (0.7886072082318196, '054_2.pkl.gz')]
Top-3 Matches: 
[(0.5733156260767246, '065_3.pkl.gz'), (0.6878979927688793, '065_1.pkl

Top-3 Matches: 
[(0.7983192580494543, '060_3.pkl.gz'), (0.8254793034738503, '060_1.pkl.gz'), (0.8363311711414108, '060_2.pkl.gz')]
Top-3 Matches: 
[(0.522248655183911, '065_1.pkl.gz'), (0.6891176592667225, '065_3.pkl.gz'), (0.6985093777082456, '065_4.pkl.gz')]
Top-3 Matches: 
[(0.623603671869627, '088_3.pkl.gz'), (0.7119771268867299, '038_2.pkl.gz'), (0.7128206294648218, '085_2.pkl.gz')]
Top-3 Matches: 
[(0.6843236186617317, '085_2.pkl.gz'), (0.7612425956109032, '085_3.pkl.gz'), (0.7911961182462783, '118_4.pkl.gz')]
Top-3 Matches: 
[(0.5178169967885227, '058_1.pkl.gz'), (0.6439265066579012, '058_3.pkl.gz'), (0.6587047904932822, '058_4.pkl.gz')]
Top-3 Matches: 
[(0.706570559782832, '063_4.pkl.gz'), (0.7739574466376267, '063_1.pkl.gz'), (0.781571763472281, '063_2.pkl.gz')]
Top-3 Matches: 
[(0.6623208270177865, '079_3.pkl.gz'), (0.7094072636153297, '079_2.pkl.gz'), (0.737308199192766, '085_2.pkl.gz')]
Top-3 Matches: 
[(0.7239695273776241, '122_3.pkl.gz'), (0.8569338417695389, '122_1.pkl.g

Top-3 Matches: 
[(0.6270195404469698, '052_4.pkl.gz'), (0.6327256921239236, '052_2.pkl.gz'), (0.671728987580182, '091_1.pkl.gz')]
Top-3 Matches: 
[(0.5824483176239812, '050_1.pkl.gz'), (0.728630880545013, '062_3.pkl.gz'), (0.7593791561367307, '094_3.pkl.gz')]
Top-3 Matches: 
[(0.7230333966631322, '047_3.pkl.gz'), (0.8299776403933299, '048_2.pkl.gz'), (0.8351423899983952, '078_1.pkl.gz')]
Top-3 Matches: 
[(0.515278740246252, '053_3.pkl.gz'), (0.607419674953868, '074_2.pkl.gz'), (0.6335543012261344, '074_1.pkl.gz')]
Top-3 Matches: 
[(0.7064895839070724, '114_4.pkl.gz'), (0.7611102229856639, '114_2.pkl.gz'), (0.7908538248386743, '095_2.pkl.gz')]
Top-3 Matches: 
[(0.7276997596112537, '108_4.pkl.gz'), (0.833142232432678, '116_1.pkl.gz'), (0.8379840405147234, '116_2.pkl.gz')]
Top-3 Matches: 
[(0.4248062275097094, '096_3.pkl.gz'), (0.4398303772886961, '096_2.pkl.gz'), (0.48055224881067404, '096_1.pkl.gz')]
Top-3 Matches: 
[(0.6762261983587081, '109_2.pkl.gz'), (0.7891851449631955, '109_4.pkl.

Top-3 Matches: 
[(0.7257225053232302, '029_4.pkl.gz'), (0.8218696197173059, '029_2.pkl.gz'), (0.8642169124204003, '068_2.pkl.gz')]
Top-3 Matches: 
[(0.5459174136332672, '090_4.pkl.gz'), (0.5683489720269017, '090_1.pkl.gz'), (0.5982959049994847, '090_2.pkl.gz')]
Top-3 Matches: 
[(0.644885942338016, '061_3.pkl.gz'), (0.698143741619995, '061_2.pkl.gz'), (0.7214608507955682, '061_1.pkl.gz')]
Top-3 Matches: 
[(0.6380032915046786, '078_2.pkl.gz'), (0.7573419320855023, '078_3.pkl.gz'), (0.7585296408753086, '054_3.pkl.gz')]
Top-3 Matches: 
[(0.6923012276131632, '082_1.pkl.gz'), (0.7877240629785853, '082_3.pkl.gz'), (0.7964020983680461, '044_3.pkl.gz')]
Top-3 Matches: 
[(0.7649860988224924, '110_4.pkl.gz'), (0.8799004491745952, '058_2.pkl.gz'), (0.8811600479597841, '110_1.pkl.gz')]
Top-3 Matches: 
[(0.6936613300007936, '077_1.pkl.gz'), (0.7771139447286364, '077_3.pkl.gz'), (0.7904909644698355, '097_3.pkl.gz')]
Top-3 Matches: 
[(0.6296599261909958, '040_4.pkl.gz'), (0.7183903380974239, '040_2.pk

Top-3 Matches: 
[(0.7276997596112537, '108_3.pkl.gz'), (0.8444039648574923, '116_2.pkl.gz'), (0.8462088635244278, '038_2.pkl.gz')]
Top-3 Matches: 
[(0.4443451100263256, '044_1.pkl.gz'), (0.5770306246337171, '044_3.pkl.gz'), (0.6968722325832046, '044_4.pkl.gz')]
Top-3 Matches: 
[(0.5832647561195068, '091_1.pkl.gz'), (0.5880506341127443, '091_3.pkl.gz'), (0.6188616320853058, '043_4.pkl.gz')]
Top-3 Matches: 
[(0.6365280653535276, '070_3.pkl.gz'), (0.727630669418522, '070_1.pkl.gz'), (0.7456487960193351, '070_2.pkl.gz')]
Top-3 Matches: 
[(0.6765812075026884, '083_3.pkl.gz'), (0.7388597736744823, '083_2.pkl.gz'), (0.7479346216158498, '083_1.pkl.gz')]
Top-3 Matches: 
[(0.6896561057252588, '031_4.pkl.gz'), (0.7725558307255342, '057_2.pkl.gz'), (0.7826142609941807, '031_1.pkl.gz')]
Top-3 Matches: 
[(0.683085575566623, '073_2.pkl.gz'), (0.8166362889600774, '073_4.pkl.gz'), (0.8180237159508545, '073_3.pkl.gz')]
Top-3 Matches: 
[(0.5799046429855358, '080_1.pkl.gz'), (0.7502734474543459, '075_4.pk

In [19]:
print ('TOP-1 Accuracy: %f' %(Top_k_accuracy[0]))
print ('TOP-2 Accuracy: %f' %(Top_k_accuracy[1]))
print ('TOP-3 Accuracy: %f' %(Top_k_accuracy[2]))
print ('mAP Accuracy: %f' %(Top_k_accuracy[3]))

TOP-1 Accuracy: 0.925000
TOP-2 Accuracy: 0.680000
TOP-3 Accuracy: 0.402500
mAP Accuracy: 0.712500
