## Implementation using VLAD

In [1]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from sklearn import preprocessing
import scipy.spatial.distance as spdistance
import os
from sklearn import mixture
import math
import copy
import gzip
import pickle

In [2]:
### First step is to extract the features using ROOT_SIFT
class FeatureEx():
    
    def __init__(self):
        self.sift = cv2.xfeatures2d.SIFT_create()
        
    def extract(self, img):
        keypoints, img_features = self.sift.detectAndCompute(img, None)
        return keypoints, img_features
    
    #### For computing ROOT_SIFT feature
    def compute(self, img):
        img = cv2.imread(img, cv2.IMREAD_GRAYSCALE)
        if img is None:
            return "No image"
        
        kpts, descriptors = self.extract(img)
        
        ## Hellinger normalization
        descriptors += np.finfo(np.float32).eps
        descriptors /= np.sum(descriptors, axis=1)[:, np.newaxis]
        descriptors = np.sqrt(descriptors)
        
        return (kpts, descriptors)

In [3]:
def get_feature_matrix(path, max_desc=150000, max_desc_per_file=150):
    'path denotes the path to the image files'
    fEx = FeatureEx() #creating Feature extraction object
    
    directory = path
    
    # features is a matrix of features(row-wise)
    features = np.asarray(())
    
    for file in os.listdir(directory):
        print (file)
        if file.endswith(".tif"): 
            print(os.path.join(directory, file))
            filePath = os.path.join(directory, file)
            
            kpts, descriptors = fEx.compute(filePath)
            if descriptors.shape[0] > max_desc_per_file:
                'select the random max_desc_per_file from the descriptors'
                idx = np.random.randint(descriptors.shape[0], size=max_desc_per_file)
                descriptors = descriptors[idx, :]
                
            if features.size == 0:
                features = descriptors
            else:
                if features.shape[0] + descriptors.shape[0] <= max_desc:
                    features = np.vstack((features, descriptors))
                else:
                    break
        else:
            continue
    
    return features


In [4]:
def compute_gmm_params(features):
    'features is a matrix of features(row-wise) to train gmm model'
    
    ## using default number of clusters = 100 as is used in paper
    gmm = mixture.GaussianMixture(n_components=100, covariance_type='diag')
    gmm.fit(features)
    
    return gmm

In [5]:
# Let max_desc = 150000 for training ubm
# And max_desc_per_file = max_desc/no_of_files = 150000/1000 in this case
features = get_feature_matrix('../icdar2013_benchmarking_dataset/')
gmm = compute_gmm_params(features)

139_4.tif
../icdar2013_benchmarking_dataset/139_4.tif
148_4.tif
../icdar2013_benchmarking_dataset/148_4.tif
163_3.tif
../icdar2013_benchmarking_dataset/163_3.tif
105_1.tif
../icdar2013_benchmarking_dataset/105_1.tif
140_4.tif
../icdar2013_benchmarking_dataset/140_4.tif
207_1.tif
../icdar2013_benchmarking_dataset/207_1.tif
040_3.tif
../icdar2013_benchmarking_dataset/040_3.tif
125_2.tif
../icdar2013_benchmarking_dataset/125_2.tif
012_1.tif
../icdar2013_benchmarking_dataset/012_1.tif
001_3.tif
../icdar2013_benchmarking_dataset/001_3.tif
182_2.tif
../icdar2013_benchmarking_dataset/182_2.tif
092_2.tif
../icdar2013_benchmarking_dataset/092_2.tif
175_2.tif
../icdar2013_benchmarking_dataset/175_2.tif
054_1.tif
../icdar2013_benchmarking_dataset/054_1.tif
099_4.tif
../icdar2013_benchmarking_dataset/099_4.tif
184_4.tif
../icdar2013_benchmarking_dataset/184_4.tif
106_3.tif
../icdar2013_benchmarking_dataset/106_3.tif
070_1.tif
../icdar2013_benchmarking_dataset/070_1.tif
050_2.tif
../icdar2013_bench

005_2.tif
../icdar2013_benchmarking_dataset/005_2.tif
109_2.tif
../icdar2013_benchmarking_dataset/109_2.tif
150_4.tif
../icdar2013_benchmarking_dataset/150_4.tif
194_4.tif
../icdar2013_benchmarking_dataset/194_4.tif
248_3.tif
../icdar2013_benchmarking_dataset/248_3.tif
160_3.tif
../icdar2013_benchmarking_dataset/160_3.tif
139_1.tif
../icdar2013_benchmarking_dataset/139_1.tif
067_2.tif
../icdar2013_benchmarking_dataset/067_2.tif
128_3.tif
../icdar2013_benchmarking_dataset/128_3.tif
212_2.tif
../icdar2013_benchmarking_dataset/212_2.tif
127_2.tif
../icdar2013_benchmarking_dataset/127_2.tif
236_3.tif
../icdar2013_benchmarking_dataset/236_3.tif
231_4.tif
../icdar2013_benchmarking_dataset/231_4.tif
029_2.tif
../icdar2013_benchmarking_dataset/029_2.tif
220_2.tif
../icdar2013_benchmarking_dataset/220_2.tif
189_1.tif
../icdar2013_benchmarking_dataset/189_1.tif
110_1.tif
../icdar2013_benchmarking_dataset/110_1.tif
059_4.tif
../icdar2013_benchmarking_dataset/059_4.tif
205_1.tif
../icdar2013_bench

065_4.tif
../icdar2013_benchmarking_dataset/065_4.tif
226_2.tif
../icdar2013_benchmarking_dataset/226_2.tif
040_2.tif
../icdar2013_benchmarking_dataset/040_2.tif
004_2.tif
../icdar2013_benchmarking_dataset/004_2.tif
130_3.tif
../icdar2013_benchmarking_dataset/130_3.tif
036_3.tif
../icdar2013_benchmarking_dataset/036_3.tif
120_1.tif
../icdar2013_benchmarking_dataset/120_1.tif
019_1.tif
../icdar2013_benchmarking_dataset/019_1.tif
026_4.tif
../icdar2013_benchmarking_dataset/026_4.tif
088_3.tif
../icdar2013_benchmarking_dataset/088_3.tif
003_3.tif
../icdar2013_benchmarking_dataset/003_3.tif
219_2.tif
../icdar2013_benchmarking_dataset/219_2.tif
225_2.tif
../icdar2013_benchmarking_dataset/225_2.tif
160_2.tif
../icdar2013_benchmarking_dataset/160_2.tif
018_3.tif
../icdar2013_benchmarking_dataset/018_3.tif
158_4.tif
../icdar2013_benchmarking_dataset/158_4.tif
072_2.tif
../icdar2013_benchmarking_dataset/072_2.tif
102_1.tif
../icdar2013_benchmarking_dataset/102_1.tif
113_1.tif
../icdar2013_bench

141_4.tif
../icdar2013_benchmarking_dataset/141_4.tif
210_1.tif
../icdar2013_benchmarking_dataset/210_1.tif
209_4.tif
../icdar2013_benchmarking_dataset/209_4.tif
133_4.tif
../icdar2013_benchmarking_dataset/133_4.tif
197_1.tif
../icdar2013_benchmarking_dataset/197_1.tif
005_3.tif
../icdar2013_benchmarking_dataset/005_3.tif
222_2.tif
../icdar2013_benchmarking_dataset/222_2.tif
177_4.tif
../icdar2013_benchmarking_dataset/177_4.tif
005_4.tif
../icdar2013_benchmarking_dataset/005_4.tif
043_3.tif
../icdar2013_benchmarking_dataset/043_3.tif
082_2.tif
../icdar2013_benchmarking_dataset/082_2.tif
032_1.tif
../icdar2013_benchmarking_dataset/032_1.tif
161_2.tif
../icdar2013_benchmarking_dataset/161_2.tif
232_2.tif
../icdar2013_benchmarking_dataset/232_2.tif
240_4.tif
../icdar2013_benchmarking_dataset/240_4.tif
191_4.tif
../icdar2013_benchmarking_dataset/191_4.tif
182_1.tif
../icdar2013_benchmarking_dataset/182_1.tif
080_3.tif
../icdar2013_benchmarking_dataset/080_3.tif
249_4.tif
../icdar2013_bench

136_2.tif
../icdar2013_benchmarking_dataset/136_2.tif
017_3.tif
../icdar2013_benchmarking_dataset/017_3.tif
085_3.tif
../icdar2013_benchmarking_dataset/085_3.tif
016_3.tif
../icdar2013_benchmarking_dataset/016_3.tif
233_3.tif
../icdar2013_benchmarking_dataset/233_3.tif
182_4.tif
../icdar2013_benchmarking_dataset/182_4.tif
151_1.tif
../icdar2013_benchmarking_dataset/151_1.tif
114_4.tif
../icdar2013_benchmarking_dataset/114_4.tif
051_1.tif
../icdar2013_benchmarking_dataset/051_1.tif
233_4.tif
../icdar2013_benchmarking_dataset/233_4.tif
108_1.tif
../icdar2013_benchmarking_dataset/108_1.tif
053_1.tif
../icdar2013_benchmarking_dataset/053_1.tif
030_3.tif
../icdar2013_benchmarking_dataset/030_3.tif
119_3.tif
../icdar2013_benchmarking_dataset/119_3.tif
049_2.tif
../icdar2013_benchmarking_dataset/049_2.tif
196_1.tif
../icdar2013_benchmarking_dataset/196_1.tif
079_2.tif
../icdar2013_benchmarking_dataset/079_2.tif
237_1.tif
../icdar2013_benchmarking_dataset/237_1.tif
116_2.tif
../icdar2013_bench

014_2.tif
../icdar2013_benchmarking_dataset/014_2.tif
146_3.tif
../icdar2013_benchmarking_dataset/146_3.tif
128_2.tif
../icdar2013_benchmarking_dataset/128_2.tif
231_2.tif
../icdar2013_benchmarking_dataset/231_2.tif
197_4.tif
../icdar2013_benchmarking_dataset/197_4.tif
213_2.tif
../icdar2013_benchmarking_dataset/213_2.tif
204_4.tif
../icdar2013_benchmarking_dataset/204_4.tif
137_3.tif
../icdar2013_benchmarking_dataset/137_3.tif
195_1.tif
../icdar2013_benchmarking_dataset/195_1.tif
157_4.tif
../icdar2013_benchmarking_dataset/157_4.tif
095_4.tif
../icdar2013_benchmarking_dataset/095_4.tif
100_2.tif
../icdar2013_benchmarking_dataset/100_2.tif
085_1.tif
../icdar2013_benchmarking_dataset/085_1.tif
097_4.tif
../icdar2013_benchmarking_dataset/097_4.tif
202_2.tif
../icdar2013_benchmarking_dataset/202_2.tif
040_4.tif
../icdar2013_benchmarking_dataset/040_4.tif
245_1.tif
../icdar2013_benchmarking_dataset/245_1.tif
093_3.tif
../icdar2013_benchmarking_dataset/093_3.tif
092_1.tif
../icdar2013_bench

014_3.tif
../icdar2013_benchmarking_dataset/014_3.tif
245_3.tif
../icdar2013_benchmarking_dataset/245_3.tif
047_4.tif
../icdar2013_benchmarking_dataset/047_4.tif
116_4.tif
../icdar2013_benchmarking_dataset/116_4.tif
158_3.tif
../icdar2013_benchmarking_dataset/158_3.tif
239_3.tif
../icdar2013_benchmarking_dataset/239_3.tif
010_2.tif
../icdar2013_benchmarking_dataset/010_2.tif
054_4.tif
../icdar2013_benchmarking_dataset/054_4.tif
002_4.tif
../icdar2013_benchmarking_dataset/002_4.tif
055_3.tif
../icdar2013_benchmarking_dataset/055_3.tif
061_1.tif
../icdar2013_benchmarking_dataset/061_1.tif
083_3.tif
../icdar2013_benchmarking_dataset/083_3.tif
021_4.tif
../icdar2013_benchmarking_dataset/021_4.tif
148_2.tif
../icdar2013_benchmarking_dataset/148_2.tif
010_4.tif
../icdar2013_benchmarking_dataset/010_4.tif
014_1.tif
../icdar2013_benchmarking_dataset/014_1.tif
076_4.tif
../icdar2013_benchmarking_dataset/076_4.tif
225_4.tif
../icdar2013_benchmarking_dataset/225_4.tif
244_3.tif
../icdar2013_bench

In [26]:
def vlad(data, means, assignments, components, normalize=['l2c']):
    def encode(k):
        uk_ = assignments[:,k].T.dot(data)        

        clustermass = assignments[:,k].sum()
        if clustermass > 0:
            uk_ -= clustermass * means[k]

        if 'l2c' in normalize:
            n = max(math.sqrt(np.sum(uk_ * uk_)), 1e-12)
            uk_ /= n

        return uk_

    uk = list(map(encode, range(components)))
    fin_enc = np.concatenate(uk, axis=0).reshape(1,-1)
    fin_enc = np.sign(fin_enc) * np.sqrt(np.abs(fin_enc))
    fin_enc = preprocessing.normalize(fin_enc)

    return fin_enc

In [27]:
fEx = FeatureEx()
filePath = '../icdar2013_benchmarking_dataset/001_1.tif'
kpts, data = fEx.compute(filePath)

posteriors = gmm.predict_proba(data)
enc = vlad(data, gmm.means_, posteriors, gmm.means_.shape[0]) 
print(enc)

[[ 0.00802103  0.01612437  0.0087098  ...  0.00723737 -0.00999022
  -0.00748201]]


In [28]:
def ubm_adaptation(path, outDir, gmm):
    'path denotes the path to the image files'
    fEx = FeatureEx() # creating Feature extraction object
    
    directory = path
    
    # features is a matrix of features(row-wise)

    for file in os.listdir(directory):
        print (file)
        if file.endswith(".tif"): 
            print(os.path.join(directory, file))
            fp = os.path.join(directory, file)
            print(fp)
            kpts, data = fEx.compute(fp)
            
            posteriors = gmm.predict_proba(data)
            enc = vlad(data, gmm.means_, posteriors, gmm.means_.shape[0]) 
            print(enc)
            
            #### save the encoding in a folder outDir as pickle file
            fileName, file_ext = os.path.splitext(file)
            
            if not os.path.exists(outDir):
                os.makedirs(outDir)
            
            filepath = os.path.join(outDir, fileName + '.pkl.gz')
            with gzip.open(filepath, 'wb') as f:
                pickle.dump(enc, f, -1)
        else:
            continue

In [29]:
ubm_adaptation('../experimental_dataset_2013/', '../outTestVoc', gmm)

105_1.tif
../experimental_dataset_2013/105_1.tif
../experimental_dataset_2013/105_1.tif
[[ 0.00988293  0.01221394 -0.01103032 ... -0.00481159  0.00470582
   0.00635142]]
040_3.tif
../experimental_dataset_2013/040_3.tif
../experimental_dataset_2013/040_3.tif
[[-0.00456081 -0.01197508 -0.00383336 ...  0.00464625 -0.01551769
  -0.00610512]]
125_2.tif
../experimental_dataset_2013/125_2.tif
../experimental_dataset_2013/125_2.tif
[[ 0.01148533  0.0172935   0.01324252 ...  0.01598019 -0.00311084
   0.00874205]]
092_2.tif
../experimental_dataset_2013/092_2.tif
../experimental_dataset_2013/092_2.tif
[[-0.01128421 -0.01446774 -0.01060701 ...  0.0079537   0.01625519
   0.01031684]]
054_1.tif
../experimental_dataset_2013/054_1.tif
../experimental_dataset_2013/054_1.tif
[[-0.00728894  0.01187308  0.01128783 ...  0.00482617  0.00955679
  -0.0076997 ]]
099_4.tif
../experimental_dataset_2013/099_4.tif
../experimental_dataset_2013/099_4.tif
[[ 0.00780597 -0.00701124 -0.0165158  ...  0.00764546  0.01250

[[ 0.00776314  0.00905822 -0.01330062 ...  0.00583625  0.00891217
  -0.00777468]]
096_3.tif
../experimental_dataset_2013/096_3.tif
../experimental_dataset_2013/096_3.tif
[[ 0.01034491  0.00591457 -0.01423187 ...  0.01059495 -0.0131611
  -0.00539759]]
073_3.tif
../experimental_dataset_2013/073_3.tif
../experimental_dataset_2013/073_3.tif
[[-0.00913811 -0.01472872 -0.01413276 ... -0.01127996  0.00461563
  -0.00350338]]
101_4.tif
../experimental_dataset_2013/101_4.tif
../experimental_dataset_2013/101_4.tif
[[-0.00588214 -0.01502357 -0.02090895 ... -0.01271069  0.01186609
  -0.00654084]]
038_3.tif
../experimental_dataset_2013/038_3.tif
../experimental_dataset_2013/038_3.tif
[[ 0.00477268 -0.00868924  0.00818403 ...  0.00296261 -0.01433911
  -0.00594396]]
077_1.tif
../experimental_dataset_2013/077_1.tif
../experimental_dataset_2013/077_1.tif
[[-0.00882048  0.0099008  -0.01091215 ...  0.00673958 -0.00703009
  -0.0069092 ]]
102_3.tif
../experimental_dataset_2013/102_3.tif
../experimental_data

[[-0.00802692  0.00562595  0.00769828 ...  0.00540985  0.01286124
   0.00614989]]
039_1.tif
../experimental_dataset_2013/039_1.tif
../experimental_dataset_2013/039_1.tif
[[-0.00459992 -0.0068862  -0.00830284 ... -0.00622767  0.00973405
   0.01285772]]
088_4.tif
../experimental_dataset_2013/088_4.tif
../experimental_dataset_2013/088_4.tif
[[ 0.00679441  0.00780118 -0.01499765 ... -0.01293551 -0.00679576
   0.0034837 ]]
088_1.tif
../experimental_dataset_2013/088_1.tif
../experimental_dataset_2013/088_1.tif
[[ 0.0022647   0.00874591 -0.00513012 ...  0.00957686  0.01756587
  -0.00713878]]
062_4.tif
../experimental_dataset_2013/062_4.tif
../experimental_dataset_2013/062_4.tif
[[-0.01061012  0.01139828  0.01820983 ... -0.00655287 -0.00462242
  -0.00740339]]
055_4.tif
../experimental_dataset_2013/055_4.tif
../experimental_dataset_2013/055_4.tif
[[ 0.01041703  0.0178027  -0.00729103 ...  0.0077755   0.00430898
   0.0045417 ]]
068_3.tif
../experimental_dataset_2013/068_3.tif
../experimental_dat

[[-0.01256798 -0.00734057  0.01226444 ... -0.01007233 -0.01128463
  -0.00709361]]
058_3.tif
../experimental_dataset_2013/058_3.tif
../experimental_dataset_2013/058_3.tif
[[-0.01141845  0.01073043  0.01211658 ... -0.00758941  0.00983627
  -0.00642371]]
053_4.tif
../experimental_dataset_2013/053_4.tif
../experimental_dataset_2013/053_4.tif
[[-0.00950611 -0.0161156   0.00734667 ...  0.01101542 -0.01502237
   0.00750309]]
103_3.tif
../experimental_dataset_2013/103_3.tif
../experimental_dataset_2013/103_3.tif
[[-0.00834251  0.0134606   0.017831   ...  0.01332837 -0.01165402
  -0.00853078]]
064_4.tif
../experimental_dataset_2013/064_4.tif
../experimental_dataset_2013/064_4.tif
[[-0.00974248 -0.01659015 -0.01119632 ...  0.00624298  0.01489688
  -0.00845877]]
086_1.tif
../experimental_dataset_2013/086_1.tif
../experimental_dataset_2013/086_1.tif
[[-0.00536507  0.00283635 -0.01309251 ...  0.01109786  0.01016203
   0.00864587]]
079_3.tif
../experimental_dataset_2013/079_3.tif
../experimental_dat

[[-0.01217754 -0.01676883 -0.01610761 ...  0.01355148  0.00485267
  -0.00577746]]
125_4.tif
../experimental_dataset_2013/125_4.tif
../experimental_dataset_2013/125_4.tif
[[-0.00591852  0.01604515  0.00823349 ...  0.00734357 -0.00893039
  -0.00771493]]
094_1.tif
../experimental_dataset_2013/094_1.tif
../experimental_dataset_2013/094_1.tif
[[-0.00672114 -0.00594791  0.00477966 ...  0.01636275  0.00695313
  -0.00636726]]
029_3.tif
../experimental_dataset_2013/029_3.tif
../experimental_dataset_2013/029_3.tif
[[0.01011287 0.01716189 0.02129601 ... 0.01072344 0.01173942 0.00952708]]
083_1.tif
../experimental_dataset_2013/083_1.tif
../experimental_dataset_2013/083_1.tif
[[-0.00476852  0.00632744  0.01572124 ...  0.00630685 -0.0108994
  -0.00591101]]
085_4.tif
../experimental_dataset_2013/085_4.tif
../experimental_dataset_2013/085_4.tif
[[ 0.00435334  0.01014535 -0.00426192 ...  0.00894952 -0.00868356
  -0.00854689]]
046_4.tif
../experimental_dataset_2013/046_4.tif
../experimental_dataset_2013

[[-0.00652878  0.01613408  0.00710716 ...  0.01453083  0.01471676
   0.00655463]]
085_3.tif
../experimental_dataset_2013/085_3.tif
../experimental_dataset_2013/085_3.tif
[[ 0.01232821  0.01317457 -0.01242748 ... -0.01244397  0.00517118
  -0.0047973 ]]
114_4.tif
../experimental_dataset_2013/114_4.tif
../experimental_dataset_2013/114_4.tif
[[-0.01020575 -0.01767252  0.00425773 ...  0.00360893 -0.00396862
  -0.00521406]]
051_1.tif
../experimental_dataset_2013/051_1.tif
../experimental_dataset_2013/051_1.tif
[[-0.01238135 -0.01356403  0.01281323 ...  0.01546939  0.00664155
   0.00867996]]
108_1.tif
../experimental_dataset_2013/108_1.tif
../experimental_dataset_2013/108_1.tif
[[ 0.0141284   0.00730997 -0.01749586 ... -0.00511509  0.00647924
   0.0034941 ]]
053_1.tif
../experimental_dataset_2013/053_1.tif
../experimental_dataset_2013/053_1.tif
[[ 0.0090935   0.01695947  0.01031146 ...  0.00907339 -0.0081924
  -0.00505055]]
030_3.tif
../experimental_dataset_2013/030_3.tif
../experimental_data

[[-0.0110195  -0.02065646  0.00973033 ...  0.01300044 -0.00639041
  -0.00143299]]
073_4.tif
../experimental_dataset_2013/073_4.tif
../experimental_dataset_2013/073_4.tif
[[ 0.00782935  0.00813227 -0.01173226 ...  0.0142928  -0.00846222
  -0.00705673]]
118_4.tif
../experimental_dataset_2013/118_4.tif
../experimental_dataset_2013/118_4.tif
[[-0.01011661 -0.01341888  0.00290308 ...  0.00089365 -0.00922188
  -0.00538191]]
076_3.tif
../experimental_dataset_2013/076_3.tif
../experimental_dataset_2013/076_3.tif
[[-0.01190611 -0.01100776  0.01673042 ... -0.00439838 -0.01355486
  -0.00770226]]
119_1.tif
../experimental_dataset_2013/119_1.tif
../experimental_dataset_2013/119_1.tif
[[ 0.00587993 -0.0148457  -0.00907627 ...  0.0163886   0.00734011
   0.00329155]]
068_4.tif
../experimental_dataset_2013/068_4.tif
../experimental_dataset_2013/068_4.tif
[[-0.01208632 -0.01495787  0.01937908 ... -0.01211314 -0.00481432
   0.00523596]]
107_1.tif
../experimental_dataset_2013/107_1.tif
../experimental_dat

[[-0.01029701 -0.015177    0.01048643 ... -0.00936654 -0.00778017
  -0.00449017]]
103_1.tif
../experimental_dataset_2013/103_1.tif
../experimental_dataset_2013/103_1.tif
[[ 0.00913221  0.01516934  0.00224499 ... -0.00630536 -0.01311618
  -0.00591438]]
071_3.tif
../experimental_dataset_2013/071_3.tif
../experimental_dataset_2013/071_3.tif
[[-0.00157762 -0.00645625 -0.0122066  ...  0.01536532  0.01063952
   0.00507001]]
102_2.tif
../experimental_dataset_2013/102_2.tif
../experimental_dataset_2013/102_2.tif
[[-0.009455   -0.00339649 -0.00794596 ...  0.00727819  0.00649653
   0.00553453]]
112_4.tif
../experimental_dataset_2013/112_4.tif
../experimental_dataset_2013/112_4.tif
[[-0.01003284 -0.01025067  0.0053208  ... -0.01002156  0.00920678
   0.00998165]]
092_4.tif
../experimental_dataset_2013/092_4.tif
../experimental_dataset_2013/092_4.tif
[[-0.0113688  -0.01089362  0.00992879 ...  0.01015688 -0.00964125
   0.00282442]]
057_3.tif
../experimental_dataset_2013/057_3.tif
../experimental_dat

[[-0.00950126 -0.00556151  0.00062916 ...  0.01122766  0.01022532
  -0.00100917]]
080_4.tif
../experimental_dataset_2013/080_4.tif
../experimental_dataset_2013/080_4.tif
[[ 0.0032293   0.01210825  0.01766695 ...  0.01127163 -0.00874698
  -0.00558674]]
081_4.tif
../experimental_dataset_2013/081_4.tif
../experimental_dataset_2013/081_4.tif
[[-0.00622739  0.01387092  0.0179812  ... -0.01720659 -0.00311107
   0.00380992]]
112_1.tif
../experimental_dataset_2013/112_1.tif
../experimental_dataset_2013/112_1.tif
[[-0.00863202 -0.00625665  0.00490193 ... -0.00957671  0.00814906
   0.00307971]]
031_4.tif
../experimental_dataset_2013/031_4.tif
../experimental_dataset_2013/031_4.tif
[[-0.01745773 -0.01211013  0.01494019 ... -0.0104603   0.00231934
  -0.00671756]]
063_3.tif
../experimental_dataset_2013/063_3.tif
../experimental_dataset_2013/063_3.tif
[[-0.01239112  0.0046851   0.0180157  ...  0.00352253  0.01212807
   0.00617622]]
044_1.tif
../experimental_dataset_2013/044_1.tif
../experimental_dat

In [30]:
enc = np.array([1, 2, 3])
enc = enc.reshape(1, 3)
directory = '../outVoc'
if not os.path.exists(directory):
    os.makedirs(directory)
filePath = '../outVoc/file.pkl.gz'
with gzip.open(filePath, 'wb') as f:
    pickle.dump(enc, f, -1)

In [31]:
ubm_adaptation('../icdar2013_benchmarking_dataset/', '../outTrainVoc', gmm)

139_4.tif
../icdar2013_benchmarking_dataset/139_4.tif
../icdar2013_benchmarking_dataset/139_4.tif
[[ 0.00403503 -0.01667697 -0.01809048 ...  0.01114486  0.01950616
   0.00732818]]
148_4.tif
../icdar2013_benchmarking_dataset/148_4.tif
../icdar2013_benchmarking_dataset/148_4.tif
[[ 0.00527322  0.01108351 -0.00661623 ... -0.0020866  -0.00426312
  -0.00642019]]
163_3.tif
../icdar2013_benchmarking_dataset/163_3.tif
../icdar2013_benchmarking_dataset/163_3.tif
[[ 0.00811954  0.01021481  0.00978057 ...  0.01361026 -0.01109212
   0.00870054]]
105_1.tif
../icdar2013_benchmarking_dataset/105_1.tif
../icdar2013_benchmarking_dataset/105_1.tif
[[-0.00550618  0.01190028  0.01819749 ...  0.00267924 -0.00610406
  -0.0085121 ]]
140_4.tif
../icdar2013_benchmarking_dataset/140_4.tif
../icdar2013_benchmarking_dataset/140_4.tif
[[-0.01057721 -0.01185581 -0.00780538 ...  0.01170622  0.01003786
   0.00314396]]
207_1.tif
../icdar2013_benchmarking_dataset/207_1.tif
../icdar2013_benchmarking_dataset/207_1.tif
[[

[[-0.0069221   0.01507188  0.0165134  ... -0.01104244  0.01376996
  -0.0067277 ]]
112_3.tif
../icdar2013_benchmarking_dataset/112_3.tif
../icdar2013_benchmarking_dataset/112_3.tif
[[-0.008644    0.01389121  0.01092389 ...  0.00401742  0.01048318
  -0.00530458]]
188_1.tif
../icdar2013_benchmarking_dataset/188_1.tif
../icdar2013_benchmarking_dataset/188_1.tif
[[ 0.00869532  0.01218031  0.01696805 ... -0.01808634  0.00816354
   0.00491955]]
148_1.tif
../icdar2013_benchmarking_dataset/148_1.tif
../icdar2013_benchmarking_dataset/148_1.tif
[[ 0.01115524  0.01808936  0.01257413 ...  0.00446726 -0.00832265
  -0.00592443]]
111_3.tif
../icdar2013_benchmarking_dataset/111_3.tif
../icdar2013_benchmarking_dataset/111_3.tif
[[-0.00965465 -0.01530709 -0.00364921 ... -0.00768968 -0.01405241
  -0.0026946 ]]
169_1.tif
../icdar2013_benchmarking_dataset/169_1.tif
../icdar2013_benchmarking_dataset/169_1.tif
[[ 0.01538795  0.01389902 -0.01033158 ...  0.00744327 -0.00866907
   0.00861135]]
144_3.tif
../icdar

[[-0.00805147 -0.00497157  0.011764   ... -0.01110534  0.0127501
   0.01142639]]
069_3.tif
../icdar2013_benchmarking_dataset/069_3.tif
../icdar2013_benchmarking_dataset/069_3.tif
[[ 0.00646874  0.00857476  0.01586342 ... -0.00761973 -0.00200994
   0.00805076]]
037_2.tif
../icdar2013_benchmarking_dataset/037_2.tif
../icdar2013_benchmarking_dataset/037_2.tif
[[-0.01041551 -0.01637364 -0.01106107 ... -0.01456983 -0.00639849
  -0.00476527]]
077_2.tif
../icdar2013_benchmarking_dataset/077_2.tif
../icdar2013_benchmarking_dataset/077_2.tif
[[ 0.01305786  0.00681939 -0.01159719 ... -0.01245282 -0.01307172
  -0.00833933]]
087_4.tif
../icdar2013_benchmarking_dataset/087_4.tif
../icdar2013_benchmarking_dataset/087_4.tif
[[ 0.00861949 -0.0121697  -0.01352205 ...  0.00631144  0.00858507
  -0.00658181]]
169_4.tif
../icdar2013_benchmarking_dataset/169_4.tif
../icdar2013_benchmarking_dataset/169_4.tif
[[ 0.00397066 -0.01068034 -0.0144877  ... -0.0120402   0.00689728
   0.00985492]]
088_2.tif
../icdar2

[[-0.01268006 -0.01830967 -0.01532322 ... -0.01388881  0.01280088
   0.0058826 ]]
115_2.tif
../icdar2013_benchmarking_dataset/115_2.tif
../icdar2013_benchmarking_dataset/115_2.tif
[[ 0.00225266  0.0126786   0.01413734 ...  0.00464753 -0.00466284
  -0.00370039]]
172_3.tif
../icdar2013_benchmarking_dataset/172_3.tif
../icdar2013_benchmarking_dataset/172_3.tif
[[ 0.01564723  0.01572553 -0.00584703 ...  0.00609903 -0.01570229
  -0.00868031]]
105_2.tif
../icdar2013_benchmarking_dataset/105_2.tif
../icdar2013_benchmarking_dataset/105_2.tif
[[ 0.01100154  0.01305519  0.0055142  ... -0.00740725 -0.00875663
  -0.00782092]]
064_3.tif
../icdar2013_benchmarking_dataset/064_3.tif
../icdar2013_benchmarking_dataset/064_3.tif
[[ 0.00592961  0.00520007 -0.01375522 ... -0.01423927  0.00827496
  -0.00212346]]
183_2.tif
../icdar2013_benchmarking_dataset/183_2.tif
../icdar2013_benchmarking_dataset/183_2.tif
[[-0.01264517 -0.00572019  0.00725474 ... -0.01186194  0.01291874
   0.01266894]]
084_2.tif
../icdar

[[ 0.00562293 -0.00146047  0.00570449 ... -0.01513704  0.00877859
   0.00741612]]
098_3.tif
../icdar2013_benchmarking_dataset/098_3.tif
../icdar2013_benchmarking_dataset/098_3.tif
[[-0.00679766  0.00500339  0.01077706 ... -0.01078784  0.00949791
   0.00821845]]
171_2.tif
../icdar2013_benchmarking_dataset/171_2.tif
../icdar2013_benchmarking_dataset/171_2.tif
[[0.00730958 0.01178597 0.00677204 ... 0.01431857 0.0078277  0.00680664]]
193_4.tif
../icdar2013_benchmarking_dataset/193_4.tif
../icdar2013_benchmarking_dataset/193_4.tif
[[ 0.00191097 -0.01029947 -0.01752761 ... -0.01681311 -0.00306068
   0.00302743]]
090_2.tif
../icdar2013_benchmarking_dataset/090_2.tif
../icdar2013_benchmarking_dataset/090_2.tif
[[ 0.00598722  0.00782576  0.01457463 ... -0.01445134 -0.01359613
  -0.00823759]]
041_4.tif
../icdar2013_benchmarking_dataset/041_4.tif
../icdar2013_benchmarking_dataset/041_4.tif
[[-0.00792776 -0.01313729  0.00662726 ... -0.01252018 -0.00573115
  -0.00304087]]
119_4.tif
../icdar2013_ben

[[ 0.01092982  0.01445014  0.00546108 ...  0.00728512 -0.00937613
  -0.00670707]]
185_3.tif
../icdar2013_benchmarking_dataset/185_3.tif
../icdar2013_benchmarking_dataset/185_3.tif
[[-0.006566    0.00696134  0.00440739 ...  0.01844227  0.00743466
   0.0012635 ]]
062_4.tif
../icdar2013_benchmarking_dataset/062_4.tif
../icdar2013_benchmarking_dataset/062_4.tif
[[ 0.00116374 -0.01422969 -0.01474443 ...  0.00851383 -0.01064272
  -0.00746818]]
201_4.tif
../icdar2013_benchmarking_dataset/201_4.tif
../icdar2013_benchmarking_dataset/201_4.tif
[[ 0.0053623  -0.00302957  0.00885855 ...  0.00665265 -0.00525932
  -0.00718371]]
055_4.tif
../icdar2013_benchmarking_dataset/055_4.tif
../icdar2013_benchmarking_dataset/055_4.tif
[[-0.00872047 -0.01486769  0.01211551 ... -0.01619675 -0.01187718
  -0.00613945]]
022_2.tif
../icdar2013_benchmarking_dataset/022_2.tif
../icdar2013_benchmarking_dataset/022_2.tif
[[-0.0092681  -0.02113003 -0.00790999 ...  0.00743022  0.00254816
   0.00770521]]
213_4.tif
../icdar

[[-0.01196923 -0.01609355 -0.01560162 ... -0.0058098  -0.01210346
  -0.00723097]]
190_2.tif
../icdar2013_benchmarking_dataset/190_2.tif
../icdar2013_benchmarking_dataset/190_2.tif
[[ 0.00757693  0.0124771  -0.01360757 ...  0.01589804  0.00742806
  -0.00449748]]
104_4.tif
../icdar2013_benchmarking_dataset/104_4.tif
../icdar2013_benchmarking_dataset/104_4.tif
[[ 0.00510155 -0.0046567   0.01472505 ... -0.01478937 -0.00574786
   0.00760673]]
179_1.tif
../icdar2013_benchmarking_dataset/179_1.tif
../icdar2013_benchmarking_dataset/179_1.tif
[[-0.00858222 -0.01677495 -0.0137609  ...  0.0031083  -0.01446259
  -0.0071084 ]]
091_2.tif
../icdar2013_benchmarking_dataset/091_2.tif
../icdar2013_benchmarking_dataset/091_2.tif
[[-0.0126     -0.00561135 -0.00322648 ... -0.01229635  0.01746431
   0.00527047]]
213_3.tif
../icdar2013_benchmarking_dataset/213_3.tif
../icdar2013_benchmarking_dataset/213_3.tif
[[ 0.00594489  0.00690142 -0.01467122 ...  0.00469512 -0.01479439
  -0.00554651]]
022_1.tif
../icdar

[[ 0.00705748  0.01250168 -0.00443271 ... -0.0135724   0.01228643
   0.00271507]]
082_3.tif
../icdar2013_benchmarking_dataset/082_3.tif
../icdar2013_benchmarking_dataset/082_3.tif
[[ 0.01062687 -0.00277015  0.00588946 ...  0.01285166 -0.00582371
  -0.00804697]]
117_1.tif
../icdar2013_benchmarking_dataset/117_1.tif
../icdar2013_benchmarking_dataset/117_1.tif
[[-0.01108332 -0.01612375 -0.01120546 ... -0.00759647 -0.01083594
  -0.0069657 ]]
228_1.tif
../icdar2013_benchmarking_dataset/228_1.tif
../icdar2013_benchmarking_dataset/228_1.tif
[[ 0.01179615  0.00839752 -0.00481394 ...  0.00898406  0.0057463
   0.00187237]]
031_2.tif
../icdar2013_benchmarking_dataset/031_2.tif
../icdar2013_benchmarking_dataset/031_2.tif
[[ 0.01500693  0.02080591  0.0103762  ...  0.00874648 -0.00535171
   0.0040779 ]]
120_3.tif
../icdar2013_benchmarking_dataset/120_3.tif
../icdar2013_benchmarking_dataset/120_3.tif
[[ 0.01036006  0.01711593  0.01168507 ... -0.01633535 -0.0139881
   0.00138083]]
216_2.tif
../icdar20

[[ 0.01176846  0.0212803   0.0107793  ... -0.00848494  0.00509676
   0.00447807]]
231_1.tif
../icdar2013_benchmarking_dataset/231_1.tif
../icdar2013_benchmarking_dataset/231_1.tif
[[ 0.00823608  0.0096897  -0.0127635  ...  0.00617064  0.01247639
  -0.00606524]]
096_2.tif
../icdar2013_benchmarking_dataset/096_2.tif
../icdar2013_benchmarking_dataset/096_2.tif
[[-0.01224801 -0.01469294 -0.0156074  ...  0.00760541  0.00625186
  -0.00651634]]
248_4.tif
../icdar2013_benchmarking_dataset/248_4.tif
../icdar2013_benchmarking_dataset/248_4.tif
[[-0.01245892  0.00711849  0.00961379 ... -0.01727531 -0.01243483
   0.00434451]]
135_2.tif
../icdar2013_benchmarking_dataset/135_2.tif
../icdar2013_benchmarking_dataset/135_2.tif
[[-0.00435189  0.003549   -0.01145292 ...  0.01282783 -0.00687419
   0.00580301]]
085_2.tif
../icdar2013_benchmarking_dataset/085_2.tif
../icdar2013_benchmarking_dataset/085_2.tif
[[-0.00963236 -0.01619794 -0.0121928  ...  0.01512593 -0.00891399
  -0.00481258]]
016_1.tif
../icdar

[[ 0.01114937  0.01738074  0.01185743 ... -0.00871389  0.01010849
  -0.00513097]]
232_1.tif
../icdar2013_benchmarking_dataset/232_1.tif
../icdar2013_benchmarking_dataset/232_1.tif
[[ 0.01725407  0.01638945  0.00363641 ...  0.0047882   0.00496735
  -0.00648888]]
244_2.tif
../icdar2013_benchmarking_dataset/244_2.tif
../icdar2013_benchmarking_dataset/244_2.tif
[[0.00387721 0.0146814  0.01777907 ... 0.01306639 0.01426248 0.00433949]]
039_3.tif
../icdar2013_benchmarking_dataset/039_3.tif
../icdar2013_benchmarking_dataset/039_3.tif
[[-0.01141538 -0.0117438  -0.01107509 ... -0.00945396 -0.00467397
   0.00253082]]
010_3.tif
../icdar2013_benchmarking_dataset/010_3.tif
../icdar2013_benchmarking_dataset/010_3.tif
[[ 0.00835135  0.01513369  0.01839203 ... -0.0122069  -0.00740733
  -0.00581992]]
095_3.tif
../icdar2013_benchmarking_dataset/095_3.tif
../icdar2013_benchmarking_dataset/095_3.tif
[[-0.00682669  0.01247171  0.01074278 ...  0.01077836 -0.01065814
  -0.00298461]]
203_4.tif
../icdar2013_ben

[[-0.01090212  0.01092312  0.01645607 ...  0.00845285  0.01685521
   0.00880776]]
005_3.tif
../icdar2013_benchmarking_dataset/005_3.tif
../icdar2013_benchmarking_dataset/005_3.tif
[[-0.00705049  0.00781837  0.00948625 ... -0.00954743 -0.01064554
  -0.00627701]]
222_2.tif
../icdar2013_benchmarking_dataset/222_2.tif
../icdar2013_benchmarking_dataset/222_2.tif
[[-0.010009   -0.01161057  0.00598207 ...  0.00506164  0.01658253
   0.00487019]]
177_4.tif
../icdar2013_benchmarking_dataset/177_4.tif
../icdar2013_benchmarking_dataset/177_4.tif
[[-0.00762261 -0.01240616 -0.00327516 ...  0.00666133 -0.01185866
  -0.00618039]]
005_4.tif
../icdar2013_benchmarking_dataset/005_4.tif
../icdar2013_benchmarking_dataset/005_4.tif
[[ 0.00839745  0.01437501  0.01291947 ... -0.00565653  0.00800724
  -0.00784237]]
043_3.tif
../icdar2013_benchmarking_dataset/043_3.tif
../icdar2013_benchmarking_dataset/043_3.tif
[[-0.00758006  0.01190942 -0.0084142  ... -0.00856965  0.00706522
   0.00452963]]
082_2.tif
../icdar

[[-0.01107149 -0.00513898  0.0091391  ... -0.00800109  0.00797719
   0.00876682]]
011_4.tif
../icdar2013_benchmarking_dataset/011_4.tif
../icdar2013_benchmarking_dataset/011_4.tif
[[ 0.00776494 -0.00675459  0.0136592  ... -0.01224012 -0.00792069
  -0.00189853]]
039_2.tif
../icdar2013_benchmarking_dataset/039_2.tif
../icdar2013_benchmarking_dataset/039_2.tif
[[ 0.00388587 -0.00856503 -0.01185834 ... -0.01429743 -0.00364158
  -0.00422527]]
071_2.tif
../icdar2013_benchmarking_dataset/071_2.tif
../icdar2013_benchmarking_dataset/071_2.tif
[[ 0.01081145  0.01800099  0.01173339 ... -0.01288817 -0.00808188
  -0.00385079]]
035_4.tif
../icdar2013_benchmarking_dataset/035_4.tif
../icdar2013_benchmarking_dataset/035_4.tif
[[-0.01088012 -0.01448083 -0.00907726 ... -0.0045038   0.01412756
  -0.0069353 ]]
157_3.tif
../icdar2013_benchmarking_dataset/157_3.tif
../icdar2013_benchmarking_dataset/157_3.tif
[[ 0.00451871  0.00305711 -0.01143939 ... -0.00683467  0.00685352
  -0.00773655]]
124_4.tif
../icdar

[[ 0.00605322  0.00748659 -0.01114728 ... -0.01589106 -0.0046771
   0.00255079]]
187_4.tif
../icdar2013_benchmarking_dataset/187_4.tif
../icdar2013_benchmarking_dataset/187_4.tif
[[ 0.00664053 -0.00497223  0.00948964 ... -0.00938936  0.01276166
   0.00884103]]
009_1.tif
../icdar2013_benchmarking_dataset/009_1.tif
../icdar2013_benchmarking_dataset/009_1.tif
[[-0.01097488 -0.00900189  0.01295083 ... -0.01098912  0.01780793
   0.01345005]]
229_2.tif
../icdar2013_benchmarking_dataset/229_2.tif
../icdar2013_benchmarking_dataset/229_2.tif
[[ 0.0125918   0.01599318  0.00680713 ...  0.01378778  0.01203092
  -0.00494679]]
129_3.tif
../icdar2013_benchmarking_dataset/129_3.tif
../icdar2013_benchmarking_dataset/129_3.tif
[[ 0.01441462  0.01551108 -0.0139855  ...  0.00286109 -0.01273809
  -0.00524165]]
108_4.tif
../icdar2013_benchmarking_dataset/108_4.tif
../icdar2013_benchmarking_dataset/108_4.tif
[[-0.01154746 -0.00976032  0.00742937 ...  0.00990681 -0.00823559
  -0.00602996]]
074_2.tif
../icdar2

[[ 0.010643    0.01284821 -0.01387352 ...  0.01585445  0.01119754
   0.00416563]]
033_1.tif
../icdar2013_benchmarking_dataset/033_1.tif
../icdar2013_benchmarking_dataset/033_1.tif
[[ 0.0114803   0.01149316 -0.01652526 ... -0.01197651 -0.01042748
  -0.00203414]]
059_2.tif
../icdar2013_benchmarking_dataset/059_2.tif
../icdar2013_benchmarking_dataset/059_2.tif
[[-0.00256075  0.00916747  0.01711267 ... -0.01361339 -0.00509858
   0.00801964]]
235_4.tif
../icdar2013_benchmarking_dataset/235_4.tif
../icdar2013_benchmarking_dataset/235_4.tif
[[-0.00550366 -0.01376314 -0.01467426 ... -0.0051303  -0.00513271
   0.00679126]]
002_3.tif
../icdar2013_benchmarking_dataset/002_3.tif
../icdar2013_benchmarking_dataset/002_3.tif
[[-0.00662349  0.01027041  0.01310845 ...  0.00982939 -0.00876625
   0.0044827 ]]
145_1.tif
../icdar2013_benchmarking_dataset/145_1.tif
../icdar2013_benchmarking_dataset/145_1.tif
[[ 0.00961995 -0.00819963 -0.01332152 ...  0.00384269  0.00461562
  -0.00682265]]
189_3.tif
../icdar

[[-0.00642322 -0.01405439  0.00856338 ...  0.00406955  0.00805124
   0.00543727]]
187_2.tif
../icdar2013_benchmarking_dataset/187_2.tif
../icdar2013_benchmarking_dataset/187_2.tif
[[ 0.00954812  0.01023325 -0.01062161 ...  0.01359596 -0.00591502
  -0.00175551]]
238_4.tif
../icdar2013_benchmarking_dataset/238_4.tif
../icdar2013_benchmarking_dataset/238_4.tif
[[-0.00146228  0.01704102  0.01231523 ... -0.01538129 -0.01194536
  -0.00052395]]
063_2.tif
../icdar2013_benchmarking_dataset/063_2.tif
../icdar2013_benchmarking_dataset/063_2.tif
[[ 0.00529813  0.01810262  0.01505013 ... -0.01041144 -0.01271006
   0.00608071]]
131_1.tif
../icdar2013_benchmarking_dataset/131_1.tif
../icdar2013_benchmarking_dataset/131_1.tif
[[-0.00727065  0.01720171  0.01858032 ...  0.0096316   0.00484428
   0.00627201]]
142_4.tif
../icdar2013_benchmarking_dataset/142_4.tif
../icdar2013_benchmarking_dataset/142_4.tif
[[ 0.00730289  0.01081663  0.00962868 ...  0.00583182 -0.01274241
  -0.00721019]]
138_2.tif
../icdar

[[ 0.01048224  0.01231589 -0.01069555 ... -0.01458198  0.00429744
  -0.00521891]]
111_4.tif
../icdar2013_benchmarking_dataset/111_4.tif
../icdar2013_benchmarking_dataset/111_4.tif
[[-0.00857924 -0.01616523 -0.01747601 ... -0.01029303  0.00881139
   0.00570246]]
094_3.tif
../icdar2013_benchmarking_dataset/094_3.tif
../icdar2013_benchmarking_dataset/094_3.tif
[[-0.00640254 -0.01113691 -0.01354591 ...  0.01027756 -0.00490515
   0.00551209]]
094_4.tif
../icdar2013_benchmarking_dataset/094_4.tif
../icdar2013_benchmarking_dataset/094_4.tif
[[-0.00573014 -0.00828508  0.01032363 ...  0.01781642 -0.01125648
  -0.0050595 ]]
247_1.tif
../icdar2013_benchmarking_dataset/247_1.tif
../icdar2013_benchmarking_dataset/247_1.tif
[[ 0.0061664   0.00549257 -0.01148168 ...  0.01380174  0.0083982
   0.00380477]]
132_4.tif
../icdar2013_benchmarking_dataset/132_4.tif
../icdar2013_benchmarking_dataset/132_4.tif
[[ 0.01221417  0.00788655 -0.01435401 ...  0.01236013  0.00871426
   0.00474901]]
224_4.tif
../icdar2

[[-0.01090535  0.00678893  0.01965896 ... -0.01510226  0.01031908
   0.00579849]]
195_4.tif
../icdar2013_benchmarking_dataset/195_4.tif
../icdar2013_benchmarking_dataset/195_4.tif
[[ 0.0129026   0.01441785 -0.00517738 ... -0.01662016  0.00766577
   0.00787576]]
073_1.tif
../icdar2013_benchmarking_dataset/073_1.tif
../icdar2013_benchmarking_dataset/073_1.tif
[[ 0.00989538  0.00302418 -0.01293714 ...  0.00900562  0.00761673
  -0.00218383]]
091_4.tif
../icdar2013_benchmarking_dataset/091_4.tif
../icdar2013_benchmarking_dataset/091_4.tif
[[ 0.00919486 -0.00876935 -0.01518116 ... -0.01913494  0.01590865
   0.00202292]]
069_4.tif
../icdar2013_benchmarking_dataset/069_4.tif
../icdar2013_benchmarking_dataset/069_4.tif
[[ 0.0092412   0.00793421  0.01273331 ... -0.01208573  0.00826184
   0.00801786]]
192_2.tif
../icdar2013_benchmarking_dataset/192_2.tif
../icdar2013_benchmarking_dataset/192_2.tif
[[-0.00901686 -0.01525917 -0.01205089 ... -0.00819908  0.01652137
   0.00650981]]
118_3.tif
../icdar

[[ 0.00745502 -0.01671695 -0.02003024 ... -0.00342228  0.01119548
  -0.0048497 ]]
241_4.tif
../icdar2013_benchmarking_dataset/241_4.tif
../icdar2013_benchmarking_dataset/241_4.tif
[[-0.00977447 -0.01434147 -0.0149749  ...  0.01466731 -0.0140189
  -0.00845449]]
115_4.tif
../icdar2013_benchmarking_dataset/115_4.tif
../icdar2013_benchmarking_dataset/115_4.tif
[[-0.00321675 -0.00804274 -0.01436295 ...  0.01473265 -0.00473293
  -0.00481239]]
078_2.tif
../icdar2013_benchmarking_dataset/078_2.tif
../icdar2013_benchmarking_dataset/078_2.tif
[[-0.00337306 -0.0041974   0.01355418 ... -0.01675652  0.01051257
  -0.00338307]]
042_2.tif
../icdar2013_benchmarking_dataset/042_2.tif
../icdar2013_benchmarking_dataset/042_2.tif
[[ 0.01470541  0.01916886 -0.01470517 ... -0.00510645 -0.0096019
  -0.00301224]]
122_3.tif
../icdar2013_benchmarking_dataset/122_3.tif
../icdar2013_benchmarking_dataset/122_3.tif
[[ 0.00867107  0.00627905 -0.00834932 ...  0.005422    0.0049891
  -0.00684322]]
030_4.tif
../icdar201

[[ 0.00290497 -0.00695739  0.00848196 ...  0.00333775  0.01956025
   0.00225388]]
112_4.tif
../icdar2013_benchmarking_dataset/112_4.tif
../icdar2013_benchmarking_dataset/112_4.tif
[[ 0.01182456  0.01647966  0.01029048 ...  0.00556497 -0.0082911
   0.00298048]]
092_4.tif
../icdar2013_benchmarking_dataset/092_4.tif
../icdar2013_benchmarking_dataset/092_4.tif
[[-0.01027255 -0.01330915  0.01225669 ...  0.00013655 -0.00261492
  -0.00386216]]
225_3.tif
../icdar2013_benchmarking_dataset/225_3.tif
../icdar2013_benchmarking_dataset/225_3.tif
[[-0.00537265  0.0082852   0.01347584 ...  0.0099448   0.01318915
   0.00512432]]
240_1.tif
../icdar2013_benchmarking_dataset/240_1.tif
../icdar2013_benchmarking_dataset/240_1.tif
[[ 0.00566694  0.00631062 -0.00288834 ... -0.01159186  0.01693941
   0.01314369]]
057_3.tif
../icdar2013_benchmarking_dataset/057_3.tif
../icdar2013_benchmarking_dataset/057_3.tif
[[-0.01114757 -0.01623859  0.00428063 ...  0.01112336 -0.01140511
  -0.00438495]]
026_1.tif
../icdar2

[[-0.01486401 -0.01014885  0.01314856 ... -0.00993976  0.00311019
   0.0035354 ]]
038_4.tif
../icdar2013_benchmarking_dataset/038_4.tif
../icdar2013_benchmarking_dataset/038_4.tif
[[-0.00876304 -0.00713302  0.01256447 ... -0.01567972 -0.0077575
  -0.00716805]]
195_2.tif
../icdar2013_benchmarking_dataset/195_2.tif
../icdar2013_benchmarking_dataset/195_2.tif
[[ 0.00933966 -0.00696315 -0.00995997 ...  0.01170478  0.01429991
   0.00570501]]
190_1.tif
../icdar2013_benchmarking_dataset/190_1.tif
../icdar2013_benchmarking_dataset/190_1.tif
[[ 0.00782237 -0.0084418  -0.0191857  ...  0.01581807  0.00816024
  -0.00725862]]
047_2.tif
../icdar2013_benchmarking_dataset/047_2.tif
../icdar2013_benchmarking_dataset/047_2.tif
[[-0.00751082  0.01519959  0.01967248 ...  0.00621952  0.00695329
   0.00640834]]
098_1.tif
../icdar2013_benchmarking_dataset/098_1.tif
../icdar2013_benchmarking_dataset/098_1.tif
[[-0.00305233  0.01144803  0.01297029 ...  0.01039438  0.01184099
  -0.00576862]]
164_3.tif
../icdar2

[[-0.00636306 -0.01150787 -0.0066608  ... -0.01372786  0.01291725
   0.0113497 ]]
055_3.tif
../icdar2013_benchmarking_dataset/055_3.tif
../icdar2013_benchmarking_dataset/055_3.tif
[[-0.00248023  0.00464498  0.0081902  ... -0.00415134  0.00402064
   0.00761622]]
061_1.tif
../icdar2013_benchmarking_dataset/061_1.tif
../icdar2013_benchmarking_dataset/061_1.tif
[[ 0.00620453  0.01687686 -0.00623758 ... -0.00823466  0.01744875
   0.00468314]]
083_3.tif
../icdar2013_benchmarking_dataset/083_3.tif
../icdar2013_benchmarking_dataset/083_3.tif
[[-0.00667565 -0.0128919  -0.01214561 ... -0.00205734 -0.01157741
  -0.00839997]]
021_4.tif
../icdar2013_benchmarking_dataset/021_4.tif
../icdar2013_benchmarking_dataset/021_4.tif
[[-0.0094084  -0.01283954  0.01133037 ... -0.01492098  0.00978619
   0.0069407 ]]
148_2.tif
../icdar2013_benchmarking_dataset/148_2.tif
../icdar2013_benchmarking_dataset/148_2.tif
[[ 0.0170524   0.01555125  0.00491839 ... -0.00801374 -0.01099553
  -0.00751494]]
010_4.tif
../icdar

[[-0.00946876 -0.01273566 -0.00699425 ... -0.01242012  0.01025909
   0.00236718]]
249_3.tif
../icdar2013_benchmarking_dataset/249_3.tif
../icdar2013_benchmarking_dataset/249_3.tif
[[-0.01282575 -0.01477232 -0.00697694 ... -0.01832747  0.01213375
   0.00521448]]
187_3.tif
../icdar2013_benchmarking_dataset/187_3.tif
../icdar2013_benchmarking_dataset/187_3.tif
[[-0.00470567 -0.00776847 -0.00901858 ... -0.01728144 -0.00926593
   0.0048244 ]]
001_1.tif
../icdar2013_benchmarking_dataset/001_1.tif
../icdar2013_benchmarking_dataset/001_1.tif
[[ 0.00802103  0.01612437  0.0087098  ...  0.00723737 -0.00999022
  -0.00748201]]
081_4.tif
../icdar2013_benchmarking_dataset/081_4.tif
../icdar2013_benchmarking_dataset/081_4.tif
[[ 0.00864919  0.01802005  0.01300132 ...  0.00496451 -0.00588492
  -0.00618345]]
170_2.tif
../icdar2013_benchmarking_dataset/170_2.tif
../icdar2013_benchmarking_dataset/170_2.tif
[[-0.00721577 -0.01267621 -0.01851964 ...  0.01004321 -0.00712621
  -0.00786917]]
216_1.tif
../icdar

In [32]:
def load_pickle(fileName):
    'Returns the object stored inside the pickle file'
    if fileName.endswith('pkl.gz'):
        with gzip.open(fileName, 'rb') as f:
            desc = pickle.load(f)
    
    return desc

In [37]:
## Before calling this function
## verify that supervectors for train and test have been computed and are present as pickle files in- 
## -folder ../outTrainVoc and ../outTestVoc
def evaluate():
    'We evaluate using TOP-1 evaluation matrix'
    # X is the supervector belonging to test document
    # Y is the supervector belonging to train document
    testDir = '../outTestVoc/'
    Dir = '../outTestVoc/'
    
    total_test_files = 0
    correctly_classified = 0 # Denoted total no. of correctly classified test files
    
    for testFile in os.listdir(testDir):

        label_test = testFile[0:3] # First three letters denote the writer-ID
        # print ('testfile label: %s' %(label_test))
        # X is the supervector belonging to test document
        cos_dist = []
        if testFile.endswith('pkl.gz'):
            X = load_pickle(os.path.join(testDir, testFile))
            total_test_files += 1
        else:
            continue
        for file in os.listdir(Dir):
            # Y is the supervector belonging to train document
            if file.endswith('pkl.gz'):
                Y = load_pickle(os.path.join(Dir, file))
            else:
                continue
            
            # Computing cosine distance between X and Y supervectors
            dist = spdistance.cosine(X, Y)
            cos_dist.append((dist, file))
        
        # sorting cos_dist on the basis of distance
        list.sort(cos_dist)
        # Extracting the v
        # print ("Top-6 Matches: ")
        # print (cos_dist[0:7])
        
        pred_label = cos_dist[1][1] # looking at the 2nd nearest as 1st one is the same document
        pred_label = pred_label[0:3] # First three letters denote the writer-ID
        
        if (pred_label == label_test):
            correctly_classified += 1
        
    return correctly_classified/total_test_files

In [38]:
Top_1_accuracy = evaluate()
print ('TOP-1 Accuracy: %f' %(Top_1_accuracy))

TOP-1 Accuracy: 0.915000
