## Implementation using Fisher Encoding

In [15]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from sklearn import preprocessing
import scipy.spatial.distance as spdistance
import os
from sklearn import mixture
import math
import copy
import gzip
import pickle

In [2]:
### First step is to extract the features using ROOT_SIFT
class FeatureEx():
    
    def __init__(self):
        self.sift = cv2.xfeatures2d.SIFT_create()
        
    def extract(self, img):
        keypoints, img_features = self.sift.detectAndCompute(img, None)
        return keypoints, img_features
    
    #### For computing ROOT_SIFT feature
    def compute(self, img):
        img = cv2.imread(img, cv2.IMREAD_GRAYSCALE)
        if img is None:
            return "No image"
        
        kpts, descriptors = self.extract(img)
        
        ## Hellinger normalization
        descriptors += np.finfo(np.float32).eps
        descriptors /= np.sum(descriptors, axis=1)[:, np.newaxis]
        descriptors = np.sqrt(descriptors)
        
        return (kpts, descriptors)

In [3]:
def get_feature_matrix(path, max_desc=150000, max_desc_per_file=150):
    'path denotes the path to the image files'
    fEx = FeatureEx() #creating Feature extraction object
    
    directory = path
    
    # features is a matrix of features(row-wise)
    features = np.asarray(())
    
    for file in os.listdir(directory):
        print (file)
        if file.endswith(".tif"): 
            print(os.path.join(directory, file))
            filePath = os.path.join(directory, file)
            
            kpts, descriptors = fEx.compute(filePath)
            if descriptors.shape[0] > max_desc_per_file:
                'select the random max_desc_per_file from the descriptors'
                idx = np.random.randint(descriptors.shape[0], size=max_desc_per_file)
                descriptors = descriptors[idx, :]
                
            if features.size == 0:
                features = descriptors
            else:
                if features.shape[0] + descriptors.shape[0] <= max_desc:
                    features = np.vstack((features, descriptors))
                else:
                    break
        else:
            continue
    
    return features


In [4]:
def compute_gmm_params(features):
    'features is a matrix of features(row-wise) to train gmm model'
    
    ## using default number of clusters = 100 as is used in paper
    gmm = mixture.GaussianMixture(n_components=100, covariance_type='diag')
    gmm.fit(features)
    
    return gmm

In [5]:
# Let max_desc = 150000 for training ubm
# And max_desc_per_file = max_desc/no_of_files = 150000/1000 in this case
features = get_feature_matrix('../icdar2013_benchmarking_dataset/')
gmm = compute_gmm_params(features)

139_4.tif
../icdar2013_benchmarking_dataset/139_4.tif
148_4.tif
../icdar2013_benchmarking_dataset/148_4.tif
163_3.tif
../icdar2013_benchmarking_dataset/163_3.tif
105_1.tif
../icdar2013_benchmarking_dataset/105_1.tif
140_4.tif
../icdar2013_benchmarking_dataset/140_4.tif
207_1.tif
../icdar2013_benchmarking_dataset/207_1.tif
040_3.tif
../icdar2013_benchmarking_dataset/040_3.tif
125_2.tif
../icdar2013_benchmarking_dataset/125_2.tif
012_1.tif
../icdar2013_benchmarking_dataset/012_1.tif
001_3.tif
../icdar2013_benchmarking_dataset/001_3.tif
182_2.tif
../icdar2013_benchmarking_dataset/182_2.tif
092_2.tif
../icdar2013_benchmarking_dataset/092_2.tif
175_2.tif
../icdar2013_benchmarking_dataset/175_2.tif
054_1.tif
../icdar2013_benchmarking_dataset/054_1.tif
099_4.tif
../icdar2013_benchmarking_dataset/099_4.tif
184_4.tif
../icdar2013_benchmarking_dataset/184_4.tif
106_3.tif
../icdar2013_benchmarking_dataset/106_3.tif
070_1.tif
../icdar2013_benchmarking_dataset/070_1.tif
050_2.tif
../icdar2013_bench

005_2.tif
../icdar2013_benchmarking_dataset/005_2.tif
109_2.tif
../icdar2013_benchmarking_dataset/109_2.tif
150_4.tif
../icdar2013_benchmarking_dataset/150_4.tif
194_4.tif
../icdar2013_benchmarking_dataset/194_4.tif
248_3.tif
../icdar2013_benchmarking_dataset/248_3.tif
160_3.tif
../icdar2013_benchmarking_dataset/160_3.tif
139_1.tif
../icdar2013_benchmarking_dataset/139_1.tif
067_2.tif
../icdar2013_benchmarking_dataset/067_2.tif
128_3.tif
../icdar2013_benchmarking_dataset/128_3.tif
212_2.tif
../icdar2013_benchmarking_dataset/212_2.tif
127_2.tif
../icdar2013_benchmarking_dataset/127_2.tif
236_3.tif
../icdar2013_benchmarking_dataset/236_3.tif
231_4.tif
../icdar2013_benchmarking_dataset/231_4.tif
029_2.tif
../icdar2013_benchmarking_dataset/029_2.tif
220_2.tif
../icdar2013_benchmarking_dataset/220_2.tif
189_1.tif
../icdar2013_benchmarking_dataset/189_1.tif
110_1.tif
../icdar2013_benchmarking_dataset/110_1.tif
059_4.tif
../icdar2013_benchmarking_dataset/059_4.tif
205_1.tif
../icdar2013_bench

065_4.tif
../icdar2013_benchmarking_dataset/065_4.tif
226_2.tif
../icdar2013_benchmarking_dataset/226_2.tif
040_2.tif
../icdar2013_benchmarking_dataset/040_2.tif
004_2.tif
../icdar2013_benchmarking_dataset/004_2.tif
130_3.tif
../icdar2013_benchmarking_dataset/130_3.tif
036_3.tif
../icdar2013_benchmarking_dataset/036_3.tif
120_1.tif
../icdar2013_benchmarking_dataset/120_1.tif
019_1.tif
../icdar2013_benchmarking_dataset/019_1.tif
026_4.tif
../icdar2013_benchmarking_dataset/026_4.tif
088_3.tif
../icdar2013_benchmarking_dataset/088_3.tif
003_3.tif
../icdar2013_benchmarking_dataset/003_3.tif
219_2.tif
../icdar2013_benchmarking_dataset/219_2.tif
225_2.tif
../icdar2013_benchmarking_dataset/225_2.tif
160_2.tif
../icdar2013_benchmarking_dataset/160_2.tif
018_3.tif
../icdar2013_benchmarking_dataset/018_3.tif
158_4.tif
../icdar2013_benchmarking_dataset/158_4.tif
072_2.tif
../icdar2013_benchmarking_dataset/072_2.tif
102_1.tif
../icdar2013_benchmarking_dataset/102_1.tif
113_1.tif
../icdar2013_bench

141_4.tif
../icdar2013_benchmarking_dataset/141_4.tif
210_1.tif
../icdar2013_benchmarking_dataset/210_1.tif
209_4.tif
../icdar2013_benchmarking_dataset/209_4.tif
133_4.tif
../icdar2013_benchmarking_dataset/133_4.tif
197_1.tif
../icdar2013_benchmarking_dataset/197_1.tif
005_3.tif
../icdar2013_benchmarking_dataset/005_3.tif
222_2.tif
../icdar2013_benchmarking_dataset/222_2.tif
177_4.tif
../icdar2013_benchmarking_dataset/177_4.tif
005_4.tif
../icdar2013_benchmarking_dataset/005_4.tif
043_3.tif
../icdar2013_benchmarking_dataset/043_3.tif
082_2.tif
../icdar2013_benchmarking_dataset/082_2.tif
032_1.tif
../icdar2013_benchmarking_dataset/032_1.tif
161_2.tif
../icdar2013_benchmarking_dataset/161_2.tif
232_2.tif
../icdar2013_benchmarking_dataset/232_2.tif
240_4.tif
../icdar2013_benchmarking_dataset/240_4.tif
191_4.tif
../icdar2013_benchmarking_dataset/191_4.tif
182_1.tif
../icdar2013_benchmarking_dataset/182_1.tif
080_3.tif
../icdar2013_benchmarking_dataset/080_3.tif
249_4.tif
../icdar2013_bench

136_2.tif
../icdar2013_benchmarking_dataset/136_2.tif
017_3.tif
../icdar2013_benchmarking_dataset/017_3.tif
085_3.tif
../icdar2013_benchmarking_dataset/085_3.tif
016_3.tif
../icdar2013_benchmarking_dataset/016_3.tif
233_3.tif
../icdar2013_benchmarking_dataset/233_3.tif
182_4.tif
../icdar2013_benchmarking_dataset/182_4.tif
151_1.tif
../icdar2013_benchmarking_dataset/151_1.tif
114_4.tif
../icdar2013_benchmarking_dataset/114_4.tif
051_1.tif
../icdar2013_benchmarking_dataset/051_1.tif
233_4.tif
../icdar2013_benchmarking_dataset/233_4.tif
108_1.tif
../icdar2013_benchmarking_dataset/108_1.tif
053_1.tif
../icdar2013_benchmarking_dataset/053_1.tif
030_3.tif
../icdar2013_benchmarking_dataset/030_3.tif
119_3.tif
../icdar2013_benchmarking_dataset/119_3.tif
049_2.tif
../icdar2013_benchmarking_dataset/049_2.tif
196_1.tif
../icdar2013_benchmarking_dataset/196_1.tif
079_2.tif
../icdar2013_benchmarking_dataset/079_2.tif
237_1.tif
../icdar2013_benchmarking_dataset/237_1.tif
116_2.tif
../icdar2013_bench

014_2.tif
../icdar2013_benchmarking_dataset/014_2.tif
146_3.tif
../icdar2013_benchmarking_dataset/146_3.tif
128_2.tif
../icdar2013_benchmarking_dataset/128_2.tif
231_2.tif
../icdar2013_benchmarking_dataset/231_2.tif
197_4.tif
../icdar2013_benchmarking_dataset/197_4.tif
213_2.tif
../icdar2013_benchmarking_dataset/213_2.tif
204_4.tif
../icdar2013_benchmarking_dataset/204_4.tif
137_3.tif
../icdar2013_benchmarking_dataset/137_3.tif
195_1.tif
../icdar2013_benchmarking_dataset/195_1.tif
157_4.tif
../icdar2013_benchmarking_dataset/157_4.tif
095_4.tif
../icdar2013_benchmarking_dataset/095_4.tif
100_2.tif
../icdar2013_benchmarking_dataset/100_2.tif
085_1.tif
../icdar2013_benchmarking_dataset/085_1.tif
097_4.tif
../icdar2013_benchmarking_dataset/097_4.tif
202_2.tif
../icdar2013_benchmarking_dataset/202_2.tif
040_4.tif
../icdar2013_benchmarking_dataset/040_4.tif
245_1.tif
../icdar2013_benchmarking_dataset/245_1.tif
093_3.tif
../icdar2013_benchmarking_dataset/093_3.tif
092_1.tif
../icdar2013_bench

014_3.tif
../icdar2013_benchmarking_dataset/014_3.tif
245_3.tif
../icdar2013_benchmarking_dataset/245_3.tif
047_4.tif
../icdar2013_benchmarking_dataset/047_4.tif
116_4.tif
../icdar2013_benchmarking_dataset/116_4.tif
158_3.tif
../icdar2013_benchmarking_dataset/158_3.tif
239_3.tif
../icdar2013_benchmarking_dataset/239_3.tif
010_2.tif
../icdar2013_benchmarking_dataset/010_2.tif
054_4.tif
../icdar2013_benchmarking_dataset/054_4.tif
002_4.tif
../icdar2013_benchmarking_dataset/002_4.tif
055_3.tif
../icdar2013_benchmarking_dataset/055_3.tif
061_1.tif
../icdar2013_benchmarking_dataset/061_1.tif
083_3.tif
../icdar2013_benchmarking_dataset/083_3.tif
021_4.tif
../icdar2013_benchmarking_dataset/021_4.tif
148_2.tif
../icdar2013_benchmarking_dataset/148_2.tif
010_4.tif
../icdar2013_benchmarking_dataset/010_4.tif
014_1.tif
../icdar2013_benchmarking_dataset/014_1.tif
076_4.tif
../icdar2013_benchmarking_dataset/076_4.tif
225_4.tif
../icdar2013_benchmarking_dataset/225_4.tif
244_3.tif
../icdar2013_bench



In [6]:
def fisher(data, means, weights, posteriors, inv_sqrt_cov): 
    
    components, fd = means.shape

    def encode(i):
        if weights[i] < 1e-6:
            return np.zeros( (fd), dtype=means.dtype),\
                   np.zeros( (fd), dtype=means.dtype) 

        diff = (data - means[i]) * inv_sqrt_cov[i]
        weights_ = np.sum(posteriors[:,i] - weights[i])
        means_ = posteriors[:,i].T.dot( diff )
        covs_ = posteriors[:,i].T.dot( diff*diff - 1 )

        weights_ /= ( len(data) * math.sqrt(weights[i]) )
        means_ /= ( len(data) * math.sqrt(weights[i]) )
        covs_ /= ( len(data) * math.sqrt(2.0*weights[i]) )
        return weights_, means_, covs_

    wk_, uk_, vk_ = zip( *map(encode, range(components)) )
    
    return wk_, uk_, vk_

In [7]:
def fisherEncode(gmm, data, posteriors, fv_components='wmc'):

    inv_sqrt_cov = np.sqrt(1.0 / (gmm.covariances_ + np.finfo(np.float32).eps))
    wk_, uk_, vk_ = fisher( data, gmm.means_, gmm.weights_, posteriors, inv_sqrt_cov)

    wk_ = np.array(wk_)
    uk_ = np.array(uk_)
    vk_ = np.array(vk_)
    components, fd = gmm.means_.shape
    
    enc = []
    if 'w' in fv_components: 
        enc.append(wk_.reshape(1,-1))
    if 'm' in fv_components:
        enc.append(uk_.reshape(1,-1))
    if 'c' in fv_components:
        enc.append(vk_.reshape(1,-1))

    fv = np.concatenate(enc, axis=1) 
    fv = np.sign(fv) * np.sqrt(np.abs(fv))
    fv = preprocessing.normalize(fv)

    return fv

In [8]:
fEx = FeatureEx()
filePath = '../icdar2013_benchmarking_dataset/001_1.tif'
kpts, data = fEx.compute(filePath)

posteriors = gmm.predict_proba(data)
enc = fisherEncode(gmm, data, posteriors)
print(enc)

[[-0.0035409   0.00773942 -0.01005935 ... -0.01015558 -0.01015558
  -0.00152195]]


In [9]:
def ubm_adaptation(path, outDir, gmm):
    'path denotes the path to the image files'
    fEx = FeatureEx() # creating Feature extraction object
    
    directory = path
    
    for file in os.listdir(directory):
        print (file)
        if file.endswith(".tif"): 
            print(os.path.join(directory, file))
            fp = os.path.join(directory, file)
            print(fp)
            kpts, data = fEx.compute(fp)
            
            posteriors = gmm.predict_proba(data)
            enc = fisherEncode(gmm, data, posteriors)
            print(enc)
            
            fileName, file_ext = os.path.splitext(file)
            
            if not os.path.exists(outDir):
                os.makedirs(outDir)
            
            filepath = os.path.join(outDir, fileName + '.pkl.gz')
            with gzip.open(filepath, 'wb') as f:
                pickle.dump(enc, f, -1)
        else:
            continue

In [10]:
ubm_adaptation('../experimental_dataset_2013/', '../outTestVoc', gmm)

105_1.tif
../experimental_dataset_2013/105_1.tif
../experimental_dataset_2013/105_1.tif
[[ 0.00840831  0.00369087 -0.01049119 ... -0.01132745 -0.01132745
   0.00427053]]
040_3.tif
../experimental_dataset_2013/040_3.tif
../experimental_dataset_2013/040_3.tif
[[ 0.00333118 -0.00724851  0.00581523 ... -0.01025803 -0.01025803
   0.00333199]]
125_2.tif
../experimental_dataset_2013/125_2.tif
../experimental_dataset_2013/125_2.tif
[[-0.00953991 -0.00355107 -0.00089571 ... -0.01179086 -0.01179086
   0.00551918]]
092_2.tif
../experimental_dataset_2013/092_2.tif
../experimental_dataset_2013/092_2.tif
[[ 0.0045721  -0.00442019  0.00204728 ... -0.01201011 -0.01201011
  -0.00778345]]
054_1.tif
../experimental_dataset_2013/054_1.tif
../experimental_dataset_2013/054_1.tif
[[ 0.00860515 -0.01022357  0.00989705 ... -0.01198122 -0.01198122
  -0.00681177]]
099_4.tif
../experimental_dataset_2013/099_4.tif
../experimental_dataset_2013/099_4.tif
[[ 0.00289119 -0.00457319  0.00235391 ... -0.01217972 -0.01217

[[-0.00415425 -0.00309706 -0.00263135 ... -0.01359883 -0.01359883
   0.00500963]]
096_3.tif
../experimental_dataset_2013/096_3.tif
../experimental_dataset_2013/096_3.tif
[[-0.01037826 -0.00895929  0.01543457 ... -0.00944426 -0.00944426
  -0.00650751]]
073_3.tif
../experimental_dataset_2013/073_3.tif
../experimental_dataset_2013/073_3.tif
[[-0.00522482  0.00795721  0.0122508  ... -0.01062578 -0.01062578
   0.00274471]]
101_4.tif
../experimental_dataset_2013/101_4.tif
../experimental_dataset_2013/101_4.tif
[[-0.00600068 -0.00617063  0.00111939 ... -0.01377278 -0.01377278
  -0.00651706]]
038_3.tif
../experimental_dataset_2013/038_3.tif
../experimental_dataset_2013/038_3.tif
[[ 0.00721439 -0.00967776  0.00650108 ... -0.01227904 -0.01227904
   0.00732664]]
077_1.tif
../experimental_dataset_2013/077_1.tif
../experimental_dataset_2013/077_1.tif
[[ 0.00492081  0.00421546  0.01041059 ... -0.0098333  -0.0098333
  -0.00088581]]
102_3.tif
../experimental_dataset_2013/102_3.tif
../experimental_data

[[-0.00984041 -0.00842526 -0.00456544 ... -0.0151026  -0.0151026
   0.0080729 ]]
039_1.tif
../experimental_dataset_2013/039_1.tif
../experimental_dataset_2013/039_1.tif
[[ 0.00918043  0.00869113 -0.00779629 ... -0.01045619 -0.01045619
   0.00321187]]
088_4.tif
../experimental_dataset_2013/088_4.tif
../experimental_dataset_2013/088_4.tif
[[-0.00437845 -0.00753612 -0.00672941 ... -0.01311313 -0.01311313
  -0.00178605]]
088_1.tif
../experimental_dataset_2013/088_1.tif
../experimental_dataset_2013/088_1.tif
[[ 0.0110686  -0.0066378  -0.00534122 ... -0.01187832 -0.01187832
  -0.00217317]]
062_4.tif
../experimental_dataset_2013/062_4.tif
../experimental_dataset_2013/062_4.tif
[[ 0.00358894  0.00365423 -0.009503   ... -0.01121968 -0.01121968
  -0.00249072]]
055_4.tif
../experimental_dataset_2013/055_4.tif
../experimental_dataset_2013/055_4.tif
[[ 0.00535664 -0.00277004 -0.00625846 ... -0.01078019 -0.01078019
   0.00934961]]
068_3.tif
../experimental_dataset_2013/068_3.tif
../experimental_data

[[ 0.0086868   0.00497521 -0.01051313 ... -0.01226288 -0.01226288
   0.0109098 ]]
058_3.tif
../experimental_dataset_2013/058_3.tif
../experimental_dataset_2013/058_3.tif
[[ 0.00058761  0.00766668 -0.00619596 ... -0.01036854 -0.01036854
  -0.00745032]]
053_4.tif
../experimental_dataset_2013/053_4.tif
../experimental_dataset_2013/053_4.tif
[[-0.00734259 -0.00558674  0.01803413 ... -0.01078264 -0.01078264
  -0.00323224]]
103_3.tif
../experimental_dataset_2013/103_3.tif
../experimental_dataset_2013/103_3.tif
[[ 0.01015847  0.00406719  0.00895972 ... -0.0138333  -0.0138333
   0.00702094]]
064_4.tif
../experimental_dataset_2013/064_4.tif
../experimental_dataset_2013/064_4.tif
[[-0.00968039 -0.0051559  -0.00585591 ... -0.00886774 -0.00886774
   0.00356085]]
086_1.tif
../experimental_dataset_2013/086_1.tif
../experimental_dataset_2013/086_1.tif
[[-0.00797205 -0.0052856   0.00649646 ... -0.01137513 -0.01137513
   0.00144124]]
079_3.tif
../experimental_dataset_2013/079_3.tif
../experimental_data

[[ 0.00615906  0.00038432 -0.01046516 ... -0.01132692 -0.01132692
   0.0026054 ]]
125_4.tif
../experimental_dataset_2013/125_4.tif
../experimental_dataset_2013/125_4.tif
[[-0.00944036 -0.00724854  0.00523408 ... -0.01286959 -0.01286959
   0.00636768]]
094_1.tif
../experimental_dataset_2013/094_1.tif
../experimental_dataset_2013/094_1.tif
[[ 0.00774515  0.00966871 -0.0116157  ... -0.0112146  -0.0112146
  -0.00166203]]
029_3.tif
../experimental_dataset_2013/029_3.tif
../experimental_dataset_2013/029_3.tif
[[-0.00698947 -0.00553289 -0.00651229 ... -0.01051309 -0.01051309
   0.00493973]]
083_1.tif
../experimental_dataset_2013/083_1.tif
../experimental_dataset_2013/083_1.tif
[[ 0.00801658 -0.00876264 -0.0068676  ... -0.0142542  -0.0142542
   0.0053105 ]]
085_4.tif
../experimental_dataset_2013/085_4.tif
../experimental_dataset_2013/085_4.tif
[[-0.00957033 -0.00877862  0.00841537 ... -0.0135159  -0.0135159
  -0.00175708]]
046_4.tif
../experimental_dataset_2013/046_4.tif
../experimental_datase

[[-0.00505091 -0.00660987  0.00603799 ... -0.01188726 -0.01188726
   0.00364746]]
085_3.tif
../experimental_dataset_2013/085_3.tif
../experimental_dataset_2013/085_3.tif
[[-0.00527527 -0.00601402  0.00659519 ... -0.01407773 -0.01407773
   0.01070415]]
114_4.tif
../experimental_dataset_2013/114_4.tif
../experimental_dataset_2013/114_4.tif
[[-0.00159492 -0.00632005  0.01182845 ... -0.01455459 -0.01455459
  -0.0096194 ]]
051_1.tif
../experimental_dataset_2013/051_1.tif
../experimental_dataset_2013/051_1.tif
[[ 0.00794762  0.00612609 -0.01070304 ... -0.00863033 -0.00863033
   0.00350506]]
108_1.tif
../experimental_dataset_2013/108_1.tif
../experimental_dataset_2013/108_1.tif
[[-0.00191992 -0.00817702 -0.00268749 ... -0.01087512 -0.01087512
  -0.0080773 ]]
053_1.tif
../experimental_dataset_2013/053_1.tif
../experimental_dataset_2013/053_1.tif
[[-0.00684533  0.00494966  0.01064027 ... -0.01346829 -0.01346829
  -0.00798181]]
030_3.tif
../experimental_dataset_2013/030_3.tif
../experimental_dat

[[ 0.00177238 -0.00555912 -0.00708083 ... -0.01088156 -0.01088156
  -0.00803279]]
073_4.tif
../experimental_dataset_2013/073_4.tif
../experimental_dataset_2013/073_4.tif
[[ 0.01091899  0.01341869 -0.00399869 ... -0.01188843 -0.01188843
  -0.00971484]]
118_4.tif
../experimental_dataset_2013/118_4.tif
../experimental_dataset_2013/118_4.tif
[[-0.00574824 -0.00866266 -0.00764652 ... -0.01422217 -0.01422217
   0.00732293]]
076_3.tif
../experimental_dataset_2013/076_3.tif
../experimental_dataset_2013/076_3.tif
[[ 0.01116293  0.00275678 -0.01107087 ... -0.01243958 -0.01243958
   0.00719667]]
119_1.tif
../experimental_dataset_2013/119_1.tif
../experimental_dataset_2013/119_1.tif
[[-0.00720298  0.00176101 -0.00824779 ... -0.01492754 -0.01492754
  -0.00952894]]
068_4.tif
../experimental_dataset_2013/068_4.tif
../experimental_dataset_2013/068_4.tif
[[-0.00438381 -0.00846959 -0.00958012 ... -0.01305062 -0.01305062
  -0.00761034]]
107_1.tif
../experimental_dataset_2013/107_1.tif
../experimental_dat

[[ 0.00733256 -0.00681496 -0.01111447 ... -0.01065724 -0.01065724
   0.00703491]]
103_1.tif
../experimental_dataset_2013/103_1.tif
../experimental_dataset_2013/103_1.tif
[[ 0.00911287  0.00455189  0.00686816 ... -0.01372485 -0.01372485
   0.005681  ]]
071_3.tif
../experimental_dataset_2013/071_3.tif
../experimental_dataset_2013/071_3.tif
[[ 0.00136145  0.01279479 -0.00761785 ... -0.0099063  -0.0099063
  -0.00285491]]
102_2.tif
../experimental_dataset_2013/102_2.tif
../experimental_dataset_2013/102_2.tif
[[ 0.00640017  0.00881132 -0.00043747 ... -0.00913499 -0.00913499
  -0.00326466]]
112_4.tif
../experimental_dataset_2013/112_4.tif
../experimental_dataset_2013/112_4.tif
[[ 0.00408902  0.00759006 -0.00893326 ... -0.01237631 -0.01237631
   0.00993259]]
092_4.tif
../experimental_dataset_2013/092_4.tif
../experimental_dataset_2013/092_4.tif
[[-0.00360609 -0.00597874  0.00480882 ... -0.01199368 -0.01199368
  -0.00406514]]
057_3.tif
../experimental_dataset_2013/057_3.tif
../experimental_data

[[ 0.00590094  0.00445145 -0.01056823 ... -0.01102336 -0.01102336
   0.00777399]]
080_4.tif
../experimental_dataset_2013/080_4.tif
../experimental_dataset_2013/080_4.tif
[[-0.00856271 -0.00760035 -0.01006128 ... -0.0109251  -0.0109251
  -0.0012077 ]]
081_4.tif
../experimental_dataset_2013/081_4.tif
../experimental_dataset_2013/081_4.tif
[[ 0.0027267   0.00458915 -0.00991748 ... -0.01349327 -0.01349327
   0.00671909]]
112_1.tif
../experimental_dataset_2013/112_1.tif
../experimental_dataset_2013/112_1.tif
[[ 0.00473306 -0.00690129 -0.00660821 ... -0.01259143 -0.01259143
  -0.00766158]]
031_4.tif
../experimental_dataset_2013/031_4.tif
../experimental_dataset_2013/031_4.tif
[[ 0.0108125   0.00698081 -0.00928294 ... -0.01164431 -0.01164431
   0.00514352]]
063_3.tif
../experimental_dataset_2013/063_3.tif
../experimental_dataset_2013/063_3.tif
[[ 0.00144256  0.0090405  -0.01156934 ... -0.0138309  -0.0138309
  -0.00708428]]
044_1.tif
../experimental_dataset_2013/044_1.tif
../experimental_datas

In [11]:
enc = np.array([1, 2, 3])
enc = enc.reshape(1, 3)
directory = '../outVoc'
if not os.path.exists(directory):
    os.makedirs(directory)
filePath = '../outVoc/file.pkl.gz'
with gzip.open(filePath, 'wb') as f:
    pickle.dump(enc, f, -1)

In [12]:
ubm_adaptation('../icdar2013_benchmarking_dataset/', '../outTrainVoc', gmm)

139_4.tif
../icdar2013_benchmarking_dataset/139_4.tif
../icdar2013_benchmarking_dataset/139_4.tif
[[-0.00795373 -0.00625767 -0.00916919 ... -0.01787285 -0.01787285
  -0.0107931 ]]
148_4.tif
../icdar2013_benchmarking_dataset/148_4.tif
../icdar2013_benchmarking_dataset/148_4.tif
[[ 0.00514727  0.01348585  0.00586502 ... -0.01167209 -0.01167209
  -0.00371999]]
163_3.tif
../icdar2013_benchmarking_dataset/163_3.tif
../icdar2013_benchmarking_dataset/163_3.tif
[[-0.00386321 -0.00236984 -0.00758848 ... -0.01258279 -0.01258279
   0.01142516]]
105_1.tif
../icdar2013_benchmarking_dataset/105_1.tif
../icdar2013_benchmarking_dataset/105_1.tif
[[-0.00408164 -0.00720445  0.01051476 ... -0.00997892 -0.00997892
  -0.00523678]]
140_4.tif
../icdar2013_benchmarking_dataset/140_4.tif
../icdar2013_benchmarking_dataset/140_4.tif
[[ 0.0091166   0.01126902 -0.01149082 ... -0.01186596 -0.01186596
   0.00805487]]
207_1.tif
../icdar2013_benchmarking_dataset/207_1.tif
../icdar2013_benchmarking_dataset/207_1.tif
[[

[[-0.0025182  -0.00732392 -0.00700115 ... -0.0127936  -0.0127936
  -0.00774569]]
112_3.tif
../icdar2013_benchmarking_dataset/112_3.tif
../icdar2013_benchmarking_dataset/112_3.tif
[[-0.00916784 -0.00641937 -0.00610711 ... -0.01688574 -0.01688574
   0.01110558]]
188_1.tif
../icdar2013_benchmarking_dataset/188_1.tif
../icdar2013_benchmarking_dataset/188_1.tif
[[ 0.00594996 -0.00627069 -0.01125744 ... -0.00973068 -0.00973068
  -0.00627455]]
148_1.tif
../icdar2013_benchmarking_dataset/148_1.tif
../icdar2013_benchmarking_dataset/148_1.tif
[[ 0.0079882   0.00998818 -0.00728312 ... -0.01357736 -0.01357736
  -0.00503205]]
111_3.tif
../icdar2013_benchmarking_dataset/111_3.tif
../icdar2013_benchmarking_dataset/111_3.tif
[[-0.00188446 -0.00637048 -0.00816933 ... -0.0100156  -0.0100156
   0.00674323]]
169_1.tif
../icdar2013_benchmarking_dataset/169_1.tif
../icdar2013_benchmarking_dataset/169_1.tif
[[ 0.00561529 -0.00751005 -0.00571607 ... -0.01049199 -0.01049199
   0.00799882]]
144_3.tif
../icdar20

[[ 0.01407384  0.00256732 -0.01086221 ... -0.0134777  -0.0134777
   0.00321324]]
069_3.tif
../icdar2013_benchmarking_dataset/069_3.tif
../icdar2013_benchmarking_dataset/069_3.tif
[[ 0.00428255  0.00339838 -0.01018559 ... -0.01364827 -0.01364827
  -0.007359  ]]
037_2.tif
../icdar2013_benchmarking_dataset/037_2.tif
../icdar2013_benchmarking_dataset/037_2.tif
[[ 0.01172496  0.0053365  -0.01098296 ... -0.01195763 -0.01195763
   0.00443179]]
077_2.tif
../icdar2013_benchmarking_dataset/077_2.tif
../icdar2013_benchmarking_dataset/077_2.tif
[[-0.00476153  0.00554194 -0.00541748 ... -0.01065084 -0.01065084
   0.00504118]]
087_4.tif
../icdar2013_benchmarking_dataset/087_4.tif
../icdar2013_benchmarking_dataset/087_4.tif
[[-0.0094782   0.0059826  -0.00888335 ... -0.01053066 -0.01053066
  -0.00313415]]
169_4.tif
../icdar2013_benchmarking_dataset/169_4.tif
../icdar2013_benchmarking_dataset/169_4.tif
[[ 0.00419773  0.00438008  0.00702    ... -0.01317184 -0.01317184
   0.00745248]]
088_2.tif
../icdar2

[[ 0.00270653 -0.00695238 -0.00930646 ... -0.01218497 -0.01218497
   0.00464299]]
115_2.tif
../icdar2013_benchmarking_dataset/115_2.tif
../icdar2013_benchmarking_dataset/115_2.tif
[[-0.00700923  0.01221661 -0.00324831 ... -0.01079675 -0.01079675
   0.00257838]]
172_3.tif
../icdar2013_benchmarking_dataset/172_3.tif
../icdar2013_benchmarking_dataset/172_3.tif
[[-0.00606355  0.0038586  -0.00612062 ... -0.01190977 -0.01190977
   0.00351792]]
105_2.tif
../icdar2013_benchmarking_dataset/105_2.tif
../icdar2013_benchmarking_dataset/105_2.tif
[[-0.00646561 -0.008617   -0.00323381 ... -0.01200645 -0.01200645
  -0.0073796 ]]
064_3.tif
../icdar2013_benchmarking_dataset/064_3.tif
../icdar2013_benchmarking_dataset/064_3.tif
[[-0.00730376  0.00752254  0.00595607 ... -0.00922391 -0.00922391
   0.0006836 ]]
183_2.tif
../icdar2013_benchmarking_dataset/183_2.tif
../icdar2013_benchmarking_dataset/183_2.tif
[[-0.00422856 -0.0080077  -0.00952374 ... -0.01592196 -0.01592196
   0.01001475]]
084_2.tif
../icdar

[[ 0.01152635 -0.00195851 -0.00819855 ... -0.01069961 -0.01069961
   0.0023075 ]]
098_3.tif
../icdar2013_benchmarking_dataset/098_3.tif
../icdar2013_benchmarking_dataset/098_3.tif
[[-0.00494224  0.004362   -0.01001283 ... -0.01408226 -0.01408226
  -0.01004733]]
171_2.tif
../icdar2013_benchmarking_dataset/171_2.tif
../icdar2013_benchmarking_dataset/171_2.tif
[[-0.00446402  0.00708159 -0.00642054 ... -0.01057795 -0.01057795
   0.00199785]]
193_4.tif
../icdar2013_benchmarking_dataset/193_4.tif
../icdar2013_benchmarking_dataset/193_4.tif
[[-0.00423327 -0.00275469 -0.00258091 ... -0.01197345 -0.01197345
   0.00509318]]
090_2.tif
../icdar2013_benchmarking_dataset/090_2.tif
../icdar2013_benchmarking_dataset/090_2.tif
[[-0.0039824   0.00748666  0.01541357 ... -0.01137866 -0.01137866
   0.0019193 ]]
041_4.tif
../icdar2013_benchmarking_dataset/041_4.tif
../icdar2013_benchmarking_dataset/041_4.tif
[[ 0.00503968 -0.00834393  0.01011406 ... -0.01429602 -0.01429602
  -0.00513585]]
119_4.tif
../icdar

[[-0.01087939 -0.00502349  0.00768883 ... -0.00933973 -0.00933973
  -0.00662676]]
185_3.tif
../icdar2013_benchmarking_dataset/185_3.tif
../icdar2013_benchmarking_dataset/185_3.tif
[[ 0.00363018  0.00813364  0.00570937 ... -0.01288092 -0.01288092
   0.00950769]]
062_4.tif
../icdar2013_benchmarking_dataset/062_4.tif
../icdar2013_benchmarking_dataset/062_4.tif
[[-0.00783318 -0.00614963  0.00938595 ... -0.00905541 -0.00905541
   0.00174705]]
201_4.tif
../icdar2013_benchmarking_dataset/201_4.tif
../icdar2013_benchmarking_dataset/201_4.tif
[[-0.01071326 -0.00679413  0.01163224 ... -0.01437444 -0.01437444
   0.00676074]]
055_4.tif
../icdar2013_benchmarking_dataset/055_4.tif
../icdar2013_benchmarking_dataset/055_4.tif
[[-0.00596344  0.00775955  0.00357195 ... -0.01152525 -0.01152525
  -0.00240516]]
022_2.tif
../icdar2013_benchmarking_dataset/022_2.tif
../icdar2013_benchmarking_dataset/022_2.tif
[[-0.00377299 -0.00711782 -0.005851   ... -0.01182836 -0.01182836
   0.00652769]]
213_4.tif
../icdar

[[ 0.00273857 -0.00516155 -0.00615721 ... -0.01074692 -0.01074692
   0.00518892]]
190_2.tif
../icdar2013_benchmarking_dataset/190_2.tif
../icdar2013_benchmarking_dataset/190_2.tif
[[-0.0074872   0.00238644  0.01506313 ... -0.01096154 -0.01096154
   0.00109461]]
104_4.tif
../icdar2013_benchmarking_dataset/104_4.tif
../icdar2013_benchmarking_dataset/104_4.tif
[[-0.00829236 -0.0072531  -0.00946937 ... -0.01198363 -0.01198363
  -0.00588292]]
179_1.tif
../icdar2013_benchmarking_dataset/179_1.tif
../icdar2013_benchmarking_dataset/179_1.tif
[[-0.00686277 -0.00346205  0.01409949 ... -0.01106436 -0.01106436
  -0.0038516 ]]
091_2.tif
../icdar2013_benchmarking_dataset/091_2.tif
../icdar2013_benchmarking_dataset/091_2.tif
[[ 0.00908826  0.00691496 -0.00985558 ... -0.01238232 -0.01238232
   0.00861948]]
213_3.tif
../icdar2013_benchmarking_dataset/213_3.tif
../icdar2013_benchmarking_dataset/213_3.tif
[[-0.00216749 -0.00772496  0.00809638 ... -0.01133489 -0.01133489
  -0.00680574]]
022_1.tif
../icdar

[[ 0.00401636 -0.00998652 -0.00959861 ... -0.00860364 -0.00860364
   0.00915691]]
082_3.tif
../icdar2013_benchmarking_dataset/082_3.tif
../icdar2013_benchmarking_dataset/082_3.tif
[[-0.01086272  0.00639353  0.0059186  ... -0.01723874 -0.01723874
  -0.00381997]]
117_1.tif
../icdar2013_benchmarking_dataset/117_1.tif
../icdar2013_benchmarking_dataset/117_1.tif
[[-0.00384065 -0.0077949   0.00939597 ... -0.00994803 -0.00994803
   0.00635065]]
228_1.tif
../icdar2013_benchmarking_dataset/228_1.tif
../icdar2013_benchmarking_dataset/228_1.tif
[[ 0.00962421 -0.00720223 -0.00633599 ... -0.01000021 -0.01000021
  -0.00098362]]
031_2.tif
../icdar2013_benchmarking_dataset/031_2.tif
../icdar2013_benchmarking_dataset/031_2.tif
[[-0.00344167  0.00258535  0.01132723 ... -0.01206711 -0.01206711
  -0.00655805]]
120_3.tif
../icdar2013_benchmarking_dataset/120_3.tif
../icdar2013_benchmarking_dataset/120_3.tif
[[-0.00177644  0.01355809  0.00512912 ... -0.01322703 -0.01322703
  -0.00696978]]
216_2.tif
../icdar

[[-0.00591611  0.00610892  0.00323418 ... -0.01129792 -0.01129792
  -0.00571809]]
231_1.tif
../icdar2013_benchmarking_dataset/231_1.tif
../icdar2013_benchmarking_dataset/231_1.tif
[[ 0.01202295  0.00407928 -0.01038669 ... -0.01148038 -0.01148038
  -0.00298971]]
096_2.tif
../icdar2013_benchmarking_dataset/096_2.tif
../icdar2013_benchmarking_dataset/096_2.tif
[[-0.00692155 -0.00753939  0.00891991 ... -0.01505293 -0.01505293
   0.00567285]]
248_4.tif
../icdar2013_benchmarking_dataset/248_4.tif
../icdar2013_benchmarking_dataset/248_4.tif
[[ 0.00213993  0.00501316 -0.01043253 ... -0.01169782 -0.01169782
   0.00856771]]
135_2.tif
../icdar2013_benchmarking_dataset/135_2.tif
../icdar2013_benchmarking_dataset/135_2.tif
[[ 0.00735715  0.00668107 -0.00203555 ... -0.0112822  -0.0112822
   0.00246246]]
085_2.tif
../icdar2013_benchmarking_dataset/085_2.tif
../icdar2013_benchmarking_dataset/085_2.tif
[[-0.0101367  -0.00259757  0.00545831 ... -0.01084013 -0.01084013
   0.00268889]]
016_1.tif
../icdar2

[[-0.00736777 -0.00881963  0.0164326  ... -0.01111419 -0.01111419
   0.00339148]]
232_1.tif
../icdar2013_benchmarking_dataset/232_1.tif
../icdar2013_benchmarking_dataset/232_1.tif
[[-0.00385153  0.00283966  0.01315155 ... -0.007737   -0.007737
  -0.00464144]]
244_2.tif
../icdar2013_benchmarking_dataset/244_2.tif
../icdar2013_benchmarking_dataset/244_2.tif
[[ 0.0068222   0.01116698 -0.00971843 ... -0.01273259 -0.01273259
  -0.00531642]]
039_3.tif
../icdar2013_benchmarking_dataset/039_3.tif
../icdar2013_benchmarking_dataset/039_3.tif
[[ 0.00320658 -0.00596274 -0.00813925 ... -0.01135236 -0.01135236
   0.00724549]]
010_3.tif
../icdar2013_benchmarking_dataset/010_3.tif
../icdar2013_benchmarking_dataset/010_3.tif
[[-0.01039113 -0.00754545 -0.00409277 ... -0.01419221 -0.01419221
   0.00820676]]
095_3.tif
../icdar2013_benchmarking_dataset/095_3.tif
../icdar2013_benchmarking_dataset/095_3.tif
[[-0.0103387  -0.00621885  0.01252429 ... -0.01333237 -0.01333237
   0.00695982]]
203_4.tif
../icdar20

[[ 0.01173668 -0.00528153 -0.01048318 ... -0.01164463 -0.01164463
   0.00732102]]
005_3.tif
../icdar2013_benchmarking_dataset/005_3.tif
../icdar2013_benchmarking_dataset/005_3.tif
[[ 0.0021776  -0.00397883 -0.0080265  ... -0.01151411 -0.01151411
   0.00432722]]
222_2.tif
../icdar2013_benchmarking_dataset/222_2.tif
../icdar2013_benchmarking_dataset/222_2.tif
[[ 0.01592273  0.00551413 -0.0105166  ... -0.01002102 -0.01002102
  -0.00658276]]
177_4.tif
../icdar2013_benchmarking_dataset/177_4.tif
../icdar2013_benchmarking_dataset/177_4.tif
[[ 0.00412932 -0.00647296  0.00785439 ... -0.01226187 -0.01226187
  -0.00449965]]
005_4.tif
../icdar2013_benchmarking_dataset/005_4.tif
../icdar2013_benchmarking_dataset/005_4.tif
[[ 0.00318023  0.00454373 -0.0048663  ... -0.0120644  -0.0120644
  -0.00549076]]
043_3.tif
../icdar2013_benchmarking_dataset/043_3.tif
../icdar2013_benchmarking_dataset/043_3.tif
[[-0.0030083   0.00926811 -0.00623154 ... -0.01171198 -0.01171198
  -0.00397074]]
082_2.tif
../icdar2

[[-0.00491343  0.00734346 -0.00803479 ... -0.0130209  -0.0130209
   0.00780469]]
011_4.tif
../icdar2013_benchmarking_dataset/011_4.tif
../icdar2013_benchmarking_dataset/011_4.tif
[[-0.00407118 -0.0079412   0.00710068 ... -0.01317614 -0.01317614
   0.00842459]]
039_2.tif
../icdar2013_benchmarking_dataset/039_2.tif
../icdar2013_benchmarking_dataset/039_2.tif
[[ 0.00699662 -0.00779209 -0.00812462 ... -0.01123573 -0.01123573
  -0.00560423]]
071_2.tif
../icdar2013_benchmarking_dataset/071_2.tif
../icdar2013_benchmarking_dataset/071_2.tif
[[ 0.00378068 -0.00822621 -0.00137011 ... -0.01234602 -0.01234602
  -0.00677723]]
035_4.tif
../icdar2013_benchmarking_dataset/035_4.tif
../icdar2013_benchmarking_dataset/035_4.tif
[[ 0.00682299  0.00661433  0.01193221 ... -0.00992968 -0.00992968
   0.00264943]]
157_3.tif
../icdar2013_benchmarking_dataset/157_3.tif
../icdar2013_benchmarking_dataset/157_3.tif
[[-0.00899392 -0.00519431  0.01235358 ... -0.01356555 -0.01356555
   0.00374895]]
124_4.tif
../icdar2

[[ 0.00088706  0.00657627  0.00776309 ... -0.01080543 -0.01080543
  -0.00356429]]
187_4.tif
../icdar2013_benchmarking_dataset/187_4.tif
../icdar2013_benchmarking_dataset/187_4.tif
[[-0.00671174 -0.0073032   0.00943631 ... -0.0094893  -0.0094893
   0.00435106]]
009_1.tif
../icdar2013_benchmarking_dataset/009_1.tif
../icdar2013_benchmarking_dataset/009_1.tif
[[ 0.00499764  0.00263422 -0.01091548 ... -0.01284416 -0.01284416
   0.00575896]]
229_2.tif
../icdar2013_benchmarking_dataset/229_2.tif
../icdar2013_benchmarking_dataset/229_2.tif
[[-0.0095958  -0.00444688  0.01020945 ... -0.01230785 -0.01230785
  -0.00798303]]
129_3.tif
../icdar2013_benchmarking_dataset/129_3.tif
../icdar2013_benchmarking_dataset/129_3.tif
[[-0.00771516 -0.00446749  0.00670368 ... -0.01252084 -0.01252084
   0.00411808]]
108_4.tif
../icdar2013_benchmarking_dataset/108_4.tif
../icdar2013_benchmarking_dataset/108_4.tif
[[-0.00466816  0.00646729 -0.01145319 ... -0.01292649 -0.01292649
  -0.00558887]]
074_2.tif
../icdar2

[[-0.00597372 -0.00847889  0.01060734 ... -0.010182   -0.010182
  -0.00235777]]
033_1.tif
../icdar2013_benchmarking_dataset/033_1.tif
../icdar2013_benchmarking_dataset/033_1.tif
[[ 0.00838019  0.00260342  0.01262644 ... -0.00981187 -0.00981187
   0.00820371]]
059_2.tif
../icdar2013_benchmarking_dataset/059_2.tif
../icdar2013_benchmarking_dataset/059_2.tif
[[ 0.01226414  0.00770432 -0.01137753 ... -0.01351551 -0.01351551
  -0.00367687]]
235_4.tif
../icdar2013_benchmarking_dataset/235_4.tif
../icdar2013_benchmarking_dataset/235_4.tif
[[-0.0084642   0.00493185 -0.00891697 ... -0.01276127 -0.01276127
   0.00234452]]
002_3.tif
../icdar2013_benchmarking_dataset/002_3.tif
../icdar2013_benchmarking_dataset/002_3.tif
[[ 0.00756398  0.0103377  -0.01066303 ... -0.01210511 -0.01210511
   0.00340217]]
145_1.tif
../icdar2013_benchmarking_dataset/145_1.tif
../icdar2013_benchmarking_dataset/145_1.tif
[[ 0.0026742  -0.00685187  0.00905519 ... -0.00898826 -0.00898826
  -0.00014755]]
189_3.tif
../icdar20

[[-0.00305325  0.00966685  0.00407406 ... -0.01167053 -0.01167053
   0.00819386]]
187_2.tif
../icdar2013_benchmarking_dataset/187_2.tif
../icdar2013_benchmarking_dataset/187_2.tif
[[-0.00827443 -0.00884004  0.00956832 ... -0.01111468 -0.01111468
  -0.00568476]]
238_4.tif
../icdar2013_benchmarking_dataset/238_4.tif
../icdar2013_benchmarking_dataset/238_4.tif
[[ 0.00612611 -0.00957304  0.0085494  ... -0.01084288 -0.01084288
   0.00262528]]
063_2.tif
../icdar2013_benchmarking_dataset/063_2.tif
../icdar2013_benchmarking_dataset/063_2.tif
[[ 0.00944077  0.00659506 -0.01007939 ... -0.01043297 -0.01043297
  -0.00385564]]
131_1.tif
../icdar2013_benchmarking_dataset/131_1.tif
../icdar2013_benchmarking_dataset/131_1.tif
[[-0.00638235  0.01084354 -0.01114784 ... -0.01516542 -0.01516542
   0.00459688]]
142_4.tif
../icdar2013_benchmarking_dataset/142_4.tif
../icdar2013_benchmarking_dataset/142_4.tif
[[-0.00524401 -0.00388757  0.01018804 ... -0.0121277  -0.0121277
   0.00827924]]
138_2.tif
../icdar2

[[-0.0054735   0.01069408  0.00372665 ... -0.01318777 -0.01318777
  -0.00482963]]
111_4.tif
../icdar2013_benchmarking_dataset/111_4.tif
../icdar2013_benchmarking_dataset/111_4.tif
[[ 0.00880821 -0.00899717 -0.0043608  ... -0.01104747 -0.01104747
  -0.0036679 ]]
094_3.tif
../icdar2013_benchmarking_dataset/094_3.tif
../icdar2013_benchmarking_dataset/094_3.tif
[[-0.00628088  0.00890706 -0.0062206  ... -0.01183578 -0.01183578
   0.0077361 ]]
094_4.tif
../icdar2013_benchmarking_dataset/094_4.tif
../icdar2013_benchmarking_dataset/094_4.tif
[[-0.00354911  0.00608032 -0.00479739 ... -0.01173631 -0.01173631
  -0.00551651]]
247_1.tif
../icdar2013_benchmarking_dataset/247_1.tif
../icdar2013_benchmarking_dataset/247_1.tif
[[ 0.00735558  0.01278713 -0.01176208 ... -0.01324073 -0.01324073
  -0.00663312]]
132_4.tif
../icdar2013_benchmarking_dataset/132_4.tif
../icdar2013_benchmarking_dataset/132_4.tif
[[-0.00990529 -0.00733068  0.01487438 ... -0.01265807 -0.01265807
  -0.0031439 ]]
224_4.tif
../icdar

[[ 0.00393042  0.00506468 -0.00761987 ... -0.0113825  -0.0113825
  -0.00563119]]
195_4.tif
../icdar2013_benchmarking_dataset/195_4.tif
../icdar2013_benchmarking_dataset/195_4.tif
[[-0.00682934 -0.00419175 -0.00630008 ... -0.01529847 -0.01529847
   0.00297591]]
073_1.tif
../icdar2013_benchmarking_dataset/073_1.tif
../icdar2013_benchmarking_dataset/073_1.tif
[[-0.00842292 -0.00906922  0.01042959 ... -0.01373247 -0.01373247
   0.00610579]]
091_4.tif
../icdar2013_benchmarking_dataset/091_4.tif
../icdar2013_benchmarking_dataset/091_4.tif
[[-0.00734666  0.00252216 -0.00561007 ... -0.01362316 -0.01362316
   0.00552793]]
069_4.tif
../icdar2013_benchmarking_dataset/069_4.tif
../icdar2013_benchmarking_dataset/069_4.tif
[[ 0.01169636  0.0060668  -0.01001901 ... -0.01447173 -0.01447173
  -0.01004581]]
192_2.tif
../icdar2013_benchmarking_dataset/192_2.tif
../icdar2013_benchmarking_dataset/192_2.tif
[[-0.00549197  0.00607002 -0.00318361 ... -0.01257692 -0.01257692
   0.00431518]]
118_3.tif
../icdar2

[[ 0.01148826  0.01078234 -0.00734445 ... -0.01158715 -0.01158715
   0.00877839]]
241_4.tif
../icdar2013_benchmarking_dataset/241_4.tif
../icdar2013_benchmarking_dataset/241_4.tif
[[-0.00259235 -0.00490097  0.00387485 ... -0.01332927 -0.01332927
  -0.00300646]]
115_4.tif
../icdar2013_benchmarking_dataset/115_4.tif
../icdar2013_benchmarking_dataset/115_4.tif
[[-0.00703403  0.00695803  0.01026284 ... -0.01224985 -0.01224985
   0.00443148]]
078_2.tif
../icdar2013_benchmarking_dataset/078_2.tif
../icdar2013_benchmarking_dataset/078_2.tif
[[ 0.00925968 -0.00748209 -0.00916721 ... -0.01336228 -0.01336228
  -0.00676864]]
042_2.tif
../icdar2013_benchmarking_dataset/042_2.tif
../icdar2013_benchmarking_dataset/042_2.tif
[[-0.00443481 -0.0096734  -0.00300312 ... -0.01255176 -0.01255176
   0.00718034]]
122_3.tif
../icdar2013_benchmarking_dataset/122_3.tif
../icdar2013_benchmarking_dataset/122_3.tif
[[-0.01036373  0.00954008  0.01162648 ... -0.01064483 -0.01064483
  -0.00472719]]
030_4.tif
../icdar

[[ 0.00783532  0.0081838  -0.01104573 ... -0.01219462 -0.01219462
   0.00553187]]
112_4.tif
../icdar2013_benchmarking_dataset/112_4.tif
../icdar2013_benchmarking_dataset/112_4.tif
[[-0.00163327  0.00486259 -0.00423908 ... -0.01497352 -0.01497352
  -0.00480683]]
092_4.tif
../icdar2013_benchmarking_dataset/092_4.tif
../icdar2013_benchmarking_dataset/092_4.tif
[[ 0.00410836  0.00690676 -0.00253922 ... -0.01441341 -0.01441341
  -0.00682193]]
225_3.tif
../icdar2013_benchmarking_dataset/225_3.tif
../icdar2013_benchmarking_dataset/225_3.tif
[[ 0.00960168  0.01539347 -0.01128744 ... -0.01516976 -0.01516976
  -0.00525849]]
240_1.tif
../icdar2013_benchmarking_dataset/240_1.tif
../icdar2013_benchmarking_dataset/240_1.tif
[[ 0.00192858 -0.00255452 -0.00924075 ... -0.01204752 -0.01204752
  -0.00737399]]
057_3.tif
../icdar2013_benchmarking_dataset/057_3.tif
../icdar2013_benchmarking_dataset/057_3.tif
[[ 0.00931059 -0.00976729 -0.0102257  ... -0.01514224 -0.01514224
  -0.01034321]]
026_1.tif
../icdar

[[ 0.00851788 -0.00361401 -0.00896842 ... -0.01403285 -0.01403285
  -0.00155853]]
038_4.tif
../icdar2013_benchmarking_dataset/038_4.tif
../icdar2013_benchmarking_dataset/038_4.tif
[[ 0.00947311  0.00134478  0.00318786 ... -0.01050821 -0.01050821
  -0.00624792]]
195_2.tif
../icdar2013_benchmarking_dataset/195_2.tif
../icdar2013_benchmarking_dataset/195_2.tif
[[ 0.00674748 -0.00715201 -0.00485216 ... -0.01405537 -0.01405537
   0.00228403]]
190_1.tif
../icdar2013_benchmarking_dataset/190_1.tif
../icdar2013_benchmarking_dataset/190_1.tif
[[ 0.00251445 -0.00245022  0.01439326 ... -0.009443   -0.009443
  -0.00290444]]
047_2.tif
../icdar2013_benchmarking_dataset/047_2.tif
../icdar2013_benchmarking_dataset/047_2.tif
[[ 0.01024113  0.00672464 -0.0111701  ... -0.01064993 -0.01064993
  -0.00497935]]
098_1.tif
../icdar2013_benchmarking_dataset/098_1.tif
../icdar2013_benchmarking_dataset/098_1.tif
[[ 0.01392544  0.00371332 -0.00994591 ... -0.01312829 -0.01312829
  -0.00808235]]
164_3.tif
../icdar20

[[ 0.01152499  0.00546129 -0.00948118 ... -0.01152151 -0.01152151
   0.00449239]]
055_3.tif
../icdar2013_benchmarking_dataset/055_3.tif
../icdar2013_benchmarking_dataset/055_3.tif
[[ 0.00250598  0.00631917  0.00968949 ... -0.01194583 -0.01194583
   0.0082534 ]]
061_1.tif
../icdar2013_benchmarking_dataset/061_1.tif
../icdar2013_benchmarking_dataset/061_1.tif
[[-0.0008848   0.00913561 -0.00885992 ... -0.01151457 -0.01151457
   0.00403876]]
083_3.tif
../icdar2013_benchmarking_dataset/083_3.tif
../icdar2013_benchmarking_dataset/083_3.tif
[[-0.00654116  0.00119809  0.01563492 ... -0.01170109 -0.01170109
   0.00336271]]
021_4.tif
../icdar2013_benchmarking_dataset/021_4.tif
../icdar2013_benchmarking_dataset/021_4.tif
[[ 0.00524898  0.00350144 -0.00519179 ... -0.01742598 -0.01742598
   0.00440876]]
148_2.tif
../icdar2013_benchmarking_dataset/148_2.tif
../icdar2013_benchmarking_dataset/148_2.tif
[[-0.00460695  0.00284628  0.00287731 ... -0.01057131 -0.01057131
  -0.00466906]]
010_4.tif
../icdar

[[ 0.005289    0.01227311  0.01265163 ... -0.01038496 -0.01038496
  -0.00474487]]
249_3.tif
../icdar2013_benchmarking_dataset/249_3.tif
../icdar2013_benchmarking_dataset/249_3.tif
[[-0.00650496  0.00733737 -0.00925316 ... -0.00981768 -0.00981768
  -0.00378217]]
187_3.tif
../icdar2013_benchmarking_dataset/187_3.tif
../icdar2013_benchmarking_dataset/187_3.tif
[[-0.01065884 -0.00653142  0.01734725 ... -0.01149174 -0.01149174
  -0.00809065]]
001_1.tif
../icdar2013_benchmarking_dataset/001_1.tif
../icdar2013_benchmarking_dataset/001_1.tif
[[-0.0035409   0.00773942 -0.01005935 ... -0.01015558 -0.01015558
  -0.00152195]]
081_4.tif
../icdar2013_benchmarking_dataset/081_4.tif
../icdar2013_benchmarking_dataset/081_4.tif
[[-0.00638784  0.00879751 -0.00459292 ... -0.01577924 -0.01577924
  -0.00338575]]
170_2.tif
../icdar2013_benchmarking_dataset/170_2.tif
../icdar2013_benchmarking_dataset/170_2.tif
[[-0.00840015 -0.00884989 -0.00297979 ... -0.00710572 -0.00710572
   0.0017118 ]]
216_1.tif
../icdar

In [13]:
def load_pickle(fileName):
    'Returns the object stored inside the pickle file'
    if fileName.endswith('pkl.gz'):
        with gzip.open(fileName, 'rb') as f:
            desc = pickle.load(f)
    
    return desc

In [14]:
## Before calling this function
## verify that supervectors for train and test have been computed and are present as pickle files in- 
## -folder ../outTrainVoc and ../outTestVoc
def evaluate():
    'We evaluate using TOP-1 evaluation matrix'
    # X is the supervector belonging to test document
    # Y is the supervector belonging to train document
    testDir = '../outTestVoc/'
    Dir = '../outTestVoc/'
    
    total_test_files = 0
    correctly_classified1 = 0 # Denoted total no. of correctly classified test files
    correctly_classified2 = 0 # Denoted total no. of correctly classified test files
    correctly_classified3 = 0 # Denoted total no. of correctly classified test files
    mAP = 0
    
    for testFile in os.listdir(testDir):

        label_test = testFile[0:3] # First three letters denote the writer-ID
        # print ('testfile label: %s' %(label_test))
        # X is the supervector belonging to test document
        cos_dist = []
        if testFile.endswith('pkl.gz'):
            X = load_pickle(os.path.join(testDir, testFile))
            total_test_files += 1
        else:
            continue
        for file in os.listdir(Dir):
            # Y is the supervector belonging to train document
            if file.endswith('pkl.gz'):
                Y = load_pickle(os.path.join(Dir, file))
            else:
                continue
            
            # Computing cosine distance between X and Y supervectors
            dist = spdistance.cosine(X, Y)
            cos_dist.append((dist, file))
        
        # sorting cos_dist on the basis of distance
        list.sort(cos_dist)
        # Extracting the v
        print ("Top-3 Matches: ")
        
        ## Top-3 matches
        top3 = []
        top3.append(cos_dist[1])
        if testFile[4] == '1' or testFile[4] == '2': 
            for i in cos_dist:
                if i[1][4] != '1' and i[1][4] != '2':
                    top3.append(i)
                if len(top3) == 3:
                    break
        
        if testFile[4] == '3' or testFile[4] == '4':
            for i in cos_dist:
                if i[1][4] != '3' and i[1][4] != '4':
                    top3.append(i)
                if len(top3) == 3:
                    break
        
        print(top3)
        
        ### End: Top-3 matches 
        
        
        pred_label0 = top3[0][1] # looking at the 2nd nearest as 1st one is the same document
        pred_label0 = pred_label0[0:3] # First three letters denote the writer-ID
        
        pred_label1 = top3[1][1] # looking at the 2nd nearest as 1st one is the same document
        pred_label1 = pred_label1[0:3] # First three letters denote the writer-ID
        
        pred_label2 = top3[2][1] # looking at the 2nd nearest as 1st one is the same document
        pred_label2 = pred_label2[0:3] # First three letters denote the writer-ID
        
        ## mAP
        mAP += (pred_label0 == label_test) + (pred_label1 == label_test) + (pred_label2 == label_test)
        
        if (pred_label0 == label_test):
            correctly_classified1 += 1
            
        if (pred_label0 == label_test and pred_label1 == label_test):
            correctly_classified2 += 1
            
        if (pred_label0 == label_test and pred_label1 == label_test and pred_label2 == label_test):
            correctly_classified3 += 1
        
    return [correctly_classified1/total_test_files, correctly_classified2/total_test_files, correctly_classified3/total_test_files, mAP/(3*total_test_files)]

In [16]:
evaluate()

Top-3 Matches: 
[(0.590063652994346, '113_3.pkl.gz'), (0.6600513857355619, '113_2.pkl.gz'), (0.6856403552582158, '113_1.pkl.gz')]
Top-3 Matches: 
[(0.6808680988715914, '033_3.pkl.gz'), (0.7198124927210034, '033_1.pkl.gz'), (0.74308753948149, '033_2.pkl.gz')]
Top-3 Matches: 
[(0.5995367975210679, '072_4.pkl.gz'), (0.6601808283597981, '055_2.pkl.gz'), (0.6674990862772741, '072_2.pkl.gz')]
Top-3 Matches: 
[(0.5020433479688526, '039_4.pkl.gz'), (0.5412740987970814, '039_1.pkl.gz'), (0.5494490674851675, '039_2.pkl.gz')]
Top-3 Matches: 
[(0.6022775555205857, '114_1.pkl.gz'), (0.6953983381294422, '114_3.pkl.gz'), (0.7059673002290506, '068_3.pkl.gz')]
Top-3 Matches: 
[(0.5217022679764038, '053_2.pkl.gz'), (0.5677953793299337, '053_3.pkl.gz'), (0.5941148112401364, '096_3.pkl.gz')]
Top-3 Matches: 
[(0.5969328038045305, '085_3.pkl.gz'), (0.5969328038045305, '085_3.pkl.gz'), (0.6304708625914321, '085_4.pkl.gz')]
Top-3 Matches: 
[(0.6704779765412927, '075_4.pkl.gz'), (0.7091083472463644, '038_2.pkl

Top-3 Matches: 
[(0.6361794495807194, '123_3.pkl.gz'), (0.6605296689763392, '123_1.pkl.gz'), (0.6606302889268916, '123_2.pkl.gz')]
Top-3 Matches: 
[(0.6900338895755349, '059_1.pkl.gz'), (0.7310186992548446, '059_3.pkl.gz'), (0.7496440762897599, '104_4.pkl.gz')]
Top-3 Matches: 
[(0.6024947477477458, '069_4.pkl.gz'), (0.6871302498418139, '069_2.pkl.gz'), (0.6878914601304349, '069_1.pkl.gz')]
Top-3 Matches: 
[(0.610180012972124, '079_4.pkl.gz'), (0.6234943221803138, '079_2.pkl.gz'), (0.669336041738843, '054_2.pkl.gz')]
Top-3 Matches: 
[(0.532037939715663, '091_2.pkl.gz'), (0.5680890293189808, '091_4.pkl.gz'), (0.5750436463949251, '091_3.pkl.gz')]
Top-3 Matches: 
[(0.6446806289222642, '097_2.pkl.gz'), (0.6890093038666958, '097_3.pkl.gz'), (0.7044442052444877, '111_3.pkl.gz')]
Top-3 Matches: 
[(0.5406259387171026, '037_2.pkl.gz'), (0.5406259387171026, '037_2.pkl.gz'), (0.6037865315742914, '037_1.pkl.gz')]
Top-3 Matches: 
[(0.7117518187725816, '101_1.pkl.gz'), (0.7472295464203289, '093_4.pkl

Top-3 Matches: 
[(0.5835031957007717, '055_1.pkl.gz'), (0.6258809611673031, '055_4.pkl.gz'), (0.6309143973040747, '120_4.pkl.gz')]
Top-3 Matches: 
[(0.6838071384122624, '060_1.pkl.gz'), (0.74445340884937, '060_3.pkl.gz'), (0.7498557250417546, '060_4.pkl.gz')]
Top-3 Matches: 
[(0.5455781042525594, '095_2.pkl.gz'), (0.6377235113853811, '071_4.pkl.gz'), (0.64501745277244, '095_3.pkl.gz')]
Top-3 Matches: 
[(0.6572812075108884, '055_1.pkl.gz'), (0.6838292119009106, '073_3.pkl.gz'), (0.6882248615724701, '073_4.pkl.gz')]
Top-3 Matches: 
[(0.6140811023930146, '086_3.pkl.gz'), (0.7149686707204688, '075_2.pkl.gz'), (0.7271918094457046, '098_1.pkl.gz')]
Top-3 Matches: 
[(0.6734870989581945, '062_3.pkl.gz'), (0.677597503040984, '119_1.pkl.gz'), (0.6925159897489934, '111_2.pkl.gz')]
Top-3 Matches: 
[(0.6148458276812521, '038_3.pkl.gz'), (0.6327919497763621, '038_2.pkl.gz'), (0.6974436087197446, '118_2.pkl.gz')]
Top-3 Matches: 
[(0.5576724257841457, '065_3.pkl.gz'), (0.6322361254128175, '065_1.pkl.g

Top-3 Matches: 
[(0.7314376127750133, '060_3.pkl.gz'), (0.7498110454829454, '060_1.pkl.gz'), (0.7498557250417546, '060_2.pkl.gz')]
Top-3 Matches: 
[(0.519106172515807, '065_1.pkl.gz'), (0.6172527506351413, '065_3.pkl.gz'), (0.6439321621261016, '065_4.pkl.gz')]
Top-3 Matches: 
[(0.549400868559569, '088_3.pkl.gz'), (0.6034143765102531, '088_2.pkl.gz'), (0.6510966032950821, '078_2.pkl.gz')]
Top-3 Matches: 
[(0.6300652092009827, '085_2.pkl.gz'), (0.6838297881272256, '085_3.pkl.gz'), (0.7140673352426781, '118_4.pkl.gz')]
Top-3 Matches: 
[(0.503511131936951, '058_1.pkl.gz'), (0.622798850921826, '058_4.pkl.gz'), (0.6236853547205052, '058_3.pkl.gz')]
Top-3 Matches: 
[(0.67396295605083, '063_4.pkl.gz'), (0.7057661084677704, '063_1.pkl.gz'), (0.7094361463028407, '063_2.pkl.gz')]
Top-3 Matches: 
[(0.610180012972124, '079_3.pkl.gz'), (0.6746224702712529, '079_2.pkl.gz'), (0.6915205873098835, '120_2.pkl.gz')]
Top-3 Matches: 
[(0.6828233469417597, '122_3.pkl.gz'), (0.7683949830926133, '030_1.pkl.gz'

Top-3 Matches: 
[(0.5717114859235154, '052_2.pkl.gz'), (0.5717114859235154, '052_2.pkl.gz'), (0.5900238617711742, '091_1.pkl.gz')]
Top-3 Matches: 
[(0.6234076220130689, '050_1.pkl.gz'), (0.7106890911286585, '050_3.pkl.gz'), (0.7305142255371684, '062_4.pkl.gz')]
Top-3 Matches: 
[(0.6631858318424562, '047_3.pkl.gz'), (0.7550642264366947, '120_2.pkl.gz'), (0.7554278294642979, '054_1.pkl.gz')]
Top-3 Matches: 
[(0.5067183599618247, '053_3.pkl.gz'), (0.580207194826694, '074_2.pkl.gz'), (0.5936076951687963, '074_1.pkl.gz')]
Top-3 Matches: 
[(0.6558497754917191, '114_4.pkl.gz'), (0.6953983381294422, '114_2.pkl.gz'), (0.7312319675158867, '112_2.pkl.gz')]
Top-3 Matches: 
[(0.665417679494586, '108_4.pkl.gz'), (0.7015777821007179, '116_2.pkl.gz'), (0.7071688687317578, '113_2.pkl.gz')]
Top-3 Matches: 
[(0.29071989384036667, '096_3.pkl.gz'), (0.37552117607857727, '096_2.pkl.gz'), (0.43657393034408276, '096_1.pkl.gz')]
Top-3 Matches: 
[(0.6269679057995459, '109_2.pkl.gz'), (0.7290007071215918, '111_3

Top-3 Matches: 
[(0.6120556456517694, '029_4.pkl.gz'), (0.7212449835474559, '029_2.pkl.gz'), (0.7555395499485356, '054_1.pkl.gz')]
Top-3 Matches: 
[(0.4619306686832616, '090_4.pkl.gz'), (0.5102537939433929, '090_1.pkl.gz'), (0.5136324627861932, '090_2.pkl.gz')]
Top-3 Matches: 
[(0.6535889722857874, '061_3.pkl.gz'), (0.6712609248064102, '061_1.pkl.gz'), (0.6724386678873482, '061_2.pkl.gz')]
Top-3 Matches: 
[(0.5716355768936641, '078_2.pkl.gz'), (0.6680906017143031, '078_3.pkl.gz'), (0.6765538218466183, '078_4.pkl.gz')]
Top-3 Matches: 
[(0.6489258720158473, '082_1.pkl.gz'), (0.7009459386526142, '082_4.pkl.gz'), (0.7207692626703941, '082_3.pkl.gz')]
Top-3 Matches: 
[(0.6834413701648199, '110_4.pkl.gz'), (0.7358793437171198, '110_2.pkl.gz'), (0.7502727542472822, '110_1.pkl.gz')]
Top-3 Matches: 
[(0.6027952370738217, '077_1.pkl.gz'), (0.6953257879731145, '077_3.pkl.gz'), (0.7388494203280793, '077_4.pkl.gz')]
Top-3 Matches: 
[(0.5995271220403195, '040_4.pkl.gz'), (0.6688052409344027, '040_2.

Top-3 Matches: 
[(0.665417679494586, '108_3.pkl.gz'), (0.742929494343874, '116_2.pkl.gz'), (0.7536880947310909, '054_2.pkl.gz')]
Top-3 Matches: 
[(0.45948416286516536, '044_1.pkl.gz'), (0.5641087551822409, '044_3.pkl.gz'), (0.6491076261156992, '044_4.pkl.gz')]
Top-3 Matches: 
[(0.532037939715663, '091_1.pkl.gz'), (0.5673801752933973, '091_3.pkl.gz'), (0.5747424071910567, '091_4.pkl.gz')]
Top-3 Matches: 
[(0.6233786841843683, '070_3.pkl.gz'), (0.6851477826206597, '070_1.pkl.gz'), (0.6896941261101147, '070_2.pkl.gz')]
Top-3 Matches: 
[(0.639291381185826, '083_3.pkl.gz'), (0.6485997368116675, '083_2.pkl.gz'), (0.6759125276644318, '083_1.pkl.gz')]
Top-3 Matches: 
[(0.6354523582141213, '031_4.pkl.gz'), (0.6938747646407832, '057_2.pkl.gz'), (0.7167399106307182, '031_2.pkl.gz')]
Top-3 Matches: 
[(0.650926597360376, '055_1.pkl.gz'), (0.6784391617669502, '073_4.pkl.gz'), (0.7173226479186074, '073_3.pkl.gz')]
Top-3 Matches: 
[(0.5290499079822008, '080_1.pkl.gz'), (0.658951776264499, '074_3.pkl.g

[0.94, 0.72, 0.475, 0.7483333333333333]

In [1]:
Top_k_accuracy = evaluate()
print ('TOP-1 Accuracy: %f' %(Top_k_accuracy[0]))
print ('TOP-2 Accuracy: %f' %(Top_k_accuracy[1]))
print ('TOP-3 Accuracy: %f' %(Top_k_accuracy[2]))
print ('mAP Accuracy: %f' %(Top_k_accuracy[3]))

#### TOP-1 Accuracy: 0.94
#### TOP-2 Accuracy: 0.72
#### TOP-3 Accuracy: 0.475
#### mAP Accuracy: 0.7483333333333333