In [1]:
import matplotlib.pyplot as plt
from skimage.feature import local_binary_pattern
import numpy as np
import os
import cv2
import time
import pandas as pd

In [2]:
def load_images():
    dir = os.path.join(os.sep,'home','mohit','Desktop','dataset')
    images = {}
    for k in range(1,113):
        for i in ['1','2']:
            for j in ['1','2','3','4','5','6','7','8']:
                try:
                    images['c'+str(k)+'_'+i+'_'+j] = cv2.imread(os.path.join(dir,'Cheque'+str(k),i,i+'_'+j+'.tif'))
                    images['c'+str(k)+'_'+i+'_'+j] = cv2.cvtColor(images['c'+str(k)+'_'+i+'_'+j], cv2.COLOR_BGR2YCrCb)
                except:
                    break
    return images

In [3]:
def n_size(images):
    size = {} #no. of ink pixels .. considering all non white pixels(<th) as ink pixels
    for k in range(1,113):
        for i in ['1','2']:
            for j in ['1','2','3','4','5','6','7','8']:
                count = 0
                try:
                    image = images['c'+str(k)+'_'+i+'_'+j][:,:,0] #using y channel
                    for a in range(0,image.shape[0]):
                        for b in range(0,image.shape[1]):
                            if image[a][b] != 255:
                                count+=1
                    size['c'+str(k)+'_'+i+'_'+j] = count
                   # print('c'+str(k)+'_'+i+'_'+j,count)
                except:
                    #print('..')
                    break  
    return size

In [5]:
def mask(image):
    r = (image==255)
    return r


In [19]:
def calc_k(images,dim,sizes,n_points = 8,radius = 1):
    ktemp = 0
    
    for k in range(1,113):
        for i in ['1','2']:
            for j in ['1','2','3','4','5','6','7','8']:
                try:
                    lbp_pens = local_binary_pattern(images['c'+str(k)+'_'+i+'_'+j][:,:,dim], n_points, radius, method = 'ror')
                    lbp_pens+= mask(images['c'+str(k)+'_'+i+'_'+j][:,:,0])*1000
                    
                    (hist, _) = np.histogram(lbp_pens.ravel(),range(0,257))
                    s = sizes['c'+str(k)+'_'+i+'_'+j]
                    hist = hist/s
                    
                    hist = sorted(hist,reverse = True)
                    s = 0
                    t = sum(hist)
                    for l in range(0,len(hist)):
                        s+=hist[l]
                        if s/t >=0.8 :
                            break
                    ktemp+=l+1
                    
                except:
                    break
    
    kval  = ktemp//854+1 
    
    return kval

In [20]:
def dominant_lbp(images,dim,sizes,l,n_points = 8,radius = 1):
    hist = {}
    for k in range(1,113):
        for i in ['1','2']:
            for j in ['1','2','3','4','5','6','7','8']:
                try:
                    
                    lbp_pens = local_binary_pattern(images['c'+str(k)+'_'+i+'_'+j][:,:,dim], n_points, radius, method = 'ror')
                    lbp_pens+= mask(images['c'+str(k)+'_'+i+'_'+j][:,:,0])*1000

                    (h, _) = np.histogram(lbp_pens.ravel(),range(0,257))
                    s = sizes['c'+str(k)+'_'+i+'_'+j]
                    h = h/s
                    
                    h = sorted(h,reverse = True)
                    
                    hist['c'+str(k)+'_'+i+'_'+j] = np.array(h[0:l])
                except:
                    break
                   
    return hist

In [8]:
def compute_dist(param1,param2,n1,n2):
    d = 0
    for i in range(0,len(param1)):
        d+=np.nan_to_num(((param1[i]-param2[i])**2)/(param1[i]+param2[i]))
    return d

def dist(param1,param2,n1,n2):
    mean1 = np.mean(param1)
    mean2 = np.mean(param2)
    var1  = np.var(param1)
    var2 = np.var(param2)
     
    d = np.abs(mean1-mean2)
    d/=(var1/n1 + var2/n2)**0.5
    
    return d

In [9]:
def distance_distribution(features,sizes):
    same_pens = {}
    diff_pens = {}
    
    for k in range(1,113) :
        for i in ['1','2']: 
            for n in ['1','2']:
                for j in ['1','2','3','4','5','6','7','8'] :
                    for m in ['1','2','3','4','5','6','7','8'] :
                        try:
                            if(i==n and j==m):
                                continue    
                            elif i==n and 'c_'+str(k)+'_'+n+'_'+m+'::'+i+'_'+j not in same_pens:
                                same_pens['c_'+str(k)+'_'+i+'_'+j+'::'+n+'_'+m] = np.nan_to_num(compute_dist(features['c'+str(k)+'_'+i+'_'+j],features['c'+str(k)+'_'+n+'_'+m],sizes['c'+str(k)+'_'+i+'_'+j],sizes['c'+str(k)+'_'+n+'_'+m]))
                                #print(' same {}   :{}'.format('c_'+str(k)+'_'+i+'_'+j+'::'+n+'_'+m,same_pens['c_'+str(k)+'_'+i+'_'+j+'::'+n+'_'+m]))
                    
                            elif i!=n and 'c_'+str(k)+'_'+n+'_'+m+'::'+i+'_'+j not in diff_pens :                               
                                diff_pens['c_'+str(k)+'_'+i+'_'+j+'::'+n+'_'+m] = np.nan_to_num(compute_dist(features['c'+str(k)+'_'+i+'_'+j],features['c'+str(k)+'_'+n+'_'+m],sizes['c'+str(k)+'_'+i+'_'+j],sizes['c'+str(k)+'_'+n+'_'+m]))
                                #print(' diff {}   :{}'.format('c_'+str(k)+'_'+i+'_'+j+'::'+n+'_'+m,diff_pens['c_'+str(k)+'_'+i+'_'+j+'::'+n+'_'+m]))
                        except:
                            
                            pass
     
    print('Lengths same pens-{} different pens-{}'.format(len(same_pens),len(diff_pens)))
    return same_pens,diff_pens

In [10]:
images = load_images()

In [11]:
sizes = n_size(images)

In [21]:
k_Y = calc_k(images,0,sizes)
k_Cb = calc_k(images,1,sizes)
k_Cr = calc_k(images,2,sizes)

In [22]:
k_Y,k_Cb,k_Cr

(7, 10, 9)

In [23]:
hist_Y  = dominant_lbp(images,0,sizes,int(k_Y))
hist_Cb = dominant_lbp(images,1,sizes,int(k_Cb))
hist_Cr = dominant_lbp(images,2,sizes,int(k_Cr))

In [24]:
same_pens_Y,diff_pens_Y = distance_distribution(hist_Y,sizes)
same_pens_Y = [ v for v in same_pens_Y. values() ]
diff_pens_Y = [ v for v in diff_pens_Y. values() ]

same_pens_Cb,diff_pens_Cb = distance_distribution(hist_Cb,sizes)
same_pens_Cb = [ v for v in same_pens_Cb. values() ]
diff_pens_Cb = [ v for v in diff_pens_Cb. values() ]

same_pens_Cr,diff_pens_Cr = distance_distribution(hist_Cr,sizes)
same_pens_Cr = [ v for v in same_pens_Cr. values() ]
diff_pens_Cr = [ v for v in diff_pens_Cr. values() ]

dy = dist(same_pens_Y,diff_pens_Y,1465,1456)
dcb = dist(same_pens_Cb,diff_pens_Cb,1465,1456)
dcr = dist(same_pens_Cr,diff_pens_Cr,1465,1456)

print(dy,dcb,dcr)

Lengths same pens-1465 different pens-1456
Lengths same pens-1465 different pens-1456
Lengths same pens-1465 different pens-1456
24.799480420153735 29.549359804761757 23.676738465016378


In [25]:
Y = pd.DataFrame(hist_Y)
Cb = pd.DataFrame(hist_Cb)
Cr = pd.DataFrame(hist_Cr)
features_all  = pd.concat([Y,Cb,Cr])
features_all.to_csv('/home/mohit/Desktop/featuresUpdated/Dominant_LBP.csv', sep=',')

## Dominant LBP (80%) r=1,n=8
   color       distance          no.of features
    Y            24.79              7             
    Cb           29.54              10         
    Cr           23.67             9      