In [2]:
#imports 
import numpy as np
import matplotlib.pyplot as plt
import cv2
import png
import os
import PIL
from PIL import Image
%matplotlib inline

In [3]:
#Functions
def adjacentPixels(img, P):
    ''' Returns list of adjacent pixel which are black'''
    row = P[0]
    col = P[1]
    res = []
    if(img[row-1][col-1] == 0):
        res.append((row-1,col-1))
    if(img[row-1][col] == 0):
        res.append((row-1,col))
    if(img[row-1][col+1] == 0):
        res.append((row-1,col+1))
    if(img[row][col-1] == 0):
        res.append((row,col-1))
    if(img[row][col+1] == 0):
        res.append((row,col+1))
    return res

def pixelsAtDistanceL(img, P, l):
    '''Returns a list of pixels which can be reach by 
    travelling l distance (horizontal,vertical,diagonal)
    It also returning path where already visited pixel is
    visted again. '''
    if(l==0):
        return [P]
    res = []
    for ap in adjacentPixels(img,P):
        res.extend(pixelsAtDistanceL(img,ap,l-1))
    return res

def featureList(n):
    '''Generates features as relative pixel values
        for n => 5 + 4(n-2)'''
    res = []
    l = n-1
    
    for i in range(0,-l,-1):
        res.append((i,-l))
    
    for i in range(-l,l+1):
        res.append((-l,i))
        
    for i in range(-l+1,1):
        res.append((i,l))
    
    return res

def edgeBasedDirectionalFeature(img,n):
    '''For a given image matrix (should have binary pixel value 0 and 255) and 
    edge length n it will generate feature vector.'''
    features = featureList(n)
    res = dict()
    
    for f in features:
        res[f] = 0
    
    height = len(img)
    width = len(img[0])
    for row in range(n-1,height,4):
        for col in range(n-1, width-n+1):
            if(img[row][col]==0):
                pixels = pixelsAtDistanceL(img,(row,col),n-1)
                for pix in pixels:
                    t1 = pix[0]-row
                    t2 = pix[1]-col
                    if((t1,t2) in features):
                        res[(t1,t2)]+=1
    result = list()
    for f in features:
        result.append(res[f])
    return result
    

In [4]:
#class
class preprocessing:  
    
    def __init__(self,data_path, code=''):
        self.dataFolderList = None
        self.folderFileDict = {}
        self.datapath = data_path
        self.temp_data_path = os.path.join(self.datapath,'../tempEBDF'+code)
        if(not os.path.exists(self.temp_data_path)):
            os.mkdir(self.temp_data_path)
        self.comp_data_path = os.path.join(self.temp_data_path,'./comp_data')
        self.edge_detected_data_path = os.path.join(self.temp_data_path,'./edge_detected_data')
        self.data = []
        self.target = []
        self.folderFile()
    
    def folderFile(self):
        '''Create dictionary with key as folder and items their respective files'''
        self.dataFolderList = [f for f in os.listdir(self.datapath) if os.path.isdir(os.path.join(self.datapath, f))]
        for folder in self.dataFolderList:
            path = os.path.join(self.datapath, folder)
            fileList = [f for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))]
            self.folderFileDict[folder] = fileList
        print("Folder File dictionary created.")
        
    def compress(self,baseheight = 64):
        '''Compress all files with given height while maintaining the aspect ratio'''
        
        if(not os.path.exists(self.comp_data_path)):
            os.mkdir(self.comp_data_path)
        for fol in self.folderFileDict.keys():
            
            folpath = os.path.join(self.datapath, fol)
            tempfolpath = os.path.join(self.comp_data_path, fol)
            
            if(not os.path.exists(tempfolpath)):
                os.mkdir(tempfolpath)
                
            for fname in self.folderFileDict[fol]:
                if(fname.endswith('.png')):
                    img = Image.open(os.path.join(folpath, fname))
                    hpercent = (baseheight / float(img.size[1]))
                    wsize = int((float(img.size[0]) * float(hpercent)))
                    img = img.resize((wsize, baseheight), PIL.Image.ANTIALIAS)
                    img.save(os.path.join(tempfolpath, fname))
        print("Compression success!")
    
    def detectEdge(self,x=300,y=300):
        '''Detect edge in each image'''
        if(not os.path.exists(self.edge_detected_data_path)):
            os.mkdir(self.edge_detected_data_path)
            
        for fol in self.folderFileDict.keys():
            
            folpath = os.path.join(self.comp_data_path, fol)
            tempfolpath = os.path.join(self.edge_detected_data_path, fol)
            
            if(not os.path.exists(tempfolpath)):
                os.mkdir(tempfolpath)
                
            for fname in self.folderFileDict[fol]:
                if(fname.endswith('.png')):
                    img = cv2.imread(os.path.join(folpath, fname),0)
                    edge = cv2.Canny(img,x,y)
                    edgeinv = cv2.bitwise_not(edge)
                    png.from_array(edgeinv,'L').save(os.path.join(tempfolpath, fname))
        print("Edges detected successfully")
                    
    def featureExtraction(self,length=4):
        '''Generate feature vector for each image with edge length => n'''
        if(not os.path.exists(self.edge_detected_data_path)):
            print("Edge data not available")
            return None
            
        for fol in self.folderFileDict.keys():
            
            folpath = os.path.join(self.edge_detected_data_path, fol)
            for fname in self.folderFileDict[fol]:
                if(fname.endswith('.png')):
                    img = cv2.imread(os.path.join(folpath, fname),0)
                    self.data.append(edgeBasedDirectionalFeature(img,n))
                    self.target.append(int(fol)-1)
        self.data = np.array(self.data)
        self.target = np.array(self.target)
        print("Feature extraction done!!")

In [None]:
#preprocessing 
m = preprocessing(data_path="D:\\dataset\\exp\\5_1",code='_5_1_b')
m.compress(64)
m.detectEdge(300,300)
m.featureExtraction(4)

Folder File dictionary created.
Compression success!
Edges detected successfully


In [None]:
#Saving data to npy
np.save('./npy/data_5_1_b',m.data)
np.save('./npy/target_5_1_b',m.target)
print(m.data.shape)