In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.

In [None]:
#we need prepoces all image at once to get max and min to normalize them

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import PIL
from skimage.feature import local_binary_pattern, greycomatrix, greycoprops
from skimage.filters import gabor
import numpy as np
import pickle
import matplotlib.pyplot as plt
from scipy.ndimage import gaussian_filter
from scipy.stats import kurtosis, skew
from scipy.ndimage import laplace, sobel
from skimage import img_as_float
from skimage.morphology import reconstruction

class IMGFeatures:
    def __init__(self):        
        self.lbp_energy= 0
        self.lbp_entropy = 0
        self.contrast=0
        self.dissimilarity=0
        self.homogeneity=0
        self.energy =0
        self.correlation =0        
        self.g_energy=0
        self.g_entropy=0

# Isolation function.
    def iso(self,arr):
        image = img_as_float(arr.reshape(arr.shape[0],arr.shape[1]))
        image = gaussian_filter(image,2.5)
        seed = np.copy(image)
        seed[1:-1, 1:-1] = image.min()
        mask = image 
        dilated = reconstruction(seed, mask, method='dilation')
        return image-dilated

    # Standard deviation for sobel filter
    def sobelstd(self,arr, axis=0):
        image = img_as_float(arr.reshape(arr.shape[0],arr.shape[1]))
        sobelstd = sobel(image, axis=axis, mode='reflect', cval=0.0).ravel()
        return [sobelstd.std(), sobelstd.max(), sobelstd.mean()]

    # Standard deviation for laplace filter
    def lapacestd(self,arr):
        image = img_as_float(arr.reshape(arr.shape[0],arr.shape[1]))
        lapacestd = laplace(image, mode='reflect', cval=0.0).ravel()
        return [lapacestd.std(), lapacestd.max(), lapacestd.mean()]

    def volume(self,arr):
        return np.sum(arr)
    
    def load_image(self,path):
        self.img = PIL.Image.open(path)
        self.img_gray = self.img.convert('L') #Converting to grayscale
        self.img_arr = np.array(self.img_gray) #Converting to array
        #plt.imshow(self.img)
    def calculate_features(self):
        
        feat_lbp = local_binary_pattern(self.img_arr,8,1,'uniform') #Radius = 1, No. of neighbours = 8
        feat_lbp = np.uint8((feat_lbp/feat_lbp.max())*255) #Converting to unit8
        
        lbp_img = PIL.Image.fromarray(feat_lbp) #Conversion from array to PIL image
        
        
        lbp_hist,_ = np.histogram(feat_lbp,8)
        lbp_hist = np.array(lbp_hist,dtype=float)
        lbp_prob = np.divide(lbp_hist,np.sum(lbp_hist))
        self.lbp_energy = np.sum(lbp_prob**2)
        self.lbp_entropy = -np.sum(np.multiply(lbp_prob,np.log2(lbp_prob)))
        
        gCoMat = greycomatrix(self.img_arr, [2], [0],256,symmetric=True, normed=True) # Co-occurance matrix
        self.contrast = greycoprops(gCoMat, prop='contrast')
        self.dissimilarity = greycoprops(gCoMat, prop='dissimilarity')
        self.homogeneity = greycoprops(gCoMat, prop='homogeneity')
        self.energy = greycoprops(gCoMat, prop='energy')
        self.correlation = greycoprops(gCoMat, prop='correlation')
        self.feat_glcm= np.array([self.contrast[0][0],self.dissimilarity[0][0],self.homogeneity[0][0],self.energy[0][0],self.correlation[0][0]])
        # Gabor filter
        gaborFilt_real,gaborFilt_imag = gabor(self.img_arr,frequency=0.6)
        gaborFilt = (gaborFilt_real**2+gaborFilt_imag**2)//2
                        
        gabor_hist,_ = np.histogram(gaborFilt,8)
        gabor_hist = np.array(gabor_hist,dtype=float)
        gabor_prob = np.divide(gabor_hist,np.sum(gabor_hist))
        self.g_energy = np.sum(gabor_prob**2)
        self.g_entropy = -np.sum(np.multiply(gabor_prob,np.log2(gabor_prob)))
        
        self.concat_feat = np.concatenate(([self.lbp_energy,self.lbp_entropy],self.feat_glcm,[self.g_energy,self.g_entropy],self.sobelstd(self.img_arr),self.lapacestd(self.img_arr)),axis=0)
print("DONE")        

In [None]:
#we have 8 ending files 
#---->4 files with feaures calculated no normalitzation one for all image (train and test)

train_df=pd.read_csv('../input/train.csv')
print(train_df[0:5].Id)
#train_df=train_df[0:5]
print(train_df.shape)




In [None]:
type(train_df)

In [None]:
#calculate all features for training set and green images
label = []
#trainFeats = np.zeros((len(train_df),featLength)) #Feature vector of each image
colnames=("name","lbp_energy","lbp_entropy","contrast","dissimilarity","homogeneity","energy","correlation","g_energy","g_entropy","so_std","so_max","so_mean","la_std","la_max","la_mean")
allFeats=pd.DataFrame(columns=colnames)
allFeats

chanels=["_red.png","_green.png","_blue.png","_yellow.png"]

for tr in range(len(train_df)):    
    for x in range((len(chanels))):
        IMG=IMGFeatures()
        full_image_path='../input/train/'+train_df.values[tr][0]+chanels[x]        
        IMG.load_image(full_image_path)
        IMG.calculate_features()
        #print(IMG.concat_feat[0])
        df2 = pd.DataFrame([[full_image_path,IMG.concat_feat[0],IMG.concat_feat[1],IMG.concat_feat[2],IMG.concat_feat[3],IMG.concat_feat[4],IMG.concat_feat[5],
                             IMG.concat_feat[6],IMG.concat_feat[7],IMG.concat_feat[8],IMG.concat_feat[9],IMG.concat_feat[10],IMG.concat_feat[11],IMG.concat_feat[12]
                            ,IMG.concat_feat[13],IMG.concat_feat[14]]],columns=colnames)
      
        allFeats=allFeats.append(df2)      
        del IMG
print("DONE")
len(allFeats)

In [None]:
#now we fill in all feats for test data
from os import listdir
from os.path import isfile, join
mypath="../input/test/"
onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]
#onlyfiles=onlyfiles[0:5]
for tr in range(len(train_df)):        
    IMG=IMGFeatures()
    full_image_path=mypath+onlyfiles[tr]
    IMG.load_image(full_image_path)
    IMG.calculate_features()
    #print(IMG.concat_feat[0])
    df2 = pd.DataFrame([[full_image_path,IMG.concat_feat[0],IMG.concat_feat[1],IMG.concat_feat[2],IMG.concat_feat[3],IMG.concat_feat[4],IMG.concat_feat[5],
                         IMG.concat_feat[6],IMG.concat_feat[7],IMG.concat_feat[8],IMG.concat_feat[9],IMG.concat_feat[10],IMG.concat_feat[11],IMG.concat_feat[12]
                        ,IMG.concat_feat[13],IMG.concat_feat[14]]],columns=colnames)

    allFeats=allFeats.append(df2)      
    del IMG
allFeats        

In [None]:
allFeats.to_csv("allfeats.csv",index=False)
print(os.listdir("."))