In [115]:
import cv2
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from scipy.spatial.distance import pdist, squareform
from scipy.stats import iqr, skew
import os

In [116]:
fileout = 'labels_cropped.txt'
do_clustering = True

# print (label_out,cluster_size,avglength,medlength,stdangle,iqrangle,iqrangle_top)
with open(fileout,'a') as f:
    line = "%s,%s,%s,%s,%s,%s,%s,%s\n" % ("ImgID","Label","ClusterSize","AvgLength","MedLength","StdAngle",\
                                       "IqrAngle","IqrangleTop")
    f.write(line)

In [119]:
label = 'yes'
label_out = 1 if label=='yes' else 0

# directory = '../data/fields_raw_' + label + '/'
# directory = '../data/fields_raw_' + label + '_good/'
# directory = '../data/fields_raw_' + label + '_clean/'
# directory = '../data/fields_raw_' + label + '_new/'
directory = '../data/fields_raw_' + label + '_cropped/'



In [120]:
for filein in os.listdir(directory):
    if filein.endswith(".png"):
        
        # Load raw image, convert to grayscale
        img = cv2.imread(directory + filein)
        gray_raw = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
        
        # Normalized
        edge = 0
        b, t = np.nanpercentile(gray_raw, [edge, 100 - edge])
        gray_norm = 255*(gray_raw - b) / (t - b)

        # Crop
        length = gray_raw.shape[0]
        width = gray_raw.shape[1]

        frac = 0.05
        x1 = int(frac*length)
        x2 = length - int(frac*length)
        y1 = int(frac*width)
        y2 = width - int(frac*width)

        cropped = gray_raw[x1:x2,y1:y2]

        gray_raw = cropped

        # Saturate scale based on percentiles
        gray_nonzr = np.ravel(gray_raw[np.where(gray_raw!=np.min(gray_raw))])
        gray_vec = np.ravel(gray_nonzr)

        edge = 5  # percent
        b, t = np.percentile(gray_vec, [edge, 100 - edge])

        gray_sat = gray_raw.copy()
        gray_sat[np.where(gray_sat<b)] = b
        gray_sat[np.where(gray_sat>t)] = t

        # Gaussian blur
        blur = cv2.GaussianBlur(gray_sat,(3,3),0)

        # Adaptive thresholding on saturated + Gaussian blur
        thr_ad_blur = cv2.adaptiveThreshold(blur,255,cv2.ADAPTIVE_THRESH_MEAN_C,cv2.THRESH_BINARY,5,0)

        # Calculate length of diagonal
        diag = np.sqrt(thr_ad_blur.shape[0]**2+thr_ad_blur.shape[1]**2)

        # Probabilistic Hough transform (detects line segments)

        minLineLength = 20
        maxLineGap = 1
        lines_pr = cv2.HoughLinesP(thr_ad_blur,1,np.pi/180,10,minLineLength,maxLineGap)
        # lines_pr = cv2.HoughLinesP(edges_ad_thr,1,np.pi/180,50,minLineLength,maxLineGap)
        len(lines_pr)

        # Get angles and lengths
        thetas_pr = []
        lengths_pr = []
        for line in lines_pr.squeeze():
            x1 = line[0]
            y1 = line[1]
            x2 = line[2]
            y2 = line[3]
            theta = np.arctan2(y2-y1,x2-x1)*180/ np.pi
            thetas_pr.append(theta)
            length = np.sqrt((x1-x2)**2 + (y1-y2)**2)
            lengths_pr.append(length)
        thetas_pr = np.array(thetas_pr)
        lengths_pr = np.array(lengths_pr)

        # feature for detecting if image contains patterns at all: 
        # spread in angle in line segments with similar length
        thetas_pr_rad = thetas_pr
        thetas_pr = np.abs(thetas_pr)

        # k-means clustering
        if do_clustering:

            # feat = np.array([thetas_pr,lengths_pr]).T
            feat = np.array([lengths_pr]).T
            kmeans = KMeans(n_clusters=2).fit(feat)
            labels = kmeans.labels_

            # find cluster id that has the max length
            good = labels[lengths_pr==np.max(lengths_pr)][0]
            bad = 1 if good==0 else 0

            ix  = np.where(labels==good)
            ix_bad = np.where(labels==bad)
            cluster_size = len(labels[ix])
            skewness = np.abs(skew(thetas_pr[ix]))

            avglength  = np.mean(lengths_pr[ix])/diag
            stdangle  = np.std(thetas_pr[ix])
            medlength =  np.median(lengths_pr[ix])/diag
            iqrangle  = iqr(thetas_pr[ix])
  
        else:
            avglength  = np.mean(lengths_pr)/diag
            stdangle  = np.std(thetas_pr)
            medlength =  np.median(lengths_pr)/diag
            iqrangle  = iqr(thetas_pr)

        top = np.percentile(lengths_pr,99)
        iqrangle_top = iqr(thetas_pr[lengths_pr>=top])

#         print (label_out,filein,cluster_size,avglength,medlength,stdangle,iqrangle,skewness)

        with open(fileout,'a') as f:
            line = "%s,%i,%i,%f,%f,%f,%f,%f\n" % \
                (filein,label_out,cluster_size,avglength,medlength,stdangle,iqrangle,iqrangle_top)
            f.write(line)


#         with open('labels_clean.txt','a') as f:
#             line = "%i,%i,%f,%f,%f,%f,%f\n" % (label_out,cluster_size,avglength,medlength,stdangle,iqrangle,iqrangle_top)
#             f.write(line)

#         with open('labels_skew.txt','a') as f:
#             line = "%i,%i,%f,%f,%f,%f,%f\n" % (label_out,cluster_size,avglength,medlength,stdangle,iqrangle,skewness)
#             f.write(line)


#         with open('labels_top.txt','a') as f:
#             line = "%i,%i,%f,%f,%f,%f,%f\n" % (label_out,cluster_size,avglength,medlength,stdangle,iqrangle,iqrangle_top)
#             f.write(line)