In [1]:
## Import packages and set up environemnt
from os import listdir
from os.path import isfile, join
from PIL import Image, ImageEnhance
import numpy as np
import matplotlib.pyplot as plt
%matplotlib notebook
from sklearnex import patch_sklearn
patch_sklearn()
from sklearn.cluster import KMeans
from scipy.spatial.distance import pdist
import pandas as pd
from IPython.display import clear_output
import time
import faiss
import dask.array as da

Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)


In [81]:
## define functions
def loadImages(directory, file_list, sample, reduction, enchanced, dim):
    ## Function takes file list and if samples loads sample, otherwise loads all images in file list, reducing by the reduction factor
    timeStamp = []
    index = 0
    if not sample:  # if sample does not exist create a list of numbers to load
        sample = range(len(file_list))  # make sample the entire file list

    if not dim:  # open an image and get the image dimensions if no cropping is supplied
        image = np.array(Image.open(directory+file_list[0]).reduce(int((reduction))))  # load image to array
        h, w, d = orig_shape = tuple(image.shape)
        dim = [0, h, 0, w,]

    pixcels = np.empty((dim[1] - dim[0], dim[3] - dim[2], len(sample),3),
                       dtype='float32')  # Create empty NP array for pixcels

    for sam in sample:  # loop over each file in file_list
        clear_output(wait=True)  # clear output
        file = file_list[sam]  # set file as the sam'th entry in file list
        file_path = directory + file  # define file path
        ts = Image.open(file_path)._getexif()[36867]  # import timestamp as tS

        if enchanced:  # if an enchancments is call
            #open image and create enchancer
            enchancer = ImageEnhance.Contrast(Image.open(file_path).reduce(int((reduction))))
            image = np.array(enchancer.enhance(enchanced))  # load image and convert to array

        else: # if no enchancement is called
            image = np.array(Image.open(file_path).reduce(int((reduction))))  # load image into array
        
        pixcels[:, :, index, :] = image[dim[0] : dim[1], dim[2] : dim[3], :]  # crop array and load into pixcels array
        timeStamp.append(ts)  # append time stamp to list
        index += 1
        print("importing " + file + " file # " + str(sam + 1) + " of " + str(len(file_list) + 1))
        print(ts)
    return (timeStamp, pixcels)

def plotSamples(imgArr, ticks, title, titleData):
    ## function takes image array and plots images over 2 columns and as many rows as required. Usere selects ticks and headings
    sampleSize = imgArr.shape[2]  # return the 3rd dimension of the array
    width = 2 # set number of columns to plot
    height = int(sampleSize/width) # determine how many rows to plot
    index = 0 # set a counter to 0
    plt.close()  # close previous plot
    f, axarr = plt.subplots(height, width)  # create enough rows to plot all samples
    for row in range(0,height):  # iterate over each image row
        for h in range(0,height): # iterare over each column
            axarr[h,row].imshow(imgArr[:, :, index, :].astype('uint8'))  # open image and plot
            if title:
                axarr[h, row].title.set_text(titleData[row])

            if not ticks:
                axarr[h, row].set_xticks([])
                axarr[h, row].set_yticks([])
            
            index += 1 # increase counter by 1

def clus2Image(clusArr, centers, recolour_dict):
    ## defien function to take an array of cluster labels and centers and return an image array, with specified clusters recoulured if required
    if recolour_dict: # if recolour provided
        for key in recolour_dict.keys(): # iterate over each key value pair
            centers[key] = np.asarray(recolour_dict[key]) # change the value of that center to the specified colour

    imgArr = centers[clussArr]

    return(imgArr)

def faissCluster(pixcels,frac, startClus, noIt):
    ## defien function to perfrom kmeans clustering using faiss
    no_clusters = startClus # set inital number of clusters
    no_iterations = noIt # set the no of iterations for each step of kmeans
    clusterMin = 1000 # set cluster min to enter while loop

    while clusterMin > frac:  # continue to increase number of clusters until the smallest cluster becomes sufficently small to just be the stripe
        print('clustering with '+str(no_clusters))
        kmeans = faiss.Kmeans(d, no_clusters, niter=no_iterations, verbose=True) # define faiss kmeans object
        kmeans.train(pixcels) # train kmeans object
        D, I = kmeans.index.search(pixcels, 1) # return kmeans
        pixInClus = np.unique(I, return_counts=True) # get counts # pixels in each cluster
        colour = np.where(pixInClus[1] == min(pixInClus[1])) # assigns the smallest cluster as the stripe colour
        clusterMin = min(pixInClus[1]) / (h*w*l) # Calcultes the fraction of the picture occupied the the stripeColour
        no_clusters += 1 # Increase number of clusters by 1
        print("Cluster Min was "+str(clusterMin))
              
    return(D, I, kmeans, colour)

def cleanIQR(upper, lower, singleValues):
    iqr = np.subtract(*np.percentile(singleValues, [upper, lower]))
    med = np.percentile(singleValues, 50)
    minus = med - iqr
    plus = med + iqr

    return(singleValues[(minus < singleValues) & (singleValues < plus)])

In [12]:
## define working directory and which files to sample
directory = '/mnt/veeringDL_storage/e22_MG_vSpSamples_1/' # define directory
sample = [10,75,950,1000] # Set which 4 images to sample
file_list = [f for f in listdir(directory) if isfile(join(directory, f))] # inspect directory and return list of files

## load sample images and plot
timeStamp, pixcels = loadImages(directory, file_list, sample, 4, 1, False)
plotSamples(pixcels, True, False, False)

(864, 1152, 4, 3)
[0, 864, 0, 1152]
in sam loop
(864, 1152, 3)
(864, 1152, 3)
importing 20220319_131634A.jpg file # 11 of 1405
2022:03:19 13:16:34
in sam loop
(864, 1152, 3)
(864, 1152, 3)
importing 20220319_134014A.jpg file # 76 of 1405
2022:03:19 13:40:14
in sam loop
(864, 1152, 3)
(864, 1152, 3)
importing 20220319_151920A.jpg file # 951 of 1405
2022:03:19 15:19:20
in sam loop
(864, 1152, 3)
(864, 1152, 3)
importing 20220319_165546A.jpg file # 1001 of 1405
2022:03:19 16:55:45


<IPython.core.display.Javascript object>

In [21]:
## Get user defined inputs
knownLength = 90 # known length of the stripe
offset = 158.49 # known zero offset
h1 = 30 # horizontal to start image at
h2 = 1050 # horizontal to end image at
v1 = 200 # vertical to start image at
v2 = 600 # vertical to end image at
stripeFrac = 0.04 # expected fraction of image to be the stripe

dim = [v1, v2, h1, h2] # make dim variable as list to pass to load images

In [None]:
## load all images and plot samples to check cropping
timeStamp, pixcels = loadImages(directory, file_list, False, 4, 1, dim)

In [24]:
plotSamples(pixcels[:,:,sample,:], True, False, False)

<IPython.core.display.Javascript object>

In [119]:
## use faiss kmeans clustering to find all stripe clusters
h, w, l, d = orig_shape = tuple(pixcels.shape) # get shape of pixcels array
## do i add x and y positions??
## do i need to scale if including x and y positions
pixcels = np.reshape(pixcels, (h*w*l, d)) # reshape and change data type of pixcdls to suit faiss kmeans
D, I, kmeans, stripeColour =  faissCluster(pixcels, 0.1, 3, 10) # perform faiss clustering looking for stripe
check = clus2Image(I, kmeans.centroids, {int(stripeColour[0][0]): [0,255,0]}) # get image array with stripe clusters changed to green
check = np.reshape(check, orig_shape) # reshape for plotting
check = check[:,:,sample,:] # reduce to just sample images
plotSamples(check, False, False, False) # plot sample images to check clustering

ValueError: not enough values to unpack (expected 4, got 2)

In [120]:
## use dask svd to find roation matrix and then calculayte stripe length and median
stripe_loc = np.array(np.where(np.reshape(I, (h,w,l)) == stripeColour)) # get stripe locations from reshaped labels array
x = da.from_array(stripe_loc[0:2,:].T) # convert x and y row to dask array
u, s, v = da.linalg.svd_compressed(x, k=10, compute=True) # perform svd
Vh = v.compute() # save rotation matrix
stripe_rot = stripe_loc[0:2,:].T @ Vh[:,0] # rotate stripe onto first principle component
stripe_rot = cleanIQR(75,25,stripe_rot) # remove outliers from stripe rot
stripeMedian = np.median(stripe_rot) # find median of the stripe
stripe_len = (abs(abs(np.min(stripe_rot)) - abs(np.max(stripe_rot)))) # fins the abs range of the stripe (known length)
scale = knownLength / stripe_len # calculate the pixcel to mm scale

In [1]:
## use faiss kmeans to find dot cluster
D, I, kmeans, stripeColour =  faissCluster(pixcels, 0.03, 5, 10) # perform faiss clustering looking for stripe
check = clus2Image(I, kmeans.centroids, {int(stripeColour[0][0]): [0,255,0]}) # get image array with stripe clusters changed to green
check = np.reshape(check, orig_shape) # reshape for plotting
check = check[:,:,sample,:] # reduce to just sample images
plotSamples(check, False, False, False) # plot sample images to check clustering

NameError: name 'faissCluster' is not defined

In [114]:
scale

0.2933833497285404

In [115]:
stripe_len

306.76587503440305