In [None]:
##### Visual String Pot
# This program takes an image and finds the centre of an orange stripe and green dot. It then use the length of the orange stripe to scale the difference between the clusters to calculate the length.
# it uses FAISS kmeans clustering to find a cluster of the approximate size of the stripe and dot.
# the stripe and dot are then rotated onto the principle component of the stripe
# the user defines a reduction and enchancment factotor which reduces the numbe rof pixcel and enchances the colours when the images are imported.
# the uses then defines the window to focus clustering and the aprox dimensions of the striope and dot
# the results are then exported as a csv with index, fileName, timeStamp, measurement, dotMedian, stripeMedian, scale, dotLength,

In [None]:
#### In version 1.1
# image correction and scaling simplified
# plotting impoved
# hyperparmter entry streamlined
# post calibration, offset and scaling
# improved layout to add option for other clustering methods later

In [None]:
## Import packages and set up environemnt
from os import listdir
from os.path import isfile, join
from PIL import Image, ImageEnhance
import numpy as np
import matplotlib.pyplot as plt
%matplotlib notebook
import pandas as pd
from IPython.display import clear_output
import time
import faiss
from sklearn.decomposition import PCA
import vSp_functions_V1_1

In [None]:
## define working directory and which files to sample
in_dir = '/mnt/veeringDL_storage/220420-mg/' # define directory containing all gopro images
out_dir = '/mnt/home/9.0 Data Jobs/' # define output directory
file_list = [f for f in listdir(in_dir) if isfile(join(in_dir, f))] # inspect directory and return list of files
print(str(len(file_list))+' Images found in folder')

In [None]:
## user defines which images to sample, trget size, enchancemt factor and type
sample = [10,2000,5000,7000] # Set which 4 images to sample
targetSize = 1200000 # define target size in pixcels
enchancement = 1.5 # define enchancement value
ench_type = 'Bright' # define enchancement type

In [None]:
## load sample images and plot
timeStamp, pixcels = loadImages(in_dir, file_list, [0], 1, False, False, False) # load a single image at full resolution
h, w, l, d = orig_shape = tuple(pixcels.shape) # return the dimensions of the original image
reduction = int((H*w)/targetSize) # caclulate the required reduction
print('A reduction factor of '+str(reduction)+' was adopted')
timeStamp, pixcels = loadImages(directory, file_list, sample, reduction, enchancement, False, ench_type)
plotSamples(pixcels, True, False, False)

In [None]:
## User defines where to crop image
h1 = 130 # horizontal to start image at
h2 = 325 # horizontal to end image at
v1 = 130 # vertical to start image at
v2 = 225 # vertical to end image at

In [None]:
dim = [v1, v2, h1, h2] # make dim variable as list to pass to load images
timeStamp, pixcels = loadImages(directory, file_list, sample, reduction, enchancement, dim, ench_type)
plotSamples(pixcels, True, False, False)

In [None]:
stripe_left = 95 # left bound of stripe
stripe_right = 185 # right bound of stripe
stripe_top = 72 # top bound of stripe
stripe_bottom = 92 # bottom bound of stripe
dot_left = 13 # left bound dot
dot_right = 43 # right bound of dot
dot_top = 17 # top bound of dot
dot_bottom = 36 # bottom bound of dot
area_factor = 1.1 # factor to multiply aproximmated area by
stepAfterStripeCluss = 1
stripe_area = abs(stripe_right - stripe_left) * abs(stripe_bottom - stripe_top) # calcluate area of stripe in pixcels
dot_area = abs(dot_right - dot_left) * abs(dot_bottom - dot_top) # calculate area of dot in pixcels
fig_area = abs(v2-v1) * abs(h2-h1) # calculate area of figure with
stripeFrac = area_factor * stripe_area / fig_area # expected fraction of image to be the stripe
dotFrac = area_factor * dot_area / fig_area # expected fraction of image to be the dot

print("The stripe fraction used is "+str(stripeFrac))
print("the dot fraction used is "+str(dotFrac))

In [None]:
## defien the number of iterations and # clusters to start with for faiss Kmeans
it = 10 # # interations to perform in clustering
stripe_startNo = 3 # number of initial clusters

In [None]:
## load all images, create image array to save RGB array before scaling and save original input array dimensions to rebuild later
timeStamp, pixcels = loadImages(directory, file_list, False, reduction, enchancement, dim, ench_type)
allImages = pixcels # save original RGB pixcels for plotting latter
pixcels = (pixcels - pixcels.mean(axis=(0,1,2), keepdims=True )) / pixcels.std(axis = (0,1,2)) # scale pixcels for clustering
h, w, l, d = orig_shape = tuple(pixcels.shape) # get shape of pixcels array
## use faiss kmeans clustering to find all stripe clusters
## do i add x and y positions??
## do i need to scale if including x and y positions, yes you do you numpty nearly order magnitude difference
pixcels = np.reshape(pixcels, (h*w*l, d)) # reshape and change data type of pixcels to suit faiss kmeans

In [None]:
## Run faiss kmeans to find initial stripe cluster
D, I, kmeans, stripeColour =  faissCluster(pixcels, stripeFrac, stripe_startNo, it) # perform faiss clustering looking for stripe

In [None]:
## scale image so as all stripes are the same intensity / shade
stripe_loc = np.array(np.where(np.reshape(I, (h,w,l)) == stripeColour)) # get stripe locations from reshaped labels array
stripeMedian, stripe_len, Vh, stripe_loc = GetStats(stripe_loc,
                                                    25,
                                                    75,
                                                    25,
                                                    75) # find cleaned stripe Loc


## need to find the average stripe location and then find the pixcel intensity at these locations
# find average locations
# find the average rgb value at these locations
# scale all rgb by the factor to make this match the cluster centroid

# add x and y components

# repeat faiss kmeans

In [None]:
# Define the cleaning parameters
perpClean_min = 25
perpClean_max = 75
rotClean_min = 25
rotClean_max = 75

In [None]:
## use PCA to find the rotation of the stripe in the image
stripe_loc = np.array(np.where(np.reshape(I, (h,w,l)) == stripeColour)) # get stripe locations from reshaped labels array
stripeMedian, stripe_len, Vh, stripe_loc = GetStats(stripe_loc,
                                                    perpClean_min,
                                                    perpClean_max,
                                                    rotClean_min,
                                                    rotClean_max) # find cleaned stripe Loc

## repeat Get stats on cleaned stripe_loc
stripeMedian, stripe_len, Vh, stripe_loc = GetStats(stripe_loc,
                                                    1,
                                                    99,
                                                    1,
                                                    99) # find cleaned stripe Loc

In [None]:
# Define the cleaning parameters
perpClean_min = 15
perpClean_max = 85
rotClean_min = 10
rotClean_max = 90

In [None]:
dot_startNo = len(kmeans.centroids) + stepAfterStripeCluss # of clusters to start finding the dot cluster
## add the positions projected onto the 2nd principal component to aid in finding dot cluster
dot_search = addInd(w, h, l, pixcels, Vh) # add indices
## Cluster to find dots
D, I, kmeans, dotColour =  faissCluster(dot_search, dotFrac, dot_startNo, it) # perform faiss clustering looking for dot
dot_loc = np.array(np.where(np.reshape(I, (h,w,l)) == dotColour)) # get dot locations x,y,l from reshaped labels array
dot_perp = dot_loc[0:2,:].T @ Vh[:,1] # rotate dot x and y onto second principle component of stripe to clean
perp_keep = cleanIQR(perpClean_max,perpClean_min,dot_perp, False, True) # get true fals of values to keep in dot_loc based on outliers from dot on the 2nd principle component
## clean dot loc by perp keep
perp_keep = np.reshape(perp_keep,(1,-1)) # make 2d array with 1 row
perp_keep = np.vstack((perp_keep,perp_keep,perp_keep)) # stack logical ontop of itself 3 times to match shape of dot_loc
dot_loc = np.reshape(dot_loc[perp_keep],(3,-1)) # subset dot_loc using perp_keep and return to origninal shape
dot_rot = dot_loc[0:2,:].T @ Vh[:,0] # rotate the remianing dots onto first principle component of stripe
rot_keep = cleanIQR(rotClean_max,rotClean_min, dot_rot, False, True) # clean dots by IQR
rot_keep = np.reshape(rot_keep,(1,-1)) # reshape to make 2d array with 1 row
rot_keep = np.vstack((rot_keep,rot_keep,rot_keep)) # stack to match shape of dot_loc
dot_loc = np.reshape(dot_loc[rot_keep],(3,-1)) # subset using rot_keep and reshape to original shape
dot_rot = dot_loc[0:2,:].T @ Vh[:,0] # rotate remaining dots onto first principal component
dot_rot = np.vstack((dot_rot,dot_loc[2,:])) # stack the image number and rotated values ontop of eachother
dotsDF = pd.DataFrame(np.transpose(dot_rot), columns=['Dot', 'Pic']) # change fron NP array to data frame for easy grouping
dataOut = dotsDF.groupby('Pic').median()

In [None]:
knownLength = -87 # known length of the stripe, negative if measure should get larger when dot is closer to stripe
offset = -127.5 # known zero offset
scale = knownLength / stripe_len # calculate the pixcel to mm scale
print('The scale used is '+str(scale)+' mm/pixcel')
c = 0 # set linear correction
m = 1.17 # set linear correction
minDotA = 0.6 # define min area factor to still accept a dot
maxDotA = 1 + (1-min) # define the max area factor to still accept as a dot

In [None]:
dataOut = dataOut.merge(dotsDF.groupby('Pic').count(), left_index = True, right_index = True)
dataOut = dataOut.merge(pd.DataFrame(file_list), left_index = True, right_index = True)
dataOut = dataOut.merge(pd.DataFrame(timeStamp), left_index = True, right_index = True)
dataOut = dataOut.rename(columns = {'Dot_x' : 'Dot', 'Dot_y': 'Count', '0_x': 'file', '0_y': 'timeStamp'})
dataOut = dataOut[dataOut['Count'] > minDotA*dot_area ] # filter dots out that dont contai enough points
dataOut = dataOut[dataOut['Count'] < maxDotA*dot_area ] # filter dots out that contain to many points
dataOut['measure'] = ((abs(stripeMedian - dataOut.Dot) * scale) - offset)
dataOut['measure'] = dataOut.measure * m + c # apply linear correction
dataOut = dataOut.sort_values('timeStamp')
# add date to output file name
dataOut.to_csv(out_dir+'vSp_'+str()+'.csv')
print("dataOut saved in "+out_dir+'vSp.csv\n\n')
print(str(dataOut.head())+'\n\n')
print(dataOut.describe())
plt.close()
plt.plot(dataOut.timeStamp, dataOut.measure)