In [None]:
import csv
import os
import cv2
import matplotlib.pyplot as plt
import numpy as np
import random

In [None]:
class DataPrep:
    def __init__(self, threshold, min_comp_size=0.001, image_size=100, grid_size=10,
                 camera_h_fov=69, camera_h_res=2592, 
                 folder='.', in_csv='data.csv', out_csv='final_data.csv', prefix='final_'):
        self.threshold = threshold
        self.min_comp_size = min_comp_size
        self.image_size = image_size
        self.grid_size = grid_size
        self.camera_fov_per_pixel = camera_h_fov/camera_h_res
        self.folder = folder
        self.csv_in_path = os.path.join(folder, in_csv)
        self.csv_out_path = os.path.join(folder, out_csv)
        self.prefix = prefix
        self.headers = []
        self.in_data = []
        self.out_data = []
        self._readData()
        # structuring element for erosion
        self.se = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5,5))

    def _readData(self):
        self.in_data = []
        with open(self.csv_in_path, 'r') as file:
            csv_reader = csv.reader(file)
            self.headers = None
            for row in csv_reader:
                if self.headers is None:
                    self.headers = row
                else:
                    self.in_data.append(row)
    
    def _writeData(self):
        with open(self.csv_out_path, 'w', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(self.headers + ['SAMPLE_FOV'])
            writer.writerows(self.out_data)

    def _processImageAlt(self, image_color, highlight=False):
        # convert the image to grayscale
        image_gray = cv2.cvtColor(image_color, cv2.COLOR_BGR2GRAY)
        # apply threshold
        image_thresh = cv2.threshold(image_gray, self.threshold, 255, cv2.THRESH_BINARY)[1]
        # find external contours only
        contours, hierarchy = cv2.findContours(image_thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
        if len(contours) < 1:
            return None, None
        # find the region with the longest perimeter, assume the rest is noise
        max_index = -1
        max_perim = -1
        for i in range(len(contours)):
            perim = len(contours[i])
            if perim > max_perim:
                max_perim = perim
                max_index = i
        # create a mask
        mask = np.zeros(image_thresh.shape[:2], dtype=np.uint8)
        cv2.drawContours(mask, [contours[max_index]], -1, 255, -1)
        # normalize the image intensity
        min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(image_gray, mask=mask)
        if max_val - min_val < 1:
            return None, None
        # real data values should be 1 to 255
        image_norm = np.clip((254/(max_val-min_val))*(image_gray.astype(np.float32) - min_val) + 1, 0, 255).astype(np.uint8)
        # background will have value 0
        image_masked = cv2.bitwise_and(image_norm, image_norm, mask=mask)
        # find the connected components in the mask to get the centroid and extents
        numLabels, labels, stats, centroids = cv2.connectedComponentsWithStats(mask, 4, cv2.CV_32S)
        if numLabels > 2:
            print("WARNING: more than one non-background region found")
        center_x = centroids[1][0]
        center_y = centroids[1][1]
        min_x = stats[1][0]
        min_y = stats[1][1]
        max_x = stats[1][0] + stats[1][2]
        max_y = stats[1][1] + stats[1][3]
        # calculate a size that can contain the centered image extents
        size = int(2 * max(center_y - min_y, max_y - center_y, center_x - min_x, max_x - center_x))
        # create a blank image
        image_centered = np.zeros((size, size), dtype=np.uint8)
        # crop to extents
        image_crop = image_masked[int(min_y):int(max_y),
                                  int(min_x):int(max_x)]
        # determine the offsets to position the cropped image with the centroid in the center
        offset_x = int(min_x - (center_x - size/2))
        offset_y = int(min_y - (center_y - size/2))
        # paste into blank image
        image_centered[offset_y:offset_y+image_crop.shape[0], 
                       offset_x:offset_x+image_crop.shape[1]] = image_crop
        # resize the image
        image_resize = cv2.resize(image_norm, (self.image_size, self.image_size))
        # determine the actual fov for the sample
        sample_fov = self.camera_fov_per_pixel * size
        if not highlight:
            return image_resize, sample_fov
        else:
            return (image_resize == 0).astype("uint8") * 255, sample_fov

    def adjustThreshold(self, highlight=False):
        # randomly sample images from data set to fill the grid
        rows = random.sample(self.in_data, self.grid_size**2)
        # Create a blank grid image
        grid_pixel_size = self.image_size * self.grid_size
        image_grid = np.zeros((grid_pixel_size, grid_pixel_size), dtype=np.uint8)
        # initial threshold value step size of 16
        step = 16
        while True:
            i = 0
            # construct a 10x10 grid of processed images
            for row in rows:
                filename = row[0]
                path = os.path.join(self.folder, filename)
                image_color = cv2.imread(path)
                image_proc, sample_fov = self._processImageAlt(image_color, highlight)
                if image_proc is None:
                    image_proc = np.zeros((self.image_size,self.image_size), dtype=np.uint8)
                row = i // self.grid_size
                col = i % self.grid_size
                image_grid[row*self.image_size:(row+1)*self.image_size, 
                           col*self.image_size:(col+1)*self.image_size] = image_proc
                i += 1
            # display the image grid
            cv2.imshow('frame', image_grid)
            key = cv2.waitKey(20) & 0xFF
            if key == ord('q'):
                break
            elif key == ord('a'):
                self.threshold -= step
            elif key == ord('s'):
                self.threshold += step
            elif key == ord('e'):
                step *= 2
            elif key == ord('d'):
                if step <= 2:
                    step = 1
                else:
                    step /= 2;
        cv2.destroyAllWindows()
        print(self.threshold)

    def processData(self):
        self.out_data = []
        for row in self.in_data:
            filename = row[0]
            path = os.path.join(self.folder, filename)
            image_color = cv2.imread(path)
            image_proc, sample_fov = self._processImageAlt(image_color)
            if image_proc is None:
                print(f'Skipping {filename}, no non-background, non-noise components were found')
                continue
            cv2.imwrite(os.path.join(self.folder, self.prefix + filename), image_proc)
            self.out_data.append([self.prefix + filename] + row[1:] + [sample_fov])
        self._writeData()

In [None]:
dp = DataPrep(threshold=158, min_comp_size=0.001, 
              camera_h_fov=69, camera_h_res=640,
              folder='data_collection')

In [None]:
dp.adjustThreshold(highlight=True)

In [None]:
dp.adjustThreshold(highlight=False)

In [None]:
dp.threshold

In [None]:
dp.processData()