## ECG Automated Cropping Script

### Imports

In [None]:
import os
from PIL import Image, ImageFilter
import matplotlib.pyplot as plt
import numpy as np
from scipy.signal import find_peaks

### Variables

In [None]:
input_dir = "//data//Vandenberg-Lab//Internal//Rui//ECGRejectionScannedSamples//"
output_dir = "//data//Vandenberg-Lab//Internal//Rui//ECGRejectionScannedSamples//Autocropped//"
# os.mkdir(output_dir)

# input_dir = "//data//Vandenberg-Lab//Internal//Rui//Transplant Cohort Samples//Originals//"
# output_dir = "//data//Vandenberg-Lab//Internal//Rui//Transplant Cohort Samples//Autocropped//"

# gridline_to_crop = 13 #For 2 rhythm strip images
gridline_to_crop = 20


### Main Working Version

In [None]:
# Use os.walk to cycle through a folder to grab file names and file paths
path, folder, file_list = next(os.walk(input_dir))
print(file_list)

for file_name in file_list:

    # Check to make sure the file is an image ('.png' extension)
    if not(file_name.endswith('.png')):
        continue
    
    # if file_name[-5] != 'A':
    #     continue

    # Read original image, get height and width
    originalImage = Image.open(input_dir + file_name)
    w, h = originalImage.size

    # Split image into colorchannels, and a greyscale version
    try:
        red, green, blue, alpha = originalImage.split()
    except:
        red, green, blue = originalImage.split()
    grey = originalImage.convert('L')

    # Perform image subtraction to eliminate signal from grey image but retain gridlines
    channel_subtract = np.asarray(grey)-np.asarray(red)
    # plt.imshow(Image.fromarray(channel_subtract), cmap='gray')
    # plt.show()

    # Isolate major gridlines in image subtraction
    threshold = np.where(channel_subtract < 200, channel_subtract, 0.0)
    # plt.imshow(Image.fromarray(threshold[:,:150]), cmap='gray')
    # plt.show()

    # plt.plot(vert_sum)
    # plt.plot(vert_gridlines[0], vert_sum[vert_gridlines[0]], 'or')
    # plt.show()

    # plt.plot(horz_sum)
    # plt.plot(horz_gridlines[0], horz_sum[horz_gridlines[0]], 'or')
    # plt.show()

    cropCoords = (
        vert_gridlines[0][0], 
        horz_gridlines[0][0],
        vert_gridlines[0][-1],
        horz_gridlines[0][-1] #gridline_to_crop
    )

    cropImage = originalImage.crop(cropCoords) #13 for double rhytm strip
    print('Original Image')
    plt.imshow(originalImage)
    plt.show()
    print('Cropped Image')
    plt.imshow(cropImage)
    plt.show()

### Main Testing Versions

In [None]:
# Use os.walk to cycle through a folder to grab file names and file paths
path, folder, file_list = next(os.walk(input_dir))
print(file_list)

for file_name in file_list:

    # Check to make sure the file is an image ('.png' extension)
    if not(file_name.endswith('.png')):
        continue

    # if file_name[-5] == 'A':
    horizontal_grid_size = 27  # USING THIS FOR SCANNED DOCUMENTS
    grid_cut = 21
    # elif file_name[-5] == 'B':
    # horizontal_grid_size = 22
    # grid_cut = 13
    # else:
    #     continue

    originalImage = Image.open(input_dir + file_name)
    w, h = originalImage.size
    print('Original')
    plt.imshow(originalImage)
    plt.show()

    try:
        red, green, blue, alpha = originalImage.split()
    except:
        red, green, blue = originalImage.split()
    grey = originalImage.convert('L')

    # plt.imshow(red, cmap='Reds_r')
    # plt.show()

    # plt.imshow(blue, cmap='Blues_r')
    # plt.show()

    # plt.imshow(green, cmap='Greens_r')
    # plt.show()

    yllw = np.asarray(grey)-np.asarray(red)
    prpl = np.asarray(grey)-np.asarray(green)
    brwn = np.asarray(grey)-np.asarray(blue)

    selection = prpl

    plt.imshow(Image.fromarray(selection), cmap='gray')
    plt.show()

    # Original 3/4 crop method
    # croppedImage = originalImage.crop((0, 0, w, .75*h))
    # print('Cropped')
    # plt.imshow(croppedImage)
    # plt.show()

    # Using Edge Detection and Grid-Approx.
    # kernel = (
    #     -1,0,1,
    #     -2,0,2,
    #     -1,0,1
    # )
    vert_kernel = (
        -1,0,1,
        -2,0,2,
        -1,0,1
    )
    horz_kernel = (
        -1,-2,-1,
         0, 0, 0,
         1, 2, 1
    )
    # corn_kernel = (
    #     -2, -2, 0,
    #     -2, 0, 2,
    #      0, 2, 2
    # )
    corn_kernel = (
        -2, -2, -1,  0,  0,
        -2, -1,  0,  0,  0,
        -1,  0,  0,  0,  1,
         0,  0,  0,  1,  2,
         0,  0,  1,  2,  2
    )
    # kernel = (
    #     -2, -1, 0, -1, -2,
    #     -1, -1, 2, -1, -1,
    #      0,  2, 2,  2,  0,
    #     -1, -1, 2, -1, -1,
    #     -2, -1, 0, -1, -2,
    # )
    # kernel = (
    #     -10, -10, -10, -10, -10,
    #      -5,  -5,  -5,  -5,  -5,
    #      -5,  -5,  0,  5,  5,
    #      5,  5,  5,  5,  5,
    #      10,  10,  10,  10,  10,
    # )
    # scale = .5
    # greyImage = originalImage.convert('L')
    # edges = greyImage.filter(ImageFilter.FIND_EDGES)
    # plt.imshow(edges, cmap='gray')
    # plt.show()
    # vert = greyImage.filter(ImageFilter.Kernel(size=(3, 3), kernel=vert_kernel, scale=scale))
    # horz = greyImage.filter(ImageFilter.Kernel(size=(3, 3), kernel=horz_kernel, scale=scale))
    # corner = greyImage.filter(ImageFilter.Kernel(size=(5, 5), kernel=corn_kernel, scale=scale))
    # plt.imshow(vert, cmap='gray')
    # plt.show()
    # plt.imshow(horz, cmap='gray')
    # plt.show()
    # plt.imshow(corner, cmap='gray')
    # plt.show()

    # plt.close()

    
    proceed = False

    for threshold in np.arange(150,250,10): # np.arange(50,300,10): #
        thresholdedImage = np.where(selection < threshold, selection, 0.0)
        
        try:
            vert_sum = np.sum(thresholdedImage, axis=0)
            horz_sum = np.sum(thresholdedImage, axis=1)
            # using height and expected number of gridlines to estimate distance between gridlines
            vert_gridlines = find_peaks(vert_sum, prominence=3000, distance=(h/horizontal_grid_size)*0.9)#10000)#
            horz_gridlines = find_peaks(horz_sum, prominence=3000, distance=(h/horizontal_grid_size)*0.9)#10000)#
        except:
            continue

        print(len(horz_gridlines[0]))

        if (horizontal_grid_size-2) < len(horz_gridlines[0]) < (horizontal_grid_size+2):
            proceed = True
            break

    if not(proceed):
        continue

    plt.plot(vert_sum)
    plt.plot(vert_gridlines[0], vert_sum[vert_gridlines[0]], 'or')
    plt.show()

    plt.plot(horz_sum)
    plt.plot(horz_gridlines[0], horz_sum[horz_gridlines[0]], 'or')
    plt.show()

    cropImage = originalImage.crop((vert_gridlines[0][0],horz_gridlines[0][0],vert_gridlines[0][-1],horz_gridlines[0][grid_cut])) #13 for double rhytm strip

    plt.imshow(cropImage)
    plt.show()
    cropImage.save(output_dir + file_name)

In [None]:
np.min(yllw)
threshold = np.where(yllw < 200, yllw, 0.0)
plt.imshow(Image.fromarray(threshold[:,:150]), cmap='gray')
plt.show()

In [None]:
# Next: crop image to only grid
# Do this by summing along x and summing along y axis, where peaks in sums are locations of the grid
vert_sum = np.sum(threshold, axis=0)
horz_sum = np.sum(threshold, axis=1)
vert_gridlines = find_peaks(vert_sum, prominence=10000)
horz_gridlines = find_peaks(horz_sum, prominence=10000)
print(horz_gridlines)

In [None]:
plt.plot(vert_sum)
plt.plot(vert_gridlines[0], vert_sum[vert_gridlines[0]], 'or')
plt.show()

plt.plot(horz_sum)
plt.plot(horz_gridlines[0], horz_sum[horz_gridlines[0]], 'or')
plt.show()

In [None]:
cropImage = originalImage.crop((vert_gridlines[0][0],horz_gridlines[0][0],vert_gridlines[0][-1],horz_gridlines[0][13])) #13 for double rhytm strip
plt.imshow(originalImage)
plt.show()
plt.imshow(cropImage)
plt.show()

In [None]:
label_list = []
labels = [[0, 0, 1],[1, 0]]

label_list = np.append(label_list,labels[0])
label_list = np.append(label_list,labels[1])

label_list