### Imports

In [None]:
#import relevant libraries

import numpy as np
import pandas as pd
import os
from skimage import morphology, measure, draw, io, exposure
import matplotlib.pyplot as plt
import cv2
import seaborn as sns
import scipy

### Set parameters and directories

In [None]:
# Experimental parameters

START_IMG_INDEX = 0 # Specify image index to start at
END_IMG_INDEX = 50 # Specify image index to finish at
MINIMUM_INTENSITY = 150  # 150 used for all implants in our publication
TROUGH_INDEX = -2  # -2 used for all implants in our publication

# Specify directories

INPUT_DIR_IMAGES = "" # Where the input radiographs are stored
OUTPUT_DIR_MASKS = "" # Where you want your image masks to save to

os.chdir(INPUT_DIR_IMAGES)

### Count and display images selected for mask generation

In [None]:
# Make a list of all pngs for which you wish to try auto generate masks
pngs = os.listdir(INPUT_IMAGE_DIR) 

# Print a list of pngs to be processed
print(f"Number of images to be processed: {len(pngs[START_IMG_INDEX:END_IMG_INDEX])}")
print("\nImage filenames:")
for png in pngs[START_IMG_INDEX:END_IMG_INDEX]:
    print(png)

### Auto generate putative image masks for all selected images

#### Image masks are generated by thresholding at a certain pixel intensity . The threshold value is automaticallly selected using kernel density estimate plots for pixel intensity and automatic minima detection.  Once auto-thresholded, only the largest object is retained for hips, or largest two objects for knees.  Holes in the image mask are then filled.  Images are then saved in the specified output directory.

#### In order to identify radiograph-mask pairs that are suitable for segmentation network training/testing, the images will have to be visually inspected and classified into high and low quality with masks. Only high quality masks should then be used for segmentation network training and testing. In our study we simply made an excel sheet ("segmentation_dataset.xlsx") containing png names for all the high quality masks in the first column ("filenames") and the model class label for the png in the 2nd column ("labels").

In [None]:
for png in pngs[START_IMG_INDEX:END_IMG_INDEX]:
    try:
        print(f"Processing image: {png}")
        
        # Read image as numpy array and display it
        image = io.imread(png)
        plt.imshow(image, cmap='gray')
        plt.axis('off')
        plt.show()        
        
        # Calculate kernel density values for pixel intensities in the image
        p = sns.distplot(image.ravel(), hist = True, kde = True,
             kde_kws = {'linewidth': 3},)
        pixel_values = p._axes.lines[0].get_xdata()
        density = p._axes.lines[0].get_ydata()        
        
        # Find the relevant trough in the pixel intensity density plot, and select as pixel intensity threshold
        troughs, _ = scipy.signal.find_peaks(-density, distance=20, height=(-0.1,-0.001))
        trough_intensities = pixel_values[troughs]
        troughs_above_min = [trough_intensity for trough_intensity in trough_intensities 
                             if trough_intensity > MINIMUM_INTENSITY]
        print(f"All troughs identified : {trough_intensities}")
        print (f"Number of troughs above minimum of {MINIMUM_INTENSITY}: {len(troughs_above_min)}")
        if len(troughs_above_min) > 1:
            THRESHOLD=troughs_above_min[TROUGH_INDEX]
            print(f"Autothreshold intensity chosen: {THRESHOLD}")
        elif len(troughs_above_min) == 1:
            THRESHOLD=troughs_above_min
            print(f"Autothreshold intensity chosen: {THRESHOLD}")
        elif len(troughs_above_min) == 0:
            THRESHOLD= trough_intensities[-1]
            print(f"CAUTION: NO TROUGHS IN RANGE ABOVE {MINIMUM_INTENSITY}")
            print(f"Autothreshold intensity chosen: {THRESHOLD}")            
        
        # Apply pixel intensity threshold to generate boolean array (image mask)
        image_bool = image >= THRESHOLD        
        
        # Retain only largest object in array (hips) or largest two objecs (knees) in the mask array
        # Hopefully these remaining areas will correspond to the metallic implant for a reasonable proportion of images
        image_labelled = np.array(measure.label(image_bool))
        props = measure.regionprops(image_labelled)
        areas = []
        for index, prop in enumerate(props):
            area = (props[index].area)
            areas.append(area)
        areas.sort()
        max_area = areas[-1]
        max_area_2nd =  areas[-2]
        if int(png[8:10]) < 19: # (<19 is hips in our labelling system), for hips need to select only largest area
            image_minus_smallobjects = morphology.remove_small_objects(image_labelled, 
                                                                       min_size=max_area)
        else: # knees need to select largest 2 areas
            image_minus_smallobjects = morphology.remove_small_objects(image_labelled, 
                                                                       min_size=max_area_2nd)
        
        # remove any holes in the mask
        image_implant_only = (morphology.remove_small_holes(image_minus_smallobjects, 
                                                            area_threshold=4000)).astype(int)
        
        # display auto-generated mask
        plt.imshow(image_implant_only, cmap='gray')
        plt.axis('off')
        plt.show()
        
        # save mask as png
        io.imsave(f"{OUTPUT_DIR_MASKS}/{png[:-4]}_MASK.png",
                  image_implant_only)
        
    except:
        print(f"UNABLE TO PROCESS IMAGE WITH FILENAME: {png}")