In [1]:
## This scripts serves as the extractor for all features used in this project
## it will iterate through each image in the dataset and extract features


In [2]:
# import modules
import pandas as pd
import numpy as np
import cv2 as cv
from radiomics import featureextractor
from IPython.display import clear_output
from iteration_utilities import flatten, deepflatten



In [3]:
# Load training data csv. The original file contains the images names and patient data
training_data_csv = pd.read_csv('../merged_training_data.csv')
training_data_csv.head()

Unnamed: 0,image_id,age_approximate,sex,melanoma,seborrheic_keratosis
0,ISIC_0000000,55,female,0.0,0.0
1,ISIC_0000001,30,female,0.0,0.0
2,ISIC_0000002,60,female,1.0,0.0
3,ISIC_0000003,30,male,0.0,0.0
4,ISIC_0000004,80,male,1.0,0.0


In [4]:
## image and mask loading

def img_mask_load(img_name_load):
    
    ''' 
    
    Function that takes the image name 
    from the Dataframe, searches this name in the image directory
    and loads it, along with the corresponding mask. Also, applies
    the mask to perform the segmentation along with histogram normalization
    
    '''
    
    im_path = '../../ISIC-2017_Training_Data/' ## CHANGE THIS VARIABLE WITH THE PATH WHERE YOU HAVE THE IMAGES
    mask_path = '../../ISIC-2017_Training_Part1_GroundTruth/' ## CHANGE THIS VARIABLE WITH THE PATH WHERE THE MAKS ARE
    img = cv.imread(im_path + img_name_load + '.jpg',-1)
    img_rgb = cv.cvtColor(img, cv.COLOR_BGR2RGB)
    mask = cv.imread(mask_path + img_name_load + '_segmentation.png', -1)
    output_array = np.zeros((img.shape[0],img.shape[1])) # Array to store normalized image
    img_segmented = cv.normalize(cv.bitwise_and(img_rgb, img_rgb, mask=mask),  output_array, 0, 255, cv.NORM_MINMAX)
    return img_segmented


In [5]:
## function for RGB features extraction

def features_rgb(img_name): 
    segmented = img_mask_load(img_name)
    
    # Tuple with each color channel
    color = ('r','g','b')
    
    
    #  Declare list to store the most repeated pixel intensity in each channel
    mode_color_channel = []
    bp = []
    medians = []
    iqrs = []
    

    # Iterate through each color channel and obtain histogram
    for i,col in enumerate(color):   
        histr = cv.calcHist([segmented],[i],None,[256],[1,256]) # the range should be [1,256] to exclude black background    
        mode_color_channel.append(list(histr.flatten()).index(max(histr.flatten()))) # append mode to mode list
        bp.append(segmented[:,:,i].flatten()[segmented[:,:,i].flatten().nonzero()])
        
    for i in range(len(bp)):
        medians.append(round(np.median(bp[i]))) # get median
        q3, q1 = np.percentile(bp[i], [75,25]) # get 3rd quartile and 1st quartile
        iqr = round(q3 - q1) # calculate quartile diference
        iqrs.append(iqr) # append to interquartile range array
                    
    return mode_color_channel, medians, iqrs


In [6]:
## function for Radiomics features extraction

def features_radiomics(img_name):
    return None

In [7]:
## function to iterate through the images, extract the features and append them to the dataframe

def feature_extraction():

    columns = ['image_id','red_mode', 'green_mode', 'blue_mode', 'red_median','green_median', 'blue_median', 'red_iqr', 'green_iqr', 'blue_iqr']
    features_dataframe = pd.DataFrame(columns = columns)
    
    for index, image in enumerate(training_data_csv['image_id'][:100]):
        
        feat_rgb = features_rgb(image)   
        print(feat_rgb)
        feat_radio = features_radiomics(image)
        features_dataframe = features_dataframe.append(pd.DataFrame([deepflatten(feat_rgb)], columns = columns[1:], index=[index]))
        features_dataframe.at[index, columns[0]] = image
        
        print(str(index+1) + '/' + str(len(training_data_csv['image_id']))+ ' images processed...')
        clear_output(wait=True)
    return features_dataframe

In [8]:
## Hit it! Extract the features
f = feature_extraction()
f

Unnamed: 0,image_id,red_mode,green_mode,blue_mode,red_median,green_median,blue_median,red_iqr,green_iqr,blue_iqr
0,ISIC_0000000,65,51,57,94,79,84,67,65,60
1,ISIC_0000001,87,53,46,116,72,55,86,58,40
2,ISIC_0000002,162,120,114,161,131,131,26,52,67
3,ISIC_0000003,182,128,51,161,110,71,60,57,48
4,ISIC_0000004,192,132,171,192,128,157,36,44,55
...,...,...,...,...,...,...,...,...,...,...
95,ISIC_0000104,99,44,30,101,50,36,28,26,25
96,ISIC_0000105,206,49,25,166,97,52,77,77,53
97,ISIC_0000107,218,48,11,178,95,44,76,92,73
98,ISIC_0000108,180,25,15,144,61,29,68,66,41
