Study Various Feature extraction techniques to extract the Image Descriptors

1. HoG
2. LBP Texture
3. Color Histogram
4. Hu Moments and Zernlike Moments
5. Haralick Textures

In [1]:
# importing the libraries
import numpy as np
import pandas as pd
import os
from glob import glob
import seaborn as sns
from PIL import Image
from skimage.io import imread
from skimage.transform import resize
from skimage.feature import hog
from skimage import exposure
import matplotlib.pyplot as plt
import cv2
import mahotas
from skimage import feature
import numpy as np
import mahotas as mt


In [2]:
# reading the dataset

dataset = pd.read_csv("E:\Final_Year_Project\Implementation\Image-Text-NN-SC-Detection\Analysis\HAM10000_metadata.csv")
dataset.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,dataset
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp,vidir_modern
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp,vidir_modern
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp,vidir_modern
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp,vidir_modern
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear,vidir_modern


In [3]:
# The diesease- dx is made into a more human readable format of which type of skin cancer cell it is
lesion_type_dict = {
                    'nv': 'Melanocytic nevi',
                    'mel': 'Melanoma',
                    'bkl': 'Benign keratosis-like lesions ',
                    'bcc': 'Basal cell carcinoma',
                    'akiec': 'Actinic keratoses',
                    'vasc': 'Vascular lesions',
                    'df': 'Dermatofibroma'
                    }

dataset['cell_type'] = dataset['dx'].map(lesion_type_dict.get) 
dataset['cell_type_idx'] = pd.Categorical(dataset['cell_type']).codes


    
localization_type_dict = {'abdomen': 50,
                            'acral': 100,
                            'back': 150,
                            'chest': 200,
                            'ear': 250,
                            'face': 51,
                            'foot': 101,
                            'genital': 151,
                            'hand': 201,
                            'lower extremity': 251,
                            'neck': 52,
                            'scalp': 102,
                            'trunk': 152,
                            'unknown': 202,
                            'upper extremity': 252}

gender_type_dict = {'male': 25,
                    'female': 50}

dataset['Feature_1'] = dataset['localization'].map(localization_type_dict.get) 
dataset['Feature_2'] = dataset['sex'].map(gender_type_dict.get)

dataset.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,dataset,cell_type,cell_type_idx,Feature_1,Feature_2
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp,vidir_modern,Benign keratosis-like lesions,2,102,25.0
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp,vidir_modern,Benign keratosis-like lesions,2,102,25.0
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp,vidir_modern,Benign keratosis-like lesions,2,102,25.0
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp,vidir_modern,Benign keratosis-like lesions,2,102,25.0
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear,vidir_modern,Benign keratosis-like lesions,2,250,25.0


In [4]:
# imputation with median values as we have an ordinal variable age, and less sensitive to extremes
dataset["age"].fillna(dataset.age.median(), inplace = True)

In [5]:
# Define image path

base_skin_dir = os.path.join("..","Data")
imageid_path_dict = {os.path.splitext(os.path.basename(x))[0]: x
                     for x in glob(os.path.join(base_skin_dir,'*','*.jpg'))}

dataset['path'] = dataset['image_id'].map(imageid_path_dict.get)
dataset.head()


Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,dataset,cell_type,cell_type_idx,Feature_1,Feature_2,path
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp,vidir_modern,Benign keratosis-like lesions,2,102,25.0,..\Data\HAM10000_images_part_1\ISIC_0027419.jpg
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp,vidir_modern,Benign keratosis-like lesions,2,102,25.0,..\Data\HAM10000_images_part_1\ISIC_0025030.jpg
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp,vidir_modern,Benign keratosis-like lesions,2,102,25.0,..\Data\HAM10000_images_part_1\ISIC_0026769.jpg
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp,vidir_modern,Benign keratosis-like lesions,2,102,25.0,..\Data\HAM10000_images_part_1\ISIC_0025661.jpg
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear,vidir_modern,Benign keratosis-like lesions,2,250,25.0,..\Data\HAM10000_images_part_2\ISIC_0031633.jpg


In [6]:
# loading into dataframe
dataset['image'] = dataset['path'].map(lambda x: np.asarray(Image.open(x)))

In [7]:
dataset.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,dataset,cell_type,cell_type_idx,Feature_1,Feature_2,path,image
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp,vidir_modern,Benign keratosis-like lesions,2,102,25.0,..\Data\HAM10000_images_part_1\ISIC_0027419.jpg,"[[[188, 147, 191], [186, 148, 189], [187, 150,..."
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp,vidir_modern,Benign keratosis-like lesions,2,102,25.0,..\Data\HAM10000_images_part_1\ISIC_0025030.jpg,"[[[25, 15, 23], [25, 14, 22], [25, 14, 22], [2..."
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp,vidir_modern,Benign keratosis-like lesions,2,102,25.0,..\Data\HAM10000_images_part_1\ISIC_0026769.jpg,"[[[186, 128, 140], [188, 128, 136], [183, 126,..."
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp,vidir_modern,Benign keratosis-like lesions,2,102,25.0,..\Data\HAM10000_images_part_1\ISIC_0025661.jpg,"[[[24, 9, 16], [22, 11, 15], [23, 11, 15], [26..."
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear,vidir_modern,Benign keratosis-like lesions,2,250,25.0,..\Data\HAM10000_images_part_2\ISIC_0031633.jpg,"[[[122, 80, 102], [124, 82, 104], [127, 83, 10..."
