## Importing Required Libraries

In [1]:
import pandas as pd
import numpy as np
from skimage.feature import graycomatrix, graycoprops
from skimage import io, color, img_as_ubyte
import os
import cv2

In [2]:
df = pd.DataFrame(
    columns=[
        "Contrast Feature",
        "Dissimilarity Feature",
        "Homogeneity Feature",
        "Energy Feature",
        "Correlation Feature",
        "ASM Feature",
    ]
)

In [3]:
matrix1 = []

### Matrix

In [4]:
list_of_images = []
name_of_images = []
for i in range(1,4):
    path_of_images = f"/kaggle/input/skin-cancer/imgs_part_{i}/imgs_part_{i}"
    images = os.listdir(path_of_images)
    name_of_images.extend(images)
    list_of_images.extend([os.path.join(path_of_images, img) for img in images])
    
for image in list_of_images:
    img = cv2.imread(image)
    gray = color.rgb2gray(img)
    image = img_as_ubyte(gray)  # Becomes 8-bit unsigned integer

    # This step is similar to data compression, because the 8-bit image contains 256 gray levels, which will cause the calculation of the gray level co-occurrence matrix to be too large, so it is compressed into 16 levels and the gray levels are divided
    bins = np.array(
        [0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240, 255]
    )  # 16-bit
    inds = np.digitize(
        image, bins
    )  # Returns a matrix with the same size as the image, but the matrix element represents the interval position of the element in the image in the bins, less than 0 is 0, 0-16 is 1, and so on

    max_value = inds.max() + 1
    matrix_coocurrence = graycomatrix(
        inds,  # Numpy matrix for co-occurrence matrix calculation
        [1],  # Step
        [0, np.pi / 4, np.pi / 2, 3 * np.pi / 4],  # Direction angle
        levels=max_value,  # Co-occurrence matrix order
        normed=False,
        symmetric=False,
    )
    # P[i,j,d,theta] returns a four-dimensional matrix, each dimension represents a different meaning
    matrix1.append(matrix_coocurrence)

In [5]:
CF =[]
DF =[]
HF =[]
EF =[]
COR = []
ASM = []
MF, VF, SD ,RMS = [], [], [],[]

# GLCM properties


In [6]:
def contrast_feature(matrix):
    return np.mean(graycoprops(matrix, 'contrast'))

def dissimilarity_feature(matrix):
    return np.mean(graycoprops(matrix, 'dissimilarity'))

def homogeneity_feature(matrix):
    return np.mean(graycoprops(matrix, 'homogeneity'))

def energy_feature(matrix):
    return np.mean(graycoprops(matrix, 'energy'))

def correlation_feature(matrix):
    return np.mean(graycoprops(matrix, 'correlation'))

def asm_feature(matrix):
    return np.mean(graycoprops(matrix, 'ASM'))
    
def mean_feature(matrix):
    return np.mean(matrix)

def variance_feature(matrix):
    return np.var(matrix)

def sd_feature(matrix):
    return np.std(matrix)
    
def rms_feature(matrix):
    return np.sqrt(np.mean(np.square(matrix)))
    
for matrix in matrix1:
    CF.append(contrast_feature(matrix))
    DF.append(dissimilarity_feature(matrix))
    HF.append(homogeneity_feature(matrix))
    EF.append(energy_feature(matrix))
    COR.append(correlation_feature(matrix))
    ASM.append(asm_feature(matrix))
    MF.append(mean_feature(matrix))
    VF.append(variance_feature(matrix))
    SD.append(sd_feature(matrix))
    RMS.append(rms_feature(matrix))

In [7]:
df["Contrast Feature"] = CF
df["Dissimilarity Feature"] = DF
df["Homogeneity Feature"] = HF
df["Energy Feature"] = EF
df["Correlation Feature"] = COR
df["ASM Feature"] = ASM
df["Mean Feature"] = MF
df["Variance Feature"] = VF
df["Standard Deviation Feature"] = SD
df["RMS Feature"] = RMS


In [8]:
df.index = name_of_images
df.index.name = "img_id"

In [9]:
df

Unnamed: 0_level_0,Contrast Feature,Dissimilarity Feature,Homogeneity Feature,Energy Feature,Correlation Feature,ASM Feature,Mean Feature,Variance Feature,Standard Deviation Feature,RMS Feature
img_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
PAT_492_937_958.png,0.164506,0.125747,0.940552,0.421009,0.969356,0.177292,6473.543210,2.365378e+09,48635.150801,7807.683949
PAT_13_21_350.png,0.140483,0.136070,0.932391,0.370503,0.976124,0.137326,4694.883136,4.895300e+08,22125.325771,9267.542269
PAT_456_887_499.png,0.162921,0.162699,0.918672,0.443865,0.920602,0.197050,2487.515306,2.328027e+08,15257.874258,6463.170524
PAT_15_1001_749.png,0.409445,0.334858,0.839908,0.365902,0.904654,0.134018,385.428201,5.606193e+06,2367.740112,2398.905612
PAT_456_888_961.png,0.331169,0.287508,0.860554,0.395208,0.867618,0.156368,2335.607422,2.129254e+08,14591.963476,5816.449441
...,...,...,...,...,...,...,...,...,...,...
PAT_1547_4231_928.png,0.209034,0.193727,0.904638,0.339363,0.964105,0.115229,2192.873047,1.370494e+08,11706.811477,6418.319963
PAT_1586_2624_615.png,0.198947,0.193567,0.903754,0.373707,0.953819,0.139728,461.272727,3.385286e+06,1839.914808,1896.855036
PAT_1312_1102_578.png,0.094519,0.089269,0.955808,0.463457,0.976365,0.214830,3608.337963,8.932632e+08,29887.509328,6428.423222
PAT_2061_4395_587.png,0.148005,0.146369,0.926962,0.298567,0.987974,0.089160,7586.921280,1.425668e+09,37758.025452,9868.991202


In [10]:
df.to_csv("Feature Extraction.csv")

In [11]:
dfMetaData = pd.read_csv("/kaggle/input/skin-cancer/metadata.csv")
# dfMetaData = dfMetaData.drop(['lesion_id', 'smoke', 'drink','background_father','background_mother','age','pesticide','gender','skin_cancer_history','cancer_history','has_piped_water','has_sewage_system','fitspatrick','region','diameter_1','diameter_2','itch','grew','hurt','changed','bleed','elevation','biopsed'], axis=1)
dfMerge = pd.merge(dfMetaData, df, on='img_id', how='inner')
columns = [col for col in dfMerge.columns if col != 'diagnostic']
dfMerge = dfMerge[columns + ['diagnostic']]
dfMerge.to_csv("Result.csv")
dfMerge



Unnamed: 0,patient_id,lesion_id,smoke,drink,background_father,background_mother,age,pesticide,gender,skin_cancer_history,...,Dissimilarity Feature,Homogeneity Feature,Energy Feature,Correlation Feature,ASM Feature,Mean Feature,Variance Feature,Standard Deviation Feature,RMS Feature,diagnostic
0,PAT_1516,1765,,,,,8,,,,...,0.089536,0.955315,0.530754,0.956334,0.281749,5263.502222,1.728619e+09,41576.669149,7577.974550,NEV
1,PAT_46,881,False,False,POMERANIA,POMERANIA,55,False,FEMALE,True,...,0.149374,0.925556,0.388357,0.960681,0.150869,7793.946667,2.001354e+09,44736.494035,9035.616256,BCC
2,PAT_1545,1867,,,,,77,,,,...,0.216749,0.892738,0.270093,0.981335,0.073005,1596.251953,4.507662e+07,6713.912399,6901.060788,ACK
3,PAT_1989,4061,,,,,75,,,,...,0.105238,0.947391,0.348523,0.984025,0.121485,13829.105710,7.336421e+09,85652.910980,10840.716365,ACK
4,PAT_684,1302,False,True,POMERANIA,POMERANIA,79,False,MALE,True,...,0.115666,0.942655,0.439068,0.955412,0.192829,3130.751953,4.740634e+08,21772.997361,6266.482098,BCC
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2293,PAT_1708,3156,,,,,73,,,,...,0.128059,0.936109,0.335443,0.982671,0.112566,10114.212963,3.628699e+09,60238.684485,9428.184208,ACK
2294,PAT_46,880,False,False,POMERANIA,POMERANIA,55,False,FEMALE,True,...,0.139193,0.930454,0.401234,0.960288,0.161028,17903.395408,9.796110e+09,98975.298417,11861.837759,BCC
2295,PAT_1343,1217,,,,,74,,,,...,0.252948,0.878176,0.332964,0.939196,0.111002,2122.769896,1.400620e+08,11834.780144,6366.932020,SEK
2296,PAT_326,690,False,False,POMERANIA,POMERANIA,58,True,FEMALE,True,...,0.100068,0.949987,0.432574,0.965712,0.187167,1019.298443,5.516327e+07,7427.197754,5145.802048,BCC
