## Importing Required Libraries

In [1]:
import pandas as pd
import numpy as np
from skimage.feature import graycomatrix, graycoprops
from skimage import io, color, img_as_ubyte
import os
import cv2

In [2]:
df = pd.DataFrame(
    columns=[
        "Contrast Feature",
        "Dissimilarity Feature",
        "Homogeneity Feature",
        "Energy Feature",
        "Correlation Feature",
        "ASM Feature",
    ]
)

In [3]:
matrix1 = []

### Matrix

In [4]:
list_of_images = []
name_of_images = []
for i in range(1,4):
    path_of_images = f"/kaggle/input/skin-cancer/imgs_part_{i}/imgs_part_{i}"
    images = os.listdir(path_of_images)
    name_of_images.extend(images)
    list_of_images.extend([os.path.join(path_of_images, img) for img in images])
    
for image in list_of_images:
    img = cv2.imread(image)
    gray = color.rgb2gray(img)
    image = img_as_ubyte(gray)  # Becomes 8-bit unsigned integer

    # This step is similar to data compression, because the 8-bit image contains 256 gray levels, which will cause the calculation of the gray level co-occurrence matrix to be too large, so it is compressed into 16 levels and the gray levels are divided
    bins = np.array(
        [0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240, 255]
    )  # 16-bit
    inds = np.digitize(
        image, bins
    )  # Returns a matrix with the same size as the image, but the matrix element represents the interval position of the element in the image in the bins, less than 0 is 0, 0-16 is 1, and so on

    max_value = inds.max() + 1
    matrix_coocurrence = graycomatrix(
        inds,  # Numpy matrix for co-occurrence matrix calculation
        [1],  # Step
        [0, np.pi / 4, np.pi / 2, 3 * np.pi / 4],  # Direction angle
        levels=max_value,  # Co-occurrence matrix order
        normed=False,
        symmetric=False,
    )
    # P[i,j,d,theta] returns a four-dimensional matrix, each dimension represents a different meaning
    matrix1.append(matrix_coocurrence)

In [5]:
CF =[]
DF =[]
HF =[]
EF =[]
COR = []
ASM = []

# GLCM properties


In [6]:
def contrast_feature(matrix):
    return np.mean(graycoprops(matrix, 'contrast'))

def dissimilarity_feature(matrix):
    return np.mean(graycoprops(matrix, 'dissimilarity'))

def homogeneity_feature(matrix):
    return np.mean(graycoprops(matrix, 'homogeneity'))

def energy_feature(matrix):
    return np.mean(graycoprops(matrix, 'energy'))

def correlation_feature(matrix):
    return np.mean(graycoprops(matrix, 'correlation'))

def asm_feature(matrix):
    return np.mean(graycoprops(matrix, 'ASM'))

for matrix in matrix1:
    CF.append(contrast_feature(matrix))
    DF.append(dissimilarity_feature(matrix))
    HF.append(homogeneity_feature(matrix))
    EF.append(energy_feature(matrix))
    COR.append(correlation_feature(matrix))
    ASM.append(asm_feature(matrix))

In [7]:
df["Contrast Feature"] = CF
df["Dissimilarity Feature"] = DF
df["Homogeneity Feature"] = HF
df["Energy Feature"] = EF
df["Correlation Feature"] = COR
df["ASM Feature"] = ASM

In [8]:
df.index = name_of_images
df.index.name = "img_id"

In [9]:
df

Unnamed: 0_level_0,Contrast Feature,Dissimilarity Feature,Homogeneity Feature,Energy Feature,Correlation Feature,ASM Feature
img_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
PAT_492_937_958.png,0.164506,0.125747,0.940552,0.421009,0.969356,0.177292
PAT_13_21_350.png,0.140483,0.136070,0.932391,0.370503,0.976124,0.137326
PAT_456_887_499.png,0.162921,0.162699,0.918672,0.443865,0.920602,0.197050
PAT_15_1001_749.png,0.409445,0.334858,0.839908,0.365902,0.904654,0.134018
PAT_456_888_961.png,0.331169,0.287508,0.860554,0.395208,0.867618,0.156368
...,...,...,...,...,...,...
PAT_1547_4231_928.png,0.209034,0.193727,0.904638,0.339363,0.964105,0.115229
PAT_1586_2624_615.png,0.198947,0.193567,0.903754,0.373707,0.953819,0.139728
PAT_1312_1102_578.png,0.094519,0.089269,0.955808,0.463457,0.976365,0.214830
PAT_2061_4395_587.png,0.148005,0.146369,0.926962,0.298567,0.987974,0.089160


In [10]:
df.to_csv("Feature Extraction.csv")

In [11]:
dfMetaData = pd.read_csv("/kaggle/input/skin-cancer/metadata.csv")
dfMetaData = dfMetaData.drop(['lesion_id', 'smoke', 'drink','background_father','background_mother','age','pesticide','gender','skin_cancer_history','cancer_history','has_piped_water','has_sewage_system','fitspatrick','region','diameter_1','diameter_2','itch','grew','hurt','changed','bleed','elevation','biopsed'], axis=1)
dfMerge = pd.merge(dfMetaData, df, on='img_id', how='inner')
columns = [col for col in dfMerge.columns if col != 'diagnostic']
dfMerge = dfMerge[columns + ['diagnostic']]
dfMerge.to_csv("Result.csv")
dfMerge

meta_df = pd.read_csv("/kaggle/input/skin-cancer/metadata.csv")
meta_df = meta_df.drop('diagnostic', axis=1)
merged_df = pd.merge(meta_df, dfMerge, on="patient_id", how="inner")
merged_df.to_csv("Meta_result.csv")