In [83]:
import cv2
from pathlib import Path
import numpy as np
import pandas as pd
import os
from glob import glob
import natsort
import matplotlib.pyplot as plt
from scipy.stats import mode, skew, kurtosis, entropy

In [84]:
def calculate_hist_features(hist):

    feature_vector = []
    feature_vector.append(np.mean(hist)) # Mean
    feature_vector.append(mode(hist)[0][0]) # Mode
    feature_vector.append(np.std(hist)) # Standard deviation
    feature_vector.append(skew(hist)[0]) # Skewness
    feature_vector.append(np.sum(hist**2)) # Energy
    feature_vector.append(entropy(hist, base=2)[0]) # Entropy
    feature_vector.append(kurtosis(hist)[0]) # Kurtosis

    return np.array(feature_vector)

In [85]:
def extract_color_features(img):

    b, g, r = cv2.split(img) # Split the image into the B, G, R channels
    n_bins = 256 # Number of bins
    height, width, _ = img.shape
    N = height * width # Number of pixels

    # Calculate normalized histograms for each channel
    hist_r = (cv2.calcHist([r], [0], None, [n_bins], [0, 256]))/N
    hist_g = (cv2.calcHist([g], [0], None, [n_bins], [0, 256]))/N
    hist_b = (cv2.calcHist([b], [0], None, [n_bins], [0, 256]))/N

    color_features = np.concatenate((calculate_hist_features(hist_r), calculate_hist_features(hist_g), calculate_hist_features(hist_b)))

    return color_features

In [72]:
ROOT_PATH = Path(Path(os.getcwd())/"../challenge1")
TRAIN_PATH = ROOT_PATH/"train"
VAL_PATH = ROOT_PATH/"val"

train_nevus = sorted(glob(str(TRAIN_PATH/'nevus/*')))
train_others = sorted(glob(str(TRAIN_PATH/'others/*')))

In [91]:
feature_names = [
    'Mean_R', 'Mode_R', 'Std_Dev_R', 'Skewness_R', 'Energy_R', 'Entropy_R', 'Kurtosis_R',
    'Mean_G', 'Mode_G', 'Std_Dev_G', 'Skewness_G', 'Energy_G', 'Entropy_G', 'Kurtosis_G',
    'Mean_B', 'Mode_B', 'Std_Dev_B', 'Skewness_B', 'Energy_B', 'Entropy_B', 'Kurtosis_B'
]
# Create an empty list to store DataFrames
df_list = []

for index, name_image in enumerate(train_nevus): 
    img = cv2.imread(train_nevus[index])

    color_features = extract_color_features(img)
    color_features_df = pd.DataFrame([color_features], columns=feature_names)
    # Append the extracted features DataFrame to the list
    df_list.append(color_features_df)
    # Concatenate all DataFrames in the list into a single DataFrame
    combined_df = pd.concat(df_list, ignore_index=True)

    if index==10:
        break

    


In [92]:
combined_df

# example for 10 images

Unnamed: 0,Mean_R,Mode_R,Std_Dev_R,Skewness_R,Energy_R,Entropy_R,Kurtosis_R,Mean_G,Mode_G,Std_Dev_G,...,Energy_G,Entropy_G,Kurtosis_G,Mean_B,Mode_B,Std_Dev_B,Skewness_B,Energy_B,Entropy_B,Kurtosis_B
0,0.003906,0.0,0.010586,3.627673,0.032592,5.590849,12.962111,0.003906,0.0,0.009306,...,0.026076,5.809914,8.524517,0.003906,0.0,0.008911,2.926448,0.024234,5.93794,7.657293
1,0.003906,0.0,0.01717,9.834631,0.079377,5.261638,104.654684,0.003906,0.0,0.017139,...,0.079106,5.355121,105.431003,0.003906,0.0,0.017034,10.069123,0.078188,5.520373,108.156813
2,0.003906,0.0,0.012455,4.481783,0.043618,5.263,20.404949,0.003906,0.0,0.011278,...,0.036467,5.465151,13.894322,0.003906,0.0,0.010721,3.687276,0.033334,5.557831,12.969722
3,0.003906,0.0,0.009726,3.654938,0.028123,5.786762,13.165893,0.003906,0.0,0.007151,...,0.016998,6.516997,8.444512,0.003906,0.0,0.006646,2.732364,0.015214,6.681399,6.565824
4,0.003906,0.0,0.012468,4.113493,0.043701,5.079543,16.822559,0.003906,0.0,0.00925,...,0.025811,5.826128,8.428881,0.003906,0.0,0.008226,2.798865,0.021227,6.093673,7.003799
5,0.003906,0.0,0.015102,4.660933,0.062289,4.460245,21.659811,0.003906,0.0,0.009173,...,0.025445,5.64941,5.567214,0.003906,0.0,0.008553,2.526289,0.022633,5.877623,5.26123
6,0.003906,0.0,0.006445,2.040578,0.014541,6.485447,3.191341,0.003906,0.0,0.004928,...,0.010122,6.847938,0.255129,0.003906,0.0,0.004646,1.116098,0.009432,6.948098,0.215583
7,0.003906,0.0,0.011053,3.72776,0.035184,5.521027,13.06055,0.003906,0.0,0.009624,...,0.027617,5.834003,11.155057,0.003906,0.0,0.008026,2.617046,0.020397,6.161692,5.770213
8,0.003906,0.0,0.012996,4.315026,0.047144,4.970579,18.729089,0.003906,0.0,0.008596,...,0.022822,5.820418,5.04939,0.003906,0.0,0.007045,1.795975,0.016611,6.163183,1.835527
9,0.003906,0.0,0.007869,3.123218,0.01976,6.348449,9.851105,0.003906,0.0,0.005994,...,0.013103,6.775122,4.371304,0.003906,0.0,0.005205,1.597158,0.010842,6.927426,1.588672
