# Libraries

In [18]:
import os
import sys
import warnings
import pandas as pd
import cv2

import numpy as np
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from skimage.feature import graycomatrix, graycoprops

IMAGES_DIR = "./images"
 
# Ignore warnings
warnings.filterwarnings("ignore")

models = {
  'Random Forest': RandomForestClassifier(n_estimators=100, random_state=0),
  'Extra Trees': ExtraTreesClassifier(n_estimators=100, random_state=0),
  'Gradient Boosting': GradientBoostingClassifier(n_estimators=100, random_state=0),
  'AdaBoost': AdaBoostClassifier(n_estimators=100, random_state=0),
  'SVM': SVC(kernel='rbf', C=1, random_state=0),
}

# Get General Characteristics

In [19]:
def get_general_characteristics(img):
  mean = np.mean(img)
  variance = np.var(img)
  std = np.std(img)
  median = np.median(img)
  skewness = (np.mean((img - mean)**3)) / (std**3)
  kurtosis = (np.mean((img - mean)**4)) / (std**4) - 3

  # Convertir el canal a enteros para calcular GLCM
  channel_int = img.astype(np.uint8)
  
  # Características de textura usando GLCM
  glcm = graycomatrix(channel_int, distances=[1], angles=[0], levels=256, symmetric=True, normed=True)

  contrast = graycoprops(glcm, 'contrast')[0, 0]
  homogeneity = graycoprops(glcm, 'homogeneity')[0, 0]
  energy = graycoprops(glcm, 'energy')[0, 0]
  correlation = graycoprops(glcm, 'correlation')[0, 0]
  dissimilarity = graycoprops(glcm, 'dissimilarity')[0, 0]

  return [mean, variance, std, median, skewness, kurtosis, contrast, homogeneity, energy, correlation, dissimilarity]

In [23]:
features = ['mean', 'variance', 'std', 'median', 'skewness', 'kurtosis', 'contrast', 'homogeneity', 'energy', 'correlation', 'dissimilarity']
channels = ('b', 'g', 'r')

df = pd.DataFrame(columns=['image', 'class'] + [f'{channels[0]}_{i}' for i in features] + [f'{channels[1]}_{i}' for i in features] + [f'{channels[2]}_{i}' for i in features])

for _class in os.listdir(IMAGES_DIR):
  print("Class:", _class)
  total = len(os.listdir(os.path.join(IMAGES_DIR, _class)))
  for i, img in enumerate(os.listdir(os.path.join(IMAGES_DIR, _class))):
    # Load image
    image = cv2.imread(os.path.join(IMAGES_DIR, _class, img))

    characteristics = []
    for j, channel in enumerate(channels):
      single_chars = get_general_characteristics(image[:, :, j])
      characteristics.append(single_chars)

    df.loc[len(df.index)] = [os.path.join(IMAGES_DIR, _class, img), _class] + list(characteristics[0]) + list(characteristics[1]) + list(characteristics[2])

    sys.stdout.write(f"\rImage: {i + 1}/{total}")
    sys.stdout.flush()
  print()

Class: beach
Image: 1/538

Image: 62/538

KeyboardInterrupt: 

# Classify

In [21]:
df = df.drop('image', axis=1)
X = df.drop("class", axis=1)
y = df["class"]

In [22]:
kf = KFold(n_splits=5, shuffle=True, random_state=42)

for name, model in models.items():
  accuracies = []
  for train_index, test_index in kf.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    accuracies.append(accuracy_score(y_test, y_pred))

  print(f"{name}:", np.mean(accuracies))

Random Forest: 0.7751162790697674
Extra Trees: 0.7713910422049957
Gradient Boosting: 0.7732687338501292
AdaBoost: 0.7435314384151595
SVM: 0.6636046511627907
