# Libraries

In [15]:
import os
import sys
import warnings
import pandas as pd
import cv2

import numpy as np
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC

IMAGES_DIR = "./images"
 
# Ignore warnings
warnings.filterwarnings("ignore")

models = {
  'Random Forest': RandomForestClassifier(n_estimators=100, random_state=0),
  'Extra Trees': ExtraTreesClassifier(n_estimators=100, random_state=0),
  'Gradient Boosting': GradientBoostingClassifier(n_estimators=100, random_state=0),
  'AdaBoost': AdaBoostClassifier(n_estimators=100, random_state=0),
  'SVM': SVC(kernel='rbf', C=1, random_state=0),
}

# Get Histograms

In [16]:
df = pd.DataFrame(columns=['image', 'class'] + [f'b_{i}' for i in range(256)] + [f'g_{i}' for i in range(256)] + [f'r_{i}' for i in range(256)])

channels = ('b', 'g', 'r')

for _class in os.listdir(IMAGES_DIR):
  print("Class:", _class)
  total = len(os.listdir(os.path.join(IMAGES_DIR, _class)))
  for i, img in enumerate(os.listdir(os.path.join(IMAGES_DIR, _class))):
    # Load image
    image = cv2.imread(os.path.join(IMAGES_DIR, _class, img))
    colorsHist = []

    # Get hist for every channel
    for j, color in enumerate(channels):
      hist = cv2.calcHist([image], [j], None, [256], [0, 256])
      colorsHist.append(hist.flatten())

    df.loc[len(df.index)] = [os.path.join(IMAGES_DIR, _class, img), _class] + list(colorsHist[0]) + list(colorsHist[1]) + list(colorsHist[2])

    sys.stdout.write(f"\rImage: {i + 1}/{total}")
    sys.stdout.flush()
  print()

Class: beach
Image: 538/538
Class: mountain
Image: 538/538


# Classify

In [17]:
df = df.drop('image', axis=1)
X = df.drop("class", axis=1)
y = df["class"]

In [18]:
kf = KFold(n_splits=5, shuffle=True, random_state=42)

for name, model in models.items():
  accuracies = []
  for train_index, test_index in kf.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    accuracies.append(accuracy_score(y_test, y_pred))

  print(f"{name}:", np.mean(accuracies))

Random Forest: 0.7946080964685616
Extra Trees: 0.7964900947459087
Gradient Boosting: 0.7881352282515074
AdaBoost: 0.7611800172265288
SVM: 0.7611757105943152
