In [1]:
import os
import cv2
import numpy as np
import mahotas as mh
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from skimage.feature import hog

# -----------------------------
# توابع استخراج ویژگی
# -----------------------------

def extract_hu_moments(image):
    moments = cv2.moments(image)
    hu = cv2.HuMoments(moments).flatten()
    return np.log(1 + abs(hu))

def extract_haralick(image):
    try:
        return mh.features.haralick(image).mean(axis=0)
    except:
        return np.zeros((13,))

def extract_color_histogram(image, bins=16):
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    h, s, v = cv2.split(hsv)
    h_hist = cv2.calcHist([h], [0], None, [bins], [0, 180])
    s_hist = cv2.calcHist([s], [0], None, [bins], [0, 256])
    v_hist = cv2.calcHist([v], [0], None, [bins], [0, 256])
    return np.concatenate([cv2.normalize(h_hist, h_hist).flatten(),
                           cv2.normalize(s_hist, s_hist).flatten(),
                           cv2.normalize(v_hist, v_hist).flatten()])

def extract_hog(image):
    return hog(image, visualize=False, multichannel=False)

# -----------------------------
# پردازش تصویر و ساخت دیتاست
# -----------------------------

def build_dataset(feature_extractor, feature_name, root_folder):
    data = []
    labels = []

    for label, class_folder in enumerate(os.listdir(root_folder)):
        class_path = os.path.join(root_folder, class_folder)
        if not os.path.isdir(class_path):
            continue
        print(f"Processing {feature_name} | Class: {class_folder}")
        for image_file in os.listdir(class_path):
            if image_file.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
                image_path = os.path.join(class_path, image_file)
                image = cv2.imread(image_path)
                if image is None:
                    continue

                # پردازش تصویر
                if feature_name == "Hu":
                    gray = cv2.cvtColor(cv2.resize(image, (256, 256)), cv2.COLOR_BGR2GRAY)
                    features = extract_hu_moments(gray)
                elif feature_name == "Haralick":
                    gray = cv2.cvtColor(cv2.resize(image, (256, 256)), cv2.COLOR_BGR2GRAY)
                    features = extract_haralick(gray)
                elif feature_name == "ColorHist":
                    features = extract_color_histogram(cv2.resize(image, (256, 256)))
                elif feature_name == "HOG":
                    gray = cv2.cvtColor(cv2.resize(image, (256, 256)), cv2.COLOR_BGR2GRAY)
                    features = extract_hog(gray)

                if features is not None:
                    data.append(features)
                    labels.append(class_folder)

    # Label Encoding
    le = LabelEncoder()
    y = le.fit_transform(labels)

    # Train/Test Split
    X = np.array(data)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

    # نرمال‌سازی
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    return X_train, X_test, y_train, y_test

# -----------------------------
# آموزش و تست مدل
# -----------------------------

def train_and_evaluate(X_train, X_test, y_train, y_test):
    # تنظیمات جستجو
    param_grid = {
        'n_estimators': [50, 100],
        'max_depth': [None, 10],
        'min_samples_split': [2, 5],
        'class_weight': [None, 'balanced']
    }

    # Grid Search
    model = RandomForestClassifier(random_state=42)
    grid = GridSearchCV(model, param_grid, cv=3, scoring='accuracy', n_jobs=-1)
    grid.fit(X_train, y_train)

    # دقت
    acc = grid.score(X_test, y_test)
    return acc, grid.best_params_

# -----------------------------
# اجرای نهایی برای هر روش
# -----------------------------

if __name__ == "__main__":
    ROOT_FOLDER = "splitted_images/"  # پوشه داده‌ها
    results = {}

    # ۱. تست Hu Moments
    X_train, X_test, y_train, y_test = build_dataset(extract_hu_moments, "Hu", ROOT_FOLDER)
    acc, params = train_and_evaluate(X_train, X_test, y_train, y_test)
    results["Hu Moments"] = acc
    print(f"Hu Moments - Accuracy: {acc:.2f}, Best Params: {params}")

    # ۲. تست Haralick
    X_train, X_test, y_train, y_test = build_dataset(extract_haralick, "Haralick", ROOT_FOLDER)
    acc, params = train_and_evaluate(X_train, X_test, y_train, y_test)
    results["Haralick"] = acc
    print(f"Haralick - Accuracy: {acc:.2f}, Best Params: {params}")

    # ۳. تست هیستوگرام رنگی
    X_train, X_test, y_train, y_test = build_dataset(extract_color_histogram, "ColorHist", ROOT_FOLDER)
    acc, params = train_and_evaluate(X_train, X_test, y_train, y_test)
    results["Color Histogram"] = acc
    print(f"Color Histogram - Accuracy: {acc:.2f}, Best Params: {params}")

    # ۴. تست HOG (اختیاری)
    try:
        X_train, X_test, y_train, y_test = build_dataset(extract_hog, "HOG", ROOT_FOLDER)
        acc, params = train_and_evaluate(X_train, X_test, y_train, y_test)
        results["HOG"] = acc
        print(f"HOG - Accuracy: {acc:.2f}, Best Params: {params}")
    except Exception as e:
        print("HOG failed:", str(e))

    # چاپ نتایج نهایی
    print("\n✅ Final Results:")
    for method, acc in results.items():
        print(f"{method}: {acc:.2f}")

Processing Hu | Class: Daisy
Processing Hu | Class: Fritillary
Processing Hu | Class: Dandelion
Processing Hu | Class: Windflower
Processing Hu | Class: Tigerlily
Processing Hu | Class: Pansy
Processing Hu | Class: Iris
Processing Hu | Class: Crocus
Processing Hu | Class: Sunflower
Processing Hu | Class: Snowdrop
Processing Hu | Class: Cowslip
Processing Hu | Class: Bluebell
Processing Hu | Class: LilyValley
Processing Hu | Class: ColtsFoot
Processing Hu | Class: Daffodil
Processing Hu | Class: Tulip
Processing Hu | Class: Buttercup
Hu Moments - Accuracy: 0.17, Best Params: {'class_weight': 'balanced', 'max_depth': 10, 'min_samples_split': 2, 'n_estimators': 100}
Processing Haralick | Class: Daisy
Processing Haralick | Class: Fritillary
Processing Haralick | Class: Dandelion
Processing Haralick | Class: Windflower
Processing Haralick | Class: Tigerlily
Processing Haralick | Class: Pansy
Processing Haralick | Class: Iris
Processing Haralick | Class: Crocus
Processing Haralick | Class: S

In [None]:
import os
import cv2
import numpy as np
import mahotas as mh
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# -----------------------------
# توابع استخراج ویژگی
# -----------------------------

def extract_hu_moments(image):
    """استخراج 7 گشتاور هو از تصویر سیاه‌سفید"""
    moments = cv2.moments(image)
    hu = cv2.HuMoments(moments).flatten()
    return np.log(1 + abs(hu))

def extract_haralick_features(image):
    """استخراج 13 ویژگی Haralick از تصویر سیاه‌سفید"""
    try:
        features = mh.features.haralick(image)
        return features.mean(axis=0)
    except:
        return np.zeros((13,))

def extract_color_histogram(image, bins=16):
    """استخراج هیستوگرام رنگی از کانال‌های HSV"""
    try:
        hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
        h, s, v = cv2.split(hsv)

        # هیستوگرام برای هر کانال
        h_hist = cv2.calcHist([h], [0], None, [bins], [0, 180])
        s_hist = cv2.calcHist([s], [0], None, [bins], [0, 256])
        v_hist = cv2.calcHist([v], [0], None, [bins], [0, 256])

        # نرمال‌سازی
        h_hist = cv2.normalize(h_hist, h_hist).flatten()
        s_hist = cv2.normalize(s_hist, s_hist).flatten()
        v_hist = cv2.normalize(v_hist, v_hist).flatten()

        # ترکیب تمام هیستوگرام‌ها
        hist = np.concatenate([h_hist, s_hist, v_hist])
        return hist
    except:
        return np.zeros((bins * 3,))  # اگر خطا داد، صفر برگردان

# -----------------------------
# پردازش تصویر و ترکیب ویژگی‌ها
# -----------------------------

def process_combined_features(image_path):
    """پردازش یک تصویر و استخراج تمام ویژگی‌ها (Hu + Haralick + Color Histogram)"""
    try:
        # خواندن تصویر
        image = cv2.imread(image_path)
        if image is None:
            raise ValueError(f"Cannot read image {image_path}")
        
        # تغییر اندازه
        image = cv2.resize(image, (256, 256))

        # بهبود کنتراست
        ycrcb = cv2.cvtColor(image, cv2.COLOR_BGR2YCrCb)
        ycrcb[:, :, 0] = cv2.equalizeHist(ycrcb[:, :, 0])
        enhanced = cv2.cvtColor(ycrcb, cv2.COLOR_YCrCb2BGR)

        # تبدیل به سیاه‌سفید برای Hu و Haralick
        gray = cv2.cvtColor(enhanced, cv2.COLOR_BGR2GRAY)

        # استخراج ویژگی‌ها
        hu_features = extract_hu_moments(gray)
        haralick_features = extract_haralick_features(gray)
        color_hist_features = extract_color_histogram(enhanced, bins=16)

        # ترکیب تمام ویژگی‌ها
        features = np.concatenate((hu_features, haralick_features, color_hist_features))
        return features
    except Exception as e:
        print(f"Error processing {image_path}: {str(e)}")
        return None

# -----------------------------
# ساخت دیتاست ترکیبی
# -----------------------------

def build_combined_dataset(root_folder):
    data = []
    labels = []

    for label, class_folder in enumerate(os.listdir(root_folder)):
        class_path = os.path.join(root_folder, class_folder)
        if not os.path.isdir(class_path):
            continue
        print(f"Processing class: {class_folder}")
        for image_file in os.listdir(class_path):
            if image_file.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
                image_path = os.path.join(class_path, image_file)
                features = process_combined_features(image_path)
                if features is not None:
                    data.append(features)
                    labels.append(class_folder)

    # تعداد ویژگی‌ها
    num_hu = 7
    num_haralick = 13
    num_color_hist = 16 * 3  # 16 بین برای H, S, V

    # نام‌گذاری ستون‌ها
    columns = [f'Hu_{i}' for i in range(num_hu)]
    columns += [f'Haralick_{i}' for i in range(num_haralick)]
    columns += [f'ColorHist_{i}' for i in range(num_color_hist)]
    columns += ['Label']

    # ساخت DataFrame
    df = pd.DataFrame(data, columns=columns[:-1])
    label_encoder = LabelEncoder()
    df['Label'] = label_encoder.fit_transform(labels)

    # جدا کردن X و y
    X = df.drop('Label', axis=1).values
    y = df['Label'].values

    # نرمال‌سازی
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Train/Test Split
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)

    return X_train, X_test, y_train, y_test, label_encoder, scaler

# -----------------------------
# جستجوی بهترین پارامترها
# -----------------------------

def tune_random_forest(X_train, y_train):
    # تنظیمات جستجو
    param_grid = {
        'n_estimators': [50, 100, 200],
        'max_depth': [None, 10, 20],
        'min_samples_split': [2, 5],
        'min_samples_leaf': [1, 2],
        'max_features': ['sqrt', 'log2'],
        'bootstrap': [True, False],
        'class_weight': [None, 'balanced']
    }

    # ساخت مدل و جستجو
    model = RandomForestClassifier(random_state=42)
    grid_search = GridSearchCV(model, param_grid, cv=5, n_jobs=-1, verbose=1, scoring='accuracy')
    grid_search.fit(X_train, y_train)

    # بهترین مدل
    best_model = grid_search.best_estimator_
    best_params = grid_search.best_params_
    best_score = grid_search.best_score_

    return best_model, best_params, best_score

# -----------------------------
# اجرای نهایی
# -----------------------------

if __name__ == "__main__":
    # تنظیمات
    ROOT_FOLDER = "splitted_images/"  # پوشه داده‌ها

    # ساخت دیتاست ترکیبی
    print("Building combined dataset (Hu + Haralick + Color Histogram)...")
    X_train, X_test, y_train, y_test, label_encoder, scaler = build_combined_dataset(ROOT_FOLDER)

    # جستجوی بهترین پارامترها
    print("Starting Grid Search for Random Forest...")
    best_model, best_params, best_cv_score = tune_random_forest(X_train, y_train)

    # پیش‌بینی و ارزیابی
    y_pred = best_model.predict(X_test)
    test_acc = accuracy_score(y_test, y_pred)

    # چاپ نتایج
    print("Best Parameters:", best_params)
    print("Best Cross-Validation Score:", best_cv_score)
    print("Test Accuracy with Best Model:", test_acc)

    # ذخیره مدل و ابزارها
    import joblib
    joblib.dump(best_model, 'best_combined_model.pkl')
    joblib.dump(label_encoder, 'label_encoder.pkl')
    joblib.dump(scaler, 'scaler.pkl')
    print("Best combined model saved.")

Building combined dataset (Hu + Haralick + Color Histogram)...
Processing class: Daisy
Processing class: Fritillary
Processing class: Dandelion
Processing class: Windflower
Processing class: Tigerlily
Processing class: Pansy
Processing class: Iris
Processing class: Crocus
Processing class: Sunflower
Processing class: Snowdrop
Processing class: Cowslip
Processing class: Bluebell
Processing class: LilyValley
Processing class: ColtsFoot
Processing class: Daffodil
Processing class: Tulip
Processing class: Buttercup
Starting Grid Search for Random Forest...
Fitting 5 folds for each of 288 candidates, totalling 1440 fits
