In [1]:
# Add necessary imports
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from skimage import io
import pandas as pd
import os
import numpy as np
from sklearn.metrics import confusion_matrix

# Flatten the images and masks to use them in Random Forest
def flatten_image(img):
    return img.reshape(-1, img.shape[-1])

def flatten_mask(mask):
    return mask.reshape(-1)

def prepare_data(image_dir, mask_dir, image_files):
    X = []
    y = []
    for img_file in image_files:
        if img_file.endswith('jpg'):
            img_path = os.path.join(image_dir, img_file)
            mask_path = os.path.join(mask_dir, img_file.replace('.jpg', '.png'))

            img = io.imread(img_path)
            mask = io.imread(mask_path)

            X.append(flatten_image(img))
            y.append(flatten_mask(mask))

    X = np.vstack(X)
    y = np.hstack(y)

    return X, y

def calculate_iou(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    intersection = np.diag(cm)
    ground_truth_set = cm.sum(axis=1)
    predicted_set = cm.sum(axis=0)
    union = ground_truth_set + predicted_set - intersection
    iou = intersection / union.astype(np.float32)
    return np.mean(iou)

image_files = os.listdir("../data/train_images")
X, y = prepare_data("../data/train_images", "../data/train_masks", image_files)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)
rf_model.fit(X_train, y_train)

# Evaluate the model using IoU
y_train_pred = rf_model.predict(X_train)
y_val_pred = rf_model.predict(X_val)

train_iou = calculate_iou(y_train, y_train_pred)
val_iou = calculate_iou(y_val, y_val_pred)

print(f"Train IoU: {train_iou:.4f}")
print(f"Validation IoU: {val_iou:.4f}")


Train IoU: 0.9116
Validation IoU: 0.7790
