In [10]:
import os
import cv2
import numpy as np
import pandas as pd
import pickle
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split

def read_image_paths(file_path):
    """Reads a text file containing image paths and extracts labels."""
    image_list, label_list = [], []
    
    with open(file_path, 'r') as f:
        for entry in f:
            img_path = entry.strip()
            if os.path.isfile(img_path):
                category = os.path.basename(os.path.dirname(img_path))
                image_list.append(img_path)
                label_list.append(category)
    
    return pd.DataFrame({'path': image_list, 'category': label_list})

def compute_histogram(image_path):
    """Extracts color histograms from an image as a feature vector."""
    img = cv2.imread(image_path)
    img = cv2.resize(img, (128, 128))  # Increased resolution for better features
    
    hist_channels = [
        cv2.calcHist([img], [ch], None, [256], [0, 256]) for ch in range(3)
    ]
    
    histograms = [cv2.normalize(hist, hist).flatten() for hist in hist_channels]
    return np.hstack(histograms)

# Load dataset
train_list = "train.txt"
dataset = read_image_paths(train_list)

# Extract features
feature_data = np.array([compute_histogram(img) for img in dataset['path']])
label_encoder = LabelEncoder()
target_labels = label_encoder.fit_transform(dataset['category'])

# Split data for better generalization
X_train, X_valid, y_train, y_valid = train_test_split(feature_data, target_labels, test_size=0.2, random_state=42, stratify=target_labels)

# Train RandomForest Model with optimized parameters
classifier = RandomForestClassifier(n_estimators=300, max_depth=20, min_samples_split=4, min_samples_leaf=2, random_state=42)
classifier.fit(X_train, y_train)

# Evaluate on validation split
valid_predictions = classifier.predict(X_valid)
valid_accuracy = accuracy_score(y_valid, valid_predictions)
print(f"Validation Accuracy: {valid_accuracy:.4f}")

# Save model & encoder
with open("image_classifier.pkl", "wb") as model_file:
    pickle.dump(classifier, model_file)
with open("category_encoder.pkl", "wb") as encoder_file:
    pickle.dump(label_encoder, encoder_file)

print("Model training complete. Files saved.")

# ---- Validate Model on Test Set ----
val_list = "val.txt"
val_dataset = read_image_paths(val_list)

val_features = np.array([compute_histogram(img) for img in val_dataset['path']])
predictions = classifier.predict(val_features)
predicted_labels = label_encoder.inverse_transform(predictions)

# Save and print test results
val_dataset['Predicted Category'] = predicted_labels
print("Validation Results:")
print(val_dataset.head())

# Save validation results
val_dataset.to_csv("validation_results.csv", index=False)
print("Validation results saved to validation_results.csv")

# ---- Model Performance Evaluation ----
accuracy = accuracy_score(val_dataset['category'], predicted_labels)
print(f"Test Accuracy: {accuracy:.4f}")

# Generate classification report
report = classification_report(val_dataset['category'], predicted_labels)
print("Classification Report:")
print(report)

# Save report to file
with open("test_predictions.csv", "w") as report_file:
    report_file.write(f"Test Accuracy: {accuracy:.4f}\n\n")
    report_file.write(report)
print("Predictions saved in test_predictions.csv")

Validation Accuracy: 0.8163
Model training complete. Files saved.
Validation Results:
                                            path        category  \
0  val/museum-outdoor/Places365_val_00010130.jpg  museum-outdoor   
1  val/museum-outdoor/Places365_val_00001310.jpg  museum-outdoor   
2  val/museum-outdoor/Places365_val_00004772.jpg  museum-outdoor   
3  val/museum-outdoor/Places365_val_00019233.jpg  museum-outdoor   
4  val/museum-outdoor/Places365_val_00010906.jpg  museum-outdoor   

  Predicted Category  
0      museum-indoor  
1     museum-outdoor  
2     museum-outdoor  
3     museum-outdoor  
4     museum-outdoor  
Validation results saved to validation_results.csv
Test Accuracy: 0.7950
Classification Report:
                precision    recall  f1-score   support

 museum-indoor       0.79      0.80      0.80       100
museum-outdoor       0.80      0.79      0.79       100

      accuracy                           0.80       200
     macro avg       0.80      0.80      0.79