In [1]:
def load_dataset(txt_file):
    data, labels = [], []
    
    with open(txt_file, 'r') as file:
        for line in file.readlines():
            image_path = line.strip()
            full_path = os.path.join(image_path)
            if os.path.exists(full_path):
                label = image_path.split('/')[1] 
                data.append(full_path)
                labels.append(label)
    
    return pd.DataFrame({'image_path': data, 'label': labels})

In [2]:
input_file = "train.txt"  # Replace with your actual filename
output_file = "train2.txt"  # The filtered output file

# Read the file and filter lines
with open(input_file, "r") as f:
    lines = f.readlines()

# Keep lines that contain "museum-indoor" or "museum-outdoor"
filtered_lines = [line.strip() for line in lines if "museum-indoor" in line or "museum-outdoor" in line]

# Write the filtered lines to the output file
with open(output_file, "w") as f:
    for line in filtered_lines:
        f.write(line + "\n")

print("Filtering complete. Check", output_file)

Filtering complete. Check train2.txt


In [3]:
import os
import cv2
import numpy as np
import pandas as pd
import joblib
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier

# ---- 1. Load Dataset ----
def extract_features(image_path):
    image = cv2.imread(image_path)
    
    if image is None:
        print(f"Error: Unable to read image {image_path}")
        return None  # Skip this image
    
    image = cv2.resize(image, (64, 64))  # Resize for uniformity
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)  # Convert to grayscale

    # Flatten image pixels into a 1D feature vector
    feature_vector = image.flatten()  # Shape: (64*64,) = (4096,)

    return feature_vector

# ---- 3. Load & Process Dataset ----
train_txt = "train2.txt"  # Your txt file listing images

dataset = load_dataset(train_txt)

# Extract features for all images
features = []
for path in dataset['image_path']:
    feature = extract_features(path)
    if feature is not None:
        features.append(feature)

features = np.array(features)  # Convert list to numpy array
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(dataset['label'])

# ---- Train Random Forest Model ----
rf_model = RandomForestClassifier(n_estimators=100, max_depth=15, random_state=42)  # Adjust hyperparameters if needed
rf_model.fit(features, y_encoded)

# ---- Save the Model ----
joblib.dump(rf_model, "random_forest.pkl")
joblib.dump(encoder, "label_encoder.pkl")
print("Model saved as random_forest.pkl")


Model saved as random_forest.pkl


In [4]:
import joblib
import numpy as np
import pandas as pd
import cv2
import os
from sklearn.metrics import accuracy_score, classification_report

# ---- Load Trained Model & Label Encoder ----
rf_model = joblib.load("random_forest.pkl")
encoder = joblib.load("label_encoder.pkl")

# ---- Load Test Dataset ----
test_txt = "val.txt"  # Change to your test dataset file
test_dataset = load_dataset(test_txt)
print(test_dataset.head())

# Extract features
test_features = np.array([extract_features(path) for path in test_dataset['image_path']])

# ---- Predict ----
predictions = rf_model.predict(test_features)
predicted_labels = encoder.inverse_transform(predictions)  # Convert back to original labels

# ---- Save Predictions ----
test_dataset['Predicted Label'] = predicted_labels
test_dataset.to_csv("test_predictions_rf.csv", index=False)
print("Predictions saved in test_predictions_rf.csv")

# ---- Compute Accuracy & Metrics ----
y_true = encoder.transform(test_dataset['label'])
accuracy = accuracy_score(y_true, predictions)
report = classification_report(y_true, predictions, target_names=encoder.classes_, digits=4)

print(f"Accuracy: {accuracy:.4f}")
print("Classification Report:\n", report)


                                      image_path           label
0  val/museum-outdoor/Places365_val_00010130.jpg  museum-outdoor
1  val/museum-outdoor/Places365_val_00001310.jpg  museum-outdoor
2  val/museum-outdoor/Places365_val_00004772.jpg  museum-outdoor
3  val/museum-outdoor/Places365_val_00019233.jpg  museum-outdoor
4  val/museum-outdoor/Places365_val_00010906.jpg  museum-outdoor
Predictions saved in test_predictions_rf.csv
Accuracy: 0.7750
Classification Report:
                 precision    recall  f1-score   support

 museum-indoor     0.7619    0.8000    0.7805       100
museum-outdoor     0.7895    0.7500    0.7692       100

      accuracy                         0.7750       200
     macro avg     0.7757    0.7750    0.7749       200
  weighted avg     0.7757    0.7750    0.7749       200

