In [19]:
import os
import cv2
import numpy as np
import pandas as pd
import joblib
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [20]:
def create_unlabeled_data(source_file, destination_file):
    """Creates an unlabeled dataset by copying paths from a labeled dataset file."""
    with open(source_file, 'r') as f:
        paths = f.readlines()
    with open(destination_file, 'w') as f:
        f.writelines(paths)
    print(f"Unlabeled dataset created: {destination_file}")

In [21]:
def parse_image_dataset(file_path):
    """Reads image paths and labels from a dataset file."""
    paths, labels = [], []
    with open(file_path, 'r') as f:
        for line in f:
            img_path = line.strip()
            if os.path.exists(img_path):
                category = img_path.split('/')[1]
                paths.append(img_path)
                labels.append(category)
    return pd.DataFrame({'image': paths, 'label': labels})

In [22]:
def compute_histogram_features(img_path):
    """Extracts histogram features from an image."""
    img = cv2.imread(img_path)
    img = cv2.resize(img, (64, 64))
    hist_features = [cv2.calcHist([img], [i], None, [256], [0, 256]).flatten() for i in range(3)]
    return np.concatenate(hist_features)

In [23]:
# Prepare dataset
train_file = "train2.txt"
unlabeled_file = "unlabeled.txt"
create_unlabeled_data(train_file, unlabeled_file)

data = parse_image_dataset(train_file)
features = np.array([compute_histogram_features(path) for path in data['image']])
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(data['label'])

Unlabeled dataset created: unlabeled.txt


In [24]:
# Train Decision Tree Classifier
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(features, y_encoded)

# Save trained model and encoder
joblib.dump(dt_model, "decision_tree_model.pkl")
joblib.dump(label_encoder, "label_encoder.pkl")
print("Model and encoder saved.")

Model and encoder saved.


In [25]:
# Load test dataset
test_file = "val.txt"
test_data = parse_image_dataset(test_file)
test_features = np.array([compute_histogram_features(path) for path in test_data['image']])

In [26]:
# Make predictions
loaded_model = joblib.load("decision_tree_model.pkl")
loaded_encoder = joblib.load("label_encoder.pkl")
predictions = loaded_model.predict(test_features)
predicted_labels = loaded_encoder.inverse_transform(predictions)

test_data['Predicted'] = predicted_labels
test_data.to_csv("test_predictions.csv", index=False)

In [27]:
# Evaluate model
true_labels_encoded = loaded_encoder.transform(test_data['label'])
accuracy = accuracy_score(true_labels_encoded, predictions)
report = classification_report(true_labels_encoded, predictions, target_names=loaded_encoder.classes_)
print(f"Accuracy: {accuracy:.4f}")
print("Classification Report:\n", report)
# ---- Confusion Matrix ----
conf_matrix = confusion_matrix(true_labels_encoded, predictions)
print("Confusion Matrix:")
print(conf_matrix)

Accuracy: 0.7500
Classification Report:
                 precision    recall  f1-score   support

 museum-indoor       0.76      0.74      0.75       100
museum-outdoor       0.75      0.76      0.75       100

      accuracy                           0.75       200
     macro avg       0.75      0.75      0.75       200
  weighted avg       0.75      0.75      0.75       200

Confusion Matrix:
[[74 26]
 [24 76]]
