In [1]:
def load_dataset(txt_file):
    data, labels = [], []
    
    with open(txt_file, 'r') as file:
        for line in file.readlines():
            image_path = line.strip()
            full_path = os.path.join(image_path)
            if os.path.exists(full_path):
                label = image_path.split('/')[1] 
                data.append(full_path)
                labels.append(label)
    
    return pd.DataFrame({'image_path': data, 'label': labels})

In [2]:
import os
import cv2
import numpy as np
import pandas as pd
import joblib
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.semi_supervised import SelfTrainingClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# ---- 1. Load Dataset ----
def extract_features(image_path):
    image = cv2.imread(image_path)
    if image is None:
        print(f"Error: Unable to read image {image_path}")
        return None
    
    image = cv2.resize(image, (64, 64))  
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)  
    return image.flatten()  # Flatten pixels

# ---- 2. Load Train Dataset ----
train_txt = "train2.txt"
dataset = load_dataset(train_txt)

features = []
labels = []
for path, label in zip(dataset['image_path'], dataset['label']):
    feature = extract_features(path)
    if feature is not None:
        features.append(feature)
        labels.append(label)

features = np.array(features)
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(labels)

# ---- 3. Create Labeled & Unlabeled Data ----
# Mark some labels as -1 (unlabeled)
y_semi_supervised = np.copy(y_encoded)
y_semi_supervised[:len(y_encoded)//2] = -1  # Assume first half is unlabeled

# ---- 4. Train Semi-Supervised Model ----
dt_base = DecisionTreeClassifier(max_depth=10)
self_training_model = SelfTrainingClassifier(dt_base, criterion="threshold", threshold=0.8)
self_training_model.fit(features, y_semi_supervised)

# ---- 5. Save Model ----
joblib.dump(self_training_model, "semi_supervised_dt.pkl")
joblib.dump(encoder, "label_encoder.pkl")
print("Semi-Supervised Model saved as semi_supervised_dt.pkl")

Semi-Supervised Model saved as semi_supervised_dt.pkl


In [5]:
import os
import cv2
import numpy as np
import pandas as pd
import joblib
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.semi_supervised import SelfTrainingClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

test_txt = "val.txt"
test_dataset = load_dataset(test_txt)

test_features = np.array([extract_features(path) for path in test_dataset['image_path']])
y_true = encoder.transform(test_dataset['label'])

# ---- 7. Predict & Evaluate ----
predictions = self_training_model.predict(test_features)
predicted_labels = encoder.inverse_transform(predictions)

# ---- 8. Save Predictions ----
test_dataset['Predicted Label'] = predicted_labels
test_dataset.to_csv("semi_supervised_predictions.csv", index=False)
print("Predictions saved in semi_supervised_predictions.csv")

# ---- 9. Compute Accuracy & Confusion Matrix ----
accuracy = accuracy_score(y_true, predictions)
report = classification_report(y_true, predictions, target_names=encoder.classes_, digits=4)
conf_matrix = confusion_matrix(y_true, predictions)

print(f"Accuracy: {accuracy:.4f}")
print("Classification Report:\n", report)
print("Confusion Matrix:\n", conf_matrix)

Predictions saved in semi_supervised_predictions.csv
Accuracy: 0.5000
Classification Report:
                 precision    recall  f1-score   support

 museum-indoor     0.5000    1.0000    0.6667       100
museum-outdoor     0.0000    0.0000    0.0000       100

      accuracy                         0.5000       200
     macro avg     0.2500    0.5000    0.3333       200
  weighted avg     0.2500    0.5000    0.3333       200

Confusion Matrix:
 [[100   0]
 [100   0]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
