In [1]:
pip install xgboost


Collecting xgboost
  Downloading xgboost-2.1.4-py3-none-win_amd64.whl (124.9 MB)
Installing collected packages: xgboost
Successfully installed xgboost-2.1.4
Note: you may need to restart the kernel to use updated packages.


In [1]:
def load_dataset(txt_file):
    data, labels = [], []
    
    with open(txt_file, 'r') as file:
        for line in file.readlines():
            image_path = line.strip()
            full_path = os.path.join(image_path)
            if os.path.exists(full_path):
                label = image_path.split('/')[1] 
                data.append(full_path)
                labels.append(label)
    
    return pd.DataFrame({'image_path': data, 'label': labels})

In [4]:
import os
import cv2
import numpy as np
import pandas as pd
import joblib
import xgboost as xgb
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# ---- 1. Load Dataset ----
def extract_features(image_path):
    image = cv2.imread(image_path)
    
    if image is None:
        print(f"Error: Unable to read image {image_path}")
        return None  # Skip this image
    
    image = cv2.resize(image, (64, 64))  # Resize for uniformity
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)  # Convert to grayscale

    # Flatten image pixels into a 1D feature vector
    feature_vector = image.flatten()  # Shape: (64*64,) = (4096,)

    return feature_vector

# ---- 3. Load & Process Dataset ----
train_txt = "train2.txt"  # Your txt file listing images

dataset = load_dataset(train_txt)

# Extract features for all images
features = []
for path in dataset['image_path']:
    feature = extract_features(path)
    if feature is not None:
        features.append(feature)

features = np.array(features)  # Convert list to numpy array
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(dataset['label'])

# ---- Train XGBoost Model ----
xgb_model = xgb.XGBClassifier(n_estimators=100, max_depth=10, learning_rate=0.1, random_state=42)  
xgb_model.fit(features, y_encoded)

# ---- Save the Model ----
joblib.dump(xgb_model, "xgboost_model.pkl")
joblib.dump(encoder, "label_encoder.pkl")
print("Model saved as xgboost_model.pkl")


Model saved as xgboost_model.pkl


In [7]:
import joblib
import numpy as np
import pandas as pd
import cv2
import os
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# ---- Load Trained Model & Label Encoder ----
xgb_model = joblib.load("xgboost_model.pkl")
encoder = joblib.load("label_encoder.pkl")

# ---- Load Test Dataset ----
test_txt = "val.txt"  # Change to your test dataset file
test_dataset = load_dataset(test_txt)
print(test_dataset.head())

# Extract features
test_features = np.array([extract_features(path) for path in test_dataset['image_path']])

# ---- Predict ----
predictions = xgb_model.predict(test_features)
predicted_labels = encoder.inverse_transform(predictions)  # Convert back to original labels

# ---- Save Predictions ----
test_dataset['Predicted Label'] = predicted_labels
test_dataset.to_csv("test_predictions_xgb.csv", index=False)
print("Predictions saved in test_predictions_xgb.csv")

# ---- Compute Accuracy & Metrics ----
y_true = encoder.transform(test_dataset['label'])
accuracy = accuracy_score(y_true, predictions)
report = classification_report(y_true, predictions, target_names=encoder.classes_, digits=4)

print(f"Accuracy: {accuracy:.4f}")
print("Classification Report:\n", report)

# ---- Confusion Matrix ----
conf_matrix = confusion_matrix(y_true, predictions)
print("Confusion Matrix:")
print(conf_matrix)

                                      image_path           label
0  val/museum-outdoor/Places365_val_00010130.jpg  museum-outdoor
1  val/museum-outdoor/Places365_val_00001310.jpg  museum-outdoor
2  val/museum-outdoor/Places365_val_00004772.jpg  museum-outdoor
3  val/museum-outdoor/Places365_val_00019233.jpg  museum-outdoor
4  val/museum-outdoor/Places365_val_00010906.jpg  museum-outdoor
Predictions saved in test_predictions_xgb.csv
Accuracy: 0.7800
Classification Report:
                 precision    recall  f1-score   support

 museum-indoor     0.7593    0.8200    0.7885       100
museum-outdoor     0.8043    0.7400    0.7708       100

      accuracy                         0.7800       200
     macro avg     0.7818    0.7800    0.7796       200
  weighted avg     0.7818    0.7800    0.7796       200

Confusion Matrix:
[[82 18]
 [26 74]]
