In [4]:
!pip install xgboost




[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import cross_validate, StratifiedKFold
from sklearn.ensemble import VotingClassifier, RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier

# 1. Read dataset
data = pd.read_csv("D:/fy_project1/data1_updated.csv")

# 2. Drop columns not needed or already removed
#   (If 'image_name' is still in your data, drop it as stated.)
if 'image_name' in data.columns:
    data.drop(columns=['image_name'], axis=1, inplace=True)

# 3. Separate features and labels
X = data.drop('category', axis=1)
y = data['category']

# 4. Encode target if necessary
le = LabelEncoder()
y_encoded = le.fit_transform(y.values.ravel())
y = np.asarray(y_encoded).ravel()

# 5. Define base models
svm_model = SVC(kernel='linear', C=1.0, probability=True, random_state=42)
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
xgb_model = XGBClassifier(n_estimators=100, objective='multi:softprob', random_state=42)

# 6. Create the ensemble with soft voting
ensemble = VotingClassifier(
    estimators=[
        ('svm', svm_model),
        ('rf', rf_model),
        ('xgb', xgb_model)
    ],
    voting='soft'
)

# 7. Cross-validation setup
#    Define metrics you want to evaluate.
scoring = {
    'accuracy': 'accuracy',
    'precision': 'precision_weighted',
    'recall': 'recall_weighted',
    'f1': 'f1_weighted'
}

# Choose the number of folds (e.g., 5)
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# 8. Perform cross-validation
results = cross_validate(
    ensemble,
    X, y,
    cv=cv,
    scoring=scoring,
    return_train_score=False
)

# 9. Print average metrics across folds
print("Cross-Validation Results (5-Fold):")
print(f"  Accuracy:  {results['test_accuracy'].mean():.4f} ± {results['test_accuracy'].std():.4f}")
print(f"  Precision: {results['test_precision'].mean():.4f} ± {results['test_precision'].std():.4f}")
print(f"  Recall:    {results['test_recall'].mean():.4f} ± {results['test_recall'].std():.4f}")
print(f"  F1-score:  {results['test_f1'].mean():.4f} ± {results['test_f1'].std():.4f}")


Cross-Validation Results (5-Fold):
  Accuracy:  0.9586 ± 0.0043
  Precision: 0.9594 ± 0.0042
  Recall:    0.9586 ± 0.0043
  F1-score:  0.9588 ± 0.0043
