In [1]:
# Import required libraries
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
import pandas as pd
import numpy as np
from sklearn.metrics import precision_score, recall_score, accuracy_score

# Load dataset with extracted features (Replace with your data path)
data = pd.read_csv('data1_updated.csv')
data.drop(columns=['image_name'],axis=1, inplace=True)
X = data.drop('category', axis=1)
y = data['category']
print(f"Label dtype: {y.dtype}")
print(f"Unique values: {np.unique(y)}")
print(f"Missing values: {y.isnull().sum()}")
print(X.dtypes)





Label dtype: object
Unique values: ['clear' 'synthetic_haze' 'synthetic_low_light' 'synthetic_rain']
Missing values: 0
brightness       float64
saturation       float64
noise_level      float64
blur_metric      float64
edge_strength    float64
lbp_mean_r2      float64
lbp_var_r2       float64
edges_var        float64
red_var          float64
green_var        float64
blue_var         float64
rg_ratio         float64
rb_ratio         float64
gb_ratio         float64
dtype: object


In [2]:
from sklearn.preprocessing import LabelEncoder

# Convert any string labels to integers
le = LabelEncoder()
y_encoded = le.fit_transform(y.values.ravel())  # Handle 2D arrays

# Verify encoding
assert set(y_encoded) == {0,1,2,3}, "Invalid class count"

In [3]:
y = np.asarray(y_encoded).ravel()  # Force 1D shape
print(f"Final shape: {y.shape}")   # Should be (n_samples,)


Final shape: (8268,)


In [5]:
# Split data according to paper's experimental protocol
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)
# Initialize base models with paper-specified parameters
svm_model = SVC(
    kernel='linear', 
    C=1.0, 
    probability=True,  # Required for soft voting
    random_state=42
)

rf_model = RandomForestClassifier(
    n_estimators=100,
    random_state=42
)

xgb_model = XGBClassifier(
    n_estimators=100,
    objective='multi:softprob',  # Essential for 4-class classification
    random_state=42
)

# Create ensemble classifier with soft voting
ensemble = VotingClassifier(
    estimators=[
        ('svm', svm_model),
        ('rf', rf_model),
        ('xgb', xgb_model)
    ],
    voting='soft'  # As specified in Section III.2[1]
)

# Train ensemble model
ensemble.fit(X_train, y_train)

# Generate predictions
y_pred = ensemble.predict(X_test)

# Calculate metrics
precision = precision_score(y_test, y_pred, average='weighted')  # For multi-class
recall = recall_score(y_test, y_pred, average='weighted')
accuracy = accuracy_score(y_test, y_pred)
# Calculate F1-score (weighted average for multiclass)
f1 = f1_score(y_test, y_pred, average='weighted')

print(f'''
Ensemble Accuracy:  {accuracy:.4f}
Ensemble Precision: {precision:.4f}
Ensemble Recall:    {recall:.4f}
Ensemble F1-score: {f1:.4f}
''')



Ensemble Accuracy:  0.9595
Ensemble Precision: 0.9599
Ensemble Recall:    0.9595
Ensemble F1-score: 0.9596

