In [1]:
import pickle
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications.vgg16 import preprocess_input
from sklearn.svm import SVC   # Support Vector Classifier for training the model
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_auc_score, roc_curve
from sklearn.preprocessing import StandardScaler, LabelBinarizer   # For scaling features and binarizing labels
import matplotlib.pyplot as plt # For visualizations
import seaborn as sns   # For heatmap




In [4]:
def load_and_preprocess_image(path, target_size):
    img = load_img(path, target_size=target_size)  # Load image from path with specified target size
    img_array = img_to_array(img)  # Convert image to array
    preprocessed_img = preprocess_input(img_array)  # Preprocess the image using VGG16 preprocessing
    flattened_img = preprocessed_img.flatten()  # Flatten the image array to 1D
    return flattened_img

def get_data_for_training(excel_path, image_size=(32, 32)):
    df = pd.read_excel(excel_path)  
    df = df.dropna(subset=['image_path'])  

    df = df.drop(columns=['Dataset'])  # Drop the 'Dataset' column

    x = []  
    y = []  
    image_paths = []  

    # Iterate through each row in the dataframe
    for idx, row in df.iterrows():
        image_path = row['image_path']  # Get image path
        label = row.drop('image_path').idxmax()  # Get the label (the column with the maximum value)

        image = load_and_preprocess_image(image_path, image_size)  # Load and preprocess the image

        x.append(image)  
        y.append(label)  
        image_paths.append(image_path)  

    x = np.array(x)  
    y = np.array(y)  

    return x, y, image_paths  

In [5]:
# File paths to training and validation data
train_excel_path = "training/training_data.xlsx"
val_excel_path = "validation/validation_data.xlsx"
image_size = (32, 32)   # Reduced image size to save memory
batch_size = 4096

# Load and preprocess training and validation data
X_train, y_train, _ = get_data_for_training(train_excel_path, image_size=image_size)
print("train data loaded")
X_val, y_val, image_paths = get_data_for_training(val_excel_path, image_size=image_size)
print("val data loaded")

train data loaded
val data loaded


In [6]:
# Standardize the features by removing the mean and scaling to unit variance
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
print("scaling complete")

scaling complete


In [None]:
# Initialize and train the SVM model
svm_model = SVC(kernel='rbf', probability=True)  
svm_model.fit(X_train, y_train)
print("training complete")

In [None]:
#printing accuracy
y_pred = svm_model.predict(X_val)
y_pred_proba = svm_model.predict_proba(X_val)
val_accuracy = accuracy_score(y_val, y_pred)
print(f"Validation accuracy: {val_accuracy}")

In [None]:
# Save validation predictions to an Excel file
results_df = pd.DataFrame({
    'Image_path': image_paths,
    'actual_class': y_val,
    'predicted_class': y_pred
})
results_df.to_excel("results.xlsx", index=False)

In [None]:
# Print the confusion matrix
print("Confusion Matrix:")
cm = confusion_matrix(y_val, y_pred, normalize='true')
print(cm)

In [None]:
# Print the classification report
print("Classification Report:")
print(classification_report(y_val, y_pred))

In [None]:
# printing the normalized confusion matrix heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt=".2f", cmap='Blues', xticklabels=np.unique(y_val), yticklabels=np.unique(y_val))
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Normalized Confusion Matrix')
plt.show()

In [None]:
# Plot AUC-ROC curve
lb = LabelBinarizer()
y_val_bin = lb.fit_transform(y_val)
fpr = dict()
tpr = dict()
roc_auc = dict()

for i in range(len(lb.classes_)):
    fpr[i], tpr[i], _ = roc_curve(y_val_bin[:, i], y_pred_proba[:, i])
    roc_auc[i] = roc_auc_score(y_val_bin[:, i], y_pred_proba[:, i])

plt.figure()
for i in range(len(lb.classes_)):
    plt.plot(fpr[i], tpr[i], label=f'Class {lb.classes_[i]} (AUC = {roc_auc[i]:.2f})')

plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('AUC-ROC Curve')
plt.legend(loc='lower right')
plt.show()

### Code Contributed by Manya Joshi