In [1]:
# Load the Drive helper and mount
from google.colab import drive

# This will prompt for authorization.
drive.mount('/content/drive/', force_remount=True)

!ls "/content/drive/My Drive/"
import os
os.chdir("/content/drive/My Drive/")

Mounted at /content/drive/
 car_detect	  explainer		      passport.jpg   'SZ10 project'
 detection_XAI	  kfold_results_summary.csv   som_10x10.pkl   test_block
'egost project'   msc_project		      SSL


In [2]:
#data
import numpy as np
import matplotlib.pyplot as plt
import scipy.io as io
from detection_XAI.utils import utils
import cv2
from tensorflow.keras.models import load_model

x_cluster = io.loadmat('./SSL/data/x_cluster.mat')
x_cluster = x_cluster['x_cluster']

# labells

y_cluster = io.loadmat('./SSL/data/y_cluster.mat')
y_cluster = y_cluster['y_cluster']

#pre-processing
from keras.utils import to_categorical

y_cluster_n = to_categorical(y_cluster-1)
x_cluster_n = utils.norml(x_cluster)

x_cluster_stft = utils.batch_stft(x_cluster_n,nfft=128,overlap=0.7)

#reshape input to 64x64
def resize_stft(x_in):
  batch_num = x_in.shape[0]

  # Initialize an empty array to store resized images
  resized_input = np.zeros((batch_num, 64, 64, 3), dtype=np.float32)

  # Loop through each image in the batch and resize
  for i in range(batch_num):
      # Resize each image to (64, 64)
      resized_image = cv2.resize(x_in[i], (64, 64), interpolation=cv2.INTER_LINEAR)

      # Store the resized image in the new array
      resized_input[i] = resized_image


# Now 'resized_images' contains all the images resized to (64, 64, 3) with dtype float32
  return resized_input

resized_x = resize_stft(x_cluster_stft)

saved_encoder_model = load_model("./SSL/model/encoder_4x4x16_v2.h5")
encoded_features = saved_encoder_model.predict(resized_x)
reshaped_features = encoded_features.reshape(encoded_features.shape[0], -1)



[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 17ms/step


In [3]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score, StratifiedKFold

# Define the models
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
svm_model = SVC(kernel='rbf', C=1.0, random_state=42)

# Define 5-fold cross-validation
#cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Evaluate Random Forest
#rf_scores = cross_val_score(rf_model, reshaped_features, y_cluster.ravel(), cv=cv, scoring='accuracy')
#print(f"Random Forest - Mean Accuracy: {rf_scores.mean():.4f}, Std: {rf_scores.std():.4f}")

# Evaluate SVM
#svm_scores = cross_val_score(svm_model, reshaped_features, y_cluster.ravel(), cv=cv, scoring='accuracy')
#print(f"SVM - Mean Accuracy: {svm_scores.mean():.4f}, Std: {svm_scores.std():.4f}")


In [None]:
confusion_matrices_array = np.array(confusion_matrices_list)

mean_confusion_matrix = np.mean(confusion_matrices_array, axis=0)
std_confusion_matrix = np.std(confusion_matrices_array, axis=0)


print("Mean Confusion Matrix:")
print(mean_confusion_matrix)
print("\nStandard Deviation Confusion Matrix:")
print(std_confusion_matrix)

Mean Confusion Matrix:
[[108.8  22.    6.2  50.2]
 [ 17.6 255.   19.   19.6]
 [  4.6  19.8 287.2  10. ]
 [ 12.4   9.6  13.4 244.6]]

Standard Deviation Confusion Matrix:
[[ 9.06421535  5.4405882   2.22710575  8.81816307]
 [ 5.42586399 11.3137085   9.54986911  3.72021505]
 [ 2.33238076  5.6         7.83326241  2.28035085]
 [ 4.88262225  1.0198039   3.00665928  6.82934843]]


In [4]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, precision_recall_fscore_support
import numpy as np

def resultshow(predictions, true_labels):
    target_names = ['quake', 'earthquake', 'rockfall', 'environment noise']

    # Print classification report
    print(classification_report(true_labels, predictions, target_names=target_names))

    # Compute confusion matrix
    confusion_matrices = confusion_matrix(true_labels, predictions)
    print(confusion_matrices)

    # Compute precision, recall, F1-score
    precision, recall, f1, support = precision_recall_fscore_support(true_labels, predictions)

    return confusion_matrices, precision, recall, f1


In [None]:
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
import numpy as np

# K-fold validation (5-fold in this example)
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
data = reshaped_features
labels = y_cluster.flatten()

class_names = ['quake', 'earthquake', 'rockfall', 'noise']
precision_dicts = {class_name: [] for class_name in class_names}
recall_dicts = {class_name: [] for class_name in class_names}
f1_dicts = {class_name: [] for class_name in class_names}
confusion_matrices_list = []
accuracy_list = []

for train_index, test_index in kfold.split(data, labels):
    X_test, X_train = data[train_index], data[test_index]
    y_test, y_train = labels[train_index], labels[test_index]

    # Initialize and train Random Forest or SVM Classifier
    #rf_model = RandomForestClassifier(n_estimators=170, random_state=42)
    #rf_model.fit(X_train, y_train)
    svm_model = SVC(kernel='rbf', C=1.0, random_state=42)
    svm_model.fit(X_train, y_train)

    # Make predictions
    #y_pred = rf_model.predict(X_test)
    y_pred = svm_model.predict(X_test)

    # Compute confusion matrix, precision, recall, and f1-score
    confusion_matrices, precision, recall, f1 = resultshow(y_pred, y_test)
    confusion_matrices_list.append(confusion_matrices)

    # Calculate accuracy for the current fold
    accuracy = accuracy_score(y_test, y_pred)
    accuracy_list.append(accuracy)

    # Update dictionaries for each class
    for class_name in class_names:
        class_index = class_names.index(class_name)
        precision_dicts[class_name].append(precision[class_index])
        recall_dicts[class_name].append(recall[class_index])
        f1_dicts[class_name].append(f1[class_index])

# Calculate mean and std for each class
for class_name in class_names:
    mean_precision = np.mean(precision_dicts[class_name])
    std_precision = np.std(precision_dicts[class_name])

    mean_recall = np.mean(recall_dicts[class_name])
    std_recall = np.std(recall_dicts[class_name])

    mean_f1 = np.mean(f1_dicts[class_name])
    std_f1 = np.std(f1_dicts[class_name])

    # Print mean and std for each class
    print(f"Class {class_name} - Mean Precision: {mean_precision:.3f} ± {std_precision:.3f}")
    print(f"Class {class_name} - Mean Recall: {mean_recall:.3f} ± {std_recall:.3f}")
    print(f"Class {class_name} - Mean F1-score: {mean_f1:.3f} ± {std_f1:.3f}")

# Calculate and print mean and std for accuracy
mean_accuracy = np.mean(accuracy_list)
std_accuracy = np.std(accuracy_list)

print(f"Mean Accuracy: {mean_accuracy:.3f} ± {std_accuracy:.3f}")


                   precision    recall  f1-score   support

            quake       0.79      0.57      0.66       187
       earthquake       0.85      0.84      0.85       311
         rockfall       0.92      0.91      0.92       322
environment noise       0.74      0.90      0.81       280

         accuracy                           0.83      1100
        macro avg       0.83      0.81      0.81      1100
     weighted avg       0.83      0.83      0.83      1100

[[106  21   6  54]
 [ 15 262  10  24]
 [  4  14 294  10]
 [  9  10   9 252]]
                   precision    recall  f1-score   support

            quake       0.68      0.53      0.60       187
       earthquake       0.82      0.75      0.79       311
         rockfall       0.84      0.92      0.88       322
environment noise       0.76      0.85      0.80       280

         accuracy                           0.79      1100
        macro avg       0.77      0.76      0.77      1100
     weighted avg       0.79     

In [17]:
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
import numpy as np

# Define class names
class_names = ['quake', 'earthquake', 'rockfall', 'noise']

# Define different hyperparameter settings for Random Forest
rf_param_grid = [
    {"n_estimators": 100, "max_depth": None, "random_state": 42},
    {"n_estimators": 100, "max_depth": 10, "random_state": 42},
    {"n_estimators": 125, "max_depth": 10, "random_state": 42},
    {"n_estimators": 150, "max_depth": 10, "random_state": 42},
    {"n_estimators": 175, "max_depth": 12, "random_state": 42},
    {"n_estimators": 200, "max_depth": 15, "random_state": 42},
    {"n_estimators": 225, "max_depth": 17, "random_state": 42},
    {"n_estimators": 250, "max_depth": 20, "random_state": 42},
    {"n_estimators": 300, "max_depth": 22, "random_state": 42},
    {"n_estimators": 350, "max_depth": 25, "random_state": 42},
    {"n_estimators": 375, "max_depth": 25, "random_state": 42},
    {"n_estimators": 400, "max_depth": 25, "random_state": 42},
    {"n_estimators": 425, "max_depth": 25, "random_state": 42},
    {"n_estimators": 450, "max_depth": 25, "random_state": 42},
    {"n_estimators": 475, "max_depth": 25, "random_state": 42},
    {"n_estimators": 500, "max_depth": 25, "random_state": 42},
    {"n_estimators": 500, "max_depth": 20, "random_state": 42},
    {"n_estimators": 500, "max_depth": 30, "random_state": 42},
]

# Define different fold numbers to test
fold_list = [3, 5, 10, 20]

# Loop over different folds
for n_splits in fold_list:
    print("="*50)
    print(f"Results for {n_splits}-Fold Cross Validation")
    print("="*50)

    # Loop over different hyperparameter settings
    for params in rf_param_grid:
        print(f"Testing RF with parameters: {params}")

        # Initialize lists to store metrics
        precision_dicts = {class_name: [] for class_name in class_names}
        recall_dicts = {class_name: [] for class_name in class_names}
        f1_dicts = {class_name: [] for class_name in class_names}
        accuracy_list = []

        # Create Stratified K-Fold
        kfold = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

        for train_index, test_index in kfold.split(reshaped_features, y_cluster.flatten()):
            X_train, X_test = reshaped_features[test_index], reshaped_features[train_index]
            y_train, y_test = y_cluster.flatten()[test_index], y_cluster.flatten()[train_index]

            # Train Random Forest with current parameters
            rf_model = RandomForestClassifier(**params)
            rf_model.fit(X_train, y_train-1)

            # Predictions
            y_pred = rf_model.predict(X_test)

            # Compute metrics
            precision, recall, f1, _ = precision_recall_fscore_support(y_test-1, y_pred, labels=range(len(class_names)))
            acc = accuracy_score(y_test-1, y_pred)

            # Store metrics
            accuracy_list.append(acc)
            for i, class_name in enumerate(class_names):
                precision_dicts[class_name].append(precision[i])
                recall_dicts[class_name].append(recall[i])
                f1_dicts[class_name].append(f1[i])

        # Print averaged metrics
        for class_name in class_names:
            mean_precision = np.mean(precision_dicts[class_name])
            mean_recall = np.mean(recall_dicts[class_name])
            mean_f1 = np.mean(f1_dicts[class_name])
            print(f"Class {class_name} - Precision: {mean_precision:.3f}, Recall: {mean_recall:.3f}, F1-score: {mean_f1:.3f}")

        mean_accuracy = np.mean(accuracy_list)
        std_accuracy = np.std(accuracy_list)
        print(f"Overall Accuracy: {mean_accuracy:.3f} ± {std_accuracy:.3f}")
        print("-"*50)


Results for 3-Fold Cross Validation
Testing RF with parameters: {'n_estimators': 100, 'max_depth': None, 'random_state': 42}
Class quake - Precision: 0.773, Recall: 0.618, F1-score: 0.686
Class earthquake - Precision: 0.865, Recall: 0.841, F1-score: 0.852
Class rockfall - Precision: 0.896, Recall: 0.898, F1-score: 0.897
Class noise - Precision: 0.762, Recall: 0.884, F1-score: 0.818
Overall Accuracy: 0.831 ± 0.007
--------------------------------------------------
Testing RF with parameters: {'n_estimators': 100, 'max_depth': 10, 'random_state': 42}
Class quake - Precision: 0.778, Recall: 0.605, F1-score: 0.680
Class earthquake - Precision: 0.861, Recall: 0.846, F1-score: 0.853
Class rockfall - Precision: 0.904, Recall: 0.903, F1-score: 0.904
Class noise - Precision: 0.756, Recall: 0.883, F1-score: 0.814
Overall Accuracy: 0.831 ± 0.005
--------------------------------------------------
Testing RF with parameters: {'n_estimators': 125, 'max_depth': 10, 'random_state': 42}
Class quake - P