In [3]:
from google.colab import drive

drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [1]:
from sklearn.metrics import f1_score, accuracy_score, recall_score, precision_score

def metrics(y_test, y_pred):
    # Compute evaluation metrics
    f1 = f1_score(y_test, y_pred)
    accuracy = accuracy_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    precison = precision_score(y_test, y_pred)

    # Compute confusion matrix
    tp =np.sum((y_test == 1) & (y_pred == 1))
    fp = np.sum((y_test == 0) & (y_pred == 1))
    tn = np.sum((y_test == 0) & (y_pred == 0))
    fn = np.sum((y_test == 1) & (y_pred == 0))
    omission = fn / (fn + tp)
    commission = fp / (fp + tn)

    # Print evaluation metrics
    print('F1-score:', f1)
    print('Accuracy:', accuracy)
    print('Omission:', omission)
    print('Commission:', commission)
    print('Recall:', recall)
    print('Precision:', precison)

    # Print confusion matrix
    print('Confusion matrix:')
    print('TP:', tp)
    print('FP:', fp)
    print('TN:', tn)
    print('FN:', fn)

    # macro F1-score
    print('Macro F1-score:', f1_score(y_test, y_pred, average='macro'))

#### Extract features from the preprocessed images using DL
* cnn (resnet) with logistic regression --> 93% 
* cnn concatenate 3 features (resnet, InceptionV3, Xception) with logistic regression --> 96% 
* cnn concatenate 3 features (resnet, InceptionV3, Xception) with SVM --> 56% 

cnn (resnet) with logistic regression

In [4]:
import tensorflow as tf
import numpy as np
from sklearn.linear_model import LogisticRegression

# Load the training set of images and labels
train_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    '/content/drive/MyDrive/SI_flood_dataset/dataset/SI_flood_dataset_split/train_eq',
    image_size=(256, 256),
    batch_size=32,
    label_mode='categorical',
)

# Preprocess the images in the training set
train_dataset = train_dataset.map(
    lambda x, y: (tf.image.per_image_standardization(x), y)
)

# Load the validation set of images and labels
valid_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    '/content/drive/MyDrive/SI_flood_dataset/dataset/SI_flood_dataset_split/val_eq',
    image_size=(256, 256),
    batch_size=32,
    label_mode='categorical',
)

# Preprocess the images in the validation set
valid_dataset = valid_dataset.map(
    lambda x, y: (tf.image.per_image_standardization(x), y)
)

# Define the CNN architecture
model = tf.keras.applications.ResNet50(
    include_top=False,
    input_shape=(256, 256, 3),
    pooling='avg'
)

# Extract features from the images in the training set using the trained CNN
train_features = []
train_labels = []
for images, labels in train_dataset:
    features = model.predict(images)
    train_features.append(features)
    train_labels.append(labels)
train_features = np.concatenate(train_features, axis=0)
train_labels = np.concatenate(train_labels, axis=0)

# Convert the 2-dimensional label matrix into a 1-dimensional label vector
train_labels = np.argmax(train_labels, axis=1)

# Train a logistic regression model on the features and labels in the training set
logistic_model = LogisticRegression()
logistic_model.fit(train_features, train_labels)

# Extract features from the images in the validation set using the trained CNN
valid_features = []
valid_labels = []
for images, labels in valid_dataset:
    features = model.predict(images)
    valid_features.append(features)
    valid_labels.append(labels)
valid_features = np.concatenate(valid_features, axis=0)
valid_labels = np.concatenate(valid_labels, axis=0)

# Convert the 2-dimensional label matrix into a 1-dimensional label vector
valid_labels = np.argmax(valid_labels, axis=1)

# Make predictions on the validation set using the logistic regression model
y_pred = logistic_model.predict(valid_features)

# Evaluate the model using accuracy, F1-score, omission, and commission metrics on the validation set
metrics(valid_labels, y_pred)

Found 736 files belonging to 2 classes.
Found 186 files belonging to 2 classes.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


F1-score: 0.9304812834224598
Accuracy: 0.9301075268817204
Omission: 0.06451612903225806
Commission: 0.07526881720430108
Recall: 0.9354838709677419
Precision: 0.925531914893617
Confusion matrix:
TP: 87
FP: 7
TN: 86
FN: 6
Macro F1-score: 0.9301055065760948


Multiple features

In [5]:
# Load the training set of images and labels
train_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    '/content/drive/MyDrive/SI_flood_dataset/dataset/SI_flood_dataset_split/train_eq',
    image_size=(256, 256),
    batch_size=32,
    label_mode='categorical',
)

# Preprocess the images in the training set
train_dataset = train_dataset.map(
    lambda x, y: (tf.image.per_image_standardization(x), y)
)

# Load the validation set of images and labels
valid_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    '/content/drive/MyDrive/SI_flood_dataset/dataset/SI_flood_dataset_split/val_eq',
    image_size=(256, 256),
    batch_size=32,
    label_mode='categorical',
)

# Preprocess the images in the validation set
valid_dataset = valid_dataset.map(
    lambda x, y: (tf.image.per_image_standardization(x), y)
)

# Define the CNN architectures
models = [
    tf.keras.applications.ResNet50(
        include_top=False,
        input_shape=(256, 256, 3),
        pooling='avg'
    ),
    tf.keras.applications.InceptionV3(
        include_top=False,
        input_shape=(256, 256, 3),
        pooling='avg'
    ),
    tf.keras.applications.Xception(
        include_top=False,
        input_shape=(256, 256, 3),
        pooling='avg'
    )
]

# Extract features from the images in the training set using the trained CNNs
train_features = []
train_labels = []
for images, labels in train_dataset:
    features = []
    for model in models:
        features.append(model.predict(images))
    train_features.append(np.concatenate(features, axis=1))
    train_labels.append(labels)
train_features = np.concatenate(train_features, axis=0)
train_labels = np.concatenate(train_labels, axis=0)

# Convert the 2-dimensional label matrix into a 1-dimensional label vector
train_labels = np.argmax(train_labels, axis=1)

# Train a logistic regression model on the features and labels in the training set
logistic_model = LogisticRegression()
logistic_model.fit(train_features, train_labels)

# Extract features from the images in the validation set using the trained CNNs
valid_features = []
valid_labels = []
for images, labels in valid_dataset:
    features = []
    for model in models:
        features.append(model.predict(images))
    valid_features.append(np.concatenate(features, axis=1))
    valid_labels.append(labels)
valid_features = np.concatenate(valid_features, axis=0)
valid_labels = np.concatenate(valid_labels, axis=0)

# Convert the 2-dimensional label matrix into a 1-dimensional label vector
valid_labels = np.argmax(valid_labels, axis=1)

# Make predictions on the validation set using the logistic regression model
y_pred = logistic_model.predict(valid_features)

# Evaluate the model using accuracy, F1-score, omission, and commission metrics on the validation set
metrics(valid_labels, y_pred)

Found 736 files belonging to 2 classes.
Found 186 files belonging to 2 classes.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/xception/xception_weights_tf_dim_ordering_tf_kernels_notop.h5






STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


F1-score: 0.968421052631579
Accuracy: 0.967741935483871
Omission: 0.010752688172043012
Commission: 0.053763440860215055
Recall: 0.989247311827957
Precision: 0.9484536082474226
Confusion matrix:
TP: 92
FP: 5
TN: 88
FN: 1
Macro F1-score: 0.967727009832273


SVM

In [6]:
import tensorflow as tf
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix

# Load the training set of images and labels
train_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    '/content/drive/MyDrive/SI_flood_dataset/dataset/SI_flood_dataset_split/train_eq',
    image_size=(256, 256),
    batch_size=32,
    label_mode='categorical',
)

# Preprocess the images in the training set
train_dataset = train_dataset.map(
    lambda x, y: (tf.image.per_image_standardization(x), y)
)

# Load the validation set of images and labels
valid_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    '/content/drive/MyDrive/SI_flood_dataset/dataset/SI_flood_dataset_split/val_eq',
    image_size=(256, 256),
    batch_size=32,
    label_mode='categorical',
)

# Preprocess the images in the validation set
valid_dataset = valid_dataset.map(
    lambda x, y: (tf.image.per_image_standardization(x), y)
)


# Define the CNN architectures
models = [
    tf.keras.applications.ResNet50(
        include_top=False,
        input_shape=(256, 256, 3),
        pooling='avg'
    ),
    tf.keras.applications.InceptionV3(
        include_top=False,
        input_shape=(256, 256, 3),
        pooling='avg'
    ),
    tf.keras.applications.Xception(
        include_top=False,
        input_shape=(256, 256, 3),
        pooling='avg'
    )
]

# Extract features from the images in the training set using the trained CNNs
train_features = []
train_labels = []
for images, labels in train_dataset:
    features = []
    for model in models:
        features.append(model.predict(images))
    train_features.append(np.concatenate(features, axis=1))
    train_labels.append(labels)
train_features = np.concatenate(train_features, axis=0)
train_labels = np.concatenate(train_labels, axis=0)

# Convert the 2-dimensional label matrix into a 1-dimensional label vector
train_labels = np.argmax(train_labels, axis=1)

# Define the SVM model
svm = SVC()

# Train the SVM model on the features and labels in the training set
svm.fit(train_features, train_labels)

# Extract features from the images in the validation set using the trained CNN
valid_features = []
valid_labels = []
for images, labels in valid_dataset:
    features = model.predict(images)
    valid_features.append(features)
    valid_labels.append(labels)
valid_features = np.concatenate(valid_features, axis=0)

# Concatenate the features with itself three times
valid_features = np.concatenate([valid_features, valid_features, valid_features], axis=1)

valid_labels = np.argmax(np.concatenate(valid_labels, axis=0), axis=1)

# Make predictions on the validation set using the SVM model
y_pred = svm.predict(valid_features)

# Evaluate the SVM model using accuracy, F1-score, omission, and commission metrics on the validation set
metrics(valid_labels, y_pred)

Found 736 files belonging to 2 classes.
Found 186 files belonging to 2 classes.




F1-score: 0.7530364372469636
Accuracy: 0.6720430107526881
Omission: 0.0
Commission: 0.6559139784946236
Recall: 1.0
Precision: 0.6038961038961039
Confusion matrix:
TP: 93
FP: 61
TN: 32
FN: 0
Macro F1-score: 0.6325182186234818


#### Extract features from the **Augmented** resized preprocessed images using DL
* cnn (resnet) with logistic regression --> 93.5% 
* cnn concatenate 3 features (resnet, InceptionV3, Xception) with logistic regression --> 98.9% 
* cnn concatenate 3 features (resnet, InceptionV3, Xception) with SVM --> 80% 
* cnn concatenate 3 features (resnet, InceptionV3, Xception) with adaboost --> 91%
* cnn concatenate 3 features (resnet, InceptionV3, Xception) with random forest 95.5%

cnn (resnet) with logistic regression

In [7]:
import tensorflow as tf
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix

# Load the training set of images and labels
train_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    '/content/drive/MyDrive/SI_flood_dataset/dataset/SI_flood_dataset_split/train_aug_resized_eq',
    image_size=(256, 256),
    batch_size=32,
    label_mode='categorical',
)

# Preprocess the images in the training set
train_dataset = train_dataset.map(
    lambda x, y: (tf.image.per_image_standardization(x), y)
)

# Load the validation set of images and labels
valid_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    '/content/drive/MyDrive/SI_flood_dataset/dataset/SI_flood_dataset_split/val_resized_eq',
    image_size=(256, 256),
    batch_size=32,
    label_mode='categorical',
)

# Preprocess the images in the validation set
valid_dataset = valid_dataset.map(
    lambda x, y: (tf.image.per_image_standardization(x), y)
)

# Define the CNN architecture
model = tf.keras.applications.ResNet50(
    include_top=False,
    input_shape=(256, 256, 3),
    pooling='avg'
)

# Extract features from the images in the training set using the trained CNN
train_features = []
train_labels = []
for images, labels in train_dataset:
    features = model.predict(images)
    train_features.append(features)
    train_labels.append(labels)
train_features = np.concatenate(train_features, axis=0)
train_labels = np.concatenate(train_labels, axis=0)

# Convert the 2-dimensional label matrix into a 1-dimensional label vector
train_labels = np.argmax(train_labels, axis=1)

# Train a logistic regression model on the features and labels in the training set
logistic_model = LogisticRegression()
logistic_model.fit(train_features, train_labels)

# Extract features from the images in the validation set using the trained CNN
valid_features = []
valid_labels = []
for images, labels in valid_dataset:
    features = model.predict(images)
    valid_features.append(features)
    valid_labels.append(labels)
valid_features = np.concatenate(valid_features, axis=0)
valid_labels = np.concatenate(valid_labels, axis=0)

# Convert the 2-dimensional label matrix into a 1-dimensional label vector
valid_labels = np.argmax(valid_labels, axis=1)

# Make predictions on the validation set using the logistic regression model
y_pred = logistic_model.predict(valid_features)

# Evaluate the model using accuracy, F1-score, omission, and commission metrics on the validation set
metrics(valid_labels, y_pred)

Found 3235 files belonging to 2 classes.
Found 186 files belonging to 2 classes.


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


F1-score: 0.9304812834224598
Accuracy: 0.9301075268817204
Omission: 0.06451612903225806
Commission: 0.07526881720430108
Recall: 0.9354838709677419
Precision: 0.925531914893617
Confusion matrix:
TP: 87
FP: 7
TN: 86
FN: 6
Macro F1-score: 0.9301055065760948


Multiple features

In [8]:
# Load the training set of images and labels
train_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    '/content/drive/MyDrive/SI_flood_dataset/dataset/SI_flood_dataset_split/train_aug_resized_eq',
    image_size=(256, 256),
    batch_size=32,
    label_mode='categorical',
)

# Preprocess the images in the training set
train_dataset = train_dataset.map(
    lambda x, y: (tf.image.per_image_standardization(x), y)
)

# Load the validation set of images and labels
valid_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    '/content/drive/MyDrive/SI_flood_dataset/dataset/SI_flood_dataset_split/val_resized_eq',
    image_size=(256, 256),
    batch_size=32,
    label_mode='categorical',
)

# Preprocess the images in the validation set
valid_dataset = valid_dataset.map(
    lambda x, y: (tf.image.per_image_standardization(x), y)
)

# Define the CNN architectures
models = [
    tf.keras.applications.ResNet50(
        include_top=False,
        input_shape=(256, 256, 3),
        pooling='avg'
    ),
    tf.keras.applications.InceptionV3(
        include_top=False,
        input_shape=(256, 256, 3),
        pooling='avg'
    ),
    tf.keras.applications.Xception(
        include_top=False,
        input_shape=(256, 256, 3),
        pooling='avg'
    )
]

# Extract features from the images in the training set using the trained CNNs
train_features = []
train_labels = []
for images, labels in train_dataset:
    features = []
    for model in models:
        features.append(model.predict(images))
    train_features.append(np.concatenate(features, axis=1))
    train_labels.append(labels)
train_features = np.concatenate(train_features, axis=0)
train_labels = np.concatenate(train_labels, axis=0)

# Convert the 2-dimensional label matrix into a 1-dimensional label vector
train_labels = np.argmax(train_labels, axis=1)

# Train a logistic regression model on the features and labels in the training set
logistic_model = LogisticRegression()
logistic_model.fit(train_features, train_labels)

# Save the weights of the trained models with their respective names
for i, model in enumerate(models):
    model_name = model.name.split('_')[0].lower()
    model.save_weights(f'{model_name}.h5')

# Extract features from the images in the validation set using the trained CNNs
valid_features = []
valid_labels = []
for images, labels in valid_dataset:
    features = []
    for model in models:
        features.append(model.predict(images))
    valid_features.append(np.concatenate(features, axis=1))
    valid_labels.append(labels)
valid_features = np.concatenate(valid_features, axis=0)
valid_labels = np.concatenate(valid_labels, axis=0)

# Convert the 2-dimensional label matrix into a 1-dimensional label vector
valid_labels = np.argmax(valid_labels, axis=1)

# Make predictions on the validation set using the logistic regression model
y_pred = logistic_model.predict(valid_features)

# Evaluate the model using accuracy, F1-score, omission, and commission metrics on the validation set
metrics(valid_labels, y_pred)


Found 3235 files belonging to 2 classes.
Found 186 files belonging to 2 classes.


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


F1-score: 0.989247311827957
Accuracy: 0.989247311827957
Omission: 0.010752688172043012
Commission: 0.010752688172043012
Recall: 0.989247311827957
Precision: 0.989247311827957
Confusion matrix:
TP: 92
FP: 1
TN: 92
FN: 1
Macro F1-score: 0.989247311827957


In [None]:
# # Load the saved weights of the trained models with their respective names
# models = [
#     tf.keras.applications.ResNet50(
#         include_top=False,
#         input_shape=(256, 256, 3),
#         pooling='avg'
#     ),
#     tf.keras.applications.InceptionV3(
#         include_top=False,
#         input_shape=(256, 256, 3),
#         pooling='avg'
#     ),
#     tf.keras.applications.Xception(
#         include_top=False,
#         input_shape=(256, 256, 3),
#         pooling='avg'
#     )
# ]

# for model in models:
#     model_name = model.name.split('_')[0].lower()
#     model.load_weights(f'{model_name}.h5')

In [9]:
import tensorflow as tf
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix

# Load the training set of images and labels
train_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    '/content/drive/MyDrive/SI_flood_dataset/dataset/SI_flood_dataset_split/train_aug_resized_eq',
    image_size=(256, 256),
    batch_size=32,
    label_mode='categorical',
)

# Preprocess the images in the training set
train_dataset = train_dataset.map(
    lambda x, y: (tf.image.per_image_standardization(x), y)
)

# Load the validation set of images and labels
valid_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    '/content/drive/MyDrive/SI_flood_dataset/dataset/SI_flood_dataset_split/val_resized_eq',
    image_size=(256, 256),
    batch_size=32,
    label_mode='categorical',
)

# Preprocess the images in the validation set
valid_dataset = valid_dataset.map(
    lambda x, y: (tf.image.per_image_standardization(x), y)
)


# Define the CNN architectures
models = [
    tf.keras.applications.ResNet50(
        include_top=False,
        input_shape=(256, 256, 3),
        pooling='avg'
    ),
    tf.keras.applications.InceptionV3(
        include_top=False,
        input_shape=(256, 256, 3),
        pooling='avg'
    ),
    tf.keras.applications.Xception(
        include_top=False,
        input_shape=(256, 256, 3),
        pooling='avg'
    )
]

# Extract features from the images in the training set using the trained CNNs
train_features = []
train_labels = []
for images, labels in train_dataset:
    features = []
    for model in models:
        features.append(model.predict(images))
    train_features.append(np.concatenate(features, axis=1))
    train_labels.append(labels)
train_features = np.concatenate(train_features, axis=0)
train_labels = np.concatenate(train_labels, axis=0)

# Convert the 2-dimensional label matrix into a 1-dimensional label vector
train_labels = np.argmax(train_labels, axis=1)

# Define the SVM model
svm = SVC()

# Train the SVM model on the features and labels in the training set
svm.fit(train_features, train_labels)

# Extract features from the images in the validation set using the trained CNN
valid_features = []
valid_labels = []
for images, labels in valid_dataset:
    features = model.predict(images)
    valid_features.append(features)
    valid_labels.append(labels)
valid_features = np.concatenate(valid_features, axis=0)

# Concatenate the features with itself three times
valid_features = np.concatenate([valid_features, valid_features, valid_features], axis=1)

valid_labels = np.argmax(np.concatenate(valid_labels, axis=0), axis=1)

# Make predictions on the validation set using the SVM model
y_pred = svm.predict(valid_features)

# Evaluate the SVM model using accuracy, F1-score, omission, and commission metrics on the validation set
metrics(valid_labels, y_pred)

Found 3235 files belonging to 2 classes.
Found 186 files belonging to 2 classes.
F1-score: 0.8303571428571428
Accuracy: 0.7956989247311828
Omission: 0.0
Commission: 0.40860215053763443
Recall: 1.0
Precision: 0.7099236641221374
Confusion matrix:
TP: 93
FP: 38
TN: 55
FN: 0
Macro F1-score: 0.7868001930501931


In [10]:
from sklearn.ensemble import AdaBoostClassifier

# Load the training set of images and labels
train_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    '/content/drive/MyDrive/SI_flood_dataset/dataset/SI_flood_dataset_split/train_aug_resized_eq',
    image_size=(256, 256),
    batch_size=32,
    label_mode='categorical',
)

# Preprocess the images in the training set
train_dataset = train_dataset.map(
    lambda x, y: (tf.image.per_image_standardization(x), y)
)

# Load the validation set of images and labels
valid_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    '/content/drive/MyDrive/SI_flood_dataset/dataset/SI_flood_dataset_split/val_resized_eq',
    image_size=(256, 256),
    batch_size=32,
    label_mode='categorical',
)

# Preprocess the images in the validation set
valid_dataset = valid_dataset.map(
    lambda x, y: (tf.image.per_image_standardization(x), y)
)

# Define the CNN architectures
models = [
    tf.keras.applications.ResNet50(
        include_top=False,
        input_shape=(256, 256, 3),
        pooling='avg'
    ),
    tf.keras.applications.InceptionV3(
        include_top=False,
        input_shape=(256, 256, 3),
        pooling='avg'
    ),
    tf.keras.applications.Xception(
        include_top=False,
        input_shape=(256, 256, 3),
        pooling='avg'
    )
]

# Extract features from the images in the training set using the trained CNNs
train_features = []
train_labels = []
for images, labels in train_dataset:
    features = []
    for model in models:
        features.append(model.predict(images))
    train_features.append(np.concatenate(features, axis=1))
    train_labels.append(labels)
train_features = np.concatenate(train_features, axis=0)
train_labels = np.concatenate(train_labels, axis=0)

# Convert the 2-dimensional label matrix into a 1-dimensional label vector
train_labels = np.argmax(train_labels, axis=1)

# Train adaboost model on the features and labels in the training set
clf = AdaBoostClassifier(n_estimators=10, random_state=0)
clf.fit(train_features, train_labels)

# Extract features from the images in the validation set using the trained CNNs
valid_features = []
valid_labels = []
for images, labels in valid_dataset:
    features = []
    for model in models:
        features.append(model.predict(images))
    valid_features.append(np.concatenate(features, axis=1))
    valid_labels.append(labels)
valid_features = np.concatenate(valid_features, axis=0)
valid_labels = np.concatenate(valid_labels, axis=0)

# Convert the 2-dimensional label matrix into a 1-dimensional label vector
valid_labels = np.argmax(valid_labels, axis=1)

# Make predictions on the validation set using the logistic regression model
y_pred = clf.predict(valid_features)

# Evaluate the model using accuracy, F1-score, omission, and commission metrics on the validation set
metrics(valid_labels, y_pred)

Found 3235 files belonging to 2 classes.
Found 186 files belonging to 2 classes.
F1-score: 0.923076923076923
Accuracy: 0.9193548387096774
Omission: 0.03225806451612903
Commission: 0.12903225806451613
Recall: 0.967741935483871
Precision: 0.8823529411764706
Confusion matrix:
TP: 90
FP: 12
TN: 81
FN: 3
Macro F1-score: 0.9191655801825294


In [11]:
from sklearn.ensemble import RandomForestClassifier

# Load the training set of images and labels
train_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    '/content/drive/MyDrive/SI_flood_dataset/dataset/SI_flood_dataset_split/train_aug_resized_eq',
    image_size=(256, 256),
    batch_size=32,
    label_mode='categorical',
)

# Preprocess the images in the training set
train_dataset = train_dataset.map(
    lambda x, y: (tf.image.per_image_standardization(x), y)
)

# Load the validation set of images and labels
valid_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    '/content/drive/MyDrive/SI_flood_dataset/dataset/SI_flood_dataset_split/val_resized_eq',
    image_size=(256, 256),
    batch_size=32,
    label_mode='categorical',
)

# Preprocess the images in the validation set
valid_dataset = valid_dataset.map(
    lambda x, y: (tf.image.per_image_standardization(x), y)
)

# Define the CNN architectures
models = [
    tf.keras.applications.ResNet50(
        include_top=False,
        input_shape=(256, 256, 3),
        pooling='avg'
    ),
    tf.keras.applications.InceptionV3(
        include_top=False,
        input_shape=(256, 256, 3),
        pooling='avg'
    ),
    tf.keras.applications.Xception(
        include_top=False,
        input_shape=(256, 256, 3),
        pooling='avg'
    )
]

# Extract features from the images in the training set using the trained CNNs
train_features = []
train_labels = []
for images, labels in train_dataset:
    features = []
    for model in models:
        features.append(model.predict(images))
    train_features.append(np.concatenate(features, axis=1))
    train_labels.append(labels)
train_features = np.concatenate(train_features, axis=0)
train_labels = np.concatenate(train_labels, axis=0)

# Convert the 2-dimensional label matrix into a 1-dimensional label vector
train_labels = np.argmax(train_labels, axis=1)

# Train adaboost model on the features and labels in the training set
clf = RandomForestClassifier(n_estimators=200, max_depth=10, random_state=0)
clf.fit(train_features, train_labels)

# Extract features from the images in the validation set using the trained CNNs
valid_features = []
valid_labels = []
for images, labels in valid_dataset:
    features = []
    for model in models:
        features.append(model.predict(images))
    valid_features.append(np.concatenate(features, axis=1))
    valid_labels.append(labels)
valid_features = np.concatenate(valid_features, axis=0)
valid_labels = np.concatenate(valid_labels, axis=0)

# Convert the 2-dimensional label matrix into a 1-dimensional label vector
valid_labels = np.argmax(valid_labels, axis=1)

# Make predictions on the validation set using the logistic regression model
y_pred = clf.predict(valid_features)

# Evaluate the model using accuracy, F1-score, omission, and commission metrics on the validation set
metrics(valid_labels, y_pred)

Found 3235 files belonging to 2 classes.
Found 186 files belonging to 2 classes.
F1-score: 0.967741935483871
Accuracy: 0.967741935483871
Omission: 0.03225806451612903
Commission: 0.03225806451612903
Recall: 0.967741935483871
Precision: 0.967741935483871
Confusion matrix:
TP: 90
FP: 3
TN: 90
FN: 3
Macro F1-score: 0.967741935483871
