In [1]:
from google.colab import drive

drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


with texture and shape features only:
* Random forest -> 86%
* Knn -> 85%
* logistic regression -> 75%
* svm (rbf) -> 54%
* adaboost -> 89%

In [15]:
from sklearn.metrics import f1_score, accuracy_score, recall_score, precision_score

def metrics(y_test, y_pred):
    # Compute evaluation metrics
    f1 = f1_score(y_test, y_pred)
    accuracy = accuracy_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)

    # Compute confusion matrix
    tp =np.sum((y_test == 1) & (y_pred == 1))
    fp = np.sum((y_test == 0) & (y_pred == 1))
    tn = np.sum((y_test == 0) & (y_pred == 0))
    fn = np.sum((y_test == 1) & (y_pred == 0))
    omission = fn / (fn + tp)
    commission = fp / (fp + tn)

    # Print evaluation metrics
    print('F1-score:', f1)
    print('Accuracy:', accuracy)
    print('Omission:', omission)
    print('Commission:', commission)

In [None]:
import numpy as np
import pandas as pd

# Load the feature data for the flooded and non-flooded images
train_flooded_features = pd.read_csv('train_flooded_features.csv')
train_non_flooded_features = pd.read_csv('train_non_flooded_features.csv')

test_flooded_features = pd.read_csv('test_flooded_features.csv')
test_non_flooded_features = pd.read_csv('test_non_flooded_features.csv')

# remove newline characters from all columns in the DataFrames
train_flooded_features = train_flooded_features.applymap(lambda x: x.replace('\n', '') if isinstance(x, str) else x)
train_non_flooded_features = train_non_flooded_features.applymap(lambda x: x.replace('\n', '') if isinstance(x, str) else x)
test_flooded_features = test_flooded_features.applymap(lambda x: x.replace('\n', '') if isinstance(x, str) else x)
test_non_flooded_features = test_non_flooded_features.applymap(lambda x: x.replace('\n', '') if isinstance(x, str) else x)

# put labels to the data
train_flooded_features['label'] = 1
train_non_flooded_features['label'] = 0

test_flooded_features['label'] = 1
test_non_flooded_features['label'] = 0

# concatenate the flooded and non-flooded data
train_features = pd.concat([train_flooded_features, train_non_flooded_features])
test_features = pd.concat([test_flooded_features, test_non_flooded_features])

# split the data into train and test, where y is the label
X_train, y_train = train_features.drop('label', axis=1), train_features['label']
X_test, y_test = test_features.drop('label', axis=1), test_features['label']

# flatten the list of lists in X_train and X_test
X_train = np.array([np.array(x).flatten() for x in X_train.values])
X_test = np.array([np.array(x).flatten() for x in X_test.values])

# Replace non-numeric values with 0
X_train = [[0 if not isinstance(val, (int, float)) else val for val in sublist] for sublist in X_train]
X_test = [[0 if not isinstance(val, (int, float)) else val for val in sublist] for sublist in X_test]

# Convert the list of lists to a 2D array of floats
X_train = np.array(X_train).astype(float)
y_train = np.array(y_train).astype(float)


In [12]:
from sklearn.ensemble import RandomForestClassifier

# train the model
clf = RandomForestClassifier(n_estimators=100, max_depth=2, random_state=0)
clf.fit(X_train, y_train)

# predict the labels for the test data
y_pred = clf.predict(X_test)

metrics(y_test, y_pred)

[[0, 0, 0, 0, 0, 0.0741500854492187, 0.0086777016165861, 0.8190300694199806, 0.0710317827079999, 65483, 1720.4926035179244, 133.53758834346058, 144.53891969112937], [0, 0, 0, 0, 0, 0.0155105590820312, 0.0072171709983607, 0.7320573943580747, 0.0444697884110194, 65438, 743.9442635585339, 55.49880120543131, 132.00785563880632], [0, 0, 0, 0, 0, -0.1814041137695312, 0.01829861185205, 0.9610585265689576, 0.2255435939496857, 65522, 7382.385494185848, 1007.2243455041124, 158.3786480309684], [0, 0, 0, 0, 0, -0.09820556640625, 0.0072413211374643, 0.7140075881889127, 0.0459798655889511, 65502, 752.636651713959, 21.11398798112497, 102.2134900089128], [0, 0, 0, 0, 0, -0.3607101440429687, 0.0088065455587039, 0.7776188118775932, 0.0752538826514265, 65526, 1764.3799051457163, 101.541709682344, 120.50521481995804], [0, 0, 0, 0, 0, -0.06512451171875, 0.012567587187094, 0.9184750997564504, 0.144147291240595, 65505, 3401.001404958025, 241.27525519944697, 128.04614468161012], [0, 0, 0, 0, 0, -0.47930908203

In [None]:
# use SVM
from sklearn import svm

clf = svm.SVC(kernel = 'linear')
clf.fit(X_train, y_train)

# predict the labels for the test data
y_pred = clf.predict(X_test)

metrics(y_test, y_pred)

In [39]:
# use knn
from sklearn.neighbors import KNeighborsClassifier

clf = KNeighborsClassifier(n_neighbors=20)
clf.fit(X_train, y_train)

# predict the labels for the test data
y_pred = clf.predict(X_test)

metrics(y_test, y_pred)

F1-score: 0.854054054054054
Accuracy: 0.8548387096774194
Omission: 0.15053763440860216
Commission: 0.13978494623655913


In [30]:
# use logistic regression
from sklearn.linear_model import LogisticRegression

clf = LogisticRegression(random_state=0).fit(X_train, y_train)

# predict the labels for the test data
y_pred = clf.predict(X_test)

# Compute evaluation metrics
metrics(y_test, y_pred)

F1-score: 0.7515151515151516
Accuracy: 0.7795698924731183
Omission: 0.3333333333333333
Commission: 0.10752688172043011




In [25]:
# use adaboost 
from sklearn.ensemble import AdaBoostClassifier

clf = AdaBoostClassifier(n_estimators=10, random_state=0)

clf.fit(X_train, y_train)

# predict the labels for the test data
y_pred = clf.predict(X_test)

# Compute evaluation metrics
metrics(y_test, y_pred)

F1-score: 0.8936170212765957
Accuracy: 0.8924731182795699
Omission: 0.0967741935483871
Commission: 0.11827956989247312
