In [None]:
import gdown
gdown.download("https://drive.google.com/file/d/1YUbTBFrk9QF0ivR5F640G3dhCMC3XQUZ/view?usp=sharing", "dataset.zip", quiet=False, fuzzy=True)
gdown.extractall("dataset.zip")

Downloading...
From: https://drive.google.com/uc?id=1YUbTBFrk9QF0ivR5F640G3dhCMC3XQUZ
To: /content/dataset.zip
100%|██████████| 1.70G/1.70G [00:14<00:00, 116MB/s]


['dataset/',
 'dataset/flooded/',
 'dataset/flooded/0.jpg',
 'dataset/flooded/1.jpg',
 'dataset/flooded/10.jpg',
 'dataset/flooded/100.jpg',
 'dataset/flooded/101.jpg',
 'dataset/flooded/102.jpg',
 'dataset/flooded/103.jpg',
 'dataset/flooded/104.jpg',
 'dataset/flooded/105.jpg',
 'dataset/flooded/106.jpg',
 'dataset/flooded/107.jpg',
 'dataset/flooded/108.jpg',
 'dataset/flooded/109.jpg',
 'dataset/flooded/11.jpg',
 'dataset/flooded/110.jpg',
 'dataset/flooded/111.jpg',
 'dataset/flooded/112.jpg',
 'dataset/flooded/113.jpg',
 'dataset/flooded/114.jpg',
 'dataset/flooded/115.jpg',
 'dataset/flooded/116.jpg',
 'dataset/flooded/117.jpg',
 'dataset/flooded/118.jpg',
 'dataset/flooded/119.jpg',
 'dataset/flooded/12.jpg',
 'dataset/flooded/120.jpg',
 'dataset/flooded/121.jpg',
 'dataset/flooded/122.jpg',
 'dataset/flooded/123.jpg',
 'dataset/flooded/124.jpg',
 'dataset/flooded/125.jpg',
 'dataset/flooded/126.jpg',
 'dataset/flooded/127.jpg',
 'dataset/flooded/128.jpg',
 'dataset/flooded/129

In [None]:
import pandas as pd
import numpy as np
import glob
import os
import cv2
from skimage.feature import hog, graycomatrix, graycoprops
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC as SVMClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import classification_report

In [None]:
df = pd.DataFrame(columns=['image_path', 'label'])

for image_path in sorted(glob.glob('dataset/flooded/*.jpg')):
    data = {'image_path': image_path, 'label': 1}
    df.loc[len(df)] = data

for image_path in sorted(glob.glob('dataset/non-flooded/*.jpg')):
    data = {'image_path': image_path, 'label': 0}
    df.loc[len(df)] = data

In [None]:
def preprocess(image):
  resized_image = cv2.resize(image, (1352, 1794))
  edge_enhancement_filter = np.asarray([[0, 1, 0], [-1, 6.5, -1], [0, 1, 0]])
  preprocessed_image = cv2.filter2D(src=resized_image, ddepth=-1, kernel=edge_enhancement_filter)
  
  return preprocessed_image

In [None]:
def extract_features(image_path):
  RGB_image = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
  preprocessed_image = preprocess(RGB_image)

  hog_feature = hog(preprocessed_image, pixels_per_cell=(128, 128), transform_sqrt=True,
                    cells_per_block=(2, 2), channel_axis=-1)
  
  gray_image = cv2.cvtColor(preprocessed_image, cv2.COLOR_RGB2GRAY)
  # Calculate the GLCM
  distances = [1]
  angles = [0, np.pi/4, np.pi/2, 3*np.pi/4]
  glcm = graycomatrix(gray_image, distances, angles, levels=256, symmetric=True, normed=True)

  # Calculate GLCM properties
  contrast = graycoprops(glcm, 'contrast')
  dissimilarity = graycoprops(glcm, 'dissimilarity')
  energy = graycoprops(glcm, 'energy')
  correlation = graycoprops(glcm, 'correlation')
  # print(len(contrast.ravel()) + len(dissimilarity.ravel()) + len(energy.ravel()) + len(correlation.ravel()))
  

  feature_vector = np.concatenate((hog_feature, contrast.ravel(), dissimilarity.ravel(), energy.ravel(), correlation.ravel()))
  return feature_vector

In [None]:
X = np.empty((len(df), 4212 + 16))
y = np.empty((len(df)))
for i, row in df.iterrows():
  features = extract_features(row['image_path'])
  X[i] = features
  y[i] = row['label']

In [None]:
print(f'The shape of the dataset is {X.shape}')
print(f'The total number of features in each image is {X.shape[1]}')

The shape of the dataset is (922, 4228)
The total number of features in each image is 4228


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify = y, test_size = 0.2, random_state = 42)

In [None]:
param_grid = {
    'n_estimators': [25, 30, 50, 100],
    'max_depth': [None, 3, 5, 7],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['sqrt', 'log2']
}

grid = GridSearchCV(RandomForestClassifier(), param_grid, cv=5)
grid.fit(X_train, y_train)

# print the best parameters found
print("Best parameters: ", grid.best_params_)

rf_classifier = grid.best_estimator_

Best parameters:  {'max_depth': 5, 'max_features': 'sqrt', 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 100}


In [None]:
xgb_classifier = XGBClassifier(n_estimators=90)
svm_classifier = SVMClassifier(kernel='poly', degree=3, probability=True)
rf_classifier = RandomForestClassifier(max_depth=5, max_features='sqrt', min_samples_leaf=2, min_samples_split=5, n_estimators=100)

model = VotingClassifier(
    estimators=[('xgb', xgb_classifier), ('rf', rf_classifier),
                ('svm', svm_classifier)],
    voting='soft'
)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

         0.0       0.86      0.78      0.82        93
         1.0       0.80      0.87      0.83        92

    accuracy                           0.83       185
   macro avg       0.83      0.83      0.83       185
weighted avg       0.83      0.83      0.83       185



In [None]:
import pickle
with open('model.pkl', 'wb') as file:
    pickle.dump(model, file)