In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('flood_risk_dataset_india.csv')
df = df.drop(columns=['Latitude','Longitude','Population Density','Infrastructure'])

In [3]:
from sklearn.preprocessing import OneHotEncoder

In [4]:
ohe = OneHotEncoder(handle_unknown='ignore',sparse_output=False).set_output(transform='pandas')
ohetransform = ohe.fit_transform(df[['Land Cover','Soil Type']])

In [5]:
X = pd.concat([df.iloc[:,0:6],ohetransform],axis=1)
y = df['Flood Occurred']

In [6]:
from sklearn.model_selection import train_test_split

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state=42, test_size=0.2)

# Random Forest

In [8]:
from sklearn.ensemble import RandomForestClassifier

In [9]:
rf = RandomForestClassifier(n_estimators=100,
                            criterion='entropy',
                            min_samples_split=10,
                            max_depth=14,
                            random_state=42
)

In [10]:
rf.fit(X_train,y_train)

In [11]:
y_pred = rf.predict(X_test)

In [12]:
rf.score(X_test,y_test)

0.535

In [13]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [14]:
accuracy_score(y_test,y_pred)

0.535

In [15]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.52      0.47      0.49       966
           1       0.55      0.60      0.57      1034

    accuracy                           0.54      2000
   macro avg       0.53      0.53      0.53      2000
weighted avg       0.53      0.54      0.53      2000



In [16]:
print(confusion_matrix(y_test,y_pred))

[[451 515]
 [415 619]]


# AdaBoost

In [17]:
from sklearn.ensemble import AdaBoostClassifier

In [110]:
abc = AdaBoostClassifier(algorithm="SAMME",
                         learning_rate=1)

In [111]:
abc.fit(X_train,y_train)

In [112]:
y_pred_prob = abc.predict_proba(X_test)[:, 1]  # Get probabilities for class 1
threshold = 0.6
y_pred_custom = (y_pred_prob >= threshold).astype(int)


In [113]:
y_pred2 = abc.predict(X_test)

In [114]:
abc.score(X_test,y_test)

0.521

In [115]:
accuracy_score(y_test,y_pred2)

0.521

In [116]:
print(classification_report(y_test,y_pred2))

              precision    recall  f1-score   support

           0       0.61      0.02      0.05       966
           1       0.52      0.99      0.68      1034

    accuracy                           0.52      2000
   macro avg       0.56      0.50      0.36      2000
weighted avg       0.56      0.52      0.37      2000



In [117]:
print(confusion_matrix(y_test,y_pred2))

[[  23  943]
 [  15 1019]]


# Gradient Boost

In [25]:
from sklearn.ensemble import GradientBoostingClassifier

In [26]:
gb = GradientBoostingClassifier(random_state=42,
                                n_estimators=500,
                                max_depth=3)

In [27]:
gb.fit(X_train,y_train)

In [28]:
y_pred3 = gb.predict(X_test)

In [29]:
gb.score(X_test,y_test)

0.521

In [30]:
accuracy_score(y_test,y_pred3)

0.521

In [31]:
print(classification_report(y_test,y_pred3))

              precision    recall  f1-score   support

           0       0.50      0.50      0.50       966
           1       0.54      0.54      0.54      1034

    accuracy                           0.52      2000
   macro avg       0.52      0.52      0.52      2000
weighted avg       0.52      0.52      0.52      2000



In [32]:
print(confusion_matrix(y_test,y_pred3))

[[479 487]
 [471 563]]


# SVM

In [33]:
from sklearn.svm import SVC

In [36]:
svm = SVC(kernel='rbf')

In [37]:
svm.fit(X_train,y_train)

In [38]:
y_pred4 = svm.predict(X_test)

In [39]:
svm.score(X_test,y_test)

0.5055

In [40]:
accuracy_score(y_test,y_pred4)

0.5055

In [41]:
print(classification_report(y_test,y_pred4))

              precision    recall  f1-score   support

           0       0.49      0.45      0.47       966
           1       0.52      0.56      0.54      1034

    accuracy                           0.51      2000
   macro avg       0.50      0.50      0.50      2000
weighted avg       0.50      0.51      0.50      2000



In [42]:
print(confusion_matrix(y_test,y_pred4))

[[436 530]
 [459 575]]


# KNN

In [43]:
from sklearn.neighbors import KNeighborsClassifier

In [44]:
knn = KNeighborsClassifier()

In [45]:
knn.fit(X_train,y_train)

In [47]:
y_pred5 = knn.predict(X_test)

In [49]:
knn.score(X_test,y_test)

0.514

In [50]:
accuracy_score(y_test,y_pred5)

0.514

In [51]:
print(classification_report(y_test,y_pred5))

              precision    recall  f1-score   support

           0       0.50      0.49      0.50       966
           1       0.53      0.53      0.53      1034

    accuracy                           0.51      2000
   macro avg       0.51      0.51      0.51      2000
weighted avg       0.51      0.51      0.51      2000



# MLP

In [52]:
from sklearn.neural_network import MLPClassifier

In [145]:
mlp = MLPClassifier(hidden_layer_sizes=(10,))

In [146]:
mlp.fit(X_train,y_train)

In [147]:
y_pred6 = mlp.predict(X_test)

In [148]:
mlp.score(X_test,y_test)

0.5065

In [149]:
accuracy_score(y_test,y_pred6)

0.5065

In [150]:
print(classification_report(y_test,y_pred6))

              precision    recall  f1-score   support

           0       0.46      0.12      0.19       966
           1       0.51      0.87      0.65      1034

    accuracy                           0.51      2000
   macro avg       0.49      0.49      0.42      2000
weighted avg       0.49      0.51      0.43      2000



In [151]:
print(confusion_matrix(y_test,y_pred6))

[[116 850]
 [137 897]]


# Extra trees

In [152]:
from sklearn.ensemble import ExtraTreesClassifier

In [166]:
etc = ExtraTreesClassifier(random_state=21,
                           n_estimators=500,
                           criterion='entropy')

In [167]:
etc.fit(X_train,y_train)

In [168]:
y_pred7 = etc.predict(X_test)

In [169]:
etc.score(X_test,y_test)

0.5165

In [170]:
accuracy_score(y_test,y_pred7)

0.5165

In [171]:
print(classification_report(y_test,y_pred7))

              precision    recall  f1-score   support

           0       0.50      0.50      0.50       966
           1       0.53      0.54      0.53      1034

    accuracy                           0.52      2000
   macro avg       0.52      0.52      0.52      2000
weighted avg       0.52      0.52      0.52      2000

