In [None]:
import warnings
warnings.filterwarnings('ignore')
!pip install --upgrade scikit-learn xgboost lightgbm numpy pandas matplotlib seaborn scipy statsmodels




In [1]:
import pandas as pd
flood_data = pd.read_csv('/content/flood_dataset.csv')

In [2]:
flood_data.head()

Unnamed: 0,Latitude,Longitude,Rainfall,Temperature,Humidity,River_Discharge,Water_Level,Elevation,Land_Cover,Soil_Type,Population_Density,Infrastructure,Historical_Floods,Flood_Occurred
0,18.861663,78.835584,218.999493,34.144337,43.912963,4236.182888,7.415552,377.465434,Water Body,Clay,7276.742184,1,0,1
1,35.570715,77.654451,55.353599,28.778774,27.585422,2472.585219,8.811019,7330.608875,Forest,Peat,6897.736956,0,1,0
2,29.227824,73.108463,103.991908,43.934956,30.108738,977.328053,4.631799,2205.873488,Agricultural,Loam,4361.518494,1,1,1
3,25.361096,85.610733,198.984191,21.569354,34.45369,3683.208933,2.891787,2512.2778,Desert,Sandy,6163.069701,1,1,0
4,12.524541,81.822101,144.626803,32.635692,36.292267,2093.390678,3.188466,2001.818223,Agricultural,Loam,6167.964591,1,0,0


In [None]:
flood_data.head()

In [None]:
flood_data.describe()

In [None]:
print(flood_data.isnull().sum())


In [None]:
for col in flood_data.columns:
  if pd.api.types.is_numeric_dtype(flood_data[col]):
    mean_value = flood_data[col].mean()
    flood_data[col].fillna(value=mean_value, inplace=True)
print(flood_data.isnull().sum())

In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

categorical_cols = flood_data.select_dtypes(include=['object']).columns

label_encoders = {}

for col in categorical_cols:
    le = LabelEncoder()
    flood_data[col] = le.fit_transform(flood_data[col].astype(str))
    label_encoders[col] = le

for col in categorical_cols:
    print(f"\nColumn '{col}':")
    unique_values = flood_data[col].unique()
    for value in unique_values:
        original_value = label_encoders[col].inverse_transform([value])[0]
        print(f"  Encoded {value} represents '{original_value}'")


In [None]:

import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(12, 10))
correlation_matrix = flood_data.corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f")
plt.title('Correlation Matrix of Flood Dataset')
plt.show()

In [None]:
X = flood_data.drop(['Flood_Occurred'], axis=1)
y = flood_data['Flood_Occurred']

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
from sklearn.linear_model import LogisticRegression

lr_model = LogisticRegression(max_iter = 2000,random_state=42)
y_train = y_train.astype(int)
lr_model.fit(X_train, y_train)

y_pred = lr_model.predict(X_test)

accuracy_lr = accuracy_score(y_test, y_pred)
print(f"Logistic Regression Accuracy: {accuracy_lr * 100:.2f}%")


In [None]:

import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=['No Flood', 'Flood'],
            yticklabels=['No Flood', 'Flood'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

In [None]:
from sklearn.tree import DecisionTreeClassifier

dt_model = DecisionTreeClassifier(random_state=42,max_depth=40)
dt_model.fit(X_train, y_train)

y_pred = dt_model.predict(X_test)

accuracy_dt = accuracy_score(y_test, y_pred)
print(f"Decision Tree Accuracy: {accuracy_dt * 100:.2f}%")


In [None]:

from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt


cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
            xticklabels=['No Flood', 'Flood'],
            yticklabels=['No Flood', 'Flood'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

In [None]:
from sklearn.ensemble import RandomForestClassifier
rf_model = RandomForestClassifier(random_state=42,max_depth=20,n_estimators=100)
rf_model.fit(X_train, y_train)

y_pred = rf_model.predict(X_test)

accuracy_rf = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy_rf * 100:.2f}%")

In [None]:

from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
            xticklabels=['No Flood', 'Flood'],
            yticklabels=['No Flood', 'Flood'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

In [None]:
from sklearn.svm import SVC

svm_model = SVC(random_state=42)
svm_model.fit(X_train, y_train)

y_pred = svm_model.predict(X_test)

accuracy_svm = accuracy_score(y_test, y_pred)
print(f"SVM Accuracy: {accuracy_svm * 100:.2f}%")


In [None]:

from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt


cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
            xticklabels=['No Flood', 'Flood'],
            yticklabels=['No Flood', 'Flood'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

In [None]:
from sklearn.neighbors import KNeighborsClassifier

knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(X_train, y_train)

y_pred = knn_model.predict(X_test)

accuracy_knn = accuracy_score(y_test, y_pred)
print(f"KNN Accuracy: {accuracy_knn * 100:.2f}%")


In [None]:

from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
            xticklabels=['No Flood', 'Flood'],
            yticklabels=['No Flood', 'Flood'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

In [None]:
from sklearn.ensemble import AdaBoostClassifier

adb_model = AdaBoostClassifier(random_state=42)
adb_model.fit(X_train, y_train)

probabilities = adb_model.predict_proba(X_test)[:, 1]

y_pred = (probabilities > 0.5).astype(int)

accuracy_adb = accuracy_score(y_test, y_pred)


print(f"AdaBoost Accuracy: {accuracy_adb * 100:.2f}%")


In [None]:

from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt


cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
            xticklabels=['No Flood', 'Flood'],
            yticklabels=['No Flood', 'Flood'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

In [None]:
import xgboost as xgb

from sklearn.metrics import accuracy_score

xgb_model = xgb.XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='logloss',max_depth = 5, reg_lambda = 1.5 , reg_alpha = 1)
xgb_model.fit(X_train, y_train)

probabilities = xgb_model.predict_proba(X_test)[:, 1]
y_pred = (probabilities > 0.5).astype(int)

accuracy_xgb = accuracy_score(y_test, y_pred)

print(f"XGBoost Accuracy: {accuracy_xgb * 100:.2f}%")



In [None]:

from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt


cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
            xticklabels=['No Flood', 'Flood'],
            yticklabels=['No Flood', 'Flood'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

In [None]:
import lightgbm as lgb
from sklearn.metrics import accuracy_score

lgb_model = lgb.LGBMClassifier(random_state=42,max_depth = 5, n_estimators = 100)
lgb_model.fit(X_train, y_train)

probabilities = lgb_model.predict_proba(X_test)[:, 1]

y_pred = (probabilities > 0.5).astype(int)

accuracy_lgb = accuracy_score(y_test, y_pred)
print(f"LightGBM Accuracy: {accuracy_lgb * 100:.2f}%")



In [None]:

from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt


cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
            xticklabels=['No Flood', 'Flood'],
            yticklabels=['No Flood', 'Flood'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

In [None]:
import matplotlib.pyplot as plt

accuracy_scores = [accuracy_lr, accuracy_dt, accuracy_rf, accuracy_svm, accuracy_knn, accuracy_adb, accuracy_xgb, accuracy_lgb]

accuracy_scores_percent = [score * 100 for score in accuracy_scores]

models = ['Logistic Regression', 'Decision Tree', 'Random Forest', 'SVM', 'KNN', 'AdaBoost', 'XGBoost', 'LightGBM']

plt.figure(figsize=(10, 6))
plt.bar(models, accuracy_scores_percent, color=['skyblue', 'salmon', 'lightgreen', 'lightcoral', 'lightsteelblue', 'khaki', 'plum', 'palegreen'])
plt.xlabel("Models")
plt.ylabel("Accuracy (%)")
plt.title("Accuracy of Different Classification Models")
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()


In [None]:
from sklearn.ensemble import VotingClassifier

voting_model = VotingClassifier(estimators=[
    ('Decision Tree', dt_model),
    ('Random Forest', rf_model),
    ('KNN', knn_model),
    ('XGBoost', xgb_model),
    ('LightGM',lgb_model),
], voting='soft', weights=[0.3,0.3,1,1,1])

voting_model.fit(X_train, y_train)

y_pred_voting = voting_model.predict(X_test)

accuracy_voting = accuracy_score(y_test, y_pred_voting)
print(f"Voting Classifier Accuracy: {accuracy_voting * 100:.2f}%")

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test, y_pred_voting)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=['No Flood', 'Flood'], yticklabels=['No Flood', 'Flood'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix for Voting Classifier')
plt.show()

In [None]:

Latitude = 19.04 #@param {type:"number"}
Longitude = 72.86 #@param {type:"number"}
Rainfall = 1000 #@param {type:"number"}
Temperature = 31 #@param {type:"number"}
Humidity = 30 #@param {type:"number"}
River_Discharge = 126.182888 #@param {type:"number"}
Water_Level = 4.415552 #@param {type:"number"}
Elevation = 15 #@param {type:"number"}
Land_Cover = 4 #@param {type:"number"} # assuming categorical data is pre-encoded
Soil_Type = 4 #@param {type:"number"} # assuming categorical data is pre-encoded
Population_Density = 15627 #@param {type:"number"}
Infrastructure = 0 #@param {type:"number"}
Historical_Floods = 0 #@param {type:"number"}

input_data = [
    Latitude, Longitude, Rainfall, Temperature, Humidity,
    River_Discharge, Water_Level, Elevation, Land_Cover,
    Soil_Type, Population_Density, Infrastructure, Historical_Floods
]

final_prediction = voting_model.predict([input_data])[0]

probabilities = voting_model.predict_proba([input_data])[0]

print("Final Prediction:", final_prediction)

if final_prediction == 0:
    print("Flood is not predicted to occur.")
else:
    print("Flood is predicted to occur.")


print(f"Probability of Flood: {probabilities[1] * 100:.2f}%")


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

X = flood_data.drop(columns=['Flood_Occurred'])
y = flood_data['Flood_Occurred']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

model = Sequential()
model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

# Evaluate the model
loss, nn_accuracy = model.evaluate(X_test, y_test)
print(f"Neural Network Test Accuracy: {nn_accuracy}")


In [None]:
!pip install catboost

In [None]:
from catboost import CatBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

X = flood_data.drop(columns=['Flood_Occurred'])
y = flood_data['Flood_Occurred']  # Target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = CatBoostClassifier(iterations=500, learning_rate=0.1, depth=6, cat_features=[8, 9], verbose=100)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
cat_accuracy = accuracy_score(y_test, y_pred)
print(f"CatBoost Test Accuracy: {cat_accuracy}")


In [None]:
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

X = flood_data.drop(columns=['Flood_Occurred'])
y = flood_data['Flood_Occurred']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train HistGradientBoostingClassifier
model = HistGradientBoostingClassifier(max_iter=100)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
hist_accuracy = accuracy_score(y_test, y_pred)
print(f"HistGradientBoosting Test Accuracy: {hist_accuracy}")


In [None]:
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

X =flood_data.drop(columns=['Flood_Occurred'])  # Features
y = flood_data['Flood_Occurred']  # Target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = ExtraTreesClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

extra_accuracy = accuracy_score(y_test, y_pred)
print(f"Extra Trees Test Accuracy: {extra_accuracy}")


In [None]:
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

X = flood_data.drop(columns=['Flood_Occurred'])  # Features
y = flood_data['Flood_Occurred']  # Target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

mlp = MLPClassifier(hidden_layer_sizes=(64, 32), max_iter=500, activation='relu', solver='adam', random_state=42)
mlp.fit(X_train, y_train)

y_pred = mlp.predict(X_test)

mlp_accuracy = accuracy_score(y_test, y_pred)
print(f"MLPClassifier Test Accuracy: {mlp_accuracy}")


In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV

X = flood_data.drop(columns=['Flood_Occurred'])
y =flood_data['Flood_Occurred']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

knn = KNeighborsClassifier()
param_grid = {'n_neighbors': [3, 5, 7, 9], 'metric': ['euclidean', 'manhattan']}
grid_search = GridSearchCV(knn, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

best_knn = grid_search.best_estimator_

y_pred = best_knn.predict(X_test)

# Evaluate the model
knnt_accuracy = accuracy_score(y_test, y_pred)
print(f"KNN (with tuning) Test Accuracy: {knnt_accuracy}")


In [None]:
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import RandomizedSearchCV

# Preprocessing
X = flood_data.drop(columns=['Flood_Occurred'])  # Features
y = flood_data['Flood_Occurred']  # Target

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# XGBoost with Randomized Search
xgb = XGBClassifier()

param_dist = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.05, 0.1],
    'max_depth': [3, 5, 7],
    'subsample': [0.8, 0.9, 1.0],
    'colsample_bytree': [0.8, 0.9, 1.0]
}

random_search = RandomizedSearchCV(xgb, param_dist, n_iter=10, cv=5, verbose=1, random_state=42)
random_search.fit(X_train, y_train)

# Best model after random search
best_xgb = random_search.best_estimator_

# Make predictions
y_pred = best_xgb.predict(X_test)

# Evaluate the model
xgbt_accuracy = accuracy_score(y_test, y_pred)
print(f"XGBoost (with Randomized Search) Test Accuracy: {xgbt_accuracy}")


In [None]:

import matplotlib.pyplot as plt

accuracy_scores = {
    'Logistic Regression': accuracy_lr,
    'Decision Tree': accuracy_dt,
    'Random Forest': accuracy_rf,
    'SVM': accuracy_svm,
    'KNN': accuracy_knn,
    'AdaBoost': accuracy_adb,
    'XGBoost': accuracy_xgb,
    'LightGBM': accuracy_lgb,
    'Neural Network': nn_accuracy,
    'CatBoost': cat_accuracy,
    'HistGradientBoosting': hist_accuracy,
    'ExtraTrees': extra_accuracy,
    'MLPClassifier': mlp_accuracy,
    'KNN (Tuned)': knnt_accuracy,
    'XGBoost (Tuned)': xgbt_accuracy
}

models = list(accuracy_scores.keys())
accuracies = list(accuracy_scores.values())

accuracy_percentages = [accuracy * 100 for accuracy in accuracies]

plt.figure(figsize=(15, 6))
plt.bar(models, accuracy_percentages, color='skyblue')
plt.xlabel("Models")
plt.ylabel("Accuracy (%)")
plt.title("Comparison of Model Accuracies")
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

In [None]:


from sklearn.ensemble import VotingClassifier

voting_model = VotingClassifier(estimators=[
    ('Random Forest', rf_model),
    ('ExtraTrees', model),
    ('KNN', best_knn),
    ('XGBoost', best_xgb)
], voting='soft', weights=[0.5, 1, 1, 1, 1, 1])

voting_model.fit(X_train, y_train)

y_pred_voting = voting_model.predict(X_test)

accuracy_voting = accuracy_score(y_test, y_pred_voting)
print(f"Voting Classifier Accuracy: {accuracy_voting * 100:.2f}%")


In [None]:
Adaboost
XGBoost (Tuned)
KNN (Tuned)
Random Forest
ExtraTrees
Votting classifier
KNN

In [None]:
!pip freeze > requirements.txt
