In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import folium
from folium.plugins import MarkerCluster
from folium.plugins import HeatMap
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

In [4]:
df=pd.read_csv('realistic_flood_data_sri_lanka.csv')
df.head()

Unnamed: 0,Flooded,Place,Water_Level_in_Rain_Gauge,Month,Distance_to_River,Soil_Saturation,River_Flow,Drainage_Capacity
0,False,Colombo,107.08,May,0.18,0.57,114.91,0.08
1,True,Kurunegala,99.27,October,0.93,0.84,328.24,0.22
2,True,Anuradhapura,202.67,March,2.1,0.9,490.88,0.78
3,True,Kurunegala,224.19,June,1.05,0.42,396.81,0.1
4,False,Batticaloa,96.82,December,0.17,0.02,429.53,0.79


In [5]:
df.isnull().sum()

Unnamed: 0,0
Flooded,0
Place,0
Water_Level_in_Rain_Gauge,0
Month,0
Distance_to_River,0
Soil_Saturation,0
River_Flow,0
Drainage_Capacity,0


In [6]:
df_encoded = pd.get_dummies(df, columns=['Place', 'Month'], drop_first=True)

X = df_encoded.drop('Flooded', axis=1)
y = df_encoded['Flooded'].astype(int)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [7]:
print(df.columns)

Index(['Flooded', 'Place', 'Water_Level_in_Rain_Gauge', 'Month',
       'Distance_to_River', 'Soil_Saturation', 'River_Flow',
       'Drainage_Capacity'],
      dtype='object')


In [8]:
print(df_encoded.columns)

Index(['Flooded', 'Water_Level_in_Rain_Gauge', 'Distance_to_River',
       'Soil_Saturation', 'River_Flow', 'Drainage_Capacity',
       'Place_Batticaloa', 'Place_Colombo', 'Place_Galle', 'Place_Jaffna',
       'Place_Kalutara', 'Place_Kandy', 'Place_Kurunegala', 'Place_Matara',
       'Place_Negombo', 'Place_Rathnapura', 'Place_Trincomalee',
       'Month_August', 'Month_December', 'Month_February', 'Month_January',
       'Month_July', 'Month_June', 'Month_March', 'Month_May',
       'Month_November', 'Month_October', 'Month_September'],
      dtype='object')


In [9]:
model = RandomForestClassifier(n_estimators=100,random_state=42)

model.fit(X_train_scaled, y_train)

In [10]:
y_pred = model.predict(X_test_scaled)


accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

print(classification_report(y_test, y_pred))

Accuracy: 77.50%
              precision    recall  f1-score   support

           0       0.70      0.84      0.76        87
           1       0.85      0.73      0.78       113

    accuracy                           0.78       200
   macro avg       0.78      0.78      0.77       200
weighted avg       0.79      0.78      0.78       200



In [13]:
def predict_flood(place, water_level, distance_to_river, soil_saturation, river_flow, drainage_capacity, month):
    # Create a dictionary for the new data point
    new_data = {
        'Water_Level_in_Rain_Gauge': [water_level],
        'Distance_to_River': [distance_to_river],
        'Soil_Saturation': [soil_saturation],
        'River_Flow': [river_flow],
        'Drainage_Capacity': [drainage_capacity],
        'Place_Anuradhapura': [1 if place == 'Anuradhapura' else 0],
        'Place_Batticaloa': [1 if place == 'Batticaloa' else 0],
        'Place_Colombo': [1 if place == 'Colombo' else 0],
        'Place_Kurunegala': [1 if place == 'Kurunegala' else 0],
        'Month_December': [1 if month == 'December' else 0],
        'Month_June': [1 if month == 'June' else 0],
        'Month_March': [1 if month == 'March' else 0],
        'Month_May': [1 if month == 'May' else 0],
        'Month_October': [1 if month == 'October' else 0]
    }

    # Convert the new data to a DataFrame
    new_df = pd.DataFrame(new_data)

    # Align the new data with the training data's columns (by reindexing)
    new_df = new_df.reindex(columns=X_train.columns, fill_value=0)

    # Make a prediction
    prediction = model.predict(new_df)

    # Output the prediction
    print(prediction[0])
    if prediction[0] == 1:
        return "Flood predicted!"
    else:
        return "No flood predicted."

In [21]:
result = predict_flood(
    place='Matale',
    water_level=100.0,
    distance_to_river=10.0,
    soil_saturation=10.0,
    river_flow=0.0,
    drainage_capacity=0.1,
    month='May'
)
print(result)

1
Flood predicted!


