In [1]:
# Library Imports
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import f1_score, recall_score, precision_score, accuracy_score, confusion_matrix
import joblib
import json


In [2]:
data=pd.read_csv(r"C:\Users\Hp\Downloads\Flood Dataset\flood.csv")
data.head(5)

Unnamed: 0,MonsoonIntensity,TopographyDrainage,RiverManagement,Deforestation,Urbanization,ClimateChange,DamsQuality,Siltation,AgriculturalPractices,Encroachments,...,DrainageSystems,CoastalVulnerability,Landslides,Watersheds,DeterioratingInfrastructure,PopulationScore,WetlandLoss,InadequatePlanning,PoliticalFactors,FloodProbability
0,3,8,6,6,4,4,6,2,3,2,...,10,7,4,2,3,4,3,2,6,0.45
1,8,4,5,7,7,9,1,5,5,4,...,9,2,6,2,1,1,9,1,3,0.475
2,3,10,4,1,7,5,4,7,4,9,...,7,4,4,8,6,1,8,3,6,0.515
3,4,4,2,7,3,4,1,4,6,4,...,4,2,6,6,8,8,6,6,10,0.52
4,3,7,5,2,5,8,5,2,7,5,...,7,6,5,3,3,4,4,3,4,0.475


In [3]:
# Step 2: Dataset Preprocessing
# Check for missing values
if data.isnull().sum().any():
    # Optionally remove rows with missing values or fill them
    data = data.fillna(data.mean())  # Filling missing values with column mean

In [4]:
# Step 3: Define Features and Target
X = data.drop('FloodProbability', axis=1)  # Features (independent variables)
y = data['FloodProbability']  # Target (dependent variable)

In [5]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [7]:
# Step 4: Declare and Configure Model
# Initialize Random Forest Regressor
rf_model = RandomForestRegressor(n_estimators=100, random_state=42, max_depth=10)

In [8]:
# Step 5: Train the Model
rf_model.fit(X_train, y_train)

In [9]:
# Step 6: Save Model and Feature Names
joblib.dump(rf_model, "random_forest_flood_model.pkl")  # Saving model
joblib.dump(X.columns.tolist(), "feature_names.pkl")  # Saving feature names

['feature_names.pkl']

In [10]:
# Step 7: Test the Model with Custom Input
# Assume Custom Input as JSON
custom_input = {
    'MonsoonIntensity': 7, 'TopographyDrainage': 8, 'RiverManagement': 8, 'Deforestation': 7,
    'Urbanization': 6, 'ClimateChange': 4, 'DamsQuality': 9, 'Siltation': 3, 'AgriculturalPractices': 9,
    'Encroachments': 9, 'IneffectiveDisasterPreparedness': 9, 'DrainageSystems': 8, 'CoastalVulnerability': 7,
    'Landslides': 7, 'Watersheds': 5, 'DeterioratingInfrastructure': 9, 'PopulationScore': 7,
    'WetlandLoss': 9, 'InadequatePlanning': 9, 'PoliticalFactors': 7
}

# Convert JSON input to a DataFrame for model input
custom_df = pd.DataFrame([custom_input])

# Load the model and predict
rf_loaded_model = joblib.load("random_forest_flood_model.pkl")
prediction = rf_loaded_model.predict(custom_df)

print(f"Flood Probability Prediction for Custom Input: {round(prediction[0], 2) * 100} %")

Flood Probability Prediction for Custom Input: 61.0 %


In [11]:
# Step 8: Test Model Accuracy
y_pred = rf_model.predict(X_test)

# Converting both continuous predictions and true values to binary for classification metrics
y_pred_binary = [1 if x >= 0.5 else 0 for x in y_pred]  # Thresholding predictions
y_test_binary = [1 if x >= 0.5 else 0 for x in y_test]  # Thresholding true values

# Metrics Calculation
f1 = f1_score(y_test_binary, y_pred_binary)
recall = recall_score(y_test_binary, y_pred_binary)
precision = precision_score(y_test_binary, y_pred_binary)
accuracy = accuracy_score(y_test_binary, y_pred_binary)

# Confusion Matrix to calculate specificity
tn, fp, fn, tp = confusion_matrix(y_test_binary, y_pred_binary).ravel()
specificity = tn / (tn + fp)

# Print out the scores
print(f"F1 Score: \t{round(f1, 2)}")
print(f"Recall:   \t{round(recall, 2)}")
print(f"Precision:\t{round(precision, 2)}")
print(f"Accuracy: \t{round(accuracy, 3)}")
print(f"Specificity: \t{round(specificity, 2)}")

F1 Score: 	0.85
Recall:   	0.82
Precision:	0.88
Accuracy: 	0.85
Specificity: 	0.88


In [12]:
from sklearn.metrics import mean_squared_error, r2_score

# Calculate regression metrics
mse = mean_squared_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)  # RMSE is the square root of MSE
r2 = r2_score(y_test, y_pred)

print(f"MSE: \t\t{round(mse, 2)}")
print(f"RMSE: \t\t{round(rmse, 2)}")
print(f"R² Score: \t{round(r2, 2)}")


MSE: 		0.0
RMSE: 		0.03
R² Score: 	0.57


In [13]:
# Step 9: Inference Code for Custom Data Prediction
def predict_flood_probability(custom_json: dict) -> float:
    """
    Load the model and predict flood probability for custom input data.
    
    Args:
        custom_json (dict): A dictionary containing the values of all the flood prediction factors.
        
    Returns:
        float: Predicted flood probability.
    """
    # Load the saved model and feature names
    model = joblib.load("random_forest_flood_model.pkl")
    feature_names = joblib.load("feature_names.pkl")
    
    # Convert JSON input to DataFrame for prediction
    custom_data = pd.DataFrame([custom_json], columns=feature_names)
    
    # Make prediction
    return model.predict(custom_data)[0]


# Example usage of the inference function:
example_input = {
    'MonsoonIntensity': 9, 'TopographyDrainage': 9, 'RiverManagement': 9, 'Deforestation': 9,
    'Urbanization': 10, 'ClimateChange': 9, 'DamsQuality': 9, 'Siltation': 9, 'AgriculturalPractices': 9,
    'Encroachments': 9, 'IneffectiveDisasterPreparedness': 9, 'DrainageSystems': 9, 'CoastalVulnerability': 10,
    'Landslides': 9, 'Watersheds': 9, 'DeterioratingInfrastructure': 9, 'PopulationScore': 9,
    'WetlandLoss': 9, 'InadequatePlanning': 9, 'PoliticalFactors': 10
}

# Predict flood probability for custom input
flood_probability = predict_flood_probability(example_input)
print(f"Predicted Flood Probability for Example Input: {flood_probability}")

Predicted Flood Probability for Example Input: 0.6454025878744707


In [14]:
import joblib

# Load the model
model = joblib.load("random_forest_flood_model.pkl")

# Check the metadata
metadata = model.__getstate__()
print(metadata['_sklearn_version'])

1.2.1
