In [2]:
import pandas as pd
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
import pickle

print("✅ Libraries imported.")

✅ Libraries imported.


In [5]:
# Load the historical dataset
df = pd.read_csv('../data/flood_risk_dataset_india.csv')

# --- Preprocessing ---
# 1. One-hot encode the categorical features
# Corrected this line to use spaces to match the actual column names
df_processed = pd.get_dummies(df, columns=['Land Cover', 'Soil Type'])


# --- The rest of your code ---
# 2. Define the features (X) and the target (y)
# Make sure the target column name also matches the file
y = df_processed['Flood Occurred'] 
# Drop the original columns plus the target and location data
X = df_processed.drop(columns=['Flood Occurred', 'Latitude', 'Longitude'])

# Ensure all feature columns are numeric
X = X.apply(pd.to_numeric, errors='coerce')
X.fillna(0, inplace=True)

print("✅ Data loaded and preprocessed.")
print("\n--- Processed Data Info ---")
X.info()

✅ Data loaded and preprocessed.

--- Processed Data Info ---
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 19 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Rainfall (mm)            10000 non-null  float64
 1   Temperature (°C)         10000 non-null  float64
 2   Humidity (%)             10000 non-null  float64
 3   River Discharge (m³/s)   10000 non-null  float64
 4   Water Level (m)          10000 non-null  float64
 5   Elevation (m)            10000 non-null  float64
 6   Population Density       10000 non-null  float64
 7   Infrastructure           10000 non-null  int64  
 8   Historical Floods        10000 non-null  int64  
 9   Land Cover_Agricultural  10000 non-null  bool   
 10  Land Cover_Desert        10000 non-null  bool   
 11  Land Cover_Forest        10000 non-null  bool   
 12  Land Cover_Urban         10000 non-null  bool   
 13  Land Cover_Water

In [6]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# --- Feature Scaling ---
# Initialize the scaler
scaler = StandardScaler()

# Fit the scaler ONLY on the training data and transform it
X_train_scaled = scaler.fit_transform(X_train)

# Transform the test data using the SAME scaler
X_test_scaled = scaler.transform(X_test)

print("✅ Data split and scaled.")
print(f"Training data shape: {X_train_scaled.shape}")

✅ Data split and scaled.
Training data shape: (8000, 19)


In [7]:
# Initialize and train the model
rfc_model = RandomForestClassifier(n_estimators=100, random_state=42)

print("Training the model...")
rfc_model.fit(X_train_scaled, y_train)
print("✅ Model training complete.")

# --- Evaluation ---
print("\nEvaluating model performance...")
y_pred = rfc_model.predict(X_test_scaled)

print("\n--- Classification Report ---")
print(classification_report(y_test, y_pred))

Training the model...
✅ Model training complete.

Evaluating model performance...

--- Classification Report ---
              precision    recall  f1-score   support

           0       0.49      0.47      0.48       989
           1       0.50      0.52      0.51      1011

    accuracy                           0.49      2000
   macro avg       0.49      0.49      0.49      2000
weighted avg       0.49      0.49      0.49      2000



In [8]:
# --- Save the Artifacts ---
# 1. Save the trained model
with open('model.pkl', 'wb') as file:
    pickle.dump(rfc_model, file)
print("✅ Model saved to 'model.pkl'")

# 2. Save the fitted scaler
with open('scaler.pkl', 'wb') as file:
    pickle.dump(scaler, file)
print("✅ Scaler saved to 'scaler.pkl'")

# 3. Save the list of feature columns
feature_names = X.columns.tolist()
with open('model_features.pkl', 'wb') as file:
    pickle.dump(feature_names, file)
print("✅ Feature list saved to 'model_features.pkl'")

✅ Model saved to 'model.pkl'
✅ Scaler saved to 'scaler.pkl'
✅ Feature list saved to 'model_features.pkl'
