In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Load balanced flood data
flood_df = pd.read_csv("dataset/flood_data_balanced.csv")
X = flood_df.drop("Risk_Label", axis=1)
y = flood_df["Risk_Label"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# === Model 1: Random Forest ===
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)

print("\nRandom Forest (Flood):")
print("Accuracy:", accuracy_score(y_test, y_pred_rf))
print(classification_report(y_test, y_pred_rf))

# === Model 2: Logistic Regression ===
lr_model = LogisticRegression(max_iter=1000)
lr_model.fit(X_train, y_train)
y_pred_lr = lr_model.predict(X_test)

print("\nLogistic Regression (Flood):")
print("Accuracy:", accuracy_score(y_test, y_pred_lr))
print(classification_report(y_test, y_pred_lr))



Random Forest (Flood):
Accuracy: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1543
           1       1.00      1.00      1.00      1490

    accuracy                           1.00      3033
   macro avg       1.00      1.00      1.00      3033
weighted avg       1.00      1.00      1.00      3033


Logistic Regression (Flood):
Accuracy: 0.921529838443785
              precision    recall  f1-score   support

           0       0.95      0.90      0.92      1543
           1       0.90      0.95      0.92      1490

    accuracy                           0.92      3033
   macro avg       0.92      0.92      0.92      3033
weighted avg       0.92      0.92      0.92      3033



In [None]:
# Load balanced earthquake data
quake_df = pd.read_csv("dataset/earthquake_data_balanced.csv")
X_q = quake_df.drop("Risk_Label", axis=1)
y_q = quake_df["Risk_Label"]

# Train-test split
Xq_train, Xq_test, yq_train, yq_test = train_test_split(X_q, y_q, test_size=0.2, random_state=42)

# === Model 1: Random Forest ===
rf_model_q = RandomForestClassifier(random_state=42)
rf_model_q.fit(Xq_train, yq_train)
yq_pred_rf = rf_model_q.predict(Xq_test)

print("\nRandom Forest (Earthquake):")
print("Accuracy:", accuracy_score(yq_test, yq_pred_rf))
print(classification_report(yq_test, yq_pred_rf))

# === Model 2: Logistic Regression ===
lr_model_q = LogisticRegression(max_iter=1000)
lr_model_q.fit(Xq_train, yq_train)
yq_pred_lr = lr_model_q.predict(Xq_test)

print("\nLogistic Regression (Earthquake):")
print("Accuracy:", accuracy_score(yq_test, yq_pred_lr))
print(classification_report(yq_test, yq_pred_lr))



Random Forest (Earthquake):
Accuracy: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1564
           1       1.00      1.00      1.00      1530

    accuracy                           1.00      3094
   macro avg       1.00      1.00      1.00      3094
weighted avg       1.00      1.00      1.00      3094


Logistic Regression (Earthquake):
Accuracy: 0.9767291531997414
              precision    recall  f1-score   support

           0       1.00      0.96      0.98      1564
           1       0.96      1.00      0.98      1530

    accuracy                           0.98      3094
   macro avg       0.98      0.98      0.98      3094
weighted avg       0.98      0.98      0.98      3094



In [4]:
import joblib

# Earthquake Models
joblib.dump(rf_model_q, 'models/earthquake_rf_model.pkl')
joblib.dump(lr_model_q, 'models/earthquake_lr_model.pkl')

# Flood Models
joblib.dump(rf_model, 'models/flood_rf_model.pkl')
joblib.dump(lr_model, 'models/flood_lr_model.pkl')

print("✅ All models saved successfully!")


✅ All models saved successfully!
