In [None]:
import pandas as pd
import numpy as np

# Set random seed for reproducibility
np.random.seed(42)

# Number of rows
n_rows = 8000

# Simulate Flood Data
flood_data = pd.DataFrame({
    'Rainfall_mm': np.random.normal(120, 30, n_rows),
    'River_Level_m': np.random.normal(5, 1.2, n_rows),
    'Soil_Moisture': np.random.uniform(40, 90, n_rows),
    'Temperature_C': np.random.normal(28, 3, n_rows)
})
flood_data['Risk_Label'] = ((flood_data['Rainfall_mm'] > 130) & 
                            (flood_data['River_Level_m'] > 5.5) & 
                            (flood_data['Soil_Moisture'] > 70)).astype(int)
flood_data.to_csv('dataset/flood_data.csv', index=False)

# Simulate Earthquake Data
earthquake_data = pd.DataFrame({
    'Magnitude': np.random.normal(5.5, 1, n_rows),
    'Depth_km': np.random.normal(10, 5, n_rows),
    'Distance_to_City_km': np.random.uniform(5, 100, n_rows),
    'Population_Density': np.random.uniform(500, 10000, n_rows)
})
earthquake_data['Risk_Label'] = ((earthquake_data['Magnitude'] > 6.5) & 
                                 (earthquake_data['Depth_km'] < 15) & 
                                 (earthquake_data['Distance_to_City_km'] < 30)).astype(int)
earthquake_data.to_csv('dataset/earthquake_data.csv', index=False)

print("✅ 8000-row datasets saved as 'flood_data.csv' and 'earthquake_data.csv'")


✅ 8000-row datasets saved as 'flood_data.csv' and 'earthquake_data.csv'


In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from imblearn.over_sampling import SMOTE
import joblib

In [None]:
import pandas as pd
from imblearn.over_sampling import SMOTE

# Load original flood dataset
flood_data = pd.read_csv("dataset/flood_data.csv")
X_flood = flood_data.drop("Risk_Label", axis=1)
y_flood = flood_data["Risk_Label"]

# Apply SMOTE
smote = SMOTE(random_state=42)
Xf_res, yf_res = smote.fit_resample(X_flood, y_flood)

# Combine into one DataFrame
flood_balanced = pd.DataFrame(Xf_res, columns=X_flood.columns)
flood_balanced['Risk_Label'] = yf_res

# Save to CSV
flood_balanced.to_csv("dataset/flood_data_balanced.csv", index=False)
print("✅ Flood balanced dataset saved as 'flood_data_balanced.csv'")

✅ Flood balanced dataset saved as 'flood_data_balanced.csv'


In [None]:
# Load original earthquake dataset
earthquake_data = pd.read_csv("dataset/earthquake_data.csv")
X_quake = earthquake_data.drop("Risk_Label", axis=1)
y_quake = earthquake_data["Risk_Label"]

# Apply SMOTE
Xq_res, yq_res = smote.fit_resample(X_quake, y_quake)

# Combine into one DataFrame
quake_balanced = pd.DataFrame(Xq_res, columns=X_quake.columns)
quake_balanced['Risk_Label'] = yq_res

# Save to CSV
quake_balanced.to_csv("dataset/earthquake_data_balanced.csv", index=False)
print("✅ Earthquake balanced dataset saved as 'earthquake_data_balanced.csv'")


✅ Earthquake balanced dataset saved as 'earthquake_data_balanced.csv'
