In [1]:
import time
import pandas as pd
import numpy as np
import folium
from folium.plugins import HeatMap
import joblib
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler

In [2]:

# Start overall timer
start_time = time.time()
csv_path = 'datasets\earthquake_data\Eartquakes-1990-2023.csv'
data = pd.read_csv(csv_path)
# Preview the dataset
data.head()


Unnamed: 0,time,place,status,tsunami,significance,data_type,magnitudo,state,longitude,latitude,depth,date
0,631153353990,"12 km NNW of Meadow Lakes, Alaska",reviewed,0,96,earthquake,2.5,Alaska,-149.6692,61.7302,30.1,1990-01-01 00:22:33.990000+00:00
1,631153491210,"14 km S of Volcano, Hawaii",reviewed,0,31,earthquake,1.41,Hawaii,-155.212333,19.317667,6.585,1990-01-01 00:24:51.210000+00:00
2,631154083450,"7 km W of Cobb, California",reviewed,0,19,earthquake,1.11,California,-122.806167,38.821,3.22,1990-01-01 00:34:43.450000+00:00
3,631155512130,"11 km E of Mammoth Lakes, California",reviewed,0,15,earthquake,0.98,California,-118.846333,37.664333,-0.584,1990-01-01 00:58:32.130000+00:00
4,631155824490,"16km N of Fillmore, CA",reviewed,0,134,earthquake,2.95,California,-118.934,34.546,16.122,1990-01-01 01:03:44.490000+00:00


In [3]:
# Convert 'time' from milliseconds to a datetime object
data['datetime'] = pd.to_datetime(data['time'], unit='ms')

# Extract additional temporal features
data['year'] = data['datetime'].dt.year
data['month'] = data['datetime'].dt.month
data['day'] = data['datetime'].dt.day

# Preview the modified DataFrame
data[['datetime', 'year', 'month', 'day']].head()


Unnamed: 0,datetime,year,month,day
0,1990-01-01 00:22:33.990,1990,1,1
1,1990-01-01 00:24:51.210,1990,1,1
2,1990-01-01 00:34:43.450,1990,1,1
3,1990-01-01 00:58:32.130,1990,1,1
4,1990-01-01 01:03:44.490,1990,1,1


In [4]:
import time
import joblib
import numpy as np
from tqdm import tqdm
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

# Define features and target
features = ['year', 'month', 'day', 'latitude', 'longitude', 'depth', 'significance']
target = 'magnitudo'

# Drop rows with missing values for the chosen columns
data_ml = data.dropna(subset=features + [target])

# Train-Test Split
X = data_ml[features]
y = data_ml[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Number of estimators (trees)
n_estimators = 100
save_interval = 10  # Save the model every 10 trees

# Initialize Random Forest without training
rf_model = RandomForestRegressor(n_estimators=1, max_depth=10, random_state=42, warm_start=True)


print("Training started...")

# Start timer
start_time = time.time()

# Train each tree sequentially and update progress bar
with tqdm(total=n_estimators, desc="Training Progress", unit="tree") as pbar:
    for i in range(1, n_estimators + 1):
        rf_model.n_estimators = i  # Increase the number of trees incrementally
        rf_model.fit(X_train, y_train)  # Train the model with the new tree

        # Save progress every `save_interval` trees
        if i % save_interval == 0 or i == n_estimators:
            joblib.dump(rf_model, f'rf_model_checkpoint_{i}.pkl')

        # Calculate ETA based on elapsed time
        elapsed_time = time.time() - start_time
        avg_time_per_tree = elapsed_time / i
        remaining_time = avg_time_per_tree * (n_estimators - i)

        # Update progress bar
        pbar.set_postfix(ETA=f"{remaining_time:.2f}s")
        pbar.update(1)

# End timer
end_time = time.time()
training_time = end_time - start_time

print(f"\nRandom Forest Training Completed in {training_time:.2f} seconds")

# Save the final trained model
joblib.dump(rf_model, 'rf_model_final.pkl')
print("Final model saved as 'rf_model_final.pkl'")

# Load the latest checkpoint for verification
latest_checkpoint = f'rf_model_checkpoint_{n_estimators}.pkl'
loaded_rf_model = joblib.load(latest_checkpoint)
print(f"Loaded latest checkpoint from '{latest_checkpoint}'")


Training started...


Training Progress: 100%|██████████| 100/100 [36:29<00:00, 21.90s/tree, ETA=0.00s]  


Random Forest Training Completed in 2189.72 seconds
Final model saved as 'rf_model_final.pkl'
Loaded latest checkpoint from 'rf_model_checkpoint_100.pkl'





In [7]:
import time
import joblib
import numpy as np
import pandas as pd
import folium
from folium.plugins import HeatMap

# Load trained model
rf_model = joblib.load('rf_model_final.pkl')

# User input: Predict for how many months ahead
forecast_months = int(input("Enter number of months to predict ahead: "))
current_year, current_month = time.localtime().tm_year, time.localtime().tm_mon

# Generate future dates
future_dates = []
for i in range(1, forecast_months + 1):
    future_month = (current_month + i) % 12 or 12
    future_year = current_year + ((current_month + i - 1) // 12)
    future_dates.append((future_year, future_month))

# Generate random locations for prediction (latitude, longitude)
num_predictions = 50  # Number of places to predict
future_data = pd.DataFrame({
    'year': np.random.choice([y for y, m in future_dates], num_predictions),
    'month': np.random.choice([m for y, m in future_dates], num_predictions),
    'day': np.random.randint(1, 29, num_predictions),
    'latitude': np.random.uniform(-90, 90, num_predictions),
    'longitude': np.random.uniform(-180, 180, num_predictions),
    'depth': np.random.uniform(5, 700, num_predictions),  # Typical earthquake depths
    'significance': np.random.randint(10, 1000, num_predictions)
})

# Predict magnitudes using the trained model
features = ['year', 'month', 'day', 'latitude', 'longitude', 'depth', 'significance']
future_data['predicted_magnitude'] = rf_model.predict(future_data[features])

# Plot results on a global map
world_map = folium.Map(location=[0, 0], zoom_start=2)

for _, row in future_data.iterrows():
    folium.CircleMarker(
        location=[row['latitude'], row['longitude']],
        radius=row['predicted_magnitude'] * 1.5,  # Scale based on magnitude
        color='red',
        fill=True,
        fill_color='red',
        fill_opacity=0.6,
        popup=f"Magnitude: {row['predicted_magnitude']:.2f}\nDepth: {row['depth']:.2f} km",
    ).add_to(world_map)

# Save and show map
world_map.save("earthquake_predictions.html")
print("Predicted earthquakes saved to 'earthquake_predictions.html'. Open it in a browser.")


Predicted earthquakes saved to 'earthquake_predictions.html'. Open it in a browser.


In [8]:
import time
import joblib
import numpy as np
import pandas as pd
import folium
from folium.plugins import HeatMap

# Load trained model
rf_model = joblib.load('rf_model_final.pkl')

# User input: Select a future time window
current_year, current_month = time.localtime().tm_year, time.localtime().tm_mon

start_month = int(input("Enter the start month (from today) for prediction window (e.g., 2 for two months ahead): "))
end_month = int(input("Enter the end month for prediction window (e.g., 6 for six months ahead): "))

# Generate future dates within the selected time window
future_dates = []
for i in range(start_month, end_month + 1):
    future_month = (current_month + i) % 12 or 12
    future_year = current_year + ((current_month + i - 1) // 12)
    future_dates.append((future_year, future_month))

# Generate random locations for prediction (latitude, longitude)
num_predictions = 50  # Number of locations to predict
future_data = pd.DataFrame({
    'year': np.random.choice([y for y, m in future_dates], num_predictions),
    'month': np.random.choice([m for y, m in future_dates], num_predictions),
    'day': np.random.randint(1, 29, num_predictions),
    'latitude': np.random.uniform(-90, 90, num_predictions),
    'longitude': np.random.uniform(-180, 180, num_predictions),
    'depth': np.random.uniform(5, 700, num_predictions),  # Typical earthquake depths
    'significance': np.random.randint(10, 1000, num_predictions)
})

# Predict magnitudes using the trained model
features = ['year', 'month', 'day', 'latitude', 'longitude', 'depth', 'significance']
future_data['predicted_magnitude'] = rf_model.predict(future_data[features])

# Plot results on a global map
world_map = folium.Map(location=[0, 0], zoom_start=2)

for _, row in future_data.iterrows():
    folium.CircleMarker(
        location=[row['latitude'], row['longitude']],
        radius=row['predicted_magnitude'] * 1.5,  # Scale based on magnitude
        color='red',
        fill=True,
        fill_color='red',
        fill_opacity=0.6,
        popup=f"Magnitude: {row['predicted_magnitude']:.2f}\nDepth: {row['depth']:.2f} km",
    ).add_to(world_map)

# Save and show map
world_map.save("earthquake_predictions.html")
print(f"Predicted earthquakes for {start_month}-{end_month} months ahead saved to 'earthquake_predictions.html'. Open it in a browser.")


Predicted earthquakes for 130-141 months ahead saved to 'earthquake_predictions.html'. Open it in a browser.


In [10]:
import folium
import joblib
import numpy as np
import pandas as pd
from folium.plugins import HeatMap
from datetime import datetime, timedelta

# Load the trained Random Forest model
rf_model = joblib.load("rf_model_final.pkl")

# Get user input for the prediction window (in months from today)
start_month = int(input("Enter the start month (from today) for prediction window: "))
end_month = int(input("Enter the end month for prediction window: "))

# Generate future dates
today = datetime.today()
future_dates = [today + timedelta(days=30 * i) for i in range(start_month, end_month + 1)]

# Number of earthquakes to predict
num_predictions = 200  # Adjust based on preference

# Ensure all columns have the same length
np.random.seed(42)  # For reproducibility
future_data = pd.DataFrame({
    'year': np.random.choice([date.year for date in future_dates], num_predictions),  
    'month': np.random.choice([date.month for date in future_dates], num_predictions),  
    'day': np.random.randint(1, 29, num_predictions),  # Random days
    'latitude': np.random.uniform(-90, 90, num_predictions),  # Random latitudes
    'longitude': np.random.uniform(-180, 180, num_predictions),  # Random longitudes
    'depth': np.random.uniform(0, 700, num_predictions),  # Random depth (0 to 700 km)
    'significance': np.random.uniform(0, 1000, num_predictions),  # Random significance score
})

# Predict magnitudes using the trained model
future_data['magnitudo'] = rf_model.predict(future_data[['year', 'month', 'day', 'latitude', 'longitude', 'depth', 'significance']])

# Initialize the map centered around the world
m = folium.Map(location=[0, 0], zoom_start=2)

# Add earthquake locations with color-coded magnitude
for _, row in future_data.iterrows():
    folium.CircleMarker(
        location=[row['latitude'], row['longitude']],
        radius=row['magnitudo'] * 2,  # Scale magnitude for better visibility
        color='red' if row['magnitudo'] > 6 else 'orange' if row['magnitudo'] > 4 else 'yellow',
        fill=True,
        fill_color='red' if row['magnitudo'] > 6 else 'orange' if row['magnitudo'] > 4 else 'yellow',
        fill_opacity=0.7,
        popup=f"Mag: {row['magnitudo']:.2f}, Depth: {row['depth']:.1f} km"
    ).add_to(m)

# Save and display the map
map_file = "predicted_earthquakes_map.html"
m.save(map_file)
print(f"Map saved as {map_file}. Open it in a browser to view.")


Map saved as predicted_earthquakes_map.html. Open it in a browser to view.
