In [None]:
!pip install xgboost pandas geopandas matplotlib seaborn

In [None]:
# Step 1
# Load and clean Earthquake Data

import pandas as pd

# Define the source URL for weekly earthquake data
source_url = 'https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/all_week.csv'

# Read the CSV directly into the frame
earthquakes = pd.read_csv(source_url)

# Select relevant columns and remove rows with missing values
earthquakes = earthquakes[['latitude','longitude','mag']].dropna()

# Rename columns for clarity
earthquakes.columns = ['Latitude','Longitude','Magnitude']

#Preview the cleaned dataset
earthquakes.head()

In [None]:
# Step 2
# Convert to Geospatial format and visualize locations

import geopandas as gpd
import matplotlib.pyplot as plt

# Create geometric points from coordinates
earthquake_points = gpd.points_from_xy(earthquakes['Longitude'], earthquakes['Latitude'])

# Wrap the DataFrame into a GeoDataFrame
geo_quakes = gpd.GeoDataFrame (earthquakes, geometry=earthquake_points)

# Plot earthquake locations on a coordinate grid
geo_quakes.plot(marker='o', color='crimson', markersize=5, figsize=(10, 6))
plt.title('Global Earthquake Locations')
plt.show()

In [None]:
# Step 3
# Training a ML Model to predict magnitude

import warnings
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Silence specific system warnings from XGBoost
warnings.filterwarnings(
    "ignore",
    message="Your system has an old version of glibc.*",
    category=FutureWarning,
    module="xgboost.core"
)

# Optional: Check glibc version (linux only)
import subprocess
try:
    version_info = subprocess.check_output("ldd --version", shell=True).decode().split('\n')[0]
    print(f"glibc version: {version_info}")
except Exception as err:
    print ("glibc version check failed:", err)

# Define input features and target value
features = earthquakes[['Latitude', 'Longitude']]
target = earthquakes['Magnitude']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Initialize and train XGBoost regressor
regressor = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100)
regressor.fit(X_train, y_train)

# Predict and evaluate
predictions = regressor.predict(X_test)
mse = mean_squared_error(y_test, predictions)
print(f"Model Mean Square Error: {mse:.4f}")

In [None]:
# Step 4
# Visualize Predicted Magnitudes Geospatially

earthquakes['Predicted_Magnitude'] = regressor.predict(features)

# Recreate GeoDataFrame with updated data
geo_quakes = gpd.GeoDataFrame(earthquakes, geometry=gpd.points_from_xy(earthquakes['Longitude'], earthquakes['Latitude']))

# Plot predicted magnitudesusing a color gradient
geo_quakes.plot(
    column='Predicted_Magnitude',
    cmap='coolwarm',
    legend=True,
    markersize=5,
    figsize=(10, 6)
)
plt.title("Predicted Earthquake Magnitudes by Location")
plt.show()