<a href="https://colab.research.google.com/github/padmapriyavj/Geoshake_Prediction/blob/main/Geoshake.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install pandas numpy requests matplotlib networkx torch torch-geometric scikit-learn fireducks


Collecting torch-geometric
  Downloading torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
Collecting fireducks
  Downloading fireducks-1.2.6-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (1.0 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloa

In [2]:
!pip install torch torchvision torchaudio --quiet
!pip install torch-geometric -f https://data.pyg.org/whl/torch-2.0.0+cpu.html --quiet
!pip install pandas numpy requests matplotlib scikit-learn networkx --quiet


In [3]:
import requests
import json

url = "https://earthquake.usgs.gov/fdsnws/event/1/query.geojson?starttime=2024-01-01%2000:00:00&endtime=2025-03-30%2023:59:59&maxlatitude=71.117&minlatitude=9.926&maxlongitude=-27.422&minlongitude=-170.156&minmagnitude=2.5&orderby=time"

response = requests.get(url)
earthquake_data = response.json()

# Save to file
with open("earthquake_data.geojson", "w") as f:
    json.dump(earthquake_data, f)

print(f"Total earthquakes retrieved: {len(earthquake_data['features'])}")


Total earthquakes retrieved: 11883


In [4]:
import fireducks.pandas as pd
import datetime

# Extract the list of earthquake event dictionaries from the GeoJSON response
features = earthquake_data["features"]

# Prepare an empty list to store cleaned and structured data
data_rows = []

# Iterate over each earthquake feature in the GeoJSON data
for eq in features:
    # Get the [longitude, latitude, depth] of the earthquake
    coords = eq["geometry"]["coordinates"]

    # Get additional information like magnitude and timestamp from the properties
    props = eq["properties"]

    # Build a dictionary for each earthquake and add it to the data_rows list
    data_rows.append({
        "longitude": coords[0],  # Longitude value (X coordinate)
        "latitude": coords[1],   # Latitude value (Y coordinate)
        "depth": coords[2],      # Depth of the quake in kilometers
        "magnitude": props["mag"],  # Magnitude on the Richter scale
        # Convert UNIX time in milliseconds to a UTC datetime object
        "time": datetime.datetime.utcfromtimestamp(props["time"] / 1000.0)
    })

# Convert the list of dictionaries into a DataFrame for tabular analysis
df = pd.DataFrame(data_rows)

# Add a new column: Unix timestamp (in seconds), derived from the 'time' column
# pd.to_datetime ensures datetime compatibility; astype(int) gives nanoseconds → divide by 1e9 to get seconds
df["timestamp"] = pd.to_datetime(df["time"]).astype(int) / 10**9

# Show the first 5 rows of the DataFrame for a quick preview
df.head()


Unnamed: 0,longitude,latitude,depth,magnitude,time,timestamp
0,-147.5834,64.9454,16.2,3.2,2025-03-30 21:05:03.399,1743369000.0
1,-125.8267,42.1376,10.0,2.7,2025-03-30 17:54:41.621,1743357000.0
2,-113.9866,36.5048,8.096,2.5,2025-03-30 17:35:50.386,1743356000.0
3,-64.740167,18.115667,19.5,3.24,2025-03-30 14:45:27.660,1743346000.0
4,-155.818833,19.280167,8.23,3.31,2025-03-30 14:25:27.640,1743345000.0


In [5]:
from sklearn.neighbors import NearestNeighbors
import networkx as nx
import numpy as np

# Set the number of nearest neighbors to connect each earthquake with
K = 5

# Extract latitude and longitude columns from the DataFrame and convert to NumPy array
# This will be the input for the KNN model (2D spatial data)
coords = df[["latitude", "longitude"]].values

# Initialize the NearestNeighbors model with K+1 neighbors
# (+1 because the closest neighbor will be the point itself, which we will ignore later)
knn = NearestNeighbors(n_neighbors=K + 1)

# Fit the model to the coordinate data
# This builds an internal structure (like a KD-Tree) to allow fast nearest-neighbor search
knn.fit(coords)

# Find the indices of the nearest neighbors for each point
# return_distance=False because we only need the neighbor indices, not how far they are
neighbors = knn.kneighbors(coords, return_distance=False)

# Initialize an empty list to store graph edges
edges = []

# Loop over each point and its neighbor list
for i, nbrs in enumerate(neighbors):
    # Skip the first neighbor (which is the point itself), and add edges to its K nearest neighbors
    for j in nbrs[1:]:
        edges.append((i, j))  # Create a directed edge from point i to point j

# Create an empty undirected graph
G = nx.Graph()

# Add each earthquake as a node, with its full attributes (lat, lon, mag, etc.)
for idx, row in df.iterrows():
    G.add_node(idx, **row.to_dict())

# Add the edges between nearest neighbors to form spatial connections
G.add_edges_from(edges)

# Output the final size of the graph
print(f"Graph created with {G.number_of_nodes()} nodes and {G.number_of_edges()} edges.")


Graph created with 11883 nodes and 38483 edges.


In [8]:
from torch_geometric.data import Data
import torch
from sklearn.preprocessing import StandardScaler, MinMaxScaler

# Normalize node features (lat, lon, depth, timestamp)
feature_scaler = StandardScaler()
scaled_features = feature_scaler.fit_transform(df[["latitude", "longitude", "depth", "timestamp"]])
x = torch.tensor(scaled_features, dtype=torch.float)
print(x.shape)

# Normalize magnitude using MinMaxScaler (0 to 1 range)
target_scaler = MinMaxScaler()
scaled_mags = target_scaler.fit_transform(df[["magnitude"]])
y = torch.tensor(scaled_mags.flatten(), dtype=torch.float)

# Edge index stays the same
edge_index = torch.tensor(list(G.edges)).t().contiguous()
print(edge_index)

# Create PyG data object
data = Data(x=x, edge_index=edge_index, y=y)
print(data)


torch.Size([11883, 4])
tensor([[    0,     0,     0,  ..., 11868, 11869, 11874],
        [11263,  3962,   359,  ..., 11869, 11870, 11880]])
Data(x=[11883, 4], edge_index=[2, 38483], y=[11883])
