# Importing Packages

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder


# Load Data

In [2]:
df = pd.read_csv("./datasets/earthquake data.csv")
df

Unnamed: 0,Date & Time,Latitude,Longitude,Depth,Magnitude,Lands,Country
0,11/21/2023 17:43,31.592,-104.549,3,2.7,WESTERN,TEXAS
1,11/21/2023 17:31,-24.200,-67.580,198,4.0,"SALTA,",ARGENTINA
2,11/21/2023 17:27,31.647,-104.017,5,2.9,WESTERN,TEXAS
3,11/21/2023 17:01,46.345,13.601,0,0.8,SLOVENIA,SLOVENIA
4,11/21/2023 16:56,-4.320,130.810,10,3.3,BANDA,SEA
...,...,...,...,...,...,...,...
53534,5/14/2023 9:50,18.080,-103.270,7,3.5,"OFFSHORE, MICHOACAN,",MEXICO
53535,5/14/2023 9:49,11.520,141.620,10,5.1,"STATE, OF, YAP,",MICRONESIA
53536,5/14/2023 9:48,19.400,-155.240,2,2.2,"ISLAND, OF, HAWAII,",HAWAII
53537,5/14/2023 9:28,15.890,-93.520,106,3.9,"CHIAPAS,",MEXICO


In [3]:
df['Date & Time'] = pd.to_datetime(df['Date & Time'])

# Extracting numerical datetime features


In [4]:
df['Year'] = df['Date & Time'].dt.year
df['Month'] = df['Date & Time'].dt.month
df['Day'] = df['Date & Time'].dt.day
df['Hour'] = df['Date & Time'].dt.hour
df['Minute'] = df['Date & Time'].dt.minute

In [5]:
df.head()

Unnamed: 0,Date & Time,Latitude,Longitude,Depth,Magnitude,Lands,Country,Year,Month,Day,Hour,Minute
0,2023-11-21 17:43:00,31.592,-104.549,3,2.7,WESTERN,TEXAS,2023,11,21,17,43
1,2023-11-21 17:31:00,-24.2,-67.58,198,4.0,"SALTA,",ARGENTINA,2023,11,21,17,31
2,2023-11-21 17:27:00,31.647,-104.017,5,2.9,WESTERN,TEXAS,2023,11,21,17,27
3,2023-11-21 17:01:00,46.345,13.601,0,0.8,SLOVENIA,SLOVENIA,2023,11,21,17,1
4,2023-11-21 16:56:00,-4.32,130.81,10,3.3,BANDA,SEA,2023,11,21,16,56


# One-hot encode categorical features

In [7]:
encoder = OneHotEncoder(sparse=False)
lands_encoded = encoder.fit_transform(df[['Lands']])
country_encoded = encoder.fit_transform(df[['Country']])



In [8]:
lands_encoded

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [9]:
country_encoded

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

# Combine features

In [10]:
X = np.hstack((df[['Latitude', 'Longitude', 'Magnitude', 'Year', 'Month', 'Day', 'Hour', 'Minute']].values, lands_encoded, country_encoded))
y = df['Depth'].values.reshape(-1, 1)

In [11]:
X.shape

(53539, 917)

# Normalize the features


In [12]:
scaler_x = MinMaxScaler(feature_range=(0, 1))
X_scaled = scaler_x.fit_transform(X)

# Normalize the target variable


In [14]:
scaler_y = MinMaxScaler(feature_range=(0, 1))
y_scaled = scaler_y.fit_transform(y)

# Chronological split (e.g., 80% for training and 20% for testing)

In [15]:
split_index = int(len(df) * 0.8)
X_train, X_test = X_scaled[:split_index], X_scaled[split_index:]
y_train, y_test = y_scaled[:split_index], y_scaled[split_index:]

# Reshape data for LSTM input [samples, time steps, features]


In [16]:
X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))

# Build the LSTM model

In [17]:
model = Sequential()
model.add(LSTM(50, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(1))

  super().__init__(**kwargs)


In [18]:
model.summary()

In [19]:
# Compile the model
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

In [20]:
# Define early stopping callback to prevent overfitting
early_stop = EarlyStopping(monitor='val_loss', patience=5, verbose=1)

In [21]:
# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stop], verbose=1)

Epoch 1/50
[1m1339/1339[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 6ms/step - loss: 0.0046 - mae: 0.0350 - val_loss: 0.0037 - val_mae: 0.0319
Epoch 2/50
[1m1339/1339[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 6ms/step - loss: 0.0032 - mae: 0.0295 - val_loss: 0.0036 - val_mae: 0.0300
Epoch 3/50
[1m1339/1339[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - loss: 0.0030 - mae: 0.0281 - val_loss: 0.0034 - val_mae: 0.0300
Epoch 4/50
[1m1339/1339[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 6ms/step - loss: 0.0030 - mae: 0.0276 - val_loss: 0.0035 - val_mae: 0.0297
Epoch 5/50
[1m1339/1339[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - loss: 0.0029 - mae: 0.0269 - val_loss: 0.0034 - val_mae: 0.0300
Epoch 6/50
[1m1339/1339[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 0.0029 - mae: 0.0266 - val_loss: 0.0034 - val_mae: 0.0283
Epoch 7/50
[1m1339/1339[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s

In [22]:
model.save('earthquake_depth_prediction_model.h5')
print("Model saved to earthquake_depth_prediction_model.h5")



Model saved to earthquake_depth_prediction_model.h5


# Evaluate the model

In [23]:
mse = model.evaluate(X_test, y_test, verbose=0)
print(f'Mean Squared Error: {mse}')

Mean Squared Error: [0.0033588882070034742, 0.028044190257787704]


In [24]:
# Make predictions
y_pred = model.predict(X_test)

# Inverse transform predictions to original scale
y_pred_inv = scaler_y.inverse_transform(y_pred)
y_test_inv = scaler_y.inverse_transform(y_test)

[1m335/335[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step


In [25]:
y_pred_inv

array([[110.936516],
       [ 19.90047 ],
       [  8.999727],
       ...,
       [ 28.114359],
       [109.63833 ],
       [135.63629 ]], dtype=float32)

In [None]:
# Example of predicting a new data point
new_data_point = np.array([[latitude_value, longitude_value, magnitude_value, year_value, month_value, day_value, hour_value, minute_value] + lands_encoded_value + country_encoded_value])  # Replace with actual values and encoded values
new_data_point_scaled = scaler_x.transform(new_data_point)
new_data_point_reshaped = new_data_point_scaled.reshape((1, 1, X_train.shape[2]))  # Adjust according to the number of features
predicted_depth = model.predict(new_data_point_reshaped)
predicted_depth_inv = scaler_y.inverse_transform(predicted_depth)

print(f'Predicted Depth: {predicted_depth_inv[0][0]}')