In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Load the dataset
combined_df = pd.read_excel('combined_sismic_data_updated.xlsx')

# Feature selection: extracting time-related features
combined_df['YEAR'] = combined_df['FECHA'].apply(lambda x: int(x.split('/')[2]))
combined_df['MONTH'] = combined_df['FECHA'].apply(lambda x: int(x.split('/')[1]))
combined_df['DAY'] = combined_df['FECHA'].apply(lambda x: int(x.split('/')[0]))

# Ensure 'HORA' is a string and handle any NaN or invalid values
combined_df['HORA'] = combined_df['HORA'].astype(str).apply(lambda x: x if x != 'nan' else '00:00:00')

# Extracting hour, minute, and second
combined_df['HOUR'] = combined_df['HORA'].apply(lambda x: int(x.split(':')[0]))
combined_df['MINUTE'] = combined_df['HORA'].apply(lambda x: int(x.split(':')[1]))
combined_df['SECOND'] = combined_df['HORA'].apply(lambda x: int(x.split(':')[2]))

# Convert 'FECHA' and 'HORA' to a datetime object for calculating time differences
combined_df['DATETIME'] = pd.to_datetime(combined_df['FECHA'] + ' ' + combined_df['HORA'].astype(str))

# Sort the dataframe by datetime to ensure correct time differences
combined_df = combined_df.sort_values(by='DATETIME').reset_index(drop=True)

# Calculate the time difference in hours until the next event (instead of time since the last event)
combined_df['TIME_UNTIL_NEXT_EVENT'] = combined_df['DATETIME'].diff(-1).abs().dt.total_seconds() / 3600.0

# Shift the TIME_UNTIL_NEXT_EVENT to align with the event that just happened
combined_df['TIME_UNTIL_NEXT_EVENT'] = combined_df['TIME_UNTIL_NEXT_EVENT'].shift(-1)

# Fill missing values (last event) with a large value or remove it
combined_df = combined_df.dropna(subset=['TIME_UNTIL_NEXT_EVENT'])

# Selected features including the new feature
features = ['YEAR', 'MONTH', 'DAY', 'HOUR', 'MINUTE', 'SECOND', 'LATITUD', 'LONGITUD', 'PROFUNDIDAD', 'MAGNITUD']

# Target variables: Latitude, Longitude, Time Until Next Event, Magnitude
X = combined_df[features]
y = combined_df[['LATITUD', 'LONGITUD', 'TIME_UNTIL_NEXT_EVENT', 'MAGNITUD']]

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Dense
from sklearn.preprocessing import MinMaxScaler

# Reshape the data for CNN-LSTM
n_steps = 3  # number of time steps
n_features = X_train.shape[1]  # This includes all features

# Scaling the data
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

X_train_reshaped = np.array([X_train_scaled[i-n_steps:i] for i in range(n_steps, len(X_train_scaled))])
y_train_reshaped = y_train[n_steps:]

X_test_reshaped = np.array([X_test_scaled[i-n_steps:i] for i in range(n_steps, len(X_test_scaled))])
y_test_reshaped = y_test[n_steps:]

# Build the CNN-LSTM model
model = Sequential()

# CNN layers
model.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(n_steps, n_features)))
model.add(MaxPooling1D(pool_size=2))

# LSTM layer
model.add(LSTM(50, activation='relu'))

# Dense output layer now predicts 4 values (Latitude, Longitude, Time Until Next Event, Magnitude)
model.add(Dense(4))  # 4 outputs for Latitude, Longitude, Time Until Next Event, and Magnitude

# Compile the model
model.compile(optimizer='adam', loss='mse')

# Summarize the model
model.summary()

# Train the model
history = model.fit(X_train_reshaped, y_train_reshaped, epochs=20, validation_data=(X_test_reshaped, y_test_reshaped))

  super().__init__(


Epoch 1/20
[1m895/895[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 17ms/step - loss: 849.6828 - val_loss: 399.0614
Epoch 2/20
[1m895/895[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 14ms/step - loss: 463.2939 - val_loss: 396.7393
Epoch 3/20
[1m895/895[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 14ms/step - loss: 703.0879 - val_loss: 395.6121
Epoch 4/20
[1m895/895[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 15ms/step - loss: 461.0047 - val_loss: 396.5733
Epoch 5/20
[1m895/895[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 15ms/step - loss: 749.0748 - val_loss: 394.8191
Epoch 6/20
[1m895/895[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 15ms/step - loss: 393.5349 - val_loss: 395.2480
Epoch 7/20
[1m895/895[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 14ms/step - loss: 478.3469 - val_loss: 394.4949
Epoch 8/20
[1m895/895[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 14ms/step - loss: 487.8032 - val_loss: 3

In [12]:
from datetime import datetime
from sklearn.metrics import mean_squared_error
import numpy as np

# Example input data for prediction (replace with actual input data)
new_input = np.array([X_test_scaled[0:n_steps]])  # Taking the first few steps from the test data as an example

# Reshape the input data to match the model's expected input shape
new_input_reshaped = new_input.reshape((1, n_steps, n_features))

# Predict the next seismic event's latitude, longitude, time until next event, and magnitude
prediction = model.predict(new_input_reshaped)

# Extract the predictions
predicted_latitude = prediction[0][0]
predicted_longitude = prediction[0][1]
predicted_time_until_next_event = prediction[0][2]
predicted_magnitude = prediction[0][3]

# Get the last event's date and time
last_event_datetime = combined_df['DATETIME'].iloc[-1]

# Predict the date of the next event
predicted_event_datetime = last_event_datetime + pd.to_timedelta(predicted_time_until_next_event, unit='h')

# Display the predictions
print(f"Predicted Latitude: {predicted_latitude}")
print(f"Predicted Longitude: {predicted_longitude}")
print(f"Predicted Date and Time of the next event: {predicted_event_datetime}")
print(f"Predicted Magnitude: {predicted_magnitude}")

# Ensure y_test_reshaped and y_pred are NumPy arrays
y_test_reshaped = np.array(y_test_reshaped)
y_pred = model.predict(X_test_reshaped)

# Calculate Mean Squared Error (MSE) for latitude, longitude, time since last event, and magnitude
mse_latitude = mean_squared_error(y_test_reshaped[:, 0], y_pred[:, 0])
mse_longitude = mean_squared_error(y_test_reshaped[:, 1], y_pred[:, 1])
mse_time = mean_squared_error(y_test_reshaped[:, 2], y_pred[:, 2])
mse_magnitude = mean_squared_error(y_test_reshaped[:, 3], y_pred[:, 3])

# Calculate standard deviations as a measure of certainty
std_dev_latitude = np.sqrt(mse_latitude)
std_dev_longitude = np.sqrt(mse_longitude)
std_dev_time = np.sqrt(mse_time)
std_dev_magnitude = np.sqrt(mse_magnitude)

# Updated Certainty Calculation
# Calculate the range of each feature in the test set
lat_range = np.max(y_test_reshaped[:, 0]) - np.min(y_test_reshaped[:, 0])
lon_range = np.max(y_test_reshaped[:, 1]) - np.min(y_test_reshaped[:, 1])
time_range = np.max(y_test_reshaped[:, 2]) - np.min(y_test_reshaped[:, 2])
mag_range = np.max(y_test_reshaped[:, 3]) - np.min(y_test_reshaped[:, 3])

# Calculate probability as inverse of the error, normalized to a range [0, 1]
probability_latitude = max(0, 1 - std_dev_latitude / lat_range)
probability_longitude = max(0, 1 - std_dev_longitude / lon_range)
probability_time = max(0, 1 - std_dev_time / time_range)
probability_magnitude = max(0, 1 - std_dev_magnitude / mag_range)

# Ensure probabilities do not exceed 100%
probability_latitude = min(probability_latitude, 1)
probability_longitude = min(probability_longitude, 1)
probability_time = min(probability_time, 1)
probability_magnitude = min(probability_magnitude, 1)

# Display predictions with certainty and probability
print(f"Predicted Latitude: {predicted_latitude} ± {std_dev_latitude} (Certainty: {probability_latitude*100:.2f}%)")
print(f"Predicted Longitude: {predicted_longitude} ± {std_dev_longitude} (Certainty: {probability_longitude*100:.2f}%)")
print(f"Predicted Date and Time of the next event: {predicted_event_datetime} ± {std_dev_time} hours (Certainty: {probability_time*100:.2f}%)")
print(f"Predicted Magnitude: {predicted_magnitude} ± {std_dev_magnitude} (Certainty: {probability_magnitude*100:.2f}%)")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
Predicted Latitude: -11.391161918640137
Predicted Longitude: -77.7036361694336
Predicted Date and Time of the next event: 2024-12-09 10:26:57.933060
Predicted Magnitude: 4.835737228393555
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
Predicted Latitude: -11.391161918640137 ± 4.5097002605573815 (Certainty: 77.43%)
Predicted Longitude: -77.7036361694336 ± 3.3862917397217807 (Certainty: 77.55%)
Predicted Date and Time of the next event: 2024-12-09 10:26:57.933060 ± 39.29922264497465 hours (Certainty: 96.41%)
Predicted Magnitude: 4.835737228393555 ± 0.465532616802117 (Certainty: 90.69%)
