In [15]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Load the dataset
combined_df = pd.read_excel('combined_sismic_data_updated.xlsx')

# Feature selection: extracting time-related features
combined_df['YEAR'] = combined_df['FECHA'].apply(lambda x: int(x.split('/')[2]))
combined_df['MONTH'] = combined_df['FECHA'].apply(lambda x: int(x.split('/')[1]))
combined_df['DAY'] = combined_df['FECHA'].apply(lambda x: int(x.split('/')[0]))

# Ensure 'HORA' is a string and handle any NaN or invalid values
combined_df['HORA'] = combined_df['HORA'].astype(str).apply(lambda x: x if x != 'nan' else '00:00:00')

# Extracting hour, minute, and second
combined_df['HOUR'] = combined_df['HORA'].apply(lambda x: int(x.split(':')[0]))
combined_df['MINUTE'] = combined_df['HORA'].apply(lambda x: int(x.split(':')[1]))
combined_df['SECOND'] = combined_df['HORA'].apply(lambda x: int(x.split(':')[2]))

# Selected features
features = ['YEAR', 'MONTH', 'DAY', 'HOUR', 'MINUTE', 'SECOND', 'LATITUD', 'LONGITUD', 'PROFUNDIDAD', 'MAGNITUD']

# Target variables: Latitude, Longitude, and TIME_SINCE_LAST_EVENT (which will be added next)
X = combined_df[features]
y = combined_df[['LATITUD', 'LONGITUD']]

In [16]:
import numpy as np

# Convert 'FECHA' and 'HORA' to a datetime object for calculating time differences
combined_df['DATETIME'] = pd.to_datetime(combined_df['FECHA'] + ' ' + combined_df['HORA'].astype(str))

# Sort the dataframe by datetime to ensure correct time differences
combined_df = combined_df.sort_values(by='DATETIME').reset_index(drop=True)

# Calculate the time difference in hours since the last event
combined_df['TIME_SINCE_LAST_EVENT'] = combined_df['DATETIME'].diff().dt.total_seconds() / 3600.0

# Fill missing values (first event) with 0
combined_df['TIME_SINCE_LAST_EVENT'] = combined_df['TIME_SINCE_LAST_EVENT'].fillna(0)

# Update the features list to include the new feature
features.append('TIME_SINCE_LAST_EVENT')

# Update X with the new feature
X = combined_df[features]

# Splitting the updated data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Display the first few rows to verify the feature
combined_df[['DATETIME', 'TIME_SINCE_LAST_EVENT']].head()

Unnamed: 0,DATETIME,TIME_SINCE_LAST_EVENT
0,1960-01-04 13:18:23,0.0
1,1960-01-13 15:40:34,218.369722
2,1960-01-15 09:30:24,41.830556
3,1960-01-17 02:57:58,41.459444
4,1960-01-23 03:37:32,144.659444


In [17]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Dense
from sklearn.preprocessing import MinMaxScaler

# Reshape the data for CNN-LSTM
n_steps = 3  # number of time steps
n_features = X_train.shape[1]

# Scaling the data
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

X_train_reshaped = np.array([X_train_scaled[i-n_steps:i] for i in range(n_steps, len(X_train_scaled))])
y_train_reshaped = y_train[n_steps:]

X_test_reshaped = np.array([X_test_scaled[i-n_steps:i] for i in range(n_steps, len(X_test_scaled))])
y_test_reshaped = y_test[n_steps:]

# Build the CNN-LSTM model
model = Sequential()

# CNN layers
model.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(n_steps, n_features)))
model.add(MaxPooling1D(pool_size=2))

# LSTM layer
model.add(LSTM(50, activation='relu'))

# Dense output layer now predicts 2 values (Latitude and Longitude)
model.add(Dense(2))  # 2 outputs for Latitude and Longitude

# Compile the model
model.compile(optimizer='adam', loss='mse')

# Summarize the model
model.summary()

# Train the model
history = model.fit(X_train_reshaped, y_train_reshaped, epochs=20, validation_data=(X_test_reshaped, y_test_reshaped))

  super().__init__(


Epoch 1/20
[1m895/895[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 16ms/step - loss: 787.5737 - val_loss: 22.5287
Epoch 2/20
[1m895/895[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 15ms/step - loss: 21.4147 - val_loss: 19.1361
Epoch 3/20
[1m895/895[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 14ms/step - loss: 18.5396 - val_loss: 17.6154
Epoch 4/20
[1m895/895[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 15ms/step - loss: 17.2883 - val_loss: 16.8468
Epoch 5/20
[1m895/895[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 15ms/step - loss: 16.2404 - val_loss: 16.0417
Epoch 6/20
[1m895/895[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 14ms/step - loss: 15.8384 - val_loss: 15.7205
Epoch 7/20
[1m895/895[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 14ms/step - loss: 15.5549 - val_loss: 15.5032
Epoch 8/20
[1m895/895[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 15ms/step - loss: 15.4873 - val_loss: 16.3086
Epoch 9

In [23]:
from datetime import datetime

# Example input data for prediction (replace with actual input data)
new_input = np.array([X_test_scaled[0:n_steps]])  # Taking the first few steps from the test data as an example

# Reshape the input data to match the model's expected input shape
new_input_reshaped = new_input.reshape((1, n_steps, n_features))

# Predict the next seismic event's latitude and longitude
prediction = model.predict(new_input_reshaped)

# Get the current date
current_date = datetime.now()

# Filter for events with magnitude > 8 that occur after the current date
future_events = combined_df[(combined_df['MAGNITUD'] > 5) & (combined_df['DATETIME'] > current_date)]

# Check if any future events exist
if not future_events.empty:
    future_high_magnitude_event = future_events.iloc[0]
    print(f"Predicted Latitude: {prediction[0][0]}")
    print(f"Predicted Longitude: {prediction[0][1]}")
    print(f"Expected Date and Time for the next event with Magnitude > 5: {future_high_magnitude_event['DATETIME']}")
else:
    print("No future seismic events with a magnitude greater than 5 found in the dataset.")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
Predicted Latitude: -10.874484062194824
Predicted Longitude: -76.21995544433594
Expected Date and Time for the next event with Magnitude > 5: 2024-09-01 01:05:04


In [29]:
from datetime import datetime
from sklearn.metrics import mean_squared_error
import numpy as np

# Example input data for prediction (replace with actual input data)
new_input = np.array([X_test_scaled[0:n_steps]])  # Taking the first few steps from the test data as an example

# Reshape the input data to match the model's expected input shape
new_input_reshaped = new_input.reshape((1, n_steps, n_features))

# Predict the next seismic event's latitude and longitude
prediction = model.predict(new_input_reshaped)

# Get the current date
current_date = datetime.now()

# Filter for events with magnitude > 5 that occur after the current date
future_events = combined_df[(combined_df['MAGNITUD'] > 5) & (combined_df['DATETIME'] > current_date)]

# Check if any future events exist
if not future_events.empty:
    future_high_magnitude_event = future_events.iloc[0]
    predicted_magnitude = future_high_magnitude_event['MAGNITUD']
    print(f"Expected Date and Time for the next event with Magnitude > 5: {future_high_magnitude_event['DATETIME']}")
    print(f"Predicted Magnitude: {predicted_magnitude}")
else:
    future_high_magnitude_event = None
    print("No future seismic events with a magnitude greater than 5 found in the dataset.")

# Predict on the test set to calculate MSE
y_pred = model.predict(X_test_reshaped)

# Convert y_test_reshaped to a NumPy array if it isn't already
y_test_reshaped = np.array(y_test_reshaped)

# Calculate Mean Squared Error (MSE) for latitude and longitude
mse_latitude = mean_squared_error(y_test_reshaped[:, 0], y_pred[:, 0])
mse_longitude = mean_squared_error(y_test_reshaped[:, 1], y_pred[:, 1])

# Calculate standard deviations as a measure of certainty
std_dev_latitude = np.sqrt(mse_latitude)
std_dev_longitude = np.sqrt(mse_longitude)

# **Updated Certainty Calculation**
# Calculate the range of latitude and longitude in the test set
lat_range = np.max(y_test_reshaped[:, 0]) - np.min(y_test_reshaped[:, 0])
lon_range = np.max(y_test_reshaped[:, 1]) - np.min(y_test_reshaped[:, 1])

# Calculate probability as inverse of the error, normalized to a range [0, 1]
probability_latitude = max(0, 1 - std_dev_latitude / lat_range)
probability_longitude = max(0, 1 - std_dev_longitude / lon_range)

# Ensure probabilities do not exceed 100%
probability_latitude = min(probability_latitude, 1)
probability_longitude = min(probability_longitude, 1)

# Display predictions with certainty and probability
print(f"Predicted Latitude: {prediction[0][0]} ± {std_dev_latitude} (Certainty: {probability_latitude*100:.2f}%)")
print(f"Predicted Longitude: {prediction[0][1]} ± {std_dev_longitude} (Certainty: {probability_longitude*100:.2f}%)")

# Display expected event details only if a future event is found
if future_high_magnitude_event is not None:
    print(f"Expected Date and Time for the next event with Magnitude > 5: {future_high_magnitude_event['DATETIME']}")
    print(f"Predicted Magnitude: {predicted_magnitude}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Expected Date and Time for the next event with Magnitude > 5: 2024-09-01 01:05:04
Predicted Magnitude: 5.2
[1m224/224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
Predicted Latitude: -10.874484062194824 ± 4.553580053640547 (Certainty: 77.87%)
Predicted Longitude: -76.21995544433594 ± 3.1660341966336807 (Certainty: 76.64%)
Expected Date and Time for the next event with Magnitude > 5: 2024-09-01 01:05:04
Predicted Magnitude: 5.2
