# Deep-STOC Model Implementation for Offshore Earthquake Prediction
This notebook showcases the final implementation of the Deep-STOC (Deep Learning for Spatio-Temporal Ocean Colour) model. The model was developed as part of my MSc Dissertation, which aims to predict the likelihood of offshore earthquakes using remote sensing data.

The Deep-STOC model is a Convolutional LSTM network trained and tested on remote sensing data, specifically designed to understand the spatio-temporal characteristics of seismic activities in the offshore regions. The parameters used for this model were derived from a detailed hyperparameter tuning process, the code for which can be found in the Deep-STOC_tuning.ipynb notebook.

In this notebook, we:

1. **Load and preprocess** the seismic data.
2. **Normalize** the data using the MinMaxScaler from the sklearn library.
3. **Define the architecture** of the Deep-STOC model using parameters identified from hyperparameter tuning.
4. **Train** the model and make predictions on the test data.
5. **Evaluate** the performance of the model using various metrics, including accuracy, precision, recall, F1 score, and ROC AUC.
6. Plot the **ROC curve** to visualize the model's performance.
7. Analyze **feature importance** via permutation importance, providing an estimate of the contribution of each feature to the model's prediction.
8. Plot the **Partial Dependence Plots (PDPs)** for selected features, offering a way to visualize the effect of certain features on the output prediction.

In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from google.colab import drive
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_auc_score, log_loss, roc_curve, auc
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import ConvLSTM2D, Flatten, Dropout, Dense
from sklearn.utils import shuffle

# Mount Google Drive for data access
drive.mount('/content/drive')

# Load the data from Google Drive
X = np.load('drive/MyDrive/X_deepcolour7.npy', allow_pickle = True)
y = np.load('drive/MyDrive/y_deepcolour7.npy', allow_pickle = True)

# Preprocess data: stack and transpose dimensions
values = np.stack([sample[0] for sample in X])
X = values.transpose(0, 1, 2, 3, 4)
print(X.shape)

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize each channel separately
scalers = [MinMaxScaler() for _ in range(X_train.shape[-1])]
for i in range(X_train.shape[-1]):
    X_train[..., i] = scalers[i].fit_transform(X_train[..., i].reshape(-1, 1)).reshape(X_train[..., i].shape)
    X_test[..., i] = scalers[i].transform(X_test[..., i].reshape(-1, 1)).reshape(X_test[..., i].shape)


In [None]:
# Define the model architecture
# Based on hyperparameter tuning results, we define a ConvLSTM model with specific parameters
model = Sequential()

# Initial ConvLSTM2D layer
model.add(ConvLSTM2D(
    filters=128,  
    kernel_size=(4, 3),  
    activation='tanh',  
    return_sequences=True,
    input_shape=(None, X_train.shape[2], X_train.shape[3], X_train.shape[4])
))

# Two additional ConvLSTM2D layers based on best additional_layers
model.add(ConvLSTM2D(filters=32, kernel_size=(3, 4), activation='tanh', return_sequences=True))
model.add(ConvLSTM2D(filters=128, kernel_size=(3, 4), activation='relu', return_sequences=False))

# Flatten layer
model.add(Flatten())
model.add(Dropout(rate=0.4))

# Output layer
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Print model summary
model.summary()

In [None]:
# Train the model
history = model.fit(X_train, y_train, validation_split=0.2, epochs=50, batch_size=32)

# Predict on test data
y_pred = model.predict(X_test)

# Convert probabilities into binary outputs
y_pred_binary = np.where(y_pred > 0.5, 1, 0)

# Compute and print performance metrics
accuracy = accuracy_score(y_test, y_pred_binary)
precision = precision_score(y_test, y_pred_binary)
recall = recall_score(y_test, y_pred_binary)
f1 = f1_score(y_test, y_pred_binary)
confusion = confusion_matrix(y_test, y_pred_binary)
roc_auc = roc_auc_score(y_test, y_pred)
logloss = log_loss(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")
print(f"Confusion Matrix: \n{confusion}")
print(f"ROC AUC: {roc_auc}")
print(f"Log Loss: {logloss}")

In [None]:
# ROC curve plot
fpr, tpr, _ = roc_curve(y_test, y_pred)
plt.figure(figsize=(8, 8), dpi=100)
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc="lower right")
plt.show()

## Feature importance analysis with PFI and PDP

In [None]:
def calculate_permutation_feature_importance(model, X_test, y_test):
    """
    Calculate permutation feature importance of a model.
    """
    # Get initial accuracy
    original_predictions = model.predict(X_test)
    original_accuracy = accuracy_score(y_test, np.round(original_predictions))

    # Initialize array to store importance
    num_features = X_test.shape[-1]
    importance = np.zeros(num_features)

    # Permute each feature and calculate the drop in accuracy
    for i in range(num_features):
        X_test_permuted = X_test.copy()
        X_test_permuted[..., i] = shuffle(X_test[..., i])

        permuted_predictions = model.predict(X_test_permuted)
        permuted_accuracy = accuracy_score(y_test, np.round(permuted_predictions))

        importance[i] = original_accuracy - permuted_accuracy

    return importance

def plot_feature_importance(importance, feature_names):
    """
    Plot permutation feature importance.
    """
    plt.figure(figsize=(10, 8))
    plt.barh(range(len(importance)), importance, align='center')
    plt.yticks(np.arange(len(importance)), feature_names)
    plt.xlabel('Importance')
    plt.title('Permutation Feature Importance')
    plt.show()

def calculate_partial_dependence(model, X_val, feature_index, num_points=100):
    """
    Calculate partial dependence of a feature.
    """
    feature_range = [X_val[..., feature_index].min(), X_val[..., feature_index].max()]
    grid = np.linspace(feature_range[0], feature_range[1], num_points)
    pdp = np.zeros(num_points)

    for i, value in enumerate(grid):
        X_val_pdp = X_val.copy()
        X_val_pdp[..., feature_index] = value

        predictions = model.predict(X_val_pdp)
        pdp[i] = predictions.mean()

    return grid, pdp

def plot_partial_dependence(grid, pdp, feature_names, line_styles):
    """
    Plot partial dependence of features.
    """
    plt.figure(figsize=(10, 8))
    for i, (g, p) in enumerate(zip(grid, pdp)):
        plt.plot(g, p, label=feature_names[i], linestyle=line_styles[i])
    plt.xlabel('Feature Value')
    plt.ylabel('Average Prediction')
    plt.title('Partial Dependence Plots')
    plt.legend(loc='lower right')
    plt.grid(True)
    plt.show()

# Define the feature names
channels = ['CHL', 'SST', 'BBP443', 'RS555', 'KD490', 'elevation', 'fault_distance', 'fault_CONF']

# Calculate permutation feature importance
importance = calculate_permutation_feature_importance(model, X_test, y_test)

# Plot feature importance
plot_feature_importance(importance, channels)

In [None]:
# Calculate and plot partial dependence for the first 5 channels
grid_results = []
pdp_results = []

for i, channel in enumerate(channels[:5]):
    grid, pdp = calculate_partial_dependence(model, X_test, i)
    grid_results.append(grid)
    pdp_results.append(pdp)

line_styles = ['-', '--', '-.', ':', (0, (3, 5, 1, 5))]

plot_partial_dependence(grid_results, pdp_results, channels[:5], line_styles)
