### Imports

In [1]:
import os
import glob
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras import layers, models
import matplotlib.pyplot as plt


ModuleNotFoundError: No module named 'tensorflow'

### Data Loading and Preprocessing

In [None]:
# Define the data directory
data_dir = '/Users/Documents/seismic_detection/data/lunar/test/data/S12_GradeB'

# Get all CSV files in the directory
csv_files = glob.glob(os.path.join(data_dir, '*.csv'))

data_list = []

# Load each CSV file
for file in csv_files:
    df = pd.read_csv(file, delimiter='\t')
    data_list.append(df)

# Concatenate all data into a single DataFrame
data = pd.concat(data_list, ignore_index=True)

# Extract the 'velocity(m/s)' column
velocities = data['velocity(m/s)'].values

# Normalize the data between 0 and 1
scaler = MinMaxScaler()
velocities_scaled = scaler.fit_transform(velocities.reshape(-1, 1))

# Function to create sequences of a specified length
def create_sequences(data, seq_length):
    sequences = []
    for i in range(len(data) - seq_length):
        seq = data[i:i+seq_length]
        sequences.append(seq)
    return np.array(sequences)

# Define the sequence length
sequence_length = 100

# Create sequences from the normalized data
sequences = create_sequences(velocities_scaled, sequence_length)

# Reshape sequences for LSTM input (samples, timesteps, features)
X = sequences.reshape((sequences.shape[0], sequences.shape[1], 1))


### Model Building

In [None]:
# Define the LSTM autoencoder model
model = models.Sequential([
    layers.LSTM(64, activation='relu', input_shape=(sequence_length, 1), return_sequences=True),
    layers.LSTM(32, activation='relu', return_sequences=False),
    layers.RepeatVector(sequence_length),
    layers.LSTM(32, activation='relu', return_sequences=True),
    layers.LSTM(64, activation='relu', return_sequences=True),
    layers.TimeDistributed(layers.Dense(1))
])

# Compile the model
model.compile(optimizer='adam', loss='mse')

# Display the model architecture
model.summary()

### Model Training

In [2]:
# Train the model
history = model.fit(
    X, X,
    epochs=10,
    batch_size=64,
    validation_split=0.1
)

NameError: name 'model' is not defined

### Anomaly Detection

In [None]:
# Get the model's predictions
X_pred = model.predict(X)

# Calculate the reconstruction errors
reconstruction_errors = np.mean(np.power(X - X_pred, 2), axis=(1, 2))

# Set a threshold for anomaly detection
threshold = np.mean(reconstruction_errors) + 3 * np.std(reconstruction_errors)

# Identify anomalies
anomalies = reconstruction_errors > threshold

# Get the indices of the anomalous sequences
anomaly_indices = np.where(anomalies)[0]

# Print the number of anomalies detected
print(f"Detected {len(anomaly_indices)} anomalies out of {len(reconstruction_errors)} sequences.")

# Plot the reconstruction errors
plt.figure(figsize=(10, 6))
plt.plot(reconstruction_errors, label='Reconstruction Error')
plt.hlines(threshold, xmin=0, xmax=len(reconstruction_errors), colors='r', linestyles='--', label='Threshold')
plt.legend()
plt.title('Reconstruction Error Over Time')
plt.xlabel('Sequence Index')
plt.ylabel('Reconstruction Error')
plt.show()
