In [None]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping

# Paths to the folders
folder_underway = "path/to/underway_using_engine"
folder_fishing = "path/to/engaged_in_fishing"

# Parameters
sequence_length = 60  # Length of time sequences (e.g., 1 hour if sampled every minute)
features = ['SOG', 'COG', 'Heading']  # Columns to use as features

# Function to load and preprocess data
def load_and_preprocess_data(folder, label, sequence_length, features):
    data = []
    labels = []
    for file in os.listdir(folder):
        if file.endswith('.csv'):
            df = pd.read_csv(os.path.join(folder, file))
            # Normalize SOG
            scaler = MinMaxScaler()
            df['SOG'] = scaler.fit_transform(df[['SOG']])
            
            # Encode COG and Heading cyclically
            df['COG_x'] = np.sin(np.radians(df['COG']))
            df['COG_y'] = np.cos(np.radians(df['COG']))
            df['Heading_x'] = np.sin(np.radians(df['Heading']))
            df['Heading_y'] = np.cos(np.radians(df['Heading']))
            
            # Select features
            feature_columns = ['SOG', 'COG_x', 'COG_y', 'Heading_x', 'Heading_y']
            df = df[feature_columns]
            
            # Create sequences
            for i in range(len(df) - sequence_length + 1):
                sequence = df.iloc[i:i + sequence_length].values
                data.append(sequence)
                labels.append(label)
    return np.array(data), np.array(labels)

# Load data from both folders
data_underway, labels_underway = load_and_preprocess_data(folder_underway, 0, sequence_length, features)
data_fishing, labels_fishing = load_and_preprocess_data(folder_fishing, 1, sequence_length, features)

# Combine and split the data
X = np.concatenate([data_underway, data_fishing], axis=0)
y = np.concatenate([labels_underway, labels_fishing], axis=0)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Build the LSTM model
model = Sequential([
    LSTM(64, input_shape=(sequence_length, X.shape[2]), return_sequences=True),
    Dropout(0.3),
    LSTM(32),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')  # Binary classification
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=50,
    batch_size=32,
    callbacks=[early_stopping],
    verbose=1
)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {accuracy:.2f}")

# Save the model
model.save("navigational_status_classifier.h5")