In [1]:
import os
import pandas as pd
import numpy as np
from scipy.ndimage import median_filter, gaussian_filter
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_squared_error, mean_absolute_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import matplotlib.pyplot as plt

In [None]:
# Function to load data from a CSV file
def load_data(file_path):
    column_names = ['Time (s)', 'X (m/s2)', 'Y (m/s2)', 'Z (m/s2)', 'R (m/s2)', 'Theta (deg)', 'Phi (deg)']
    with open(file_path, 'r') as file:
        lines = file.readlines()[4:]  # Skip metadata lines
        data = [line.strip().split(',') for line in lines if len(line.strip().split(',')) == len(column_names)]
    df = pd.DataFrame(data, columns=column_names)
    df = df.apply(pd.to_numeric, errors='coerce')  # Convert to numeric
    return df


In [None]:
# Apply Median and Gaussian filters
def median_then_gaussian_filter(data, kernel_size=5, sigma=1):
    median_filtered = median_filter(data, size=kernel_size)
    gaussian_filtered = gaussian_filter(median_filtered, sigma=sigma)
    return gaussian_filtered

In [None]:
def preprocess_data(df):
    filtered_data = {}
    for column in df.columns[1:]:  # Skip 'Time (s)'
        filtered_data[f'{column}_filtered'] = median_then_gaussian_filter(df[column].values)
    df_filtered = pd.DataFrame(filtered_data)
    df_filtered['Time (s)'] = df['Time (s)']
    return df_filtered

In [None]:
# Function to create sequences for LSTM
def create_sequences(data, time_steps=50):
    X, y = [], []
    for i in range(len(data) - time_steps):
        X.append(data[i:i+time_steps])
        y.append(data[i+time_steps])
    return np.array(X), np.array(y)

In [None]:
# Function to split data into train, validation, and test sets
def split_data(X, y, train_size=0.7, val_size=0.15, test_size=0.15):
    n_train = int(len(X) * train_size)
    n_val = int(len(X) * val_size)
    
    X_train = X[:n_train]
    y_train = y[:n_train]
    X_val = X[n_train:n_train + n_val]
    y_val = y[n_train:n_train + n_val]
    X_test = X[n_train + n_val:]
    y_test = y[n_train + n_val:]
    
    return X_train, X_val, X_test, y_train, y_val, y_test


In [None]:
# Function to create LSTM model
def create_lstm_model(input_shape):
    model = Sequential()
    model.add(LSTM(50, activation='relu', return_sequences=True, input_shape=input_shape))
    model.add(LSTM(50, activation='relu'))
    model.add(Dense(input_shape[-1]))
    model.compile(optimizer='adam', loss='mse')
    return model


In [None]:
# Function to train and evaluate the model
def train_and_evaluate_model(file_path, time_steps=50, epochs=50, batch_size=32):
    # Load and preprocess the data
    df = load_data(file_path)
    df_filtered = preprocess_data(df)
    
    # Create sequences
    columns_filtered = [col for col in df_filtered.columns if col != 'Time (s)']
    data_arr = df_filtered[columns_filtered].values
    X, y = create_sequences(data_arr, time_steps)
    
    # Split the data
    X_train, X_val, X_test, y_train, y_val, y_test = split_data(X, y)
    
    # Handle NaNs and scale the data
    imputer = SimpleImputer(strategy='mean')
    X_train = imputer.fit_transform(np.nan_to_num(X_train, nan=0.0).reshape(-1, X_train.shape[-1])).reshape(X_train.shape)
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train.reshape(-1, X_train.shape[-1])).reshape(X_train.shape)
    
    X_val = imputer.transform(np.nan_to_num(X_val, nan=0.0).reshape(-1, X_val.shape[-1])).reshape(X_val.shape)
    X_val = scaler.transform(X_val.reshape(-1, X_val.shape[-1])).reshape(X_val.shape)
    
    X_test = imputer.transform(np.nan_to_num(X_test, nan=0.0).reshape(-1, X_test.shape[-1])).reshape(X_test.shape)
    X_test = scaler.transform(X_test.reshape(-1, X_test.shape[-1])).reshape(X_test.shape)
    
    # Create and train the model
    model = create_lstm_model((X_train.shape[1], X_train.shape[-1]))
    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_val, y_val), verbose=1)
    
    # Evaluate the model
    predictions = model.predict(X_test)
    mse = mean_squared_error(y_test, predictions)
    mae = mean_absolute_error(y_test, predictions)
    print(f'Mean Squared Error: {mse}')
    print(f'Mean Absolute Error: {mae}')
    
    # Plot some test results for visual inspection
    plt.figure(figsize=(10, 6))
    plt.plot(y_test[:100, 0], label='True')
    plt.plot(predictions[:100, 0], label='Predicted')
    plt.title('True vs Predicted values - first feature')
    plt.legend()
    plt.show()
    
    return model

In [None]:
# Provide the CSV file path here
file_path = 'Final_Dataset/Abhay/RK.csv'  # Update with your CSV file path
model = train_and_evaluate_model(file_path)