## LSTM Sliding Window Technique to predict rotorcraft phase

#### Installation
1) Create conda environment with python 3.9
2) Install following packages
    conda install -c conda-forge pandas
    conda install -c conda-forge numpy
    conda install -c conda-forge scikit-learn
    conda install -c conda-forge jupyter
    conda install pytorch torchvision torchaudio pytorch-cuda=12.1 -c pytorch -c nvidia
3) Install GPU drivers for better speed

#### Notes
The code was run on a GPU equipped machine
With GPU capability, training takes only a few minutes. With CPU, this could take hours

#### About LSTM
Long Short-Term Memory (LSTM) is a type of recurrent neural network (RNN) designed for handling sequential data.
The rotorcraft data is sequential in nature and LSTM can identify patterns in this type of data well.

In [1]:
import utils

#### Load Training Data
Loads only these columns - 'Vert. Speed', 'Groundspeed', 'Altitude(AGL)', 'Date', 'System UTC Time'

In [2]:
train_csv_directory = 'data/train_data'
selected_df = utils.load_data_from_dir(train_csv_directory)
selected_df.head(10)

reading file data/train_data\SimData_2022.07.28_10.07.30.csv
reading file data/train_data\SimData_2022.07.28_10.11.13.csv
reading file data/train_data\SimData_2022.07.28_10.59.59.csv
reading file data/train_data\SimData_2022.07.28_11.47.45.csv
reading file data/train_data\SimData_2022.07.28_12.16.14.csv
reading file data/train_data\SimData_2022.07.28_13.45.28.csv
reading file data/train_data\SimData_2022.07.28_14.02.12.csv
reading file data/train_data\SimData_2022.07.28_14.19.29.csv
reading file data/train_data\SimData_2022.07.28_14.44.18.csv
reading file data/train_data\SimData_2022.07.28_15.13.02.csv
reading file data/train_data\SimData_2022.07.28_15.57.11.csv
reading file data/train_data\SimData_2022.07.28_16.45.29.csv
reading file data/train_data\SimData_2022.07.28_16.51.43.csv
reading file data/train_data\SimData_2022.07.28_16.59.08.csv
reading file data/train_data\SimData_2022.10.20_12.43.14.csv
reading file data/train_data\SimData_2022.10.20_13.25.49.csv
reading file data/train_

Unnamed: 0,Vert. Speed,Groundspeed,Altitude(AGL),Date,System UTC Time
1,-0.001,0.041,0.06,07/28/2022,14:07:52.445
2,0.004,0.038,0.06,07/28/2022,14:07:52.467
3,0.008,0.032,0.06,07/28/2022,14:07:52.485
4,0.011,0.019,0.06,07/28/2022,14:07:52.523
5,0.013,0.01,0.06,07/28/2022,14:07:52.546
6,0.015,0.004,0.06,07/28/2022,14:07:52.569
7,0.016,0.008,0.06,07/28/2022,14:07:52.593
8,0.018,0.012,0.06,07/28/2022,14:07:52.616
9,0.019,0.014,0.06,07/28/2022,14:07:52.639
10,0.021,0.013,0.06,07/28/2022,14:07:52.663


In [3]:
print('column data types')
selected_df.dtypes

column data types


Vert. Speed        object
Groundspeed        object
Altitude(AGL)      object
Date               object
System UTC Time    object
dtype: object

#### Formatting involves
a) Combine Date and Time column and change type to datetime
b) Convert columns to float - 'Vert. Speed', 'Groundspeed', 'Altitude(AGL)'
c) Sort by datetime so LSTM sequences are in order of phase occurrence


In [4]:
formatted_df = utils.format_data(selected_df)
formatted_df.head(10)

Unnamed: 0,Vert. Speed,Groundspeed,Altitude(AGL),DateTime
0,-0.001,0.041,0.06,2022-07-28 14:07:52.445
1,0.004,0.038,0.06,2022-07-28 14:07:52.467
2,0.008,0.032,0.06,2022-07-28 14:07:52.485
3,0.011,0.019,0.06,2022-07-28 14:07:52.523
4,0.013,0.01,0.06,2022-07-28 14:07:52.546
5,0.015,0.004,0.06,2022-07-28 14:07:52.569
6,0.016,0.008,0.06,2022-07-28 14:07:52.593
7,0.018,0.012,0.06,2022-07-28 14:07:52.616
8,0.019,0.014,0.06,2022-07-28 14:07:52.639
9,0.021,0.013,0.06,2022-07-28 14:07:52.663


In [5]:
print('column type after formatting')
formatted_df.dtypes

column type after formatting


Vert. Speed             float64
Groundspeed             float64
Altitude(AGL)           float64
DateTime         datetime64[ns]
dtype: object

#### Define physics model to tag phase of each row

In [6]:
#Physics model
def is_close_to_zero(value, tolerance=1e-6):
    return abs(value) < tolerance

# Create a function to identify phases
def identify_phase(Vert_Speed, Groundspeed, Altitude_AGL):
    if is_close_to_zero(Vert_Speed) and Groundspeed < 0.6 and 0 <= Altitude_AGL < 2:
        return "Standing"
    elif is_close_to_zero(Vert_Speed) and 0.6 <= Groundspeed < 20 and 0 <= Altitude_AGL < 2:
        return "Surface Taxi"
    elif is_close_to_zero(Vert_Speed) and 0.6 <= Groundspeed < 30 and 2 <= Altitude_AGL < 25:
        return "Hover Taxi"
    elif is_close_to_zero(Vert_Speed) and 0.6 <= Groundspeed < 50 and 25 <= Altitude_AGL < 100:
        return "Air Taxi"
    elif -90 <= Vert_Speed <= 90 and Groundspeed <= 0.6 and Altitude_AGL < 2:
        return "Hover In Ground Effect"
    elif -90 <= Vert_Speed <= 90 and Groundspeed <= 0.6 and Altitude_AGL >= 100:
        return "Hover"
    elif Vert_Speed > 90 and Groundspeed <= 0.6 and Altitude_AGL < 100:
        return "Hover Lift"
    elif Vert_Speed < -90 and Groundspeed <= 0.6 and Altitude_AGL < 100:
        return "Hover Descent"
    elif Vert_Speed > 90 and Altitude_AGL >= 100:
        return "Climb"
    elif -90 <= Vert_Speed <= 90 and Groundspeed > 0.6 and Altitude_AGL >= 100:
        return "Cruise"
    elif Vert_Speed < -90 and Altitude_AGL >= 100:
        return "Descent"
    else:
        return "LandingOrTakeOff"


#### Add phase column using physics definition

In [7]:
# Apply the function to identify phases for each row
formatted_df['Phase'] = formatted_df.apply(
    lambda row: identify_phase(row['Vert. Speed'], row['Groundspeed'], row['Altitude(AGL)']), axis=1)

formatted_df.head(10)

Unnamed: 0,Vert. Speed,Groundspeed,Altitude(AGL),DateTime,Phase
0,-0.001,0.041,0.06,2022-07-28 14:07:52.445,Hover In Ground Effect
1,0.004,0.038,0.06,2022-07-28 14:07:52.467,Hover In Ground Effect
2,0.008,0.032,0.06,2022-07-28 14:07:52.485,Hover In Ground Effect
3,0.011,0.019,0.06,2022-07-28 14:07:52.523,Hover In Ground Effect
4,0.013,0.01,0.06,2022-07-28 14:07:52.546,Hover In Ground Effect
5,0.015,0.004,0.06,2022-07-28 14:07:52.569,Hover In Ground Effect
6,0.016,0.008,0.06,2022-07-28 14:07:52.593,Hover In Ground Effect
7,0.018,0.012,0.06,2022-07-28 14:07:52.616,Hover In Ground Effect
8,0.019,0.014,0.06,2022-07-28 14:07:52.639,Hover In Ground Effect
9,0.021,0.013,0.06,2022-07-28 14:07:52.663,Hover In Ground Effect


In [8]:
df = formatted_df
print('number of training rows', len(df))
df.head(10)

number of training rows 3080267


Unnamed: 0,Vert. Speed,Groundspeed,Altitude(AGL),DateTime,Phase
0,-0.001,0.041,0.06,2022-07-28 14:07:52.445,Hover In Ground Effect
1,0.004,0.038,0.06,2022-07-28 14:07:52.467,Hover In Ground Effect
2,0.008,0.032,0.06,2022-07-28 14:07:52.485,Hover In Ground Effect
3,0.011,0.019,0.06,2022-07-28 14:07:52.523,Hover In Ground Effect
4,0.013,0.01,0.06,2022-07-28 14:07:52.546,Hover In Ground Effect
5,0.015,0.004,0.06,2022-07-28 14:07:52.569,Hover In Ground Effect
6,0.016,0.008,0.06,2022-07-28 14:07:52.593,Hover In Ground Effect
7,0.018,0.012,0.06,2022-07-28 14:07:52.616,Hover In Ground Effect
8,0.019,0.014,0.06,2022-07-28 14:07:52.639,Hover In Ground Effect
9,0.021,0.013,0.06,2022-07-28 14:07:52.663,Hover In Ground Effect


In [9]:
print(df['Phase'].value_counts())

Phase
Descent                   782030
Climb                     675334
Cruise                    652723
LandingOrTakeOff          470354
Hover In Ground Effect    359419
Standing                  124083
Hover                      14940
Hover Descent                995
Hover Lift                   383
Surface Taxi                   4
Air Taxi                       1
Hover Taxi                     1
Name: count, dtype: int64


#### Create sequences using sliding window and convert to torch tensor

In [10]:
import numpy as np
import torch
from sklearn.preprocessing import LabelEncoder

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

sequence_length = 5000
step_size = 1000

X, y = utils.create_sliding_window_sequences(df, sequence_length, step_size)
encoder = LabelEncoder()
y = encoder.fit_transform(np.array(y))

X_torch, y_torch = utils.torch_input_output(X, y, device)

In [11]:
print('phase labels in training data')
encoder.classes_

phase labels in training data


array(['Climb', 'Cruise', 'Descent', 'Hover', 'Hover Descent',
       'Hover In Ground Effect', 'Hover Lift', 'LandingOrTakeOff',
       'Standing'], dtype='<U22')

In [12]:
from sklearn.model_selection import train_test_split

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_torch, y_torch, test_size=0.2, random_state=42)

#### Define LSTM Model Architecture

In [13]:
import torch.nn as nn

# Define your LSTM model
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])  # Take the output at the last time step
        return out

#### Setup model with parameters

In [14]:
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

input_size = X_train.shape[2]
hidden_size = 50
output_size = len(encoder.classes_)
learning_rate = 0.001

model = LSTMModel(input_size, hidden_size, output_size).to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Convert data to DataLoader
train_data = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)

#### Model Training

In [15]:
# Training loop
num_epochs = 100
for epoch in range(num_epochs):
    total_loss = 0
    correct_predictions = 0
    total_samples = 0

    for inputs, labels in train_loader:
        labels = labels.long()
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct_predictions += (predicted == labels).sum().item()
        total_samples += labels.size(0)

    epoch_loss = total_loss / len(train_loader)
    epoch_accuracy = correct_predictions / total_samples

    print(f'Epoch [{epoch + 1}/{num_epochs}] - Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.2%}')

Epoch [1/100] - Loss: 1.7186, Accuracy: 52.32%
Epoch [2/100] - Loss: 0.9429, Accuracy: 84.96%
Epoch [3/100] - Loss: 0.5824, Accuracy: 86.99%
Epoch [4/100] - Loss: 0.4342, Accuracy: 88.50%
Epoch [5/100] - Loss: 0.3725, Accuracy: 89.55%
Epoch [6/100] - Loss: 0.3317, Accuracy: 90.45%
Epoch [7/100] - Loss: 0.3024, Accuracy: 90.73%
Epoch [8/100] - Loss: 0.2900, Accuracy: 90.93%
Epoch [9/100] - Loss: 0.2770, Accuracy: 90.73%
Epoch [10/100] - Loss: 0.2626, Accuracy: 91.46%
Epoch [11/100] - Loss: 0.2500, Accuracy: 91.46%
Epoch [12/100] - Loss: 0.2484, Accuracy: 91.38%
Epoch [13/100] - Loss: 0.2417, Accuracy: 91.46%
Epoch [14/100] - Loss: 0.2369, Accuracy: 91.59%
Epoch [15/100] - Loss: 0.2334, Accuracy: 91.63%
Epoch [16/100] - Loss: 0.2338, Accuracy: 91.59%
Epoch [17/100] - Loss: 0.2329, Accuracy: 91.79%
Epoch [18/100] - Loss: 0.2377, Accuracy: 91.75%
Epoch [19/100] - Loss: 0.2144, Accuracy: 92.20%
Epoch [20/100] - Loss: 0.2216, Accuracy: 91.50%
Epoch [21/100] - Loss: 0.2117, Accuracy: 91.91%
E

#### Evaluate Model

In [16]:
from sklearn.metrics import classification_report

# Evaluate the model
# Convert data to DataLoader for test set
test_data = TensorDataset(X_test, y_test)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

model.eval()

predicted_labels, actual_labels = utils.model_predict(test_loader, model, encoder)
# Calculate and print the classification report
report = classification_report(actual_labels, predicted_labels)
print(report)


Test accuracy: 93.34%
                        precision    recall  f1-score   support

                 Climb       0.97      0.98      0.97       134
                Cruise       0.93      0.98      0.95       137
               Descent       0.99      1.00      0.99       149
                 Hover       0.00      0.00      0.00         4
Hover In Ground Effect       0.74      1.00      0.85        70
      LandingOrTakeOff       1.00      0.93      0.96        98
              Standing       0.00      0.00      0.00        24

              accuracy                           0.93       616
             macro avg       0.66      0.70      0.68       616
          weighted avg       0.90      0.93      0.91       616



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [17]:
torch.save(model.state_dict(), 'lstm.model')

#### Test with second data set

In [18]:
import utils

test_csv_directory = 'data/test_data'
test_df = utils.load_data_from_dir(test_csv_directory)

reading file data/test_data\SimData_2022.03.31_09.14.40.csv
reading file data/test_data\SimData_2022.03.31_09.38.42.csv
reading file data/test_data\SimData_2022.03.31_10.57.22.csv
reading file data/test_data\SimData_2022.03.31_12.57.00.csv
reading file data/test_data\SimData_2022.03.31_13.47.24.csv
reading file data/test_data\SimData_2022.03.31_15.21.35.csv
reading file data/test_data\SimData_2022.03.31_16.51.42.csv
reading file data/test_data\SimData_2023.01.12_09.00.50.csv
reading file data/test_data\SimData_2023.01.12_09.28.10.csv
reading file data/test_data\SimData_2023.01.12_10.41.08.csv
reading file data/test_data\SimData_2023.01.12_10.45.38.csv
reading file data/test_data\SimData_2023.01.12_12.36.17.csv
reading file data/test_data\SimData_2023.01.12_12.36.37.csv
reading file data/test_data\SimData_2023.01.12_12.37.48.csv
reading file data/test_data\SimData_2023.01.12_12.43.44.csv
reading file data/test_data\SimData_2023.01.12_12.48.49.csv
reading file data/test_data\SimData_2023

In [19]:
test_formatted_df = utils.format_data(test_df)
test_formatted_df['Phase'] = test_formatted_df.apply(lambda row: identify_phase(row['Vert. Speed'], row['Groundspeed'], row['Altitude(AGL)']), axis=1)
print('number of test data rows', len(test_formatted_df))
test_formatted_df.head(10)

number of test data rows 2838392


Unnamed: 0,Vert. Speed,Groundspeed,Altitude(AGL),DateTime,Phase
0,1774.112,90.055,1079.71,2022-03-31 13:14:40.712,Climb
1,1773.997,90.063,1080.13,2022-03-31 13:14:40.732,Climb
2,1773.84,90.075,1080.7,2022-03-31 13:14:40.750,Climb
3,1773.695,90.086,1081.24,2022-03-31 13:14:40.768,Climb
4,1773.561,90.096,1081.76,2022-03-31 13:14:40.785,Climb
5,1773.433,90.106,1082.26,2022-03-31 13:14:40.801,Climb
6,1773.311,90.116,1082.74,2022-03-31 13:14:40.819,Climb
7,1773.079,90.135,1083.71,2022-03-31 13:14:40.849,Climb
8,1772.976,90.144,1084.15,2022-03-31 13:14:40.868,Climb
9,1772.75,90.165,1085.17,2022-03-31 13:14:40.899,Climb


#### Create sequences from test data

In [20]:
X_test2, y_test2 = utils.create_sliding_window_sequences(test_formatted_df, sequence_length, step_size)

In [21]:
import numpy as np
# use same encoder. created a new encoder by accident and that caused issues vey difficult to debug
# cuda library will complain with GPU on with no direct association of underlying label mismatch
y_test2 = encoder.transform(np.array(y_test2))
X_test2_torch, y_test2_torch = utils.torch_input_output(X_test2, y_test2, device)

In [22]:
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import classification_report

test_data2 = TensorDataset(X_test2_torch, y_test2_torch)
test_loader2 = DataLoader(test_data2, batch_size=32, shuffle=False)

model.eval()

predicted_labels2, actual_labels2 = utils.model_predict(test_loader2, model, encoder)
# Calculate and print the classification report
report2 = classification_report(actual_labels2, predicted_labels2)
print(report2)

Test accuracy: 89.73%
                        precision    recall  f1-score   support

                 Climb       0.97      0.98      0.98       640
                Cruise       0.95      0.94      0.95       560
               Descent       0.98      0.99      0.98       763
                 Hover       0.00      0.00      0.00        13
Hover In Ground Effect       0.60      0.99      0.74       325
      LandingOrTakeOff       0.98      0.98      0.98       315
              Standing       0.00      0.00      0.00       218

              accuracy                           0.90      2834
             macro avg       0.64      0.70      0.66      2834
          weighted avg       0.85      0.90      0.87      2834



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
