## Import Statements

In [1]:
import os
import pickle
import torch
import numpy as np
from torch import nn, optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
import optuna

## Load Datasets

In [3]:
# define the directory where the pickle files are stored
folder_path = '../data/batched_data_pickle_files/'

# define the filenames for the pickle files
file_names = ['processed_train_data.pkl', 'processed_train_targets.pkl', 'processed_test_data.pkl', 'true_rul.pkl']

# loop through each file and load its contents as arrays
for file_name in file_names:
    file_path = os.path.join(folder_path, file_name)
    
    # read the pickle file
    with open(file_path, 'rb') as file:
        data = pickle.load(file)
    
    # ensure data is a numpy array, if not convert it
    if isinstance(data, np.ndarray):
        globals()[file_name.replace('.pkl', '')] = data
    else:
        globals()[file_name.replace('.pkl', '')] = np.array(data)

# print the shapes of the loaded data
print("processed train data shape:", processed_train_data.shape)
print("processed train target shape:", processed_train_targets.shape)
print("processed test data shape:", processed_test_data.shape)
print("true rul shape:", true_rul.shape)

processed train data shape: (17731, 30, 14)
processed train target shape: (17731,)
processed test data shape: (100, 30, 14)
true rul shape: (100,)


## Model Building

In [15]:
# first, we assign the processed data to X and y
# X contains the features (input data) and y contains the target variable (processed_train_targets)
X = processed_train_data
y = processed_train_targets

# convert the numpy arrays to PyTorch tensors, since PyTorch models expect inputs as tensors
X_tensor = torch.FloatTensor(X)
y_tensor = torch.FloatTensor(y)

# split the data into training and validation sets
# here, we use sklearn's train_test_split to create a training set and a validation set
# 80% of the data goes into the training set, and 20% goes into the validation set
X_train, X_val, y_train, y_val = train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=42)

train_dataset = TensorDataset(X_train, y_train)  # create training dataset
val_dataset = TensorDataset(X_val, y_val)  # create validation dataset

# define the RNN model class
class RNNModel(nn.Module):
    """
    This class defines the structure of the RNN model for predicting values
    from sequential input data. It uses a simple RNN layer followed by a 
    fully connected layer for the output.
    """
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(RNNModel, self).__init__()
        
        # store hyperparameters for later use
        self.hidden_size = hidden_size  # size of the hidden state in RNN
        self.num_layers = num_layers  # number of layers in the RNN
        
        # define the RNN layer; batch_first=True ensures input/output tensors
        # have the shape (batch_size, seq_length, features)
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        
        # define a fully connected (linear) layer to map the final RNN output to the desired output size
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        """
        Forward pass through the model. The input tensor `x` is passed through the RNN
        and then through the fully connected layer to produce the final output.
        
        Args:
            x: input tensor of shape (batch_size, seq_length, input_size)
        
        Returns:
            output: tensor of shape (batch_size, output_size) from the fully connected layer
        """
        # initialize the hidden state to zeros, one for each layer in the RNN
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        
        # pass the input through the RNN; out is the output from each time step
        out, _ = self.rnn(x, h0)
        
        # we are interested in the output of the last time step, hence we select it using [-1]
        out = self.fc(out[:, -1, :])
        return out

# define the objective function for hyperparameter optimization using Optuna
# this function defines the objective for Optuna's hyperparameter optimization.
# it sets up the model with hyperparameters suggested by Optuna, trains the model, and returns the validation loss (RMSE)
# as the objective to minimize.

def objective(trial):
    # suggest hyperparameters using optuna
    hidden_size = trial.suggest_int('hidden_size', 32, 128, step=32)  # number of hidden units in the rnn
    num_layers = trial.suggest_int('num_layers', 1, 3)  # number of rnn layers
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)  # learning rate
    batch_size = trial.suggest_int('batch_size', 32, 128, step=32)  # batch size for training
    
    # create the model with the suggested hyperparameters
    model = RNNModel(input_size=X.shape[2], hidden_size=hidden_size, num_layers=num_layers, output_size=1)
    
    # define the loss function (rmse) and optimizer
    criterion = rmse_loss  # rmse loss function
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    # create the dataloader objects for the training and validation datasets
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    
    # training loop: run for a set number of epochs
    num_epochs = 20  
    for epoch in range(num_epochs):
        model.train()  # set the model to training mode
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()  # zero the gradients before the backward pass
            outputs = model(batch_X)  # get model predictions
            loss = criterion(outputs.squeeze(), batch_y)  # compute the loss
            loss.backward()  # backpropagate the gradients
            optimizer.step()  # update the model parameters
    
    # validation loop: evaluate model on the validation set
    model.eval()  # set the model to evaluation mode
    with torch.no_grad():  # no need to compute gradients during evaluation
        val_losses = []
        for batch_X, batch_y in val_loader:
            outputs = model(batch_X)  # get model predictions
            val_loss = criterion(outputs.squeeze(), batch_y)  # compute validation loss
            val_losses.append(val_loss.item())  # store the loss for each batch
        avg_val_loss = np.mean(val_losses)  # compute the average validation loss
    
    # return the average validation loss as the optimization objective
    return avg_val_loss

# create an optuna study to optimize the objective function
study = optuna.create_study(direction='minimize')  # we want to minimize the validation loss
study.optimize(objective, n_trials=100)  # run the optimization for 100 trials


# print the best hyperparameters found by Optuna and the corresponding validation loss
print("Best Hyperparameters:", study.best_params)
print("Best Validation RMSE:", study.best_value)

# extract the best hyperparameters from the study's best trial
best_trial = study.best_trial
best_hidden_size = best_trial.params['hidden_size']
best_num_layers = best_trial.params['num_layers']
best_learning_rate = best_trial.params['learning_rate']
best_batch_size = best_trial.params['batch_size']

# instantiate the final model with the best hyperparameters found
best_model = RNNModel(input_size=X.shape[2], hidden_size=best_hidden_size, num_layers=best_num_layers, output_size=1)

# define optimizer and loss function for the final model
optimizer = optim.Adam(best_model.parameters(), lr=best_learning_rate)
criterion = rmse_loss

# create dataloaders for the final training and validation datasets with the best batch size
train_loader = DataLoader(train_dataset, batch_size=best_batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=best_batch_size, shuffle=False)

# move the model to gpu if avaialble for faster training
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
best_model.to(device)

# final training loop with the best hyperparameters
num_epochs = 20 
for epoch in range(num_epochs):
    best_model.train()  # set model to training mode
    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()  # reset gradients
        batch_X = batch_X.to(device)  # move data to the correct device (CPU/GPU)
        batch_y = batch_y.to(device)  # move target labels to the correct device
        outputs = best_model(batch_X)  # model prediction
        loss = criterion(outputs.squeeze(), batch_y)  # compute loss
        loss.backward()  # backpropagation
        optimizer.step()  # optimizer step (update weights)

    # validation loop
    best_model.eval()  # set model to evaluation mode
    with torch.no_grad():  # don't compute gradients during evaluation
        val_losses = []
        for batch_X, batch_y in val_loader:
            batch_X = batch_X.to(device)
            batch_y = batch_y.to(device)
            outputs = best_model(batch_X)  # model prediction
            val_loss = criterion(outputs.squeeze(), batch_y)  # compute loss
            val_losses.append(val_loss.item())  # store loss
        avg_val_loss = np.mean(val_losses)  # compute average validation loss

    # print validation loss after each epoch
    print(f'Epoch [{epoch+1}/{num_epochs}], Validation Loss: {avg_val_loss:.4f}')

# save the best model to a file for later use
torch.save(best_model.state_dict(), 'best_rnn_model_optuna.pth')
print("Best RNN model saved to 'best_rnn_model_optuna.pth'")

# evaluate the final model on test data (optional)
best_model.eval()  # set model to evaluation mode
test_tensor = torch.FloatTensor(processed_test_data).to(device)  # convert test data to tensor
with torch.no_grad():  # no need to compute gradients
    test_predictions = best_model(test_tensor).numpy().squeeze()  # model predictions

# calculate the RMSE on the test data to evaluate model performance
test_rmse = np.sqrt(np.mean((test_predictions - true_rul)**2))
print(f'Test RMSE: {test_rmse:.4f}')


[I 2024-11-21 07:14:31,254] A new study created in memory with name: no-name-bfeaf0e5-a313-4091-b218-438f36625d08
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)  # Learning rate
[I 2024-11-21 07:15:39,638] Trial 0 finished with value: 18.26929014223116 and parameters: {'hidden_size': 128, 'num_layers': 1, 'learning_rate': 0.0034954850746481783, 'batch_size': 32}. Best is trial 0 with value: 18.26929014223116.
[I 2024-11-21 07:19:11,973] Trial 1 finished with value: 81.99670818873814 and parameters: {'hidden_size': 128, 'num_layers': 3, 'learning_rate': 1.2913086955327018e-05, 'batch_size': 128}. Best is trial 0 with value: 18.26929014223116.
[I 2024-11-21 07:20:07,505] Trial 2 finished with value: 81.99715151105609 and parameters: {'hidden_size': 32, 'num_layers': 3, 'learning_rate': 8.4153976329118e-05, 'batch_size': 128}. Best is trial 0 with value: 18.26929014223116.
[I 2024-11-21 07:29:03,564] Trial 3 finished with value: 50.597114426749094 and parameters: 

Best Hyperparameters: {'hidden_size': 32, 'num_layers': 2, 'learning_rate': 0.0011843203255513526, 'batch_size': 32}
Best Validation RMSE: 12.834748358339876
Epoch [1/20], Validation Loss: 72.3708
Epoch [2/20], Validation Loss: 65.1407
Epoch [3/20], Validation Loss: 56.8781
Epoch [4/20], Validation Loss: 48.4448
Epoch [5/20], Validation Loss: 42.7630
Epoch [6/20], Validation Loss: 41.8939
Epoch [7/20], Validation Loss: 41.7637
Epoch [8/20], Validation Loss: 41.7608
Epoch [9/20], Validation Loss: 40.0594
Epoch [10/20], Validation Loss: 20.2189
Epoch [11/20], Validation Loss: 18.4146
Epoch [12/20], Validation Loss: 18.4378
Epoch [13/20], Validation Loss: 18.0692
Epoch [14/20], Validation Loss: 18.1642
Epoch [15/20], Validation Loss: 17.8939
Epoch [16/20], Validation Loss: 17.6741
Epoch [17/20], Validation Loss: 17.7719
Epoch [18/20], Validation Loss: 17.1611
Epoch [19/20], Validation Loss: 16.8547
Epoch [20/20], Validation Loss: 16.9993
Best RNN model saved to 'best_rnn_model_optuna.pth'

In [22]:
# Extract the best hyperparameters from the best trial
best_trial = study.best_trial
best_hidden_size = best_trial.params['hidden_size']
best_num_layers = best_trial.params['num_layers']
best_learning_rate = best_trial.params['learning_rate']
best_batch_size = best_trial.params['batch_size']

# Instantiate the model with the best hyperparameters
best_model = RNNModel(input_size=X.shape[2], hidden_size=best_hidden_size, num_layers=best_num_layers, output_size=1)

# Define optimizer and loss function
optimizer = optim.Adam(best_model.parameters(), lr=best_learning_rate)
criterion = rmse_loss

# Create DataLoaders with the best batch size
train_loader = DataLoader(train_dataset, batch_size=best_batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=best_batch_size, shuffle=False)

# Final model training with the best hyperparameters
num_epochs = 30  
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
best_model.to(device)

for epoch in range(num_epochs):
    best_model.train()
    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        batch_X = batch_X.to(device)
        batch_y = batch_y.to(device)
        outputs = best_model(batch_X)
        loss = criterion(outputs.squeeze(), batch_y)
        loss.backward()
        optimizer.step()

    # Validation loop
    best_model.eval()
    with torch.no_grad():
        val_losses = []
        for batch_X, batch_y in val_loader:
            batch_X = batch_X.to(device)
            batch_y = batch_y.to(device)
            outputs = best_model(batch_X)
            val_loss = criterion(outputs.squeeze(), batch_y)
            val_losses.append(val_loss.item())
        avg_val_loss = np.mean(val_losses)

    print(f'Epoch [{epoch+1}/{num_epochs}], Validation Loss: {avg_val_loss:.4f}')

# Save the best model to a file
torch.save(best_model.state_dict(), 'best_rnn_model_optuna.pth')
print("Best RNN model saved to 'best_rnn_model_optuna.pth'")

# Evaluate on test data (optional)
best_model.eval()
test_tensor = torch.FloatTensor(processed_test_data).to(device)
with torch.no_grad():
    test_predictions = best_model(test_tensor).numpy().squeeze()

# Calculate RMSE on test data
test_rmse = np.sqrt(np.mean((test_predictions - true_rul)**2))
print(f'Test RMSE: {test_rmse:.4f}')

Epoch [1/30], Validation Loss: 72.8571
Epoch [2/30], Validation Loss: 60.1122
Epoch [3/30], Validation Loss: 50.5848
Epoch [4/30], Validation Loss: 44.9487
Epoch [5/30], Validation Loss: 42.5667
Epoch [6/30], Validation Loss: 41.8887
Epoch [7/30], Validation Loss: 41.7864
Epoch [8/30], Validation Loss: 22.7499
Epoch [9/30], Validation Loss: 21.5080
Epoch [10/30], Validation Loss: 18.3313
Epoch [11/30], Validation Loss: 17.5140
Epoch [12/30], Validation Loss: 17.0022
Epoch [13/30], Validation Loss: 16.6556
Epoch [14/30], Validation Loss: 16.9519
Epoch [15/30], Validation Loss: 17.5586
Epoch [16/30], Validation Loss: 16.4147
Epoch [17/30], Validation Loss: 16.8203
Epoch [18/30], Validation Loss: 16.9057
Epoch [19/30], Validation Loss: 16.8761
Epoch [20/30], Validation Loss: 16.3438
Epoch [21/30], Validation Loss: 16.4679
Epoch [22/30], Validation Loss: 16.4167
Epoch [23/30], Validation Loss: 16.4471
Epoch [24/30], Validation Loss: 19.6358
Epoch [25/30], Validation Loss: 16.3047
Epoch [26

## Evaluate Model

In [6]:
# test data (processed test data and true RUL values)
X_test = processed_test_data  # processed test data
y_test = true_rul  # true remaining useful life values

# convert the test data and labels to pytorch tensors
X_test_tensor = torch.FloatTensor(X_test)  # convert test data to float tensor
y_test_tensor = torch.FloatTensor(y_test)  # convert true RUL values to float tensor

# create a DataLoader for the test data (used for batch processing)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)  # combine features and labels into a dataset
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)  # DataLoader for test data (no shuffling)

# define the best hyperparameters found from optuna
best_hidden_size = 32  # best hidden layer size 
best_num_layers = 2  # best number of RNN layers
best_learning_rate = 0.0011843203255513526  # best learning rate 
best_batch_size = 32  # best batch size

# instantiate the model with the best hyperparameters
best_model = RNNModel(input_size=X_test.shape[2], hidden_size=best_hidden_size, 
                      num_layers=best_num_layers, output_size=1)

# load the best model weights saved after training with optuna
best_model.load_state_dict(torch.load('best_rnn_model_optuna.pth', map_location='cpu'))  # use 'cuda' for GPU

# define a function to get predictions from the model
def get_predictions(model, test_loader):
    """
    This function performs inference on the test set and returns the model's predictions.
    
    Args:
        model: the trained RNN model
        test_loader: DataLoader containing the test data

    Returns:
        all_predictions: a numpy array of predictions for the test set
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # choose device (GPU if available)
    model.to(device)  # move the model to the selected device (GPU/CPU)
    model.eval()  # set the model to evaluation mode (disables dropout layers, etc.)
    
    all_predictions = []  # this will hold the predictions from all batches
    
    with torch.no_grad():  # turn off gradient computation since we are just doing inference
        for batch_X, _ in test_loader:  # loop through batches in the test_loader
            batch_X = batch_X.to(device)  # move input batch to the same device as the model
            predictions = model(batch_X)  # forward pass to get predictions
            all_predictions.append(predictions.cpu().numpy())  # store predictions (move them to CPU and convert to numpy)

    # concatenate all predictions into a single array (flatten list of arrays)
    all_predictions = np.concatenate(all_predictions, axis=0)
    
    return all_predictions  # return the final array of predictions

# get the model's predictions on the test set
predictions = get_predictions(best_model, test_loader)

# print or inspect the predictions
print(predictions[:10])  # print the first 10 predictions


[[114.43959 ]
 [120.48697 ]
 [ 61.45211 ]
 [ 88.70154 ]
 [103.07733 ]
 [115.62491 ]
 [103.4091  ]
 [ 98.803474]
 [115.1178  ]
 [ 93.94371 ]]


  best_model.load_state_dict(torch.load('best_rnn_model_optuna.pth', map_location='cpu'))  # use 'cuda' for GPU


In [7]:
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error

def evaluate_rul_metrics(true, predicted):
    
    true = np.array(true)
    predicted = np.array(predicted)
    
    mae = float(mean_absolute_error(true, predicted))
    mse = float(mean_squared_error(true, predicted))
    rmse = float(np.sqrt(mse))
    mape = float(np.mean(np.abs((true - predicted) / true)) * 100)
    
    return {
        "MAE": mae,
        "MSE": mse,
        "RMSE": rmse,
        "MAPE": mape
    }
    
metrics = evaluate_rul_metrics(true_rul, predictions)
metrics

{'MAE': 13.405092282295227,
 'MSE': 326.84914252787433,
 'RMSE': 18.07896962019336,
 'MAPE': 147.84653424608157}