# Solar Internship Data Science

## Team: SIIL-TEAM-0002
## Problem Statement: DS-003
## Title: Predictive Maintenance for Manufacturing Equipment
### **Members:**
1. Shashank Jangde
2. Vidit Singh
3. Sushant Pandey
4. Yash Mishra

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
# importing necessary librabries
import numpy as np
import pandas as pd
import copy
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import r2_score
from keras import models, layers
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
import matplotlib.pyplot as plt
import seaborn as sns
import pickle

2024-11-12 21:39:41.774807: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-11-12 21:39:41.783120: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1731427781.792733   10227 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1731427781.795521   10227 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-12 21:39:41.805388: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

### Defining some reused functions

In [5]:
def process_data():
    """
    this function will return 3 list of pandas dataframe which would contain training, testing and RUL dataset names
    """
    dir = "./CMAPSSData/"
    train_data = ['train_FD001.txt', 'train_FD002.txt', 'train_FD003.txt', 'train_FD004.txt']
    test_data = ['test_FD001.txt', 'test_FD002.txt', 'test_FD003.txt', 'test_FD004.txt']
    rul_data = ['RUL_FD001.txt', 'RUL_FD002.txt', 'RUL_FD003.txt', 'RUL_FD004.txt']
    columns = ["Unit Number", "Cycles(Time)"] + [f"operation_setting{i}" for i in range(1, 4)] + [f"sensor{i}" for i in range(1, 22)]
    
    trainDatasets = []
    testDatasets = []
    expectedRulDatasets = []


    for i in range(4):
        # Import Files
        tempTrain = pd.read_csv( dir + train_data[i], sep = " ", header = None)
        tempTest = pd.read_csv( dir + test_data[i], sep = " ", header = None)
        tempRul = pd.read_csv( dir + rul_data[i], sep = " ", header = None)

        # Cleaning Files, we remove 26, 27 and 1 as they have null values
        tempTrain.drop(inplace = True, columns = [26, 27])
        tempTest.drop(inplace = True, columns = [26, 27])
        tempRul.drop(inplace = True, columns = [1])

        # Adding Columns Names
        tempTrain.columns = tempTest.columns = columns
        tempRul.columns = ["Expected RUL"]

        # Appending to Lists
        trainDatasets.append(tempTrain)
        testDatasets.append(tempTest)
        expectedRulDatasets.append(tempRul)
    
    return trainDatasets, testDatasets, expectedRulDatasets

In [6]:
def checkBasicStructure(trainDatasets, testDatasets, expectedRulDatasets):
    print("\nDatasets Example\n")
    for i in range(4):
        print("For FD00" + str(i+1) )
        print("\nTrain Dataset")
        display(trainDatasets[i].head())
    
        print("\nTest Dataset")
        display(testDatasets[i].head())
    
        print("\nExpected RUL")
        display(expectedRulDatasets[i].head())
        print("\n")

In [7]:
def checkForNA(trainDatasets, testDatasets, expectedRulDatasets):
    print("\nDatasets Example\n")
    for i in range(4):
        print("For FD00" + str(i+1))
        print("\nTrain Dataset")
        display(trainDatasets[i].isna().sum())
    
        print("\nTest Dataset")
        display(testDatasets[i].isna().sum())
    
        print("\nExpected RUL")
        display(expectedRulDatasets[i].isna().sum())
        print("\n")

In [8]:
def findRul(Datasets):

    rulDatasets = []
    
    for i in range(4):
        # Finding Maximum RUL
        temp = copy.deepcopy(Datasets[i].iloc[:, 0:2])
        tempRul = temp.groupby(Datasets[i].columns[0])[Datasets[i].columns[1]].max()

        # Create Max RUL Dataframe
        tempRul = pd.DataFrame(tempRul)
        tempRul.columns = ["Max"]
        tempRul = pd.merge(temp, tempRul, left_on = Datasets[i].columns[0], right_on = Datasets[i].columns[0])
        tempRul["RUL"] = tempRul["Max"] - tempRul[Datasets[i].columns[1]];
        tempRul.drop(inplace = True, columns = ["Max", Datasets[i].columns[1]])
        
        # Appending to List
        rulDatasets.append(tempRul)

    return rulDatasets

In [9]:
def checkRulBasicStructure(rulDatasets):
    print("\nRUL Datasets Example\n")
    for i in range(4):
        print("For FD00" + str(i+1) )
        print("\nExpected RUL")
        display(rulDatasets[i].head())
        print("\n")

In [10]:
def processTrainingData(dataset, rulDataset = None, window = 1, shift = 1):
    partitions = int(np.floor((len(dataset) - window) / shift)) + 1
    features = dataset.shape[1]

    processedData = np.repeat(np.nan, repeats = partitions * window * features).reshape(partitions, window, features)

    if rulDataset is None:
        for i in range(partitions):
            processedData[i, :, :] = dataset[(0 + shift * i): (0 + shift * i + window), :]
            
        return processedData

    processedRul = np.repeat(np.nan, repeats = partitions)
    for i in range(partitions):
        processedData[i, :, :] = dataset[(0 + shift * i): (0 + shift * i + window), :]
        processedRul[i] = rulDataset[(shift * i + (window - 1))]

    return processedData, processedRul

In [11]:
def processTestingData(dataset, window, shift, testWindow = 1):
    maxPartitions = int(np.floor((len(dataset) - window) / shift)) + 1

    if maxPartitions < testWindow:
        newLen = (maxPartitions - 1) * shift + window

        processedData = processTrainingData(dataset[-newLen:, :] , window = window, shift = shift)

        return processedData, maxPartitions

    newLen = (testWindow - 1) * shift + window

    processedData = processTrainingData(dataset[-newLen:, :] , window = window, shift = shift)

    return processedData, testWindow

In [12]:
def createModel(shape, name = "Combined"):
    model = Sequential(name = name)
    model.add(LSTM(128, return_sequences = True, input_shape = (shape[1], shape[2])))
    model.add(LSTM(64 ,return_sequences = True))
    model.add(LSTM(32))
    model.add(Dense(1))
    model.compile(loss = 'mean_squared_error', optimizer = tf.keras.optimizers.Adam(learning_rate = 0.001))

    return model

## Loading and Checking Dataset

In [14]:
trainDatasets, testDatasets, expectedRulDatasets = process_data()

### Check Basic Structure

In [16]:
checkBasicStructure(trainDatasets, testDatasets, expectedRulDatasets)


Datasets Example

For FD001

Train Dataset


Unnamed: 0,Unit Number,Cycles(Time),operation_setting1,operation_setting2,operation_setting3,sensor1,sensor2,sensor3,sensor4,sensor5,...,sensor12,sensor13,sensor14,sensor15,sensor16,sensor17,sensor18,sensor19,sensor20,sensor21
0,1,1,-0.0007,-0.0004,100.0,518.67,641.82,1589.7,1400.6,14.62,...,521.66,2388.02,8138.62,8.4195,0.03,392,2388,100.0,39.06,23.419
1,1,2,0.0019,-0.0003,100.0,518.67,642.15,1591.82,1403.14,14.62,...,522.28,2388.07,8131.49,8.4318,0.03,392,2388,100.0,39.0,23.4236
2,1,3,-0.0043,0.0003,100.0,518.67,642.35,1587.99,1404.2,14.62,...,522.42,2388.03,8133.23,8.4178,0.03,390,2388,100.0,38.95,23.3442
3,1,4,0.0007,0.0,100.0,518.67,642.35,1582.79,1401.87,14.62,...,522.86,2388.08,8133.83,8.3682,0.03,392,2388,100.0,38.88,23.3739
4,1,5,-0.0019,-0.0002,100.0,518.67,642.37,1582.85,1406.22,14.62,...,522.19,2388.04,8133.8,8.4294,0.03,393,2388,100.0,38.9,23.4044



Test Dataset


Unnamed: 0,Unit Number,Cycles(Time),operation_setting1,operation_setting2,operation_setting3,sensor1,sensor2,sensor3,sensor4,sensor5,...,sensor12,sensor13,sensor14,sensor15,sensor16,sensor17,sensor18,sensor19,sensor20,sensor21
0,1,1,0.0023,0.0003,100.0,518.67,643.02,1585.29,1398.21,14.62,...,521.72,2388.03,8125.55,8.4052,0.03,392,2388,100.0,38.86,23.3735
1,1,2,-0.0027,-0.0003,100.0,518.67,641.71,1588.45,1395.42,14.62,...,522.16,2388.06,8139.62,8.3803,0.03,393,2388,100.0,39.02,23.3916
2,1,3,0.0003,0.0001,100.0,518.67,642.46,1586.94,1401.34,14.62,...,521.97,2388.03,8130.1,8.4441,0.03,393,2388,100.0,39.08,23.4166
3,1,4,0.0042,0.0,100.0,518.67,642.44,1584.12,1406.42,14.62,...,521.38,2388.05,8132.9,8.3917,0.03,391,2388,100.0,39.0,23.3737
4,1,5,0.0014,0.0,100.0,518.67,642.51,1587.19,1401.92,14.62,...,522.15,2388.03,8129.54,8.4031,0.03,390,2388,100.0,38.99,23.413



Expected RUL


Unnamed: 0,Expected RUL
0,112
1,98
2,69
3,82
4,91




For FD002

Train Dataset


Unnamed: 0,Unit Number,Cycles(Time),operation_setting1,operation_setting2,operation_setting3,sensor1,sensor2,sensor3,sensor4,sensor5,...,sensor12,sensor13,sensor14,sensor15,sensor16,sensor17,sensor18,sensor19,sensor20,sensor21
0,1,1,34.9983,0.84,100.0,449.44,555.32,1358.61,1137.23,5.48,...,183.06,2387.72,8048.56,9.3461,0.02,334,2223,100.0,14.73,8.8071
1,1,2,41.9982,0.8408,100.0,445.0,549.9,1353.22,1125.78,3.91,...,130.42,2387.66,8072.3,9.3774,0.02,330,2212,100.0,10.41,6.2665
2,1,3,24.9988,0.6218,60.0,462.54,537.31,1256.76,1047.45,7.05,...,164.22,2028.03,7864.87,10.8941,0.02,309,1915,84.93,14.08,8.6723
3,1,4,42.0077,0.8416,100.0,445.0,549.51,1354.03,1126.38,3.91,...,130.72,2387.61,8068.66,9.3528,0.02,329,2212,100.0,10.59,6.4701
4,1,5,25.0005,0.6203,60.0,462.54,537.07,1257.71,1047.93,7.05,...,164.31,2028.0,7861.23,10.8963,0.02,309,1915,84.93,14.13,8.5286



Test Dataset


Unnamed: 0,Unit Number,Cycles(Time),operation_setting1,operation_setting2,operation_setting3,sensor1,sensor2,sensor3,sensor4,sensor5,...,sensor12,sensor13,sensor14,sensor15,sensor16,sensor17,sensor18,sensor19,sensor20,sensor21
0,1,1,9.9987,0.2502,100.0,489.05,605.03,1497.17,1304.99,10.52,...,371.69,2388.18,8114.1,8.6476,0.03,369,2319,100.0,28.42,17.1551
1,1,2,20.0026,0.7,100.0,491.19,607.82,1481.2,1246.11,9.35,...,315.32,2388.12,8053.06,9.2405,0.02,364,2324,100.0,24.29,14.8039
2,1,3,35.0045,0.84,100.0,449.44,556.0,1359.08,1128.36,5.48,...,183.04,2387.75,8053.04,9.3472,0.02,333,2223,100.0,14.98,8.9125
3,1,4,42.0066,0.841,100.0,445.0,550.17,1349.69,1127.89,3.91,...,130.4,2387.72,8066.9,9.3961,0.02,332,2212,100.0,10.35,6.4181
4,1,5,24.9985,0.6213,60.0,462.54,536.72,1253.18,1050.69,7.05,...,164.56,2028.05,7865.66,10.8682,0.02,305,1915,84.93,14.31,8.574



Expected RUL


Unnamed: 0,Expected RUL
0,18
1,79
2,106
3,110
4,15




For FD003

Train Dataset


Unnamed: 0,Unit Number,Cycles(Time),operation_setting1,operation_setting2,operation_setting3,sensor1,sensor2,sensor3,sensor4,sensor5,...,sensor12,sensor13,sensor14,sensor15,sensor16,sensor17,sensor18,sensor19,sensor20,sensor21
0,1,1,-0.0005,0.0004,100.0,518.67,642.36,1583.23,1396.84,14.62,...,522.31,2388.01,8145.32,8.4246,0.03,391,2388,100.0,39.11,23.3537
1,1,2,0.0008,-0.0003,100.0,518.67,642.5,1584.69,1396.89,14.62,...,522.42,2388.03,8152.85,8.4403,0.03,392,2388,100.0,38.99,23.4491
2,1,3,-0.0014,-0.0002,100.0,518.67,642.18,1582.35,1405.61,14.62,...,522.03,2388.0,8150.17,8.3901,0.03,391,2388,100.0,38.85,23.3669
3,1,4,-0.002,0.0001,100.0,518.67,642.92,1585.61,1392.27,14.62,...,522.49,2388.08,8146.56,8.3878,0.03,392,2388,100.0,38.96,23.2951
4,1,5,0.0016,0.0,100.0,518.67,641.68,1588.63,1397.65,14.62,...,522.58,2388.03,8147.8,8.3869,0.03,392,2388,100.0,39.14,23.4583



Test Dataset


Unnamed: 0,Unit Number,Cycles(Time),operation_setting1,operation_setting2,operation_setting3,sensor1,sensor2,sensor3,sensor4,sensor5,...,sensor12,sensor13,sensor14,sensor15,sensor16,sensor17,sensor18,sensor19,sensor20,sensor21
0,1,1,-0.0017,-0.0004,100.0,518.67,641.94,1581.93,1396.93,14.62,...,521.89,2387.94,8133.48,8.376,0.03,391,2388,100.0,39.07,23.4468
1,1,2,0.0006,-0.0002,100.0,518.67,642.02,1584.86,1398.9,14.62,...,521.85,2388.01,8137.44,8.4062,0.03,391,2388,100.0,39.04,23.4807
2,1,3,0.0014,-0.0003,100.0,518.67,641.68,1581.78,1391.92,14.62,...,522.1,2387.94,8138.25,8.3553,0.03,391,2388,100.0,39.1,23.4244
3,1,4,0.0027,0.0001,100.0,518.67,642.2,1584.53,1395.34,14.62,...,522.45,2387.96,8137.07,8.3709,0.03,392,2388,100.0,38.97,23.4782
4,1,5,-0.0001,0.0001,100.0,518.67,642.46,1589.03,1395.86,14.62,...,521.91,2387.97,8134.2,8.4146,0.03,391,2388,100.0,39.09,23.395



Expected RUL


Unnamed: 0,Expected RUL
0,44
1,51
2,27
3,120
4,101




For FD004

Train Dataset


Unnamed: 0,Unit Number,Cycles(Time),operation_setting1,operation_setting2,operation_setting3,sensor1,sensor2,sensor3,sensor4,sensor5,...,sensor12,sensor13,sensor14,sensor15,sensor16,sensor17,sensor18,sensor19,sensor20,sensor21
0,1,1,42.0049,0.84,100.0,445.0,549.68,1343.43,1112.93,3.91,...,129.78,2387.99,8074.83,9.3335,0.02,330,2212,100.0,10.62,6.367
1,1,2,20.002,0.7002,100.0,491.19,606.07,1477.61,1237.5,9.35,...,312.59,2387.73,8046.13,9.1913,0.02,361,2324,100.0,24.37,14.6552
2,1,3,42.0038,0.8409,100.0,445.0,548.95,1343.12,1117.05,3.91,...,129.62,2387.97,8066.62,9.4007,0.02,329,2212,100.0,10.48,6.4213
3,1,4,42.0,0.84,100.0,445.0,548.7,1341.24,1118.03,3.91,...,129.8,2388.02,8076.05,9.3369,0.02,328,2212,100.0,10.54,6.4176
4,1,5,25.0063,0.6207,60.0,462.54,536.1,1255.23,1033.59,7.05,...,164.11,2028.08,7865.8,10.8366,0.02,305,1915,84.93,14.03,8.6754



Test Dataset


Unnamed: 0,Unit Number,Cycles(Time),operation_setting1,operation_setting2,operation_setting3,sensor1,sensor2,sensor3,sensor4,sensor5,...,sensor12,sensor13,sensor14,sensor15,sensor16,sensor17,sensor18,sensor19,sensor20,sensor21
0,1,1,20.0072,0.7,100.0,491.19,606.67,1481.04,1227.81,9.35,...,313.03,2387.78,8048.98,9.2229,0.02,362,2324,100.0,24.31,14.7007
1,1,2,24.9984,0.62,60.0,462.54,536.22,1256.17,1031.48,7.05,...,163.61,2028.09,7863.46,10.8632,0.02,306,1915,84.93,14.36,8.5748
2,1,3,42.0,0.842,100.0,445.0,549.23,1340.13,1105.88,3.91,...,129.98,2387.95,8071.13,9.396,0.02,328,2212,100.0,10.39,6.4365
3,1,4,42.0035,0.8402,100.0,445.0,549.19,1339.7,1107.26,3.91,...,129.48,2387.9,8078.89,9.3594,0.02,328,2212,100.0,10.56,6.2367
4,1,5,35.0079,0.84,100.0,449.44,555.1,1353.04,1117.8,5.48,...,181.82,2387.87,8057.83,9.303,0.02,333,2223,100.0,14.85,8.9326



Expected RUL


Unnamed: 0,Expected RUL
0,22
1,39
2,107
3,75
4,149






### Check For NAs

In [18]:
checkForNA(trainDatasets, testDatasets, expectedRulDatasets)


Datasets Example

For FD001

Train Dataset


Unit Number           0
Cycles(Time)          0
operation_setting1    0
operation_setting2    0
operation_setting3    0
sensor1               0
sensor2               0
sensor3               0
sensor4               0
sensor5               0
sensor6               0
sensor7               0
sensor8               0
sensor9               0
sensor10              0
sensor11              0
sensor12              0
sensor13              0
sensor14              0
sensor15              0
sensor16              0
sensor17              0
sensor18              0
sensor19              0
sensor20              0
sensor21              0
dtype: int64


Test Dataset


Unit Number           0
Cycles(Time)          0
operation_setting1    0
operation_setting2    0
operation_setting3    0
sensor1               0
sensor2               0
sensor3               0
sensor4               0
sensor5               0
sensor6               0
sensor7               0
sensor8               0
sensor9               0
sensor10              0
sensor11              0
sensor12              0
sensor13              0
sensor14              0
sensor15              0
sensor16              0
sensor17              0
sensor18              0
sensor19              0
sensor20              0
sensor21              0
dtype: int64


Expected RUL


Expected RUL    0
dtype: int64



For FD002

Train Dataset


Unit Number           0
Cycles(Time)          0
operation_setting1    0
operation_setting2    0
operation_setting3    0
sensor1               0
sensor2               0
sensor3               0
sensor4               0
sensor5               0
sensor6               0
sensor7               0
sensor8               0
sensor9               0
sensor10              0
sensor11              0
sensor12              0
sensor13              0
sensor14              0
sensor15              0
sensor16              0
sensor17              0
sensor18              0
sensor19              0
sensor20              0
sensor21              0
dtype: int64


Test Dataset


Unit Number           0
Cycles(Time)          0
operation_setting1    0
operation_setting2    0
operation_setting3    0
sensor1               0
sensor2               0
sensor3               0
sensor4               0
sensor5               0
sensor6               0
sensor7               0
sensor8               0
sensor9               0
sensor10              0
sensor11              0
sensor12              0
sensor13              0
sensor14              0
sensor15              0
sensor16              0
sensor17              0
sensor18              0
sensor19              0
sensor20              0
sensor21              0
dtype: int64


Expected RUL


Expected RUL    0
dtype: int64



For FD003

Train Dataset


Unit Number           0
Cycles(Time)          0
operation_setting1    0
operation_setting2    0
operation_setting3    0
sensor1               0
sensor2               0
sensor3               0
sensor4               0
sensor5               0
sensor6               0
sensor7               0
sensor8               0
sensor9               0
sensor10              0
sensor11              0
sensor12              0
sensor13              0
sensor14              0
sensor15              0
sensor16              0
sensor17              0
sensor18              0
sensor19              0
sensor20              0
sensor21              0
dtype: int64


Test Dataset


Unit Number           0
Cycles(Time)          0
operation_setting1    0
operation_setting2    0
operation_setting3    0
sensor1               0
sensor2               0
sensor3               0
sensor4               0
sensor5               0
sensor6               0
sensor7               0
sensor8               0
sensor9               0
sensor10              0
sensor11              0
sensor12              0
sensor13              0
sensor14              0
sensor15              0
sensor16              0
sensor17              0
sensor18              0
sensor19              0
sensor20              0
sensor21              0
dtype: int64


Expected RUL


Expected RUL    0
dtype: int64



For FD004

Train Dataset


Unit Number           0
Cycles(Time)          0
operation_setting1    0
operation_setting2    0
operation_setting3    0
sensor1               0
sensor2               0
sensor3               0
sensor4               0
sensor5               0
sensor6               0
sensor7               0
sensor8               0
sensor9               0
sensor10              0
sensor11              0
sensor12              0
sensor13              0
sensor14              0
sensor15              0
sensor16              0
sensor17              0
sensor18              0
sensor19              0
sensor20              0
sensor21              0
dtype: int64


Test Dataset


Unit Number           0
Cycles(Time)          0
operation_setting1    0
operation_setting2    0
operation_setting3    0
sensor1               0
sensor2               0
sensor3               0
sensor4               0
sensor5               0
sensor6               0
sensor7               0
sensor8               0
sensor9               0
sensor10              0
sensor11              0
sensor12              0
sensor13              0
sensor14              0
sensor15              0
sensor16              0
sensor17              0
sensor18              0
sensor19              0
sensor20              0
sensor21              0
dtype: int64


Expected RUL


Expected RUL    0
dtype: int64





## Preprocessing Datasets

In [20]:
trainDatasetsCopy = copy.deepcopy(trainDatasets)
testDatasetsCopy = copy.deepcopy(testDatasets)
expectedRulDatasetsCopy = copy.deepcopy(expectedRulDatasets)

### Converting to Standardize Distribution

In [22]:
scaler = []

for i in range(4):
    sc = StandardScaler()
    scaler.append(sc)

In [23]:
for i in range(4):
    trainDatasetsCopy[i].iloc[:, 2:] = scaler[i].fit_transform(trainDatasetsCopy[i].iloc[:, 2:])
    testDatasetsCopy[i].iloc[:, 2: ] = scaler[i].transform(testDatasetsCopy[i].iloc[:, 2:])

for i in range(4):
    pickle.dump(scaler[i], open("pcascaler/" + f"scaler{i+1}.pkl", 'wb'))

  1.8010591 ]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  trainDatasetsCopy[i].iloc[:, 2:] = scaler[i].fit_transform(trainDatasetsCopy[i].iloc[:, 2:])
  1.8010591 ]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  testDatasetsCopy[i].iloc[:, 2: ] = scaler[i].transform(testDatasetsCopy[i].iloc[:, 2:])
 -0.37145692]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  trainDatasetsCopy[i].iloc[:, 2:] = scaler[i].fit_transform(trainDatasetsCopy[i].iloc[:, 2:])
 -0.03995385]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  trainDatasetsCopy[i].iloc[:, 2:] = scaler[i].fit_transform(trainDatasetsCopy[i].iloc[:, 2:])
 -0.62367048]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  testDatasetsCopy[i].iloc[:, 2: ] = scaler[i].transform(testDatasetsCopy[i].iloc[:, 2:])
 -0.11564541]' has dtype 

In [24]:
checkBasicStructure(trainDatasetsCopy, testDatasetsCopy, expectedRulDatasetsCopy)


Datasets Example

For FD001

Train Dataset


Unnamed: 0,Unit Number,Cycles(Time),operation_setting1,operation_setting2,operation_setting3,sensor1,sensor2,sensor3,sensor4,sensor5,...,sensor12,sensor13,sensor14,sensor15,sensor16,sensor17,sensor18,sensor19,sensor20,sensor21
0,1,1,-0.31598,-1.372953,0.0,0.0,-1.721725,-0.134255,-0.925936,-1.776357e-15,...,0.334262,-1.05889,-0.269071,-0.603816,-1.387779e-17,-0.78171,0,0.0,1.348493,1.194427
1,1,2,0.872722,-1.03172,0.0,0.0,-1.06178,0.211528,-0.643726,-1.776357e-15,...,1.174899,-0.363646,-0.642845,-0.275852,-1.387779e-17,-0.78171,0,0.0,1.016528,1.236922
2,1,3,-1.961874,1.015677,0.0,0.0,-0.661813,-0.413166,-0.525953,-1.776357e-15,...,1.364721,-0.919841,-0.551629,-0.649144,-1.387779e-17,-2.073094,0,0.0,0.739891,0.503423
3,1,4,0.32409,-0.008022,0.0,0.0,-0.661813,-1.261314,-0.784831,-1.776357e-15,...,1.961302,-0.224597,-0.520176,-1.971665,-1.387779e-17,-0.78171,0,0.0,0.352598,0.777792
4,1,5,-0.864611,-0.690488,0.0,0.0,-0.621816,-1.251528,-0.301518,-1.776357e-15,...,1.052871,-0.780793,-0.521748,-0.339845,-1.387779e-17,-0.136018,0,0.0,0.463253,1.059552



Test Dataset


Unnamed: 0,Unit Number,Cycles(Time),operation_setting1,operation_setting2,operation_setting3,sensor1,sensor2,sensor3,sensor4,sensor5,...,sensor12,sensor13,sensor14,sensor15,sensor16,sensor17,sensor18,sensor19,sensor20,sensor21
0,1,1,1.055599,1.015677,0.0,0.0,0.678077,-0.85355,-1.19148,-1.776357e-15,...,0.415614,-0.919841,-0.954235,-0.985107,-1.387779e-17,-0.78171,0,0.0,0.241943,0.774097
1,1,2,-1.230366,-1.03172,0.0,0.0,-1.941707,-0.338137,-1.501467,-1.776357e-15,...,1.012195,-0.502695,-0.216648,-1.649034,-1.387779e-17,-0.136018,0,0.0,1.127183,0.941305
2,1,3,0.141213,0.333211,0.0,0.0,-0.441831,-0.584426,-0.843717,-1.776357e-15,...,0.754581,-0.919841,-0.715712,0.052112,-1.387779e-17,-0.136018,0,0.0,1.459148,1.172256
3,1,4,1.924266,-0.008022,0.0,0.0,-0.481827,-1.044384,-0.279297,-1.776357e-15,...,-0.045381,-0.641744,-0.568929,-1.345067,-1.387779e-17,-1.427402,0,0.0,1.016528,0.775945
4,1,5,0.644125,-0.008022,0.0,0.0,-0.341839,-0.54365,-0.779276,-1.776357e-15,...,0.998637,-0.919841,-0.745069,-1.041101,-1.387779e-17,-2.073094,0,0.0,0.9612,1.138999



Expected RUL


Unnamed: 0,Expected RUL
0,112
1,98
2,69
3,82
4,91




For FD002

Train Dataset


Unnamed: 0,Unit Number,Cycles(Time),operation_setting1,operation_setting2,operation_setting3,sensor1,sensor2,sensor3,sensor4,sensor5,...,sensor12,sensor13,sensor14,sensor15,sensor16,sensor17,sensor18,sensor19,sensor20,sensor21
0,1,1,0.745895,0.864298,0.418187,-0.889378,-0.653071,-0.579176,-0.572622,-0.706177,...,-0.603008,0.415116,-0.212615,0.021948,-0.705933,-0.515579,-0.039954,0.418187,-0.613958,-0.619148
1,1,2,1.220553,0.866878,0.418187,-1.057627,-0.798422,-0.630051,-0.668741,-1.140622,...,-0.985404,0.414648,0.067215,0.063719,-0.705933,-0.659701,-0.115645,0.418187,-1.051681,-1.04819
2,1,3,0.067836,0.160457,-2.391275,-0.392968,-1.136055,-1.540521,-1.326301,-0.271732,...,-0.739869,-2.39349,-2.377822,2.087798,-0.705933,-1.416342,-2.159318,-2.391275,-0.679819,-0.641912
3,1,4,1.221198,0.869459,0.418187,-1.057627,-0.808881,-0.622406,-0.663705,-1.140622,...,-0.983225,0.414257,0.024309,0.03089,-0.705933,-0.695731,-0.115645,0.418187,-1.033443,-1.013808
4,1,5,0.067951,0.155618,-2.391275,-0.392968,-1.142491,-1.531554,-1.322271,-0.271732,...,-0.739215,-2.393724,-2.420728,2.090734,-0.705933,-1.416342,-2.159318,-2.391275,-0.674753,-0.66618



Test Dataset


Unnamed: 0,Unit Number,Cycles(Time),operation_setting1,operation_setting2,operation_setting3,sensor1,sensor2,sensor3,sensor4,sensor5,...,sensor12,sensor13,sensor14,sensor15,sensor16,sensor17,sensor18,sensor19,sensor20,sensor21
0,1,1,-0.94931,-1.038202,0.418187,0.6116,0.680028,0.728668,0.835679,0.688475,...,0.76727,0.418708,0.559923,-0.91022,1.416565,0.745489,0.620627,0.418187,0.773181,0.790616
1,1,2,-0.270953,0.412704,0.418187,0.692693,0.754849,0.57793,0.341397,0.364716,...,0.357777,0.41824,-0.159573,-0.118978,-0.705933,0.565336,0.655032,0.418187,0.354709,0.393558
2,1,3,0.746315,0.864298,0.418187,-0.889378,-0.634835,-0.57474,-0.647083,-0.706177,...,-0.603153,0.415351,-0.159808,0.023416,-0.705933,-0.551609,-0.039954,0.418187,-0.588627,-0.601349
3,1,4,1.221123,0.867523,0.418187,-1.057627,-0.791181,-0.66337,-0.651028,-1.140622,...,-0.98555,0.415116,0.003564,0.088675,-0.705933,-0.58764,-0.115645,0.418187,-1.057761,-1.022589
4,1,5,0.067816,0.158844,-2.391275,-0.392968,-1.151877,-1.574312,-1.299102,-0.271732,...,-0.737399,-2.393334,-2.36851,2.053234,-0.705933,-1.560464,-2.159318,-2.391275,-0.656514,-0.658513



Expected RUL


Unnamed: 0,Expected RUL
0,18
1,79
2,106
3,110
4,15




For FD003

Train Dataset


Unnamed: 0,Unit Number,Cycles(Time),operation_setting1,operation_setting2,operation_setting3,sensor1,sensor2,sensor3,sensor4,sensor5,...,sensor12,sensor13,sensor14,sensor15,sensor16,sensor17,sensor18,sensor19,sensor20,sensor21
0,1,1,-0.217122,1.343135,0.0,0.0,-0.187102,-0.712038,-0.780848,-3.552714e-15,...,-0.227593,-0.389857,0.067687,0.469741,-1.734723e-17,-0.889364,0,0.0,0.488019,-0.263512
1,1,2,0.375538,-1.037519,0.0,0.0,0.080574,-0.497656,-0.775732,-3.552714e-15,...,-0.193802,-0.263369,0.523946,0.729201,-1.734723e-17,-0.321641,0,0.0,0.005819,0.375767
2,1,3,-0.627426,-0.697425,0.0,0.0,-0.531256,-0.841254,0.116524,-3.552714e-15,...,-0.313608,-0.453101,0.361559,-0.100409,-1.734723e-17,-0.889364,0,0.0,-0.556747,-0.175058
3,1,4,-0.900961,0.322855,0.0,0.0,0.883602,-0.362566,-1.248464,-3.552714e-15,...,-0.172298,0.052852,0.142821,-0.138419,-1.734723e-17,-0.321641,0,0.0,-0.11473,-0.656192
4,1,5,0.740252,-0.017239,0.0,0.0,-1.487242,0.080881,-0.697966,-3.552714e-15,...,-0.14465,-0.263369,0.217955,-0.153292,-1.734723e-17,-0.321641,0,0.0,0.608569,0.437416



Test Dataset


Unnamed: 0,Unit Number,Cycles(Time),operation_setting1,operation_setting2,operation_setting3,sensor1,sensor2,sensor3,sensor4,sensor5,...,sensor12,sensor13,sensor14,sensor15,sensor16,sensor17,sensor18,sensor19,sensor20,sensor21
0,1,1,-0.764193,-1.377612,0.0,0.0,-0.990129,-0.902925,-0.771639,-3.552714e-15,...,-0.356616,-0.832566,-0.649725,-0.333427,-1.734723e-17,-0.889364,0,0.0,0.327286,0.360354
1,1,2,0.284359,-0.697425,0.0,0.0,-0.837172,-0.472693,-0.570063,-3.552714e-15,...,-0.368904,-0.389857,-0.40978,0.165661,-1.734723e-17,-0.889364,0,0.0,0.206736,0.587519
2,1,3,0.649073,-1.037519,0.0,0.0,-1.487242,-0.924951,-1.284277,-3.552714e-15,...,-0.292104,-0.832566,-0.3607,-0.675517,-1.734723e-17,-0.889364,0,0.0,0.447836,0.210251
3,1,4,1.241733,0.322855,0.0,0.0,-0.493017,-0.52115,-0.934332,-3.552714e-15,...,-0.184586,-0.706078,-0.432199,-0.41771,-1.734723e-17,-0.321641,0,0.0,-0.074547,0.570767
4,1,5,-0.034765,0.322855,0.0,0.0,0.004095,0.139616,-0.881124,-3.552714e-15,...,-0.350472,-0.642833,-0.606098,0.30448,-1.734723e-17,-0.889364,0,0.0,0.407652,0.013241



Expected RUL


Unnamed: 0,Expected RUL
0,44
1,51
2,27
3,120
4,101




For FD004

Train Dataset


Unnamed: 0,Unit Number,Cycles(Time),operation_setting1,operation_setting2,operation_setting3,sensor1,sensor2,sensor3,sensor4,sensor5,...,sensor12,sensor13,sensor14,sensor15,sensor16,sensor17,sensor18,sensor19,sensor20,sensor21
0,1,1,1.218156,0.864668,0.418783,-1.05469,-0.796416,-0.701412,-0.745729,-1.137677,...,-0.989007,0.417814,0.081921,0.063831,-0.694278,-0.638665,-0.114203,0.418783,-1.030999,-1.031756
1,1,2,-0.270478,0.414718,0.418783,0.692508,0.713666,0.562449,0.298212,0.363906,...,0.331131,0.415786,-0.253086,-0.125677,-0.694278,0.47612,0.655708,0.418783,0.352814,0.358264
2,1,3,1.218082,0.867565,0.418783,-1.05469,-0.815965,-0.704332,-0.711202,-1.137677,...,-0.990162,0.417658,-0.013912,0.153387,-0.694278,-0.674626,-0.114203,0.418783,-1.045089,-1.022649
3,1,4,1.217824,0.864668,0.418783,-1.05469,-0.82266,-0.72204,-0.70299,-1.137677,...,-0.988862,0.418048,0.096162,0.068362,-0.694278,-0.710586,-0.114203,0.418783,-1.039051,-1.023269
4,1,5,0.068094,0.158844,-2.387873,-0.391216,-1.160079,-1.532181,-1.410627,-0.270955,...,-0.741097,-2.389666,-2.358027,2.066982,-0.694278,-1.537685,-2.155843,-2.387873,-0.687814,-0.644612



Test Dataset


Unnamed: 0,Unit Number,Cycles(Time),operation_setting1,operation_setting2,operation_setting3,sensor1,sensor2,sensor3,sensor4,sensor5,...,sensor12,sensor13,sensor14,sensor15,sensor16,sensor17,sensor18,sensor19,sensor20,sensor21
0,1,1,-0.270126,0.414074,0.418783,0.692508,0.729733,0.594757,0.217006,0.363906,...,0.334308,0.416176,-0.219818,-0.083564,-0.694278,0.512081,0.655708,0.418783,0.346775,0.365895
1,1,2,0.06756,0.156591,-2.387873,-0.391216,-1.156865,-1.523327,-1.42831,-0.270955,...,-0.744708,-2.389588,-2.385341,2.102432,-0.694278,-1.501724,-2.155843,-2.387873,-0.654602,-0.661484
2,1,3,1.217824,0.871105,0.418783,-1.05469,-0.808467,-0.732495,-0.804811,-1.137677,...,-0.987563,0.417502,0.038732,0.147123,-0.694278,-0.710586,-0.114203,0.418783,-1.054147,-1.0201
3,1,4,1.218061,0.865312,0.418783,-1.05469,-0.809538,-0.736545,-0.793246,-1.137677,...,-0.991173,0.417112,0.129313,0.098347,-0.694278,-0.710586,-0.114203,0.418783,-1.037038,-1.053608
4,1,5,0.744765,0.864668,0.418783,-0.886741,-0.651273,-0.610894,-0.704917,-0.704316,...,-0.613207,0.416878,-0.116515,0.023184,-0.694278,-0.530782,-0.038587,0.418783,-0.605288,-0.601477



Expected RUL


Unnamed: 0,Expected RUL
0,22
1,39
2,107
3,75
4,149






### Doing Principal Component Analysis

In [26]:
pca = PCA(n_components = 10)

In [44]:
newColumns = ['PCA1', 'PCA2', 'PCA3', 'PCA4', 'PCA5', 'PCA6', 'PCA7', 'PCA8', 'PCA9', 'PCA10']

for i in range(4):
    # Finding Principal Components
    temp1 = pca.fit_transform(trainDatasetsCopy[i].iloc[:, 2:])
    
    temp2 = pca.transform(testDatasetsCopy[i].iloc[:, 2:])

    # Converting to Dataframes
    temp1 = pd.DataFrame(temp1, columns = newColumns)
    temp2 = pd.DataFrame(temp2, columns = newColumns)

    # Dropping Excess Data
    trainDatasetsCopy[i].drop(inplace = True, columns = trainDatasetsCopy[i].columns[2:])
    testDatasetsCopy[i].drop(inplace = True, columns = testDatasetsCopy[i].columns[2:])

    # Merging New Data
    trainDatasetsCopy[i] = pd.merge(trainDatasetsCopy[i], temp1, left_index=True, right_index=True)
    testDatasetsCopy[i] = pd.merge(testDatasetsCopy[i], temp2, left_index=True, right_index=True)
pickle.dump(pca, open("pcascaler/" + f"pca.pkl", 'wb'))

In [None]:
checkBasicStructure(trainDatasetsCopy, testDatasetsCopy, expectedRulDatasetsCopy)

## Finding Training Results

In [None]:
trainRulDatasets = findRul(trainDatasets)

In [None]:
checkRulBasicStructure(trainRulDatasets)

## Data Processing

### Constant Variables

In [None]:
defaultWindow = 10
defaultShift = 1
defaultTestWindow = 10

processedTrainDatasets = []
processedTestDatasets = []
processedTrainRulDatasets = []
rulDatasets = []
testWindowList = []

predictedRulDatasets = []
rmseList = []
r2_scoreList = []

### Normalizing Data

In [None]:
scaler_model = []

for i in range(4):
    sc = StandardScaler()
    scaler_model.append(sc)

pickle.dump(scaler[i], open("pcascaler/" + f"complete_scaler.pkl", 'wb'))

In [None]:
for i in range(4):
    trainDatasetsCopy[i].iloc[:, 1:] = scaler_model[i].fit_transform(trainDatasetsCopy[i].iloc[:, 1:])
    testDatasetsCopy[i].iloc[:, 1:] = scaler_model[i].transform(testDatasetsCopy[i].iloc[:, 1:])

In [None]:
checkBasicStructure(trainDatasetsCopy, testDatasetsCopy, expectedRulDatasetsCopy)

In [None]:
trainDatasetsUnique = [len(x['Unit Number'].unique()) for x in trainDatasetsCopy]
testDatasetsUnique = [len(x['Unit Number'].unique()) for x in testDatasetsCopy]

In [None]:
display(trainDatasetsUnique)
display(testDatasetsUnique)

### Heap Map

In [None]:
for i in range(4):
    print("For FD00" + str(i+1) + "\n")
    sns.heatmap(trainDatasetsCopy[i].corr(), annot=True, cmap="Reds", linewidths=0.2)
    fig = plt.gcf()
    fig.set_size_inches(12, 12)
    plt.show()
    print("\n")

### Processing Training Data

In [None]:
for i in range(4):
    temp1 = []
    temp2 = []
    for j in range(1, trainDatasetsUnique[i]+1):
        tempTrainDataset = trainDatasetsCopy[i][trainDatasetsCopy[i]['Unit Number'] == j].drop(columns = ['Unit Number']).values

        window_size = min(len(tempTrainDataset), defaultWindow)

        tempTrainRulDataset = trainRulDatasets[i][trainRulDatasets[i]['Unit Number'] == j].drop(columns = ['Unit Number']).values

        tempTrainDataset, tempTrainRulDataset = processTrainingData(tempTrainDataset, tempTrainRulDataset, window = window_size, shift = defaultShift)

        temp1.append(tempTrainDataset)
        temp2.append(tempTrainRulDataset)
        
    processedTrainDatasets.append(np.concatenate(temp1))
    processedTrainRulDatasets.append(np.concatenate(temp2))

### Processing Test Data

In [None]:
for i in range(4):
    temp1 = []
    temp2 = []
    for j in range(1, testDatasetsUnique[i]+1):
        tempTestDataset = testDatasetsCopy[i][testDatasetsCopy[i]['Unit Number'] == j].drop(columns = ['Unit Number']).values

        window_size = min(len(tempTestDataset), defaultWindow)

        tempTestDataset, testWindow = processTestingData(tempTestDataset, window = window_size, shift = defaultShift, testWindow = defaultTestWindow)

        temp1.append(tempTestDataset)
        temp2.append(testWindow)
        
    processedTestDatasets.append(np.concatenate(temp1))
    testWindowList.append(temp2)
    rulDatasets.append(expectedRulDatasetsCopy[i]["Expected RUL"].values)

### Showing Processed Data Shapes

In [None]:
for i in range(4):
    print("For FD00" + str(i+1) + "\n")
    print("Train Dataset Shape: ", processedTrainDatasets[i].shape)
    print("Train RUL Dataset Shape: ", processedTrainRulDatasets[i].shape)
    print("Test Dataset Shape: ", processedTestDatasets[i].shape)
    print("Expected RUL Shape: ", rulDatasets[i].shape)
    print("\n")

## LSTM Individual Model Training

In [None]:
trainedModel = []
histories = []

In [None]:
for i in range(4):
    print("For FD00" + str(i+1) + "\n")
    name = "FD00" + str(i+1)
    model = createModel(processedTrainDatasets[i].shape, name = name)
    display(model.summary())
    history = model.fit(processedTrainDatasets[i], processedTrainRulDatasets[i], epochs = 100, validation_split = 0.2, batch_size = 128, verbose = 1)
    print("\n")
    trainedModel.append(model)
    histories.append(history)

## Individual Model Testing

### Predicting Values

In [None]:
for i in range(4):
    predictedRul = trainedModel[i].predict(processedTestDatasets[i]).reshape(-1)
    predictedRul = np.split(predictedRul, np.cumsum(testWindowList[i])[:-1])
    predictedRul = [np.average(ruls, weights = np.repeat(1/testWindow, testWindow)) for ruls, testWindow in zip(predictedRul, testWindowList[i])]

    predictedRulDatasets.append(predictedRul)

### Finding Root Mean Square Error

In [None]:
for i in range(4):
    rmse = np.sqrt(mse(rulDatasets[i], predictedRulDatasets[i]))
    print("For FD00" + str(i+1))
    print("Root Mean Square Error:", rmse)
    print("\n")
    rmseList.append(rmse)

### Finding R2 Score

In [None]:
for i in range(4):
    r2_sc = r2_score(rulDatasets[i], predictedRulDatasets[i])
    print("For FD00" + str(i+1))
    print("R2 Score:", r2_sc)
    print("\n")
    r2_scoreList.append(r2_sc)

#### Expected RUL vs Predicted RUL

In [None]:
for i in range(4):
    print("For FD00" + str(i+1) + "\n")
    plt.figure(figsize = (15, 6))
    plt.plot(rulDatasets[i], label = "Expected RUL", color = "Red")
    plt.plot(predictedRulDatasets[i], label = "Predicted RUL", color = "Green")
    plt.legend(loc = 'upper left')
    plt.show()
    print("\n")

In [None]:
for i in range(4):
    print("For FD00" + str(i+1) + "\n")
    plt.figure(figsize = (15, 6))
    plt.plot(histories[i].history['loss'])
    plt.plot(histories[i].history['val_loss'])
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc = 'upper left')
    plt.show()
    print("\n")

## LSTM Complete Model Training

In [None]:
completeTrainDatasets = np.concatenate(processedTrainDatasets)
completeTrainRulDatasets = np.concatenate(processedTrainRulDatasets)
completeTestDatasets = np.concatenate(processedTestDatasets)
completeRulDatasets = np.concatenate(rulDatasets)
completeTestWindowList = np.concatenate(testWindowList)

### Shapes

In [None]:
print("Train Dataset Shape: ", completeTrainDatasets.shape)
print("Train RUL Dataset Shape: ", completeTrainRulDatasets.shape)
print("Test Dataset Shape: ", completeTestDatasets.shape)
print("Expected RUL Shape: ", completeRulDatasets.shape)

In [None]:
completeModel = createModel(completeTrainDatasets.shape)
completeHistory = completeModel.fit(completeTrainDatasets, completeTrainRulDatasets, epochs = 100, validation_split = 0.2, batch_size = 128, verbose = 1)

### Predicting Values

In [None]:
completePredictedRul = completeModel.predict(completeTestDatasets).reshape(-1)
completePredictedRul = np.split(completePredictedRul, np.cumsum(completeTestWindowList)[:-1])
completePredictedRul = [np.average(ruls, weights = np.repeat(1/testWindow, testWindow)) for ruls, testWindow in zip(completePredictedRul, completeTestWindowList)]

### Finding Root Mean Square Error

In [None]:
rmse = np.sqrt(mse(completeRulDatasets, completePredictedRul))
print("Root Mean Square Error:", rmse)

### Finding R2 Score

In [None]:
r2_sc = r2_score(completeRulDatasets, completePredictedRul)
print("R2 Score:", r2_sc)

### Visualization

#### Expected RUL vs Predicted RUL

In [None]:
plt.figure(figsize = (15, 6))
plt.plot(completeRulDatasets, label = "Expected RUL", color = "Red")
plt.plot(completePredictedRul, label = "Predicted RUL", color = "Green")
plt.legend(loc = 'upper left')
plt.show()

#### Training History

In [None]:
plt.figure(figsize = (15, 6))
plt.plot(completeHistory.history['loss'])
plt.plot(completeHistory.history['val_loss'])
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc = 'upper left')
plt.show()

## Saving Models

In [None]:
modelDumpFolder = 'models/'

modelDumpFiles = ['FD001.pkl', 'FD002.pkl', 'FD003.pkl', 'FD004.pkl', 'complete.pkl']

In [None]:
for i in range(4):
    pickle.dump(trainedModel[i], open(modelDumpFolder + modelDumpFiles[i], 'wb'))

In [None]:
pickle.dump(completeModel, open(modelDumpFolder + modelDumpFiles[i], 'wb'))