In [1]:
#python library imports
import io
import logging
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# aws imports
import boto3
from botocore.exceptions import ClientError


#logging.basicConfig(level=logging.DEBUG)
#logging.basicConfig(filename='pdm.log', filemode='w', format='%(name)s - %(levelname)s - %(message)s')

In [2]:
# Create a custom logger
logger = logging.getLogger("helper")

# Create handlers
c_handler = logging.StreamHandler()
f_handler = logging.FileHandler('pdm.log')

c_handler.setLevel(logging.WARNING)
f_handler.setLevel(logging.ERROR)

# Create formatters and add it to handlers
c_format = logging.Formatter('%(name)s - %(levelname)s - %(message)s')
f_format = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
c_handler.setFormatter(c_format)
f_handler.setFormatter(f_format)

# Add handlers to the logger
logger.addHandler(c_handler)
logger.addHandler(f_handler)


In [3]:
boto3.setup_default_session(profile_name='HCL_USER_1')
input_bucket = "capacityplanning"
train_key = "predictive-maintenance/train_pdm.txt"
test_key = "predictive-maintenance/test_pdm.txt"
truth_key = "predictive-maintenance/truth_pdm.txt"
output_bucket = "capacityplanning-predictions-bucket"
s3_output_path = "pdm-predictions/"


In [4]:

def preprocess_data(columns):
    # normalize sensor readings
    """
    Add regression labels to the train and test data.
    
    Regression label: RUL for each cycle# for an engine/device ID subtracted from the Last cycle#(Max value) of the same engine
                      in the train data.
    """
    client = boto3.client('s3')
    try:
        obj = client.get_object(Bucket=input_bucket, Key=train_key)
        train_df = pd.read_csv(io.BytesIO(obj['Body'].read()), delimiter=" ", header=None)
    
    except ClientError as e:
        logging.debug(e)
        return None
        
    eps=0.000001 # to avoid floating point issues during normalization
    

    train_df.drop(train_df.columns[[26, 27]], axis=1, inplace=True)
    
    train_df.columns = columns

    df_max_cycle = pd.DataFrame(train_df.groupby('id')['cycle'].max()).reset_index()

    df_max_cycle.columns = ['id','max']

    train_df = train_df.merge(df_max_cycle, on=['id'], how='left')

    train_df['RUL'] = train_df['max'] - train_df['cycle']

    train_df.drop('max', axis=1, inplace=True)

#    columns= train_df.columns

#     train_df[columns[2:]] = (train_df[columns[2:]] - train_df[columns[2:]].min() + eps) / (
#                     train_df[columns[2:]].max() - train_df[columns[2:]].min() + eps)


    # Load time series
    try:
        objc = client.get_object(Bucket=input_bucket, Key=test_key)
        test_df = pd.read_csv(io.BytesIO(objc['Body'].read()), delimiter=" ", header=None)
        
        objt = client.get_object(Bucket=input_bucket, Key=truth_key)
        truth_df = pd.read_csv(io.BytesIO(objt['Body'].read()), delimiter=" ", header=None)
        
    except ClientError as e:
        
        logging.debug(e)
        return None
    
    #test_df = pd.read_csv(r"C:\Users\shivapriya.katta\Documents\Notepad_files\test_pdm.txt", delimiter=" ", header=None)

    test_df.drop(test_df.columns[[26, 27]], axis=1, inplace=True)
    
    test_df.columns = columns

    # Load the RUL values
    
    truth_df.drop(truth_df.columns[1], axis=1, inplace=True)
    
    truth_df.index += 1

    # Merge truth_df and test_df and compute RUL per timestamp for test data
    
    test_df = test_df.merge(truth_df, left_on=test_df.columns[0], right_index=True, how='left')
    
    test_df.columns = columns + ['RUL_end']
    
    rul = pd.DataFrame(test_df.groupby('id')['cycle'].max()).reset_index()
    
    rul.columns = ['id', 'max']
    
    test_df = test_df.merge(rul, on=['id'], how='left')  # We get the maximum number of cycles for each device
    
    test_df['RUL'] = test_df['max'] + test_df['RUL_end'] - test_df['cycle']  # The RUL is the number of cycles per series + RUL - how many cycles have already ran
    
    test_df.drop(['max', 'RUL_end'], axis=1, inplace=True)

#     # Normalize
#     test_df[columns[2:]] = (test_df[columns[2:]] - test_df[columns[2:]].min() + eps) / (
#                 test_df[columns[2:]].max() - test_df[columns[2:]].min() + eps)
    

    return train_df, test_df


In [5]:
col_names = ['id', 'cycle', 'setting1', 'setting2', 'setting3', 'sensor1', 'sensor2', 'sensor3','sensor4', 'sensor5', 'sensor6',
             'sensor7', 'sensor8', 'sensor9', 'sensor10', 'sensor11', 'sensor12', 'sensor13', 'sensor14','sensor15', 'sensor16',
             'sensor17', 'sensor18', 'sensor19', 'sensor20', 'sensor21']

train_df, test_df = preprocess_data(col_names)

In [6]:
train_df.head()

Unnamed: 0,id,cycle,setting1,setting2,setting3,sensor1,sensor2,sensor3,sensor4,sensor5,...,sensor13,sensor14,sensor15,sensor16,sensor17,sensor18,sensor19,sensor20,sensor21,RUL
0,1,1,-0.0007,-0.0004,100.0,518.67,641.82,1589.7,1400.6,14.62,...,2388.02,8138.62,8.4195,0.03,392,2388,100.0,39.06,23.419,191
1,1,2,0.0019,-0.0003,100.0,518.67,642.15,1591.82,1403.14,14.62,...,2388.07,8131.49,8.4318,0.03,392,2388,100.0,39.0,23.4236,190
2,1,3,-0.0043,0.0003,100.0,518.67,642.35,1587.99,1404.2,14.62,...,2388.03,8133.23,8.4178,0.03,390,2388,100.0,38.95,23.3442,189
3,1,4,0.0007,0.0,100.0,518.67,642.35,1582.79,1401.87,14.62,...,2388.08,8133.83,8.3682,0.03,392,2388,100.0,38.88,23.3739,188
4,1,5,-0.0019,-0.0002,100.0,518.67,642.37,1582.85,1406.22,14.62,...,2388.04,8133.8,8.4294,0.03,393,2388,100.0,38.9,23.4044,187


In [7]:
test_df.head()

Unnamed: 0,id,cycle,setting1,setting2,setting3,sensor1,sensor2,sensor3,sensor4,sensor5,...,sensor13,sensor14,sensor15,sensor16,sensor17,sensor18,sensor19,sensor20,sensor21,RUL
0,1,1,0.0023,0.0003,100.0,518.67,643.02,1585.29,1398.21,14.62,...,2388.03,8125.55,8.4052,0.03,392,2388,100.0,38.86,23.3735,142
1,1,2,-0.0027,-0.0003,100.0,518.67,641.71,1588.45,1395.42,14.62,...,2388.06,8139.62,8.3803,0.03,393,2388,100.0,39.02,23.3916,141
2,1,3,0.0003,0.0001,100.0,518.67,642.46,1586.94,1401.34,14.62,...,2388.03,8130.1,8.4441,0.03,393,2388,100.0,39.08,23.4166,140
3,1,4,0.0042,0.0,100.0,518.67,642.44,1584.12,1406.42,14.62,...,2388.05,8132.9,8.3917,0.03,391,2388,100.0,39.0,23.3737,139
4,1,5,0.0014,0.0,100.0,518.67,642.51,1587.19,1401.92,14.62,...,2388.03,8129.54,8.4031,0.03,390,2388,100.0,38.99,23.413,138


In [8]:
print("The length of train_df = {}".format(len(train_df)))
print("The length of test_df = {}".format(len(test_df)))

The length of train_df = 20631
The length of test_df = 13096


In [9]:
# Lets choose XGBoost Regressor to model the regression problem

In [10]:
import xgboost as xgb
from xgboost.sklearn import XGBRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import explained_variance_score
from sklearn.model_selection import GridSearchCV   #Perforing grid search
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline

In [11]:
class FeatureSelector(BaseEstimator, TransformerMixin):
    
    def __init__(self, features):
        self.features = features
        
    def fit(self, X,y=None):
        return self
    
    def transform(self,X):
        return X[features].values

In [12]:
features = ['setting1', 'setting2', 'setting3', 'sensor1', 'sensor2', 'sensor3','sensor4', 'sensor5', 'sensor6',
             'sensor7', 'sensor8', 'sensor9', 'sensor10', 'sensor11', 'sensor12', 'sensor13', 'sensor14','sensor15', 'sensor16',
             'sensor17', 'sensor18', 'sensor19', 'sensor20', 'sensor21']

X_train = train_df.drop('RUL', axis=1)
y_train = train_df["RUL"]

X_test = test_df.drop('RUL', axis=1)
y_test = test_df["RUL"]
pipeline = Pipeline(steps=[
    ('selector', FeatureSelector(features)),
    ('scaler_x', MinMaxScaler())
])

In [13]:
train_prepared = pipeline.fit_transform(X_train)

In [14]:
scaler_y = MinMaxScaler()
ytrain_prepared = y_train.values.reshape(-1,1)
ytrain_prepared_sc = scaler_y.fit_transform(ytrain_prepared)

print("train_prepared shape = {}".format(train_prepared.shape))
print("ytrain_prepared shape = {}".format(ytrain_prepared.shape))
print("ytrain_prepared_sc shape = {}".format(ytrain_prepared_sc.shape))

train_prepared shape = (20631, 24)
ytrain_prepared shape = (20631, 1)
ytrain_prepared_sc shape = (20631, 1)


In [15]:
ytrain_prepared_sc

array([[0.52908587],
       [0.52631579],
       [0.52354571],
       ...,
       [0.00554017],
       [0.00277008],
       [0.        ]])

In [16]:
test_prepared = pipeline.transform(X_test)

In [17]:
ytest_prepared = y_test.values.reshape(-1,1)
ytest_prepared_sc = scaler_y.transform(ytest_prepared)
print("test_prepared shape = {}".format(test_prepared.shape))
print("ytest_prepared shape = {}".format(ytest_prepared.shape))
print("ytest_prepared_sc shape = {}".format(ytest_prepared_sc.shape))

test_prepared shape = (13096, 24)
ytest_prepared shape = (13096, 1)
ytest_prepared_sc shape = (13096, 1)


In [18]:
dtrain = xgb.DMatrix(train_prepared, label=ytrain_prepared_sc)
dtest = xgb.DMatrix(test_prepared, label=ytest_prepared_sc)

In [20]:
dtrain

<xgboost.core.DMatrix at 0x1bab6cae460>

In [21]:
gridsearch_params = [
    (max_depth, min_child_weight)
    for max_depth in range(9,12)
    for min_child_weight in range(5,8)
]

In [23]:
params = {
    # Parameters that we are going to tune.
    'max_depth':6,
    'min_child_weight': 1,
    'eta':.3,
    'subsample': 1,
    'colsample_bytree': 1,
    # Other parameters
    'objective':'reg:linear',
}

In [24]:
# Define initial best params and MAE
min_mae = float("Inf")
num_boost_round = 90
best_params = None
for max_depth, min_child_weight in gridsearch_params:
    print("CV with max_depth={}, min_child_weight={}".format(
                             max_depth,
                             min_child_weight))
    # Update our parameters
    params['max_depth'] = max_depth
    params['min_child_weight'] = min_child_weight
    # Run CV
    cv_results = xgb.cv(
        params,
        dtrain,
        num_boost_round=num_boost_round,
        seed=42,
        nfold=5,
        metrics={'mae'},
        early_stopping_rounds=10
    )
    # Update best MAE
    mean_mae = cv_results['test-mae-mean'].min()
    boost_rounds = cv_results['test-mae-mean'].argmin()
    print("\tMAE {} for {} rounds".format(mean_mae, boost_rounds))
    if mean_mae < min_mae:
        min_mae = mean_mae
        best_params = (max_depth,min_child_weight)
print("Best params: {}, {}, MAE: {}".format(best_params[0], best_params[1], min_mae))

CV with max_depth=9, min_child_weight=5
	MAE 0.0836914 for 14 rounds
CV with max_depth=9, min_child_weight=6
	MAE 0.083843 for 12 rounds
CV with max_depth=9, min_child_weight=7
	MAE 0.083527 for 13 rounds
CV with max_depth=10, min_child_weight=5
	MAE 0.08416019999999999 for 15 rounds
CV with max_depth=10, min_child_weight=6
	MAE 0.0842946 for 12 rounds
CV with max_depth=10, min_child_weight=7
	MAE 0.0841906 for 12 rounds
CV with max_depth=11, min_child_weight=5
	MAE 0.0852042 for 12 rounds
CV with max_depth=11, min_child_weight=6
	MAE 0.085087 for 15 rounds
CV with max_depth=11, min_child_weight=7


	MAE 0.0850332 for 13 rounds
Best params: 9, 7, MAE: 0.083527


In [35]:
# Define an other set of grid-search params
gridsearch_params_new = [
    (subsample, colsample)
    for subsample in [i/10. for i in range(7,11)]
    for colsample in [i/10. for i in range(7,11)]
]

In [36]:
updated_params = {
    # Parameters that we are going to tune.
    'max_depth':9,
    'min_child_weight': 7,
    'eta':.3,
    'subsample': 1,
    'colsample_bytree': 1,
    # Other parameters
    'objective':'reg:linear',
}

In [37]:
# Now tune for different values of subsample and colsample_by_tree

min_mae = float("Inf")
best_params = None
# We start by the largest values and go down to the smallest
for subsample, colsample in reversed(gridsearch_params_new):
    print("CV with subsample={}, colsample={}".format(
                             subsample,
                             colsample))
    # We update our parameters
    params['subsample'] = subsample
    params['colsample_bytree'] = colsample
    # Run CV
    cv_results = xgb.cv(
        updated_params,
        dtrain,
        num_boost_round=num_boost_round,
        seed=42,
        nfold=5,
        metrics={'mae'},
        early_stopping_rounds=10
    )
    # Update best score
    mean_mae = cv_results['test-mae-mean'].min()
    boost_rounds = cv_results['test-mae-mean'].argmin()
    print("\tMAE {} for {} rounds".format(mean_mae, boost_rounds))
    if mean_mae < min_mae:
        min_mae = mean_mae
        best_params = (subsample,colsample)
print("Best params: {}, {}, MAE: {}".format(best_params[0], best_params[1], min_mae))


CV with subsample=1.0, colsample=1.0
	MAE 0.083527 for 13 rounds
CV with subsample=1.0, colsample=0.9
	MAE 0.083527 for 13 rounds
CV with subsample=1.0, colsample=0.8
	MAE 0.083527 for 13 rounds
CV with subsample=1.0, colsample=0.7
	MAE 0.083527 for 13 rounds
CV with subsample=0.9, colsample=1.0
	MAE 0.083527 for 13 rounds
CV with subsample=0.9, colsample=0.9
	MAE 0.083527 for 13 rounds
CV with subsample=0.9, colsample=0.8
	MAE 0.083527 for 13 rounds
CV with subsample=0.9, colsample=0.7
	MAE 0.083527 for 13 rounds
CV with subsample=0.8, colsample=1.0


	MAE 0.083527 for 13 rounds
CV with subsample=0.8, colsample=0.9
	MAE 0.083527 for 13 rounds
CV with subsample=0.8, colsample=0.8
	MAE 0.083527 for 13 rounds
CV with subsample=0.8, colsample=0.7
	MAE 0.083527 for 13 rounds
CV with subsample=0.7, colsample=1.0
	MAE 0.083527 for 13 rounds
CV with subsample=0.7, colsample=0.9
	MAE 0.083527 for 13 rounds
CV with subsample=0.7, colsample=0.8
	MAE 0.083527 for 13 rounds
CV with subsample=0.7, colsample=0.7
	MAE 0.083527 for 13 rounds
Best params: 1.0, 1.0, MAE: 0.083527


In [50]:
# lets update the params dictionary with the new values f subsamples and the colsample_by_tree
updated_params_new = {
    # Parameters that we are going to tune.
    'max_depth':9,
    'min_child_weight': 7,
    'eta':.01,
    'subsample': 1.0,
    'colsample_bytree': 1.0,
    # Other parameters
    'objective':'reg:linear',
}

In [51]:
%time
# This can take some time…
min_mae = float("Inf")
best_params = None
for eta in [.3, .2, .1, .05, .01, .005]:
    print("CV with eta={}".format(eta))
    # We update our parameters
    updated_params_new['eta'] = eta
    # Run and time CV
    %time 
    cv_results = xgb.cv(
            updated_params_new,
            dtrain,
            num_boost_round=num_boost_round,
            seed=42,
            nfold=5,
            metrics=['mae'],
            early_stopping_rounds=10)
    # Update best score
    mean_mae = cv_results['test-mae-mean'].min()
    boost_rounds = cv_results['test-mae-mean'].argmin()
    print("\tMAE {} for {} rounds\n".format(mean_mae, boost_rounds))
    if mean_mae < min_mae:
        min_mae = mean_mae
        best_params = eta
print("Best params: {}, MAE: {}".format(best_params, min_mae))

Wall time: 0 ns
CV with eta=0.3
Wall time: 0 ns
	MAE 0.083527 for 13 rounds

CV with eta=0.2
Wall time: 0 ns
	MAE 0.08286439999999999 for 22 rounds

CV with eta=0.1
Wall time: 0 ns
	MAE 0.0817278 for 53 rounds

CV with eta=0.05
Wall time: 0 ns
	MAE 0.08471479999999999 for 55 rounds

CV with eta=0.01
Wall time: 0 ns
	MAE 0.1610134 for 55 rounds

CV with eta=0.005
Wall time: 0 ns
	MAE 0.1926782 for 55 rounds

Best params: 0.1, MAE: 0.0817278


In [52]:
# Now our final dict of params is given below:
# lets update the params dictionary with the new values f subsamples and the colsample_by_tree
updated_params_final = {
    # Parameters that we are going to tune.
    'max_depth':9,
    'min_child_weight': 7,
    'eta':0.01,
    'subsample': 1.0,
    'colsample_bytree': 1.0,
    # Other parameters
    'objective':'reg:linear',
}


In [53]:
# Updated_params_final is the final dict of params we got using 5-fold cross validation on the training set.
# Let’s train a model with it and see how well it does on our test set

In [54]:
xgb_model = xgb.train(
    updated_params_final,
    dtrain,
    num_boost_round=num_boost_round,
    evals=[(dtest, "Test")],
    early_stopping_rounds=10
)

[0]	Test-rmse:0.19507
Will train until Test-rmse hasn't improved in 10 rounds.
[1]	Test-rmse:0.19390
[2]	Test-rmse:0.19274
[3]	Test-rmse:0.19161
[4]	Test-rmse:0.19049
[5]	Test-rmse:0.18939
[6]	Test-rmse:0.18831
[7]	Test-rmse:0.18724
[8]	Test-rmse:0.18618
[9]	Test-rmse:0.18515
[10]	Test-rmse:0.18412
[11]	Test-rmse:0.18312
[12]	Test-rmse:0.18213
[13]	Test-rmse:0.18116
[14]	Test-rmse:0.18020
[15]	Test-rmse:0.17926
[16]	Test-rmse:0.17833
[17]	Test-rmse:0.17741
[18]	Test-rmse:0.17650
[19]	Test-rmse:0.17562
[20]	Test-rmse:0.17474
[21]	Test-rmse:0.17388
[22]	Test-rmse:0.17304
[23]	Test-rmse:0.17220
[24]	Test-rmse:0.17138
[25]	Test-rmse:0.17057
[26]	Test-rmse:0.16977
[27]	Test-rmse:0.16899
[28]	Test-rmse:0.16821
[29]	Test-rmse:0.16746
[30]	Test-rmse:0.16671
[31]	Test-rmse:0.16598
[32]	Test-rmse:0.16525
[33]	Test-rmse:0.16455
[34]	Test-rmse:0.16385
[35]	Test-rmse:0.16316
[36]	Test-rmse:0.16248
[37]	Test-rmse:0.16181
[38]	Test-rmse:0.16117
[39]	Test-rmse:0.16052
[40]	Test-rmse:0.15989
[41]	Test-

In [55]:
print("Best RMSE: {:.2f} in {} rounds".format(xgb_model.best_score, xgb_model.best_iteration+1))

Best RMSE: 0.15 in 56 rounds


In [56]:
# Now we know that we got best RMSE score as 0.13 and the num_boost_rounds or the number of trees constructed is 56.
# So we don't need early_stopping_rounds anymore.


In [57]:
num_boost_round = xgb_model.best_iteration + 1
best_xgb_model = xgb.train(
    updated_params_final,
    dtrain,
    num_boost_round=num_boost_round,
    evals=[(dtest, "Test")]
)

[0]	Test-rmse:0.19507
[1]	Test-rmse:0.19390
[2]	Test-rmse:0.19274
[3]	Test-rmse:0.19161
[4]	Test-rmse:0.19049
[5]	Test-rmse:0.18939
[6]	Test-rmse:0.18831
[7]	Test-rmse:0.18724
[8]	Test-rmse:0.18618
[9]	Test-rmse:0.18515
[10]	Test-rmse:0.18412
[11]	Test-rmse:0.18312
[12]	Test-rmse:0.18213
[13]	Test-rmse:0.18116
[14]	Test-rmse:0.18020
[15]	Test-rmse:0.17926
[16]	Test-rmse:0.17833
[17]	Test-rmse:0.17741
[18]	Test-rmse:0.17650
[19]	Test-rmse:0.17562
[20]	Test-rmse:0.17474
[21]	Test-rmse:0.17388
[22]	Test-rmse:0.17304
[23]	Test-rmse:0.17220
[24]	Test-rmse:0.17138
[25]	Test-rmse:0.17057
[26]	Test-rmse:0.16977
[27]	Test-rmse:0.16899
[28]	Test-rmse:0.16821
[29]	Test-rmse:0.16746
[30]	Test-rmse:0.16671
[31]	Test-rmse:0.16598
[32]	Test-rmse:0.16525
[33]	Test-rmse:0.16455
[34]	Test-rmse:0.16385
[35]	Test-rmse:0.16316
[36]	Test-rmse:0.16248
[37]	Test-rmse:0.16181
[38]	Test-rmse:0.16117
[39]	Test-rmse:0.16052
[40]	Test-rmse:0.15989
[41]	Test-rmse:0.15927
[42]	Test-rmse:0.15866
[43]	Test-rmse:0.1580

In [58]:
from sklearn.metrics import mean_squared_error
mean_squared_error(best_xgb_model.predict(dtest), ytest_prepared_sc)

0.022993928018939316

In [59]:
# so the test rmse when calculated gives sqrt(0.02299) is exactly equal to 0.15164

In [60]:
#lets save our model to reuse it when needed
best_xgb_model.save_model("pdm_xgb_model.model")

In [61]:
# Now load the model for the performing inference.
loaded_model = xgb.Booster()
loaded_model.load_model("pdm_xgb_model.model")



In [None]:
# Use the loaded model for getting the predictions on the test data

In [62]:
ypred = loaded_model.predict(dtest)

In [74]:
type(ypred)

numpy.ndarray

In [64]:
ypred.shape

(13096,)

In [65]:
# Lets use the model to predict the RUL for first four devices i.e the devces with IDs--(1,2,3,4)

In [66]:
type(X_test)

pandas.core.frame.DataFrame

In [99]:
# check lengths of the dataframes taking devices 1 to 4 into account.
def check_lengths(X_test):
    check_list = []
    for i in range(1,5):
        temp_df = X_test[X_test["id"]==i]
        check_list.append(len(temp_df))
    return check_list

In [100]:
lengths = check_lengths(X_test)

In [101]:
lengths

[31, 49, 126, 106]

In [77]:
temp_df = X_test[X_test["id"]==1]

In [78]:
temp_df.shape

(31, 26)

In [102]:
# Now compute the predictions for first four devices..i.e., the "id"=1, "id"=2, "id"=3, "id"=4.

In [95]:
def predict_RUL(X_test, scaler_y, loaded_model):
    predictions_list={}
    for i in range(1,5):
        temp_df = X_test[X_test["id"]==i]
        temp_df_transformed = pipeline.transform(temp_df)
        temp_dmatrix = xgb.DMatrix(temp_df_transformed)
        predictions = loaded_model.predict(temp_dmatrix)
        predictions_inverse_transformed = scaler_y.inverse_transform(predictions.reshape(-1,1))
        ypred = predictions_inverse_transformed.flatten()
        predictions_list["device"+str(i)] = ypred
        #device = pd.Series(predictions)
        #predictions_list.append(device+str(i))
    #final_df=pd.concat(predictions_list)
    return predictions_list
        

In [97]:
final_dict = predict_RUL(X_test, scaler_y, loaded_model)

In [98]:
final_dict

{'device1': array([181.83315, 170.938  , 174.86966, 171.61238, 180.09343, 186.4516 ,
        178.30916, 172.18861, 175.01997, 184.7781 , 178.9299 , 178.43533,
        171.8577 , 181.29874, 177.18326, 166.80095, 178.15952, 176.03862,
        177.10902, 168.99355, 180.21233, 169.42848, 179.27733, 177.1919 ,
        179.72202, 175.18242, 176.36717, 170.17094, 169.53859, 173.62996,
        176.63216], dtype=float32),
 'device2': array([167.37999, 165.81526, 164.18976, 176.64377, 167.05861, 167.04024,
        165.48117, 164.12125, 164.83557, 167.13557, 167.95265, 161.49588,
        166.58328, 167.04364, 169.22905, 173.75697, 164.76527, 165.97926,
        166.021  , 165.93076, 165.84743, 162.47063, 169.44354, 158.755  ,
        169.73666, 155.61922, 160.27193, 165.0947 , 173.57239, 162.76431,
        165.27808, 155.77913, 167.37364, 166.9828 , 166.82175, 164.07109,
        170.83836, 166.96704, 166.12852, 166.0121 , 167.17767, 166.58647,
        159.17606, 159.51465, 166.97522, 164.85507, 16