In [None]:
#| hide
%load_ext autoreload
%autoreload 2

In [None]:
#| default_exp utils

# Objective 1 : Predict RUL


Predictive Maintenance (PdM) is a great application of Survival Analysis since it consists in predicting when equipment failure will occur and therefore alerting the maintenance team to prevent that failure.

### ` Objectives`
> - To estimate Remaining Useful Time(RUL) of a machine/component

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import shap
from boruta import BorutaPy

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

from PredictiveMaintenance2 import Datasets,Visualize,FeatureEng,Model

  from .autonotebook import tqdm as notebook_tqdm


# Feature selection

### load dataset

In [None]:
# load pre processed dataset
machineData = pd.read_csv('Machine_Data_Preprocessed.csv')
machineData.head(2)

Unnamed: 0,date,device,failure,metric1,metric2,metric3,metric4,metric5,metric6,metric7,metric8,metric9,RUL,SurvivalTime
0,2015-01-01,0,0,141503600,0,0,1,19,494462,16,16,3,18,1
1,2015-01-01,1,0,55587136,0,0,0,7,199132,0,0,0,214,1


In [None]:
# sort values by device and date
machineData = machineData.sort_values(['device','date'],ascending= True).reset_index(drop=True)
machineData.head(2)

Unnamed: 0,date,device,failure,metric1,metric2,metric3,metric4,metric5,metric6,metric7,metric8,metric9,RUL,SurvivalTime
0,2015-01-01,0,0,141503600,0,0,1,19,494462,16,16,3,18,1
1,2015-01-02,0,0,161679800,0,0,1,19,495730,16,16,3,17,2


### split the data

In [None]:
x = machineData.drop(['date','failure','RUL','SurvivalTime'],axis=1)
y = machineData['RUL']

# split the data
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.2, random_state=42)


In [None]:
# convert train and test dataset to numpy array for applying boruta
x_train = x_train.to_numpy()
x_test = x_test.to_numpy()

In [None]:
# Instantiate the random forest regressor
rf = RandomForestRegressor(n_estimators=100, random_state=42)

### Boruta

In [None]:
# Instantiate the Boruta object
boruta = BorutaPy(estimator=rf, n_estimators='auto', verbose=2, alpha=0.05, max_iter=100, random_state=42)

# Fit the Boruta object to the training data
boruta.fit(x_train, y_train)

Iteration: 	1 / 100
Confirmed: 	0
Tentative: 	10
Rejected: 	0
Iteration: 	2 / 100
Confirmed: 	0
Tentative: 	10
Rejected: 	0
Iteration: 	3 / 100
Confirmed: 	0
Tentative: 	10
Rejected: 	0
Iteration: 	4 / 100
Confirmed: 	0
Tentative: 	10
Rejected: 	0
Iteration: 	5 / 100
Confirmed: 	0
Tentative: 	10
Rejected: 	0
Iteration: 	6 / 100
Confirmed: 	0
Tentative: 	10
Rejected: 	0
Iteration: 	7 / 100
Confirmed: 	0
Tentative: 	10
Rejected: 	0
Iteration: 	8 / 100
Confirmed: 	8
Tentative: 	2
Rejected: 	0
Iteration: 	9 / 100
Confirmed: 	8
Tentative: 	2
Rejected: 	0
Iteration: 	10 / 100
Confirmed: 	8
Tentative: 	2
Rejected: 	0
Iteration: 	11 / 100
Confirmed: 	8
Tentative: 	2
Rejected: 	0
Iteration: 	12 / 100
Confirmed: 	8
Tentative: 	2
Rejected: 	0
Iteration: 	13 / 100
Confirmed: 	8
Tentative: 	2
Rejected: 	0
Iteration: 	14 / 100
Confirmed: 	8
Tentative: 	2
Rejected: 	0
Iteration: 	15 / 100
Confirmed: 	8
Tentative: 	2
Rejected: 	0
Iteration: 	16 / 100
Confirmed: 	8
Tentative: 	2
Rejected: 	0
Iteration:

In [None]:
# Get the selected features
selected_features = boruta.support_

# Train a regression model using the selected features
x_train_sel = x_train[:, selected_features]
x_test_sel = x_test[:, selected_features]

# Modeling

In [None]:
# Create a Random Forest Regressor model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)

# Fit the model to the training data
rf_model.fit(x_train_sel, y_train)

# Predictions


In [None]:
# Make predictions on the testing data
y_pred = rf_model.predict(x_test_sel)

# Validations

In [None]:
# Calculate the Mean Squared Error (MSE) and R-squared value of the predictions
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [None]:
# Print the results
print(f"Mean Squared Error: {mse}")
print(f"R-squared value: {r2}")

Mean Squared Error: 83.94598232058489
R-squared value: 0.9874394628423803


In [None]:
# Perform cross-validation on the model
scores = cross_val_score(rf_model,x,y, cv=5)
print('Cross-Validation Scores:', scores)
print('Mean Cross-Validation Score:', np.mean(scores))

Cross-Validation Scores: [ 0.20027512 -0.39237914 -0.07696919  0.08281032  0.07112319]
Mean Cross-Validation Score: -0.02302793973490691


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()