In [1]:
# standard libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Train1 and Test1 dataset

In [2]:
train1 = pd.read_csv('CleanedData/train1_clean.csv')
test1 = pd.read_csv('CleanedData/test1_clean.csv')

In [3]:
train1

Unnamed: 0,engine_number,time_cycles,sensor_measurement2,sensor_measurement3,sensor_measurement4,sensor_measurement7,sensor_measurement8,sensor_measurement9,sensor_measurement11,sensor_measurement12,sensor_measurement13,sensor_measurement15,sensor_measurement17,sensor_measurement20,sensor_measurement21,RUL
0,1,1,641.82,1589.70,1400.60,554.36,2388.06,9046.19,47.47,521.66,2388.02,8.4195,392,39.06,23.4190,191
1,1,2,642.15,1591.82,1403.14,553.75,2388.04,9044.07,47.49,522.28,2388.07,8.4318,392,39.00,23.4236,190
2,1,3,642.35,1587.99,1404.20,554.26,2388.08,9052.94,47.27,522.42,2388.03,8.4178,390,38.95,23.3442,189
3,1,4,642.35,1582.79,1401.87,554.45,2388.11,9049.48,47.13,522.86,2388.08,8.3682,392,38.88,23.3739,188
4,1,5,642.37,1582.85,1406.22,554.00,2388.06,9055.15,47.28,522.19,2388.04,8.4294,393,38.90,23.4044,187
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20626,100,196,643.49,1597.98,1428.63,551.43,2388.19,9065.52,48.07,519.49,2388.26,8.4956,397,38.49,22.9735,4
20627,100,197,643.54,1604.50,1433.58,550.86,2388.23,9065.11,48.04,519.68,2388.22,8.5139,395,38.30,23.1594,3
20628,100,198,643.42,1602.46,1428.18,550.94,2388.24,9065.90,48.09,520.01,2388.24,8.5646,398,38.44,22.9333,2
20629,100,199,643.23,1605.26,1426.53,550.68,2388.25,9073.72,48.39,519.67,2388.23,8.5389,395,38.29,23.0640,1


In [4]:
test1

Unnamed: 0,engine_number,time_cycles,sensor_measurement2,sensor_measurement3,sensor_measurement4,sensor_measurement7,sensor_measurement8,sensor_measurement9,sensor_measurement11,sensor_measurement12,sensor_measurement13,sensor_measurement15,sensor_measurement17,sensor_measurement20,sensor_measurement21,RUL
0,1,1,643.02,1585.29,1398.21,553.90,2388.04,9050.17,47.20,521.72,2388.03,8.4052,392,38.86,23.3735,142
1,1,2,641.71,1588.45,1395.42,554.85,2388.01,9054.42,47.50,522.16,2388.06,8.3803,393,39.02,23.3916,141
2,1,3,642.46,1586.94,1401.34,554.11,2388.05,9056.96,47.50,521.97,2388.03,8.4441,393,39.08,23.4166,140
3,1,4,642.44,1584.12,1406.42,554.07,2388.03,9045.29,47.28,521.38,2388.05,8.3917,391,39.00,23.3737,139
4,1,5,642.51,1587.19,1401.92,554.16,2388.01,9044.55,47.31,522.15,2388.03,8.4031,390,38.99,23.4130,138
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13091,100,194,643.24,1599.45,1415.79,553.41,2388.02,9142.37,47.69,520.69,2388.00,8.4715,394,38.65,23.1974,24
13092,100,195,643.22,1595.69,1422.05,553.22,2388.05,9140.68,47.60,521.05,2388.09,8.4512,395,38.57,23.2771,23
13093,100,196,643.44,1593.15,1406.82,553.04,2388.11,9146.81,47.57,521.18,2388.04,8.4569,395,38.62,23.2051,22
13094,100,197,643.26,1594.99,1419.36,553.37,2388.07,9148.85,47.61,521.33,2388.08,8.4711,395,38.66,23.2699,21


In [5]:
# Divide into train and Test
# train_data
X_train = train1.drop('RUL',axis=1)
y_train = train1['RUL']

# test_data
X_test = test1.drop('RUL', axis =1)
y_test = test1['RUL']

# ML algo

In [6]:
from sklearn.linear_model import Lasso, LinearRegression
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [7]:
models = {
    'Lasso':Lasso(),
    'LinearRegression': LinearRegression(),
    'SupportVectorMachine': SVR(),
     'KNeighborsRegressor' : KNeighborsRegressor(),
    'RandomForestRegressor': RandomForestRegressor(),
    'GradientBoostingRegressor': GradientBoostingRegressor(),
    'XGBRegressor': XGBRegressor()
}

In [8]:
# Scaling
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scale = scaler.fit_transform(X_train)
X_test_scale = scaler.transform(X_test)

In [9]:
results = []

# Store feature importances
feature_importance = {}

for model_name, model in models.items():
    # Train the model
    model.fit(X_train_scale, y_train)

    # Make Predictions
    y_pred = model.predict(X_test_scale)

    mae = mean_absolute_error(y_test, y_pred)
    rmse = mean_squared_error(y_test, y_pred, squared=False)
    r2_square = r2_score(y_test, y_pred)

    # Append results to the list
    results.append({
        'Model': model_name,
        'MAE': mae,
        'RMSE': rmse,
        'R2': r2_square,
    })

    # Store feature importances if the model supports it
    if hasattr(model, 'feature_importances_'):
        feature_importance[model_name] = model.feature_importances_

# Create a DataFrame from the results list
results_df = pd.DataFrame(results)

# Display the results DataFrame
print("Results:")
results_df

Results:


Unnamed: 0,Model,MAE,RMSE,R2
0,Lasso,33.775373,43.271669,0.461694
1,LinearRegression,33.922029,43.253501,0.462146
2,SupportVectorMachine,31.393672,41.888889,0.495548
3,KNeighborsRegressor,35.359056,45.976951,0.392282
4,RandomForestRegressor,40.01738,52.848084,0.197065
5,GradientBoostingRegressor,35.723366,47.193165,0.359705
6,XGBRegressor,45.498272,58.752384,0.007632


In [10]:
# SVM give better accuracy

#### Check the minimum cycle of engine

In [11]:
train1.groupby('engine_number')['RUL'].nunique().min()

128

#### Let's put a RUL threshold 128

In [12]:
train1['RUL'][train1['RUL'] > 128] = 128
test1['RUL'][test1['RUL'] > 128] = 128

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train1['RUL'][train1['RUL'] > 128] = 128
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test1['RUL'][test1['RUL'] > 128] = 128


In [13]:
train1

Unnamed: 0,engine_number,time_cycles,sensor_measurement2,sensor_measurement3,sensor_measurement4,sensor_measurement7,sensor_measurement8,sensor_measurement9,sensor_measurement11,sensor_measurement12,sensor_measurement13,sensor_measurement15,sensor_measurement17,sensor_measurement20,sensor_measurement21,RUL
0,1,1,641.82,1589.70,1400.60,554.36,2388.06,9046.19,47.47,521.66,2388.02,8.4195,392,39.06,23.4190,128
1,1,2,642.15,1591.82,1403.14,553.75,2388.04,9044.07,47.49,522.28,2388.07,8.4318,392,39.00,23.4236,128
2,1,3,642.35,1587.99,1404.20,554.26,2388.08,9052.94,47.27,522.42,2388.03,8.4178,390,38.95,23.3442,128
3,1,4,642.35,1582.79,1401.87,554.45,2388.11,9049.48,47.13,522.86,2388.08,8.3682,392,38.88,23.3739,128
4,1,5,642.37,1582.85,1406.22,554.00,2388.06,9055.15,47.28,522.19,2388.04,8.4294,393,38.90,23.4044,128
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20626,100,196,643.49,1597.98,1428.63,551.43,2388.19,9065.52,48.07,519.49,2388.26,8.4956,397,38.49,22.9735,4
20627,100,197,643.54,1604.50,1433.58,550.86,2388.23,9065.11,48.04,519.68,2388.22,8.5139,395,38.30,23.1594,3
20628,100,198,643.42,1602.46,1428.18,550.94,2388.24,9065.90,48.09,520.01,2388.24,8.5646,398,38.44,22.9333,2
20629,100,199,643.23,1605.26,1426.53,550.68,2388.25,9073.72,48.39,519.67,2388.23,8.5389,395,38.29,23.0640,1


#### Repeate Same Process

In [14]:
# Divide into train and Test
# train_data
X_train = train1.drop('RUL',axis=1)
y_train = train1['RUL']

# test_data
X_test = test1.drop('RUL', axis =1)
y_test = test1['RUL']


# Scaling
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scale = scaler.fit_transform(X_train)
X_test_scale = scaler.transform(X_test)


results = []

for model_name, model in models.items():
    # Train the model
    model.fit(X_train_scale, y_train)

    # Make Predictions
    y_pred = model.predict(X_test_scale)

    mae = mean_absolute_error(y_test, y_pred)
    rmse = mean_squared_error(y_test, y_pred, squared=False)
    r2_square = r2_score(y_test, y_pred)
    
    # Append results to the list
    results.append({
        'Model': model_name,
        'MAE': mae,
        'RMSE': rmse,
        'R2': r2_square,
    })

# Create a DataFrame from the results list
results_df = pd.DataFrame(results)

# Display the results DataFrame
results_df

Unnamed: 0,Model,MAE,RMSE,R2
0,Lasso,15.653997,19.777548,0.523373
1,LinearRegression,15.687303,19.780395,0.523236
2,SupportVectorMachine,10.966649,16.522498,0.667352
3,KNeighborsRegressor,11.991265,18.151966,0.598504
4,RandomForestRegressor,12.169171,18.830212,0.56794
5,GradientBoostingRegressor,12.460871,17.909501,0.609158
6,XGBRegressor,15.752929,24.237538,0.284168


#### After putting threshold SVM increase accuracy around 17 percent

# PCA

In [15]:
# Scaling
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scale = scaler.fit_transform(X_train)
X_test_scale = scaler.transform(X_test)

from sklearn.decomposition import PCA  
pca = PCA(n_components=0.95) # You can change this value as needed
X_train_pca = pca.fit_transform(X_train_scale)
X_test_pca = pca.transform(X_test_scale)

results = []

for model_name, model in models.items():
    # Train the model
    model.fit(X_train_pca, y_train)

    # Make Predictions
    y_pred = model.predict(X_test_pca)

    mae = mean_absolute_error(y_test, y_pred)
    rmse = mean_squared_error(y_test, y_pred, squared=False)
    r2_square = r2_score(y_test, y_pred)
    
    # Append results to the list
    results.append({
        'Model': model_name,
        'MAE': mae,
        'RMSE': rmse,
        'R2': r2_square,
    })

# Create a DataFrame from the results list
results_df = pd.DataFrame(results)

# Display the results DataFrame
results_df

Unnamed: 0,Model,MAE,RMSE,R2
0,Lasso,15.983557,20.072494,0.509051
1,LinearRegression,15.702845,19.807,0.521952
2,SupportVectorMachine,10.820022,16.523358,0.667317
3,KNeighborsRegressor,11.879459,18.149454,0.598615
4,RandomForestRegressor,11.905654,17.557384,0.624376
5,GradientBoostingRegressor,11.966794,16.853995,0.65387
6,XGBRegressor,13.126884,18.457053,0.584894


# SupportVectorMachine Gives best R2_score

In [16]:
train1['RUL'].describe().T

count    20631.000000
mean        87.982551
std         42.741710
min          0.000000
25%         51.000000
50%        103.000000
75%        128.000000
max        128.000000
Name: RUL, dtype: float64

In [9]:
train1['RUL'][train1['RUL'] > 103] = 103
test1['RUL'][test1['RUL'] > 103] = 103

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train1['RUL'][train1['RUL'] > 103] = 103
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test1['RUL'][test1['RUL'] > 103] = 103


In [10]:
X_train = train1.drop('RUL',axis=1)
y_train = train1['RUL']

# test_data
X_test = test1.drop('RUL', axis =1)
y_test = test1['RUL']

# Scaling
from sklearn.preprocessing import RobustScaler
scaler = RobustScaler()
X_train_scale = scaler.fit_transform(X_train)
X_test_scale = scaler.transform(X_test)

from sklearn.decomposition import PCA  
pca = PCA(n_components=0.95) # You can change this value as needed
X_train_pca = pca.fit_transform(X_train_scale)
X_test_pca = pca.transform(X_test_scale)

results = []

for model_name, model in models.items():
    # Train the model
    model.fit(X_train_pca, y_train)

    # Make Predictions
    y_pred = model.predict(X_test_pca)

    mae = mean_absolute_error(y_test, y_pred)
    rmse = mean_squared_error(y_test, y_pred, squared=False)
    r2_square = r2_score(y_test, y_pred)
    
    # Append results to the list
    results.append({
        'Model': model_name,
        'MAE': mae,
        'RMSE': rmse,
        'R2': r2_square,
    })

# Create a DataFrame from the results list
results_df = pd.DataFrame(results)

# Display the results DataFrame
results_df

Unnamed: 0,Model,MAE,RMSE,R2
0,Lasso,12.183683,15.168114,0.417514
1,LinearRegression,11.79556,14.78487,0.446577
2,SupportVectorMachine,5.558247,10.882363,0.700174
3,KNeighborsRegressor,6.525596,11.855887,0.644131
4,RandomForestRegressor,6.741745,11.632604,0.657409
5,GradientBoostingRegressor,6.98865,11.137308,0.685962
6,XGBRegressor,7.627333,12.14631,0.626482


In [18]:
# Assuming you have already applied PCA and stored the result in X_train_pca

# Get the original column names of the features in X_train
original_feature_names = X_train.columns.tolist()

# Create a DataFrame with PCA components and original column names
X_train_pca_df = pd.DataFrame(data=X_train_pca, columns=original_feature_names[:X_train_pca.shape[1]])

# Add the original feature names as column names
X_train_pca_df.columns = original_feature_names[:X_train_pca.shape[1]]
X_train_pca_df.columns

In [19]:
X_train_pca_df.columns

Index(['engine_number', 'time_cycles', 'sensor_measurement2',
       'sensor_measurement3', 'sensor_measurement4', 'sensor_measurement7',
       'sensor_measurement8', 'sensor_measurement9', 'sensor_measurement11',
       'sensor_measurement12', 'sensor_measurement13'],
      dtype='object')

### SVM give better accuracy now try hyperparamet only for svm

In [20]:
# Scaling
from sklearn.preprocessing import RobustScaler
scaler = RobustScaler()
X_train_scale = scaler.fit_transform(X_train)
X_test_scale = scaler.transform(X_test)

from sklearn.decomposition import PCA  
pca = PCA(n_components=0.95) # You can change this value as needed
X_train_pca = pca.fit_transform(X_train_scale)
X_test_pca = pca.transform(X_test_scale)



# Model
svr = SVR(C=3.0, epsilon=0.5, 
         kernel='rbf', 
         degree=3, 
         gamma='auto', 
         coef0=0.0, 
         shrinking=True, 
         tol=0.001, 
         cache_size=500, 
         verbose=False, 
         max_iter=-1)

svr.fit(X_train_pca, y_train)
y_pred = svr.predict(X_test_pca)

mae = mean_absolute_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)
r2_square = r2_score(y_test, y_pred)
    

print('MAE: ', mae)
print('RMSE: ', rmse)
print('R2: ', r2_square)

MAE:  5.501754536535247
RMSE:  10.823729878431688
R2:  0.7033965324458893


In [21]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.svm import SVR

# Define the SVR hyperparameter search space
param_dist = {
    'C': [3, 5],
    'epsilon': [0.1, 0.2],
    'kernel': ['rbf'],
    'degree': [3 , 5],
    'gamma': ['auto'],
    'coef0': [0.0, 0.1],
    'shrinking': [True],
    'tol': [0.001, 0.01],
    'cache_size': [200, 500]
}

# Create the SVR model
svr = SVR()

# Create the Randomized SearchCV object
random_search = RandomizedSearchCV(svr, param_distributions=param_dist, n_iter=100, cv=5, n_jobs=-1, verbose=2, random_state=42)

# Perform the Randomized Search
random_search.fit(X_train_pca, y_train)

# Print the best hyperparameters
print("Best Hyperparameters from Randomized Search:")
print(random_search.best_params_)

# Get the best SVR model
best_svr = random_search.best_estimator_
best_svr



Fitting 5 folds for each of 64 candidates, totalling 320 fits
Best Hyperparameters from Randomized Search:
{'tol': 0.001, 'shrinking': True, 'kernel': 'rbf', 'gamma': 'auto', 'epsilon': 0.2, 'degree': 3, 'coef0': 0.0, 'cache_size': 200, 'C': 5}


In [22]:
# Scaling
from sklearn.preprocessing import RobustScaler
scaler = RobustScaler()
X_train_scale = scaler.fit_transform(X_train)
X_test_scale = scaler.transform(X_test)

from sklearn.decomposition import PCA  
pca = PCA(n_components=0.95) # You can change this value as needed
X_train_pca = pca.fit_transform(X_train_scale)
X_test_pca = pca.transform(X_test_scale)



# Model
svr = SVR(C=5.0, epsilon=0.2, 
         kernel='rbf', 
         degree=3, 
         gamma='auto', 
         coef0=0.0, 
         shrinking=True, 
         tol=0.001, 
         cache_size=500, 
         verbose=False, 
         max_iter=-1)

svr.fit(X_train_pca, y_train)
y_pred = svr.predict(X_test_pca)

mae = mean_absolute_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)
r2_square = r2_score(y_test, y_pred)
    

print('MAE: ', mae)
print('RMSE: ', rmse)
print('R2: ', r2_square)

MAE:  5.398695464478152
RMSE:  10.817670392772273
R2:  0.7037285365505757


****************************************************************************************************************************

***************************************************************************************************************************