Imports and helper functions

In [4]:
#Code source: https://github.com/ageron/handson-ml2/blob/master/02_end_to_end_machine_learning_project.ipynb

# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

# Common imports
import numpy as np
import os

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# Where to save the figures
PROJECT_ROOT_DIR = "."
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "Report", "fig")
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

Regression MLP imports

In [3]:
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, max_error

Function for making models and generating plots, stolen from previous solution

In [18]:
def test_model(X_train, y_train, X_test, y_test, model):
 
    y_train_predicted = model.predict(X_train)
    y_test_predicted = model.predict(X_test)
    
    print('--------------------TRAIN SET-------------------------')
    print('r2_score = ','%.2f' % r2_score(y_train,y_train_predicted), '(1.0 means perfect fit)')
    print('max_error = ','%.2f' % max_error(y_train,y_train_predicted))
    print('root_mean_squared_error = ','%.2f' % mean_squared_error(y_train,y_train_predicted, squared=False))
    print('--------------------TEST SET-------------------------')
    print('r2_score = ','%.2f' % r2_score(y_test,y_test_predicted), '(1.0 means perfect fit)')
    print('max_error = ','%.2f' % max_error(y_test,y_test_predicted))
    print('root_mean_squared_error = ','%.2f' % mean_squared_error(y_test,y_test_predicted, squared=False))
    print(y_train.index[:100])
    
    return r2_score(y_train,y_train_predicted), r2_score(y_test,y_test_predicted)

Import the training set and the data set

In [1]:
import pandas as pd
useless_features = ['engine','RUL', 'cycle', 'setting 3', 'sensor 1', 'sensor 5', 'sensor 6', 'sensor 10','sensor 14', 'sensor 16', 'sensor 18', 'sensor 19']

fd_001_train  = pd.read_csv('train_FD001.csv')
y_train = fd_001_train['RUL']
X_train = fd_001_train.drop(columns=useless_features)

fd_001_test = pd.read_csv('test_FD001.csv')
y_test = fd_001_test['RUL']
X_test = fd_001_test.drop(columns=useless_features)
""" fan_prepared_test = num_pipeline.fit_transform(fd_001_test)
fan_labels_test = fd_001_test["RUL"].copy() """

' fan_prepared_test = num_pipeline.fit_transform(fd_001_test)\nfan_labels_test = fd_001_test["RUL"].copy() '

<h2>Multi-layer Perceptron</h2>

3 hidden layers, Adam optimizer

In [5]:
X_mlp_train_full = X_train.copy()
y_mlp_train_full = y_train.copy()
X_mlp_test = X_train.copy()
y_mlp_test = y_train.copy()

X_mlp_train, X_mlp_valid, y_mlp_train, y_mlp_valid = train_test_split(
    X_mlp_train_full, y_mlp_train_full, random_state=42)

mlp_reg = MLPRegressor(hidden_layer_sizes=[50, 50, 50], random_state=42)
pipeline = make_pipeline(StandardScaler(), mlp_reg)
pipeline.fit(X_mlp_train_full, y_mlp_train_full)
y_pred = pipeline.predict(X_mlp_test)
rmse = mean_squared_error(y_mlp_test, y_pred, squared=False)

In [6]:
rmse

40.629634992286704

3 hidden layers, lbfgs optimizer

In [7]:
X_mlp_train_full = X_train.copy()
y_mlp_train_full = y_train.copy()
X_mlp_test = X_train.copy()
y_mlp_test = y_train.copy()

X_mlp_train, X_mlp_valid, y_mlp_train, y_mlp_valid = train_test_split(
    X_mlp_train_full, y_mlp_train_full, random_state=42)

mlp_reg = MLPRegressor(hidden_layer_sizes=[50, 50, 50], random_state=42, solver='lbfgs')
pipeline = make_pipeline(StandardScaler(), mlp_reg)
pipeline.fit(X_mlp_train_full, y_mlp_train_full)
y_pred = pipeline.predict(X_mlp_test)
rmse = mean_squared_error(y_mlp_test, y_pred, squared=False)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


In [10]:
rmse

40.39162843769508

Adam optimizer with lower learning rate

In [13]:
X_mlp_train_full = X_train.copy()
y_mlp_train_full = y_train.copy()
X_mlp_test = X_train.copy()
y_mlp_test = y_train.copy()

mlp_reg = MLPRegressor(hidden_layer_sizes=[50, 50, 50], random_state=42, learning_rate_init=0.0005, max_iter=600)
pipeline = make_pipeline(StandardScaler(), mlp_reg)
pipeline.fit(X_mlp_train_full, y_mlp_train_full)
y_pred = pipeline.predict(X_mlp_test)
rmse = mean_squared_error(y_mlp_test, y_pred, squared=False)

In [19]:
test_model(X_mlp_train_full,y_mlp_train_full,X_mlp_test,y_mlp_test, pipeline)

--------------------TRAIN SET-------------------------
r2_score =  0.66 (1.0 means perfect fit)
max_error =  217.51
root_mean_squared_error =  40.05
--------------------TEST SET-------------------------
r2_score =  0.66 (1.0 means perfect fit)
max_error =  217.51
root_mean_squared_error =  40.05
RangeIndex(start=0, stop=100, step=1)


(0.6619856937750026, 0.6619856937750026)