Imports and helper functions

In [1]:
#Code source: https://github.com/ageron/handson-ml2/blob/master/02_end_to_end_machine_learning_project.ipynb

# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

# Common imports
import numpy as np
import os

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# Where to save the figures
PROJECT_ROOT_DIR = "."
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "Report", "fig")
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

Regression MLP imports

In [2]:
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, max_error

RNN imports

In [3]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

Make it deterministic

In [4]:
tf.keras.utils.set_random_seed(42)

Import the training set and the data set

In [7]:
import pandas as pd
scaler = StandardScaler()

fd_001_train  = pd.read_csv('train_FD001.csv')
fd_001_test = pd.read_csv('test_FD001.csv')

useless_features = ['cycle', 'setting 3', 'sensor 1', 'sensor 5', 'sensor 6', 'sensor 10','sensor 14', 'sensor 16', 'sensor 18', 'sensor 19']
ids_and_targets = ['engine','RUL']
useful_features = fd_001_train.columns.drop(useless_features + ids_and_targets)

X_train_full = fd_001_train.drop(columns=useless_features)
X_train_scaled_full = X_train_full.copy()
X_train_scaled_full[useful_features] = scaler.fit_transform(X_train_scaled_full[useful_features])
X_train_scaled = X_train_scaled_full[useful_features]
y_train_full = fd_001_train['RUL']

X_test_full = fd_001_test.drop(columns=useless_features)
X_test_scaled_full = X_test_full.copy()
X_test_scaled_full[useful_features] = scaler.fit_transform(X_test_scaled_full[useful_features])
X_test_scaled = X_test_scaled_full[useful_features]
y_test = fd_001_test['RUL']

In [8]:
X_train_scaled_full

Unnamed: 0,engine,setting 1,setting 2,sensor 2,sensor 3,sensor 4,sensor 7,sensor 8,sensor 9,sensor 11,sensor 12,sensor 13,sensor 15,sensor 17,sensor 20,sensor 21,RUL
0,1,-0.315980,-1.372953,-1.721725,-0.134255,-0.925936,1.121141,-0.516338,-0.862813,-0.266467,0.334262,-1.058890,-0.603816,-0.781710,1.348493,1.194427,191.0
1,1,0.872722,-1.031720,-1.061780,0.211528,-0.643726,0.431930,-0.798093,-0.958818,-0.191583,1.174899,-0.363646,-0.275852,-0.781710,1.016528,1.236922,190.0
2,1,-1.961874,1.015677,-0.661813,-0.413166,-0.525953,1.008155,-0.234584,-0.557139,-1.015303,1.364721,-0.919841,-0.649144,-2.073094,0.739891,0.503423,189.0
3,1,0.324090,-0.008022,-0.661813,-1.261314,-0.784831,1.222827,0.188048,-0.713826,-1.539489,1.961302,-0.224597,-1.971665,-0.781710,0.352598,0.777792,188.0
4,1,-0.864611,-0.690488,-0.621816,-1.251528,-0.301518,0.714393,-0.516338,-0.457059,-0.977861,1.052871,-0.780793,-0.339845,-0.136018,0.463253,1.059552,187.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20626,100,-0.178822,-1.031720,1.618000,1.216258,2.188375,-2.189329,1.315066,0.012547,1.980044,-2.607969,2.278282,1.425294,2.446751,-1.805173,-2.921113,4.0
20627,100,-0.727453,-1.714186,1.717992,2.279706,2.738351,-2.833345,1.878576,-0.006020,1.867718,-2.350355,1.722087,1.913240,1.155367,-2.856395,-1.203764,3.0
20628,100,0.186933,-0.008022,1.478011,1.946971,2.138377,-2.742957,2.019453,0.029755,2.054927,-1.902919,2.000184,3.265092,3.092444,-2.081810,-3.292481,2.0
20629,100,-0.498857,1.015677,1.098043,2.403666,1.955051,-3.036719,2.160330,0.383884,3.178182,-2.363913,1.861136,2.579834,1.155367,-2.911722,-2.085072,1.0


In [9]:
fd_001_test['engine']

0          1
1          1
2          1
3          1
4          1
        ... 
13091    100
13092    100
13093    100
13094    100
13095    100
Name: engine, Length: 13096, dtype: int64

In [10]:
features_per_engine_test = []
counter = 0
for i in range(len(fd_001_test)-1):
    if fd_001_test['engine'][i] == fd_001_test['engine'][i+1]:
        counter += 1
    else:
        features_per_engine_test.append(counter)
        counter = 0
len(features_per_engine_test)

        

99

In [11]:
min(features_per_engine_test)

30

<h2>Multi-layer Perceptron</h2>

<h2>Recurrent Neural Network</h2>

Let's try with tensorflow and keras

In [12]:
""" features_train = tf.constant(X_train_full)
features_test = tf.constant(X_test)
norm_layer = layers.Normalization()
norm_layer.adapt(features_train)
X_train_scaled = norm_layer(features_train)
X_test_scaled = norm_layer(features_test) """

' features_train = tf.constant(X_train_full)\nfeatures_test = tf.constant(X_test)\nnorm_layer = layers.Normalization()\nnorm_layer.adapt(features_train)\nX_train_scaled = norm_layer(features_train)\nX_test_scaled = norm_layer(features_test) '

In [13]:
tensor_features_train = tf.constant(X_train_scaled)
tensor_features_test = tf.constant(X_test_scaled)

In [14]:
""" X_train_scaled = tf.expand_dims(X_train_scaled, axis=-1) """

' X_train_scaled = tf.expand_dims(X_train_scaled, axis=-1) '

In [15]:
rnn_model = keras.models.Sequential([
    layers.LSTM(15, input_shape=(15,1), return_sequences=True, activation='relu'),
    layers.LSTM(50, return_sequences=True, activation='relu'),
    layers.LSTM(50, activation='relu'),
    layers.Dense(50),
    layers.Dense(1)
])
rnn_model.compile(loss = tf.keras.losses.MeanSquaredError(),
                      optimizer = tf.keras.optimizers.Adam())
rnn_model.fit(tensor_features_train, y_train_full, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x2925c42fb80>

In [16]:
rnn_predictions = rnn_model.predict(tensor_features_train)
rmse = mean_squared_error(y_train_full, rnn_predictions, squared=False)
r2 = r2_score(y_train_full, rnn_predictions)
print(f'train: rmse={rmse}, r2={r2}')

train: rmse=42.32846162795372, r2=0.6223519402285129


In [17]:
rnn_predictions = rnn_model.predict(tensor_features_test)
rmse = mean_squared_error(y_test, rnn_predictions, squared=False)
r2 = r2_score(y_test, rnn_predictions)
print(f'train: rmse={rmse}, r2={r2}')

train: rmse=68.57678533897497, r2=-0.3519984477253595


In [18]:
rnn_model2 = keras.models.Sequential([
    layers.LSTM(64, input_shape=(15,1), return_sequences=True, activation='relu'),
    layers.LSTM(128, return_sequences=True, activation='relu'),
    layers.LSTM(64, activation='relu'),
    layers.Dense(50),
    layers.Dense(1)
])
rnn_model2.compile(loss = tf.keras.losses.MeanSquaredError(),
                      optimizer = tf.keras.optimizers.Adam())
rnn_model2.fit(tensor_features_train, y_train_full, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x2925cc02170>

In [19]:
rnn_predictions = rnn_model2.predict(tensor_features_train)
rmse = mean_squared_error(y_train_full, rnn_predictions, squared=False)
r2 = r2_score(y_train_full, rnn_predictions)
print(f'train: rmse={rmse}, r2={r2}')

train: rmse=41.77779009537171, r2=0.6321140388878295


In [20]:
rnn_predictions = rnn_model2.predict(tensor_features_test)
rmse = mean_squared_error(y_test, rnn_predictions, squared=False)
r2 = r2_score(y_test, rnn_predictions)
print(f'test: rmse={rmse}, r2={r2}')

test: rmse=64.75666639059966, r2=-0.20556578646452994


<h2>Time-shifted features</h2>

dataset time-shifter borrowed from previous assignment solution

In [102]:
def make_shifted_feature_dataset(df, m, X_cols, y_cols):
    temp_df_shifted = pd.DataFrame(df[X_cols], columns=y_cols)
    shifted_columns_names = [s + ' (t)' for s in X_cols]
    temp_df_shifted[shifted_columns_names] = pd.DataFrame(df[X_cols])
    for i in range(1,m+1):
        shifted_columns_names = [s + f' (t-{i})' for s in X_cols]
        temp_df_shifted[shifted_columns_names] = df.groupby('engine')[X_cols].shift(periods=i)
    temp_df_shifted["RUL"] = df["RUL"]
    return temp_df_shifted

In [103]:
m = 20
time_shifted_train = make_shifted_feature_dataset(
    X_train_scaled_full, m, useful_features, ['RUL'])
time_shifted_test = make_shifted_feature_dataset(
    X_test_scaled_full, m, useful_features, ['RUL'])

X_time_shifted_train = time_shifted_train.dropna().drop(columns="RUL")
y_time_shifted_train = time_shifted_train.dropna()["RUL"]

""" tensor_time_shifted = tf.constant(X_time_shifted_train)
tensor_time_shifted = tf.expand_dims(tensor_time_shifted, axis=-1) """


  temp_df_shifted[shifted_columns_names] = df.groupby('engine')[X_cols].shift(periods=i)
  temp_df_shifted[shifted_columns_names] = df.groupby('engine')[X_cols].shift(periods=i)
  temp_df_shifted[shifted_columns_names] = df.groupby('engine')[X_cols].shift(periods=i)
  temp_df_shifted[shifted_columns_names] = df.groupby('engine')[X_cols].shift(periods=i)
  temp_df_shifted[shifted_columns_names] = df.groupby('engine')[X_cols].shift(periods=i)
  temp_df_shifted[shifted_columns_names] = df.groupby('engine')[X_cols].shift(periods=i)
  temp_df_shifted[shifted_columns_names] = df.groupby('engine')[X_cols].shift(periods=i)
  temp_df_shifted[shifted_columns_names] = df.groupby('engine')[X_cols].shift(periods=i)
  temp_df_shifted[shifted_columns_names] = df.groupby('engine')[X_cols].shift(periods=i)
  temp_df_shifted[shifted_columns_names] = df.groupby('engine')[X_cols].shift(periods=i)
  temp_df_shifted[shifted_columns_names] = df.groupby('engine')[X_cols].shift(periods=i)
  temp_df_shifted[shi

In [120]:
mlp_model = keras.models.Sequential([
    layers.Dense(50),
    layers.Dense(50),
    layers.Dense(50),
    layers.Dense(1)
])
mlp_model.compile(loss = tf.keras.losses.MeanSquaredError(),
                      optimizer = tf.keras.optimizers.Adam())
mlp_model.fit(X_time_shifted_train, y_time_shifted_train, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x29278b5c280>

In [119]:
mlp_predictions = mlp_model.predict(X_time_shifted_train)
rmse = mean_squared_error(y_time_shifted_train, mlp_predictions, squared=False)
r2 = r2_score(y_time_shifted_train, mlp_predictions)
print(f'test: rmse={rmse}, r2={r2}')

test: rmse=38.04605176266772, r2=0.6470593991681878


In [116]:
rnn_model4 = keras.models.Sequential([
    layers.SimpleRNN(32, input_shape=(315,1), return_sequences=True, activation='relu'),
    layers.SimpleRNN(64, return_sequences=True, activation='relu'),
    layers.Dense(50),
    layers.Dense(1)
])
rnn_model4.compile(loss = tf.keras.losses.MeanSquaredError(),
                      optimizer = tf.keras.optimizers.Adam())
rnn_model4.fit(tensor_time_shifted, y_time_shifted_train, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x292039b3c40>

 Question: The atmospheric pressure is 14.5 psi. If the absolute pressure is 2,865.6 psfa, what is the gauge pressure?
 Answer: 19.9 psi abosolute => gauge pressure = 19.9-14.5 = 5.4 psi
 
 Q:What is the liquid density in a manometer, if the difference in the liquid levels in the manometer tubes is 1.35m, and the differential pressure between the tubes is 7.85 kPa?
 A: p_diff = p_atm +rho*g*L => rho = (p_diff-p_atm)/(g*L) => rho = (7.85kPa-101.3kPa)/((-9.81m/s^2)*1.35m) = 7 056.3 kg / m3???
 
 Q: Briefly discuss the different physical principles that are used in temperature measuring instruments and give examples of instruments that use each of these principles.
 A: 
 Mechanical, electric, semiconductive and noncontact principles can be used in temperature measurements.
 
Mechanical measuring devices are called thermomoeters and measure temperature due to variation of density. The Bourbon gauge can be used to measure the pressure which can be translated to a temperature by the means of equations such as the ideal gas law.
 
Electric measurements are possible due to the Seebeck effect, which states that a voltage will be generated due to difference of temperature in different junctions of a curcuit. But the measurement is also dependent on the temperature gradient of the wires.
 
Q: Explain the difference between systematic and random errors. What are the typical sources of these two types of error?
A: Random error is related to the precision of the measurement. The error can be described as a stochastic distribution and can't be fully removed. Filtering and/or redundant sensor fusion can be used to decrease random error. The source of this type of error is due to precision limitations of equipment.
Systematic error are due to other measureable disturbances that affect the system. It can be due to heat exchange within wires or change in environment, things affecting the seebeck effect. This error can be compensated by reducing heat conductivity in wires or introduce additional temperature measurements.
 
Q: The recalibration frequency of a pressure transducer with a range of 0-10 bar is set so that it is recalibrated once the measurement error has grown to +1% of the full-scale reading. How can its inaccuracy be expressed in the form of a ±x% error in the output reading?
A: The measurement and its error would be expressed as: middle_value ± 1%

Q:Orifice meters are used to measure the flow rate of a fluid. In an experiment, the flow coefficient K of an orifice is found by collecting and weighing water flowing through the orifice during a certain interval while the orifice is under a constant head. K is calculated from the following formula...

A:

