Imports and helper functions

In [1]:
#Code source: https://github.com/ageron/handson-ml2/blob/master/02_end_to_end_machine_learning_project.ipynb

# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

# Common imports
import numpy as np
import os

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# Where to save the figures
PROJECT_ROOT_DIR = "."
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "Report", "fig")
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

Regression MLP imports

In [2]:
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, max_error

RNN imports

In [18]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import bidirectional

ImportError: cannot import name 'bidirectional' from 'tensorflow.keras' (c:\Program Files\Python310\lib\site-packages\keras\api\_v2\keras\__init__.py)

Make it deterministic

In [4]:
tf.keras.utils.set_random_seed(42)

Early stopping callback

In [35]:
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3, restore_best_weights=True)

Import the training set and the data set

In [5]:
import pandas as pd
scaler = StandardScaler()

fd_001_train  = pd.read_csv('train_FD001.csv')
fd_001_test = pd.read_csv('test_FD001.csv')

useless_features = ['setting 3', 'sensor 1', 'sensor 5', 'sensor 6', 'sensor 10','sensor 14', 'sensor 16', 'sensor 18', 'sensor 19']
ids_and_targets = ['engine','RUL']
useful_features = fd_001_train.columns.drop(useless_features + ids_and_targets)

X_train_full = fd_001_train.drop(columns=useless_features)
X_train_scaled_full = X_train_full.copy()
X_train_scaled_full[useful_features] = scaler.fit_transform(X_train_scaled_full[useful_features])
X_train_scaled = X_train_scaled_full[useful_features]
y_train_full = fd_001_train['RUL']

X_test_full = fd_001_test.drop(columns=useless_features)
X_test_scaled_full = X_test_full.copy()
X_test_scaled_full[useful_features] = scaler.fit_transform(X_test_scaled_full[useful_features])
X_test_scaled = X_test_scaled_full[useful_features]
y_test = fd_001_test['RUL']

In [6]:
X_train_scaled_full

Unnamed: 0,engine,cycle,setting 1,setting 2,sensor 2,sensor 3,sensor 4,sensor 7,sensor 8,sensor 9,sensor 11,sensor 12,sensor 13,sensor 15,sensor 17,sensor 20,sensor 21,RUL
0,1,-1.565170,-0.315980,-1.372953,-1.721725,-0.134255,-0.925936,1.121141,-0.516338,-0.862813,-0.266467,0.334262,-1.058890,-0.603816,-0.781710,1.348493,1.194427,191.0
1,1,-1.550652,0.872722,-1.031720,-1.061780,0.211528,-0.643726,0.431930,-0.798093,-0.958818,-0.191583,1.174899,-0.363646,-0.275852,-0.781710,1.016528,1.236922,190.0
2,1,-1.536134,-1.961874,1.015677,-0.661813,-0.413166,-0.525953,1.008155,-0.234584,-0.557139,-1.015303,1.364721,-0.919841,-0.649144,-2.073094,0.739891,0.503423,189.0
3,1,-1.521616,0.324090,-0.008022,-0.661813,-1.261314,-0.784831,1.222827,0.188048,-0.713826,-1.539489,1.961302,-0.224597,-1.971665,-0.781710,0.352598,0.777792,188.0
4,1,-1.507098,-0.864611,-0.690488,-0.621816,-1.251528,-0.301518,0.714393,-0.516338,-0.457059,-0.977861,1.052871,-0.780793,-0.339845,-0.136018,0.463253,1.059552,187.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20626,100,1.265868,-0.178822,-1.031720,1.618000,1.216258,2.188375,-2.189329,1.315066,0.012547,1.980044,-2.607969,2.278282,1.425294,2.446751,-1.805173,-2.921113,4.0
20627,100,1.280386,-0.727453,-1.714186,1.717992,2.279706,2.738351,-2.833345,1.878576,-0.006020,1.867718,-2.350355,1.722087,1.913240,1.155367,-2.856395,-1.203764,3.0
20628,100,1.294904,0.186933,-0.008022,1.478011,1.946971,2.138377,-2.742957,2.019453,0.029755,2.054927,-1.902919,2.000184,3.265092,3.092444,-2.081810,-3.292481,2.0
20629,100,1.309423,-0.498857,1.015677,1.098043,2.403666,1.955051,-3.036719,2.160330,0.383884,3.178182,-2.363913,1.861136,2.579834,1.155367,-2.911722,-2.085072,1.0


In [7]:
fd_001_test['engine']

0          1
1          1
2          1
3          1
4          1
        ... 
13091    100
13092    100
13093    100
13094    100
13095    100
Name: engine, Length: 13096, dtype: int64

In [8]:
features_per_engine_test = []
counter = 0
for i in range(len(fd_001_test)-1):
    if fd_001_test['engine'][i] == fd_001_test['engine'][i+1]:
        counter += 1
    else:
        features_per_engine_test.append(counter)
        counter = 0
len(features_per_engine_test)

        

99

In [9]:
min(features_per_engine_test)

30

<h2>Recurrent Neural Network</h2>

Let's try with tensorflow and keras

In [10]:
""" features_train = tf.constant(X_train_full)
features_test = tf.constant(X_test)
norm_layer = layers.Normalization()
norm_layer.adapt(features_train)
X_train_scaled = norm_layer(features_train)
X_test_scaled = norm_layer(features_test) """

' features_train = tf.constant(X_train_full)\nfeatures_test = tf.constant(X_test)\nnorm_layer = layers.Normalization()\nnorm_layer.adapt(features_train)\nX_train_scaled = norm_layer(features_train)\nX_test_scaled = norm_layer(features_test) '

In [11]:
tensor_features_train = tf.constant(X_train_scaled)
tensor_features_test = tf.constant(X_test_scaled)

In [12]:
""" X_train_scaled = tf.expand_dims(X_train_scaled, axis=-1) """

' X_train_scaled = tf.expand_dims(X_train_scaled, axis=-1) '

In [13]:
rnn_model = keras.models.Sequential([
    layers.LSTM(15, input_shape=(16,1), return_sequences=True, activation='relu'),
    layers.LSTM(50, return_sequences=True, activation='relu'),
    layers.LSTM(50, activation='relu'),
    layers.Dense(50),
    layers.Dense(1)
])
rnn_model.compile(loss = tf.keras.losses.MeanSquaredError(),
                      optimizer = tf.keras.optimizers.Adam())
rnn_model.fit(tensor_features_train, y_train_full, epochs=20, validation_data=(
    tensor_features_test, y_test),)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x2b0caa6fb50>

In [14]:
rnn_predictions = rnn_model.predict(tensor_features_train)
rmse = mean_squared_error(y_train_full, rnn_predictions, squared=False)
r2 = r2_score(y_train_full, rnn_predictions)
print(f'train: rmse={rmse}, r2={r2}')

train: rmse=36.80459952386067, r2=0.7144866097821738


In [15]:
rnn_predictions = rnn_model.predict(tensor_features_test)
rmse = mean_squared_error(y_test, rnn_predictions, squared=False)
r2 = r2_score(y_test, rnn_predictions)
print(f'train: rmse={rmse}, r2={r2}')

train: rmse=57.54936188880059, r2=0.0478552608569941


In [36]:
rnn_model2 = keras.models.Sequential([
    layers.LSTM(128, input_shape=(16, 1),
                return_sequences=True, activation='relu'),
    layers.Bidirectional(layers.LSTM(
        64, return_sequences=True, activation='relu')),
    #layers.Dropout(.2),
    layers.Bidirectional(layers.LSTM(
        64, return_sequences=True, activation='relu')),
    #layers.Dropout(.2),
    layers.Dense(1)
])
rnn_model2.compile(loss=tf.keras.losses.MeanSquaredError(),
                   optimizer=tf.keras.optimizers.Adam(learning_rate=5e-3))
rnn_model2.fit(tensor_features_train, y_train_full, epochs=20, validation_data=(
    tensor_features_test, y_test), callbacks=[callback])


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20


<keras.callbacks.History at 0x2b0fe3d72e0>

In [24]:
rnn_predictions = rnn_model2.predict(tensor_features_train)
rmse = mean_squared_error(y_train_full, rnn_predictions, squared=False)
r2 = r2_score(y_train_full, rnn_predictions)
print(f'train: rmse={rmse}, r2={r2}')

train: rmse=22.903523387075293, r2=0.8894326544914424


In [25]:
rnn_predictions = rnn_model2.predict(tensor_features_test)
rmse = mean_squared_error(y_test, rnn_predictions, squared=False)
r2 = r2_score(y_test, rnn_predictions)
print(f'test: rmse={rmse}, r2={r2}')

test: rmse=61.0820650844475, r2=-0.07262859358126961


<h2>Time-shifted features</h2>

dataset time-shifter borrowed from previous assignment solution

In [None]:
def make_shifted_feature_dataset(df, m, X_cols, y_cols):
    temp_df_shifted = pd.DataFrame(df[X_cols], columns=y_cols)
    shifted_columns_names = [s + ' (t)' for s in X_cols]
    temp_df_shifted[shifted_columns_names] = pd.DataFrame(df[X_cols])
    for i in range(1,m+1):
        shifted_columns_names = [s + f' (t-{i})' for s in X_cols]
        temp_df_shifted[shifted_columns_names] = df.groupby('engine')[X_cols].shift(periods=i)
    temp_df_shifted["RUL"] = df["RUL"]
    return temp_df_shifted

In [None]:
m = 20
time_shifted_train = make_shifted_feature_dataset(
    X_train_scaled_full, m, useful_features, ['RUL'])
time_shifted_test = make_shifted_feature_dataset(
    X_test_scaled_full, m, useful_features, ['RUL'])

X_time_shifted_train = time_shifted_train.dropna().drop(columns="RUL")
y_time_shifted_train = time_shifted_train.dropna()["RUL"]

""" tensor_time_shifted = tf.constant(X_time_shifted_train)
tensor_time_shifted = tf.expand_dims(tensor_time_shifted, axis=-1) """


  temp_df_shifted[shifted_columns_names] = df.groupby('engine')[X_cols].shift(periods=i)
  temp_df_shifted[shifted_columns_names] = df.groupby('engine')[X_cols].shift(periods=i)
  temp_df_shifted[shifted_columns_names] = df.groupby('engine')[X_cols].shift(periods=i)
  temp_df_shifted[shifted_columns_names] = df.groupby('engine')[X_cols].shift(periods=i)
  temp_df_shifted[shifted_columns_names] = df.groupby('engine')[X_cols].shift(periods=i)
  temp_df_shifted[shifted_columns_names] = df.groupby('engine')[X_cols].shift(periods=i)
  temp_df_shifted[shifted_columns_names] = df.groupby('engine')[X_cols].shift(periods=i)
  temp_df_shifted[shifted_columns_names] = df.groupby('engine')[X_cols].shift(periods=i)
  temp_df_shifted[shifted_columns_names] = df.groupby('engine')[X_cols].shift(periods=i)
  temp_df_shifted[shifted_columns_names] = df.groupby('engine')[X_cols].shift(periods=i)
  temp_df_shifted[shifted_columns_names] = df.groupby('engine')[X_cols].shift(periods=i)
  temp_df_shifted[shi

' tensor_time_shifted = tf.constant(X_time_shifted_train)\ntensor_time_shifted = tf.expand_dims(tensor_time_shifted, axis=-1) '

<h2>Multi-layer Perceptron with timeshift</h2>

In [None]:
mlp_model = keras.models.Sequential([
    layers.Dense(50),
    layers.Dense(50),
    layers.Dense(50),
    layers.Dense(1)
])
mlp_model.compile(loss = tf.keras.losses.MeanSquaredError(),
                      optimizer = tf.keras.optimizers.Adam())
mlp_model.fit(X_time_shifted_train, y_time_shifted_train, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x2b0ed611c90>

In [None]:
mlp_predictions = mlp_model.predict(X_time_shifted_train)
rmse = mean_squared_error(y_time_shifted_train, mlp_predictions, squared=False)
r2 = r2_score(y_time_shifted_train, mlp_predictions)
print(f'test: rmse={rmse}, r2={r2}')

test: rmse=36.47968625634497, r2=0.6755224748818442


In [None]:
rnn_model4 = keras.models.Sequential([
    layers.SimpleRNN(32, input_shape=(315,1), return_sequences=True, activation='relu'),
    layers.SimpleRNN(64, return_sequences=True, activation='relu'),
    layers.Dense(50),
    layers.Dense(1)
])
rnn_model4.compile(loss = tf.keras.losses.MeanSquaredError(),
                      optimizer = tf.keras.optimizers.Adam())
rnn_model4.fit(tensor_time_shifted, y_time_shifted_train, epochs=10)

NameError: name 'tensor_time_shifted' is not defined

 2.1 Q: The atmospheric pressure is 14.5 psi. If the absolute pressure is 2,865.6 psfa, what is the gauge pressure?
 Answer: 19.9 psi absolute => gauge pressure = 19.9-14.5 = 5.4 psi
 
 2.2 Q:What is the liquid density in a manometer, if the difference in the liquid levels in the manometer tubes is 1.35 m, and the differential pressure between the tubes is 7.85 kPa?
 A: p_diff = p_atm +rho*g*L => rho = (p_diff-p_atm)/(g*L) => rho = (7.85kPa-101.3kPa)/((-9.81m/s^2)*1.35m) = 7 056.3 kg / m3
 
2.3 Q: Briefly discuss the different physical principles that are used in temperature measuring instruments and give examples of instruments that use each of these principles.
A: Mechanical, electric, semiconductive and noncontact principles can be used in temperature measurements.
 
Mechanical measuring devices are called thermomoeters and measure temperature due to variation of density. The Bourbon gauge can be used to measure the pressure which can be translated to a temperature by the means of equations such as the ideal gas law.
 
Electric measurements are possible due to the Seebeck effect, which states that a voltage will be generated due to difference of temperature in different junctions of a circuit. But the measurement is also dependent on the temperature gradient of the wires.
 
2.4 Q: Explain the difference between systematic and random errors. What are the typical sources of these two types of error?
A: Random error is related to the precision of the measurement. The error can be described as a stochastic distribution and can't be fully removed. Filtering and/or redundant sensor fusion can be used to decrease random error. The source of this type of error is due to precision limitations of equipment.
Systematic error are due to other measurable disturbances that affect the system. It can be due to heat exchange within wires or change in environment, things affecting the Seebeck effect. This error can be compensated by reducing heat conductivity in wires or introduce additional temperature measurements.
 
2.5 Q: The recalibration frequency of a pressure transducer with a range of 0-10 bar is set so that it is recalibrated once the measurement error has grown to +1% of the full-scale reading. How can its inaccuracy be expressed in the form of a ±x% error in the output reading?
A: The measurement and its error would be expressed as: middle_value ± 1%

2.6 Q:Orifice meters are used to measure the flow rate of a fluid. In an experiment, the flow coefficient K of an orifice is found by collecting and weighing water flowing through the orifice during a certain interval while the orifice is under a constant head. K is calculated from the following formula...

A:

2.7 Q: Young's modulus...
A:

2.8 Q:Instruments used to measure the volume flow rate of fluids (volume flowing in unit time) can be divided into a number of different types. Explain what these different types are and discuss briefly how instruments in each class work, using sketches of instruments as appropriate.
A:
2.9 Q: What is a Coriolis meter? What is it used for and how does it work?
A: A Coriolis meter is used to measure mass flow and density in fluids, thus also volume flow is measured. It works by splitting a flow equally into two tubes. Three electromagnetic coils are installed along the tubes, the middle one uses a power-source to generate a pushing force between the tubes and induce oscillations between the tubes at the natural resonating frequency. A/C voltage is generated in the first and third coil, these A/C signals are in phase when no liquid flows through the tubes and phase-shifted when flow is present. The mass flow rate is directly proportional to the time delay between these signals and the frequency of the signals correlate to the liquid density. Mass flow divided by density equals the volumetric flow.

2.10 Q: Name four different kinds of differential pressure meters. Discuss briefly how each one works and explain the main advantages and disadvantages of each type.
A: