In [1]:
import sys
from pathlib import Path
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.compose import TransformedTargetRegressor
from lightgbm import LGBMRegressor
from ga_selects_features import GASelectsFeatures

In [2]:
path = Path(sys.path[0]).joinpath("1_data").joinpath("FD001.csv")
df_data = pd.read_csv(str(path))

In [3]:
input_features = ['time', 'setting_1', 'setting_2', 'setting_3',
   'sensor_1', 'sensor_2', 'sensor_3', 'sensor_4', 'sensor_5', 'sensor_6',
   'sensor_7', 'sensor_8', 'sensor_9', 'sensor_10', 'sensor_11',
   'sensor_12', 'sensor_13', 'sensor_14', 'sensor_15', 'sensor_16',
   'sensor_17', 'sensor_18', 'sensor_19', 'sensor_20', 'sensor_21']
target = 'RUL'

X = df_data[input_features]
y = df_data[target]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

model = LGBMRegressor(max_depth=4, n_estimators=12, boosting_type="gbdt", verbose=0)
pipeline = Pipeline([('std', StandardScaler()), ('regressor', model)])
model = TransformedTargetRegressor(regressor=pipeline,
                                   transformer=StandardScaler())


In [4]:
ga_selects_features = GASelectsFeatures(input_model=input_features)
best_features = ga_selects_features.transform(model=model,
                                              X_train=X_train,
                                              X_val=X_val,
                                              y_train=y_train,
                                              y_val=y_val)
print(best_features)

gen	nevals	Mean         	Max          	Min          
0  	200   	[45.35700041]	[51.37499595]	[41.14260425]
1  	175   	[43.59095936]	[53.08899324]	[41.1746566] 
2  	183   	[43.42218589]	[55.63494737]	[41.16571089]
3  	182   	[43.0740593] 	[52.03889157]	[41.13065842]
4  	175   	[42.87290522]	[51.06619352]	[41.13065842]
5  	184   	[43.18411623]	[50.55917122]	[41.13804856]
6  	184   	[42.91904336]	[50.48567419]	[41.13453379]
7  	174   	[43.25979239]	[52.03562544]	[41.13860617]
8  	184   	[43.06492984]	[55.69526589]	[41.1373537] 
9  	181   	[43.07324471]	[50.72341992]	[41.14040344]
10 	182   	[43.07004413]	[50.48639806]	[41.13710442]
11 	175   	[42.92890218]	[50.31661002]	[41.13055006]
12 	178   	[43.06489007]	[51.44101142]	[41.12498119]
13 	174   	[42.88591286]	[51.28925855]	[41.12498119]
14 	181   	[43.1698169] 	[50.91552555]	[41.12498119]
15 	178   	[43.18524096]	[51.39671166]	[41.12498119]
16 	181   	[43.02360561]	[51.85590854]	[41.12498119]
17 	178   	[43.41529865]	[50.31704404]	[41.138

In [5]:
best_features

['time',
 'setting_1',
 'setting_2',
 'setting_3',
 'sensor_3',
 'sensor_5',
 'sensor_7',
 'sensor_9',
 'sensor_11',
 'sensor_12',
 'sensor_13',
 'sensor_15',
 'sensor_17',
 'sensor_18',
 'sensor_21']

In [6]:
model = LGBMRegressor(max_depth=4, n_estimators=12, boosting_type="gbdt", verbose=0)
pipeline = Pipeline([('std', StandardScaler()), ('regressor', model)])
model = TransformedTargetRegressor(regressor=pipeline,
                                   transformer=StandardScaler())

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

ga_selects_features.root_mean_squared_error(y_test, y_pred)

39.6568687151808

In [7]:
model = LGBMRegressor(max_depth=4, n_estimators=12, boosting_type="gbdt", verbose=0)
pipeline = Pipeline([('std', StandardScaler()), ('regressor', model)])
model = TransformedTargetRegressor(regressor=pipeline,
                                   transformer=StandardScaler())

model.fit(X_train[best_features], y_train)

y_pred = model.predict(X_test[best_features])

ga_selects_features.root_mean_squared_error(y_test, y_pred)

39.697124798843554