## Turbofan degradation Trane

In [1]:
from utils import load_data
df, metadata = load_data()

In [12]:
from trane import ProblemGenerator
entity_columns = ['Engine', 'Cycle']
problem_generator = ProblemGenerator(
    metadata=metadata,
    entity_columns=entity_columns,
)
problems = problem_generator.generate()

In [13]:
num_columns = df.shape[1]
print(f"generated {len(problems)} problems from {num_columns} columns")

generated 12996 problems from 29 columns


In [14]:
for problem in problems:
    print(problem)

For each <Cycle> predict if there exists a record
For each <Cycle> predict if there exists a record with <(Bleed Enthalpy)> greater than <None>
For each <Cycle> predict if there exists a record with <(Bleed Enthalpy)> less than <None>
For each <Cycle> predict if there exists a record with <(Burner Fuel-Air Ratio)> greater than <None>
For each <Cycle> predict if there exists a record with <(Burner Fuel-Air Ratio)> less than <None>
For each <Cycle> predict if there exists a record with <(Bypass Ratio) > greater than <None>
For each <Cycle> predict if there exists a record with <(Bypass Ratio) > less than <None>
For each <Cycle> predict if there exists a record with <(Bypass-Duct Pressure) (psia)> greater than <None>
For each <Cycle> predict if there exists a record with <(Bypass-Duct Pressure) (psia)> less than <None>
For each <Cycle> predict if there exists a record with <(Corrected Core Speed) (rpm)> greater than <None>
For each <Cycle> predict if there exists a record with <(Corrected

In [5]:
import featuretools as ft
es = ft.EntitySet('observations')

es.add_dataframe(
    dataframe=df.reset_index(),
    dataframe_name='records',
    index='index',
    time_index='date',
)

es.normalize_dataframe(
    base_dataframe_name='records',
    new_dataframe_name='engines',
    index='unit_number',
)

es.normalize_dataframe(
    base_dataframe_name='records',
    new_dataframe_name='cycles',
    index='time_cycles',
)

fm, fd = ft.dfs(
    entityset=es,
    target_dataframe_name='engines',
    cutoff_time=ex,
    agg_primitives=['sum'],
    trans_primitives=[],
    cutoff_time_in_index=True,
    include_cutoff_time=False,
    verbose=False,
)

fm.head()

es.plot()

ModuleNotFoundError: No module named 'featuretools'

## ML

In [None]:
fm.reset_index(drop=True, inplace=True)
y = fm.ww.pop('_execute_operations_on_df').astype('category').cat.codes

In [None]:
from sklearn.datasets import load_wine
from sklearn.linear_model import SGDRegressor
from sklearn.metrics import mean_squared_error, make_scorer
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
import lightgbm as lgb
import xgboost as xgb
import warnings

models = {
    'LGB': lgb.LGBMRegressor,
    'XGB': xgb.XGBRegressor,
    'DTC': DecisionTreeRegressor,
    'SGDC': SGDRegressor,
    'RF': RandomForestRegressor,
}

def scoring_function(model_name, hyperparameter_values):
    model_class = models[model_name]
    model_instance = model_class(**hyperparameter_values)
    skf = KFold(n_splits=5)
    scores = cross_val_score(
        estimator=model_instance,
        X=fm,
        y=y,
        scoring=make_scorer(mean_squared_error),
        cv=skf,
        n_jobs=-1
    )
    return scores.mean()

from btb.tuning import Tunable
from btb.tuning import hyperparams as hp

tunables = {
    'LGB': Tunable({
        'num_leaves': hp.IntHyperParam(min=2, max=100),
        'max_depth': hp.IntHyperParam(min=3, max=200),
        'learning_rate': hp.FloatHyperParam(min=0.01, max=1),
        'n_estimators': hp.IntHyperParam(min=10, max=1000),
    }),
    'XGB': Tunable({
        'max_depth': hp.IntHyperParam(min=3, max=200),
        'learning_rate': hp.FloatHyperParam(min=0.01, max=1),
        'n_estimators': hp.IntHyperParam(min=10, max=1000),
    }),
    'DTC': Tunable({
        'max_depth': hp.IntHyperParam(min=3, max=200),
        'min_samples_split': hp.FloatHyperParam(min=0.01, max=1)
    }),
    'SGDC': Tunable({
        'max_iter': hp.IntHyperParam(min=1, max=5000, default=1000),
        'tol': hp.FloatHyperParam(min=1e-3, max=1, default=1e-3),
    }),
    'RF': Tunable({
        'n_estimators': hp.IntHyperParam(min=10, max=1000),
        'max_depth': hp.IntHyperParam(min=3, max=200),
        'min_samples_split': hp.FloatHyperParam(min=0.01, max=1),
    }),
}

from btb import BTBSession

session = BTBSession(
    tunables=tunables,
    scorer=scoring_function,
    maximize=False,
    verbose=True
)

best_proposal = session.run(30)

In [None]:
best_proposal