### **Эксперимент 6.**

Построить отдельные модели для NAI = 1, NAI = 0. Результат смотрим суммарно. Использовать конфигурацию эксперимента 5 (или 4, если лучше)

Сохранить и записать использованные константы.

In [8]:
import pandas as pd

y_cols = ['egtm']
meta_cols = ['reportts', 'acnum', 'pos']
features = ['egt', 'tat', 'n1a', 'n2a', 'wai', 'nai', 'prv', 'alt', 'mn', 'ff', 'nf']

In [9]:
fleet = ['VQ-BGU', 'VQ-BDU']

bgu = pd.read_csv('./takeoff-merged-VQ-BGU-30s.csv', parse_dates=['reportts']).dropna(subset=['egtm'])
bdu = pd.read_csv('./takeoff-merged-VQ-BDU-30s.csv', parse_dates=['reportts']).dropna(subset=['egtm'])

In [10]:
from utils import *

data = build_dataset(
  [bgu, bdu], 
  y_cols, 
  meta_cols, 
  features,
  n_back=13,
)

In [11]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

def predict_ensemble(models: List, X: pd.DataFrame, y):
  pred = []
  for val in [True, False]:
    X_f, y_f = filter_split_by_field(X, y, 'nai', val)
    p = models[val].predict(X_f)
    pred.append(p)

  return pd.concat(pred)

def filter_split_by_field(X: pd.DataFrame, y: pd.DataFrame, field: str, value: bool):
  return X[X[field] == value], y[X[field] == value]

def train_ensemble_model(X, y, model = 'linreg'):
  assert len(X) == len(y)
  train_i = int(len(X) * 75 / 100)
  X_train, y_train = X[0:train_i], y[0:train_i]
  X_test, y_test = X[train_i:], y[train_i:]

  ensemble = dict()
  ensemble[True] = LinearRegression()
  ensemble[False] = LinearRegression()

  for val in [True, False]:
    X_train_f, y_train_f = filter_split_by_field(X_train, y_train, 'nai', val)
    ensemble[val].fit(X_train_f, y_train_f)
  
  model = LinearRegression()

  model.fit(X_train, y_train)

  predicted_train = model.predict(X_train)

  predicted_test = model.predict(X_test)
  mse = mean_squared_error(y_test, predicted_test, squared=False)
  mae = mean_absolute_error(y_train, predicted_train)
  r2 = r2_score(y_test, predicted_test)

  return mse, mae, r2, model, predicted_train, predicted_test, train_i, y_test

In [13]:
X = data.drop(columns=(meta_cols + y_cols))
y = data['egtm']

mse, mae, r2, model, predicted_train, predicted_test, train_i, y_test = train_ensemble_model(X, y)

print(f'Baseline: rmse = {mse:.3f} mae = {mae:.3f} r2 = {r2:.3f}')


Baseline: rmse = 1.770 mae = 1.305 r2 = 0.439
