Цель: определить параметры формулы, используемой в EngineWise для коррекции

In [1]:
import pandas as pd
from sklearn.metrics import mean_absolute_error

data = pd.read_csv('../takeoff-merged-VQ-BDU-30s.csv', parse_dates=['reportts'])

Columns: N1 (unbiased), N1C2 (corrected)

In [2]:
X = data[['n1', 'n1c2']]

MAE Original:

In [3]:
mean_absolute_error(X['n1'], X['n1c2'])

1.6533633776091081

#### Theta factor correction

In [4]:
theta = (data['tat'] + 273.15) / 288.15

alpha = 0.5

n1k = X['n1'] / (theta ** alpha)

mean_absolute_error(n1k, X['n1c2'])

0.38439573866562365

Now let's try to adjust alpha more closely:

In [11]:
a = 0.50
while a <= 0.55:
  n1k = X['n1'] / (theta ** a)
  print(f'a={a}\tMAE={mean_absolute_error(n1k, X['n1c2'])}')
  a += 0.005

a=0.5	MAE=0.38439573866562365
a=0.505	MAE=0.3812085578878123
a=0.51	MAE=0.37846716565907146
a=0.515	MAE=0.37628644749472545
a=0.52	MAE=0.374788074360602
a=0.525	MAE=0.37423580206110363
a=0.53	MAE=0.3746169997233491
a=0.535	MAE=0.37573502942520176
a=0.54	MAE=0.37755979046528776
a=0.545	MAE=0.3801716987762979
a=0.55	MAE=0.38340346397394437


In [17]:
a = 0.50
min_err = 100
min_a = a
while a <= 0.55:
  n1k = X['n1'] / (theta ** a)
  err = mean_absolute_error(n1k, X['n1c2'])
  if err < min_err:
    min_a = a
    min_err = err
  a += 0.00001

min_a, min_err

(0.5255199999998839, 0.3742303309384519)

Minimum error a is 0.5255

In [18]:
a = 0.5255
y_err = X['n1c2'] - (X['n1'] / (theta ** a))

In [33]:
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split

def train_model(X, y):
  assert len(X) == len(y)
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

  model = LinearRegression(n_jobs=-1)
  model.fit(X_train, y_train)

  pred = model.predict(X_test)
  mse = mean_squared_error(y_test, pred, squared=False)
  mae = mean_absolute_error(y_test, pred)

  return mse, mae, model

In [34]:
X2 = data.drop(columns=[
    'reportts', 'acnum', 'pos', 'dep', 'arr', 
    'egtm', 'fltdes', 'reportts',
    'dmusw', 'exswpn', 'reason', 'file_path',
    'n1c2', 'n1', 'tat'
]).fillna(-100)

X2 = X2.loc[:, ~X2.columns.str.contains('stw')]


mse, mae, model = train_model(X2, y_err)

mse, mae

(0.06006107740897699, 0.03893766798807289)

In [42]:
pd.DataFrame([model.coef_], columns=X2.columns).sort_values(by=[0], axis=1)

Unnamed: 0,focb,mne_peak,mn_rtd,p5,n1com_peak,n1_peak,rfan,n2c5,n1com,n1p,...,nf_peak,n1msa,n1c2_peak,n2a_peak,fvl,mne,mn,dph,mn_peak,vsva
0,-1.928045,-1.805964,-1.78675,-0.463654,-0.300552,-0.232537,-0.198727,-0.154807,-0.145669,-0.139861,...,0.180517,0.20852,0.224568,0.242002,0.375731,0.41638,0.633016,0.651856,1.393181,6.594877
