Hypotesis: egtm = egt redline - egt

Idea: egt redline may depend on engine hours linearly. I want to plot (egt - etgm) vs engine hours 

In [15]:
import pandas as pd
import matplotlib.pyplot as plt 

dataset = pd.read_csv('./small-sample-BGU.csv', parse_dates=['reportts']) \
  .sort_values('reportts')

In [16]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
import numpy as np

In [17]:
important_features = ['naiup', 't2', 'ehrs', 'ecyc', 'fdp', 'ps14']

In [18]:
Y = dataset[['egtm']]

X = dataset.drop(columns=[
    'reportts', 'acnum', 'pos', 'dep', 'arr', 
    'egtm', 'fltdes', 'reportts',
    'dmusw', 'exswpn', 'reason'
]).fillna(-100)

X = X.loc[:, ~X.columns.str.contains('stw')]

In [19]:
def train_model(X, y):
  y = Y['egtm']
  x = X[y.notna()]
  y = y.dropna()

  X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=40)

  scaler = StandardScaler()
  scaler.fit(X_train)

  X_train = scaler.transform(X_train)
  X_test = scaler.transform(X_test)
  

  model = LinearRegression(n_jobs=-1)
  model.fit(X_train, y_train)

  predicted = model.predict(X_test)
  preds = pd.DataFrame({'y': y_test, 'pred': predicted})
  mse = mean_squared_error(y_test, predicted, squared=False)
  mae = mean_absolute_error(y_test, predicted)
  
  return mse, mae, model, preds


In [20]:
mse, mae, result_model, pred = train_model(X, Y)
mse

360207500700391.8

In [21]:
mse, mae, result_model, pred = train_model(X[important_features], Y)
mse

4.3029192720739315

Let's do some feature generation

In [22]:
X_aug = X[important_features].copy()
for f in important_features:
  X_aug[f + '_2'] = X_aug[f] ** 2
  for k in important_features:
    if f != k:
      X_aug[f + '_m_' + k] = X_aug[f] * X_aug[k]

In [23]:
mse, mae, result_model, pred = train_model(X_aug, Y)
mse

3.6203369005000123

In [24]:
X_aug

Unnamed: 0,naiup,t2,ehrs,ecyc,fdp,ps14,naiup_2,naiup_m_t2,naiup_m_ehrs,naiup_m_ecyc,...,fdp_m_t2,fdp_m_ehrs,fdp_m_ecyc,fdp_m_ps14,ps14_2,ps14_m_naiup,ps14_m_t2,ps14_m_ehrs,ps14_m_ecyc,ps14_m_fdp
0,128.8,3.0,0,0,11.3,15.991,16589.44,386.40,0.0,0.0,...,33.90,0.0,0.0,180.6983,255.712081,2059.6408,47.9730,0.000,0.000,180.6983
522,127.6,3.1,0,0,10.4,15.895,16281.76,395.56,0.0,0.0,...,32.24,0.0,0.0,165.3080,252.651025,2028.2020,49.2745,0.000,0.000,165.3080
523,128.3,10.6,4,2,10.8,15.873,16460.89,1359.98,513.2,256.6,...,114.48,43.2,21.6,171.4284,251.952129,2036.5059,168.2538,63.492,31.746,171.4284
1,129.0,10.4,4,2,12.0,16.026,16641.00,1341.60,516.0,258.0,...,124.80,48.0,24.0,192.3120,256.832676,2067.3540,166.6704,64.104,32.052,192.3120
524,131.2,-8.6,6,3,10.6,16.380,17213.44,-1128.32,787.2,393.6,...,-91.16,63.6,31.8,173.6280,268.304400,2149.0560,-140.8680,98.280,49.140,173.6280
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
519,119.1,-13.5,957,205,12.5,16.848,14184.81,-1607.85,113978.7,24415.5,...,-168.75,11962.5,2562.5,210.6000,283.855104,2006.5968,-227.4480,16123.536,3453.840,210.6000
1042,109.8,27.8,964,206,12.6,16.001,12056.04,3052.44,105847.2,22618.8,...,350.28,12146.4,2595.6,201.6126,256.032001,1756.9098,444.8278,15424.964,3296.206,201.6126
520,112.7,27.8,964,206,13.1,16.053,12701.29,3133.06,108642.8,23216.2,...,364.18,12628.4,2698.6,210.2943,257.698809,1809.1731,446.2734,15475.092,3306.918,210.2943
521,118.3,-23.3,1355,279,12.8,16.822,13994.89,-2756.39,160296.5,33005.7,...,-298.24,17344.0,3571.2,215.3216,282.979684,1990.0426,-391.9526,22793.810,4693.338,215.3216


Equation discovery

In [25]:
!pip3 install -U pysr -q

In [26]:
from pysr import PySRRegressor

model = PySRRegressor(
    niterations=140,  # < Increase me for better results
    binary_operators=["+", "*"],
    unary_operators=[
        "cos",
        "exp",
        "sin",
        # "inv(x) = 1/x",
        # ^ Custom operator (julia syntax)
    ],
    # extra_sympy_mappings={"inv": lambda x: 1 / x},
    # ^ Define operator for SymPy as well
    loss="loss(prediction, target) = (prediction - target)^2",
    # ^ Custom loss function (julia syntax)
)

y = Y['egtm']
x = X[important_features][y.notna()]
y = y.dropna()

model.fit(x, y)

Compiling Julia backend...




FileNotFoundError: Julia is not installed in your PATH. Please install Julia and add it to your PATH.

Current PATH: /home/skeptlk/.conda/envs/recsys/bin:/usr/bin:/home/skeptlk/.nvm/versions/node/v19.5.0/bin:/home/skeptlk/.meteor:/home/skeptlk/.local/bin:/opt/apache-spark/bin:/home/skeptlk/.meteor:/opt/google-cloud-cli/bin:/usr/condabin:/usr/local/bin:/usr/bin:/usr/local/sbin:/var/lib/flatpak/exports/bin:/usr/lib/jvm/default/bin:/usr/bin/site_perl:/usr/bin/vendor_perl:/usr/bin/core_perl:/opt/spark/bin:/opt/spark/sbin:/usr/bin:/home/skeptlk/.nvm/versions/node/v19.5.0/bin:/home/skeptlk/.meteor:/home/skeptlk/.local/bin:/opt/apache-spark/bin:/home/skeptlk/.meteor:/opt/google-cloud-cli/bin:/usr/condabin:/usr/local/bin:/usr/bin:/usr/local/sbin:/var/lib/flatpak/exports/bin:/usr/lib/jvm/default/bin:/usr/bin/site_perl:/usr/bin/vendor_perl:/usr/bin/core_perl:/opt/spark/bin:/opt/spark/sbin

In [None]:
x

Unnamed: 0,ivs12,ibe,iaie,iai,zwbp,acct,alt,alt_peak,alt_rtd,aoc,...,vorv,votm,vsva,w14,pf,wai,nai,prv,hpv,xf
0,-100.0,-100.0,-100.0,-100.0,-100.0,-20.0,1418.0,1247.0,-352.0,3.6,...,-100.0,-100.0,0.0,1160.0,0.53,0.0,0.0,1.0,0.0,0.0
522,-100.0,-100.0,-100.0,-100.0,-100.0,-20.0,1418.0,1247.0,-352.0,3.5,...,-100.0,-100.0,0.0,1160.0,0.53,0.0,0.0,1.0,0.0,0.0
523,-100.0,-100.0,-100.0,-100.0,-100.0,-20.0,2005.0,1676.0,96.0,3.4,...,-100.0,-100.0,0.0,1179.0,0.53,0.0,0.0,1.0,0.0,0.0
1,-100.0,-100.0,-100.0,-100.0,-100.0,-20.0,2005.0,1581.0,96.0,3.5,...,-100.0,-100.0,0.0,1179.0,0.53,0.0,0.0,1.0,0.0,0.0
524,-100.0,-100.0,-100.0,-100.0,-100.0,-20.0,1739.0,1566.0,619.0,3.4,...,-100.0,-100.0,0.0,1303.0,0.51,0.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
519,-100.0,-100.0,-100.0,-100.0,-100.0,-20.0,1459.0,1254.0,296.0,3.6,...,-100.0,-100.0,0.0,1340.0,0.53,0.0,1.0,1.0,0.0,0.0
1042,-100.0,-100.0,-100.0,-100.0,-100.0,-20.0,1554.0,1214.0,-75.0,3.5,...,-100.0,-100.0,0.0,1140.0,0.03,0.0,0.0,0.0,0.0,0.0
520,-100.0,-100.0,-100.0,-100.0,-100.0,23.0,1554.0,1412.0,-75.0,3.4,...,-100.0,-100.0,0.0,1137.0,0.03,0.0,0.0,0.0,0.0,0.0
521,-100.0,-100.0,-100.0,-100.0,-100.0,-20.0,832.0,653.0,-303.0,3.7,...,-100.0,-100.0,0.0,1336.0,0.51,0.0,0.0,1.0,0.0,0.0
