In [15]:
from sklearn.model_selection import train_test_split, cross_val_predict
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
from sklearn.linear_model import LinearRegression, Ridge
from catboost import CatBoostRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

dataset = pd.read_csv('./takeoff-merged-VQ-BGU-30s-full.csv', parse_dates=['reportts']) \
  .sort_values('reportts')

In [16]:
Y = dataset[['egtm', 'n1_delt', 'n2_delt']]
X = dataset.drop(columns=[
    'acnum', 'pos', 'dep', 'arr', 
    'egtm', 'n1_delt', 'n2_delt', 
    'fltdes', 'reportts',
    'dmusw', 'exswpn', 'reason'
]).fillna(0)

X = X.loc[:, ~X.columns.str.contains('stw')]


In [19]:
def get_cv_rmse_for_parameter(X, alg='catboost', param='egtm'):
    y = Y[param]

    if alg == 'linreg':
        model = LinearRegression(n_jobs=-1)
    elif alg == 'linreg_ridge':
        model = Ridge()
    elif alg == 'catboost':
        model = CatBoostRegressor(iterations=200, random_state=42, verbose=False)
    elif alg == 'lightgbm':
        model = LGBMRegressor(n_jobs=-1, random_state=42)
    elif alg == 'xgboost':
        model = XGBRegressor()
    elif alg == 'rand_forest':
        model = RandomForestRegressor(n_jobs=-1, n_estimators=100, random_state=42)

    preds = cross_val_predict(model, X, y)
    rmse = mean_squared_error(y, preds, squared=False)
    return rmse


In [20]:
model_names = ['linreg', 'linreg_ridge', 'catboost', 'lightgbm', 'xgboost', 'rand_forest']
params = Y.columns

metrics = []

impotant = ['naiup', 'fmvc', 'baf', 'fdp', 'aoc', 'oip', 'alt', 'ttp', 'bbv']

param = 'egtm'

for model in model_names:
    metrics.append({
        'model_name': model,
        'rmse_all': get_cv_rmse_for_parameter(X, model),
        'rmse_important': get_cv_rmse_for_parameter(X[impotant], model),
    })


pd.DataFrame(metrics)

Unnamed: 0,model_name,rmse_all,rmse_important
0,linreg,13943670000.0,5.073838
1,linreg_ridge,6.014727,4.995699
2,catboost,3.934068,4.496539
3,lightgbm,3.826391,4.459865
4,xgboost,3.651118,4.731197
5,rand_forest,3.597813,4.767549
