In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
import seaborn as sns
import os
from typing import Union, Tuple

from sklearn.compose import ColumnTransformer
from sklearn.ensemble import HistGradientBoostingRegressor, RandomForestRegressor
from sklearn.experimental import enable_iterative_imputer  # noqa
from sklearn.impute import SimpleImputer, KNNImputer, IterativeImputer
from sklearn.linear_model import LinearRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR

In [3]:
df = pd.read_csv("data/EMEWS_cleaned_with_nan.csv")

In [4]:
df.head()

Unnamed: 0,day,date,total_number_of_patients,total_number_of_emews,zone_a_mwr_patients,zone_a_mwr_cat_3,zone_a_mwr_cat_4,zone_a_mwr_sets_of_emews,zone_a_mwr_deescalations,zone_a_mwr_escalations,...,zone_a__cat_3,zone_a__sets_of_emews,zone_a__deescalations,zone_a__escalations,zone_b/c_patients,zone_b/c_cat_2,zone_b/c_cat_3,zone_b/c_sets_of_emews,zone_b/c_deescalations,zone_b/c_escalations
0,thursday,2024-01-04 00:00:00,100.0,302.0,64.0,53.0,11.0,192.0,10.0,6.0,...,9.0,50.0,9.0,1.0,15.0,,,60.0,1.0,5.0
1,friday,2024-01-05 00:00:00,112.0,220.0,59.0,59.0,0.0,90.0,10.0,0.0,...,12.0,60.0,10.0,0.0,27.0,12.0,15.0,70.0,0.0,5.0
2,saturday,2024-01-06 00:00:00,69.0,175.0,36.0,36.0,0.0,58.0,3.0,2.0,...,0.0,67.0,5.0,2.0,15.0,,,50.0,0.0,5.0
3,sunday,2024-01-07 00:00:00,74.0,206.0,30.0,27.0,3.0,41.0,2.0,11.0,...,6.0,61.0,7.0,3.0,16.0,,,50.0,0.0,5.0
4,monday,2024-01-08 00:00:00,87.0,267.0,41.0,38.0,3.0,116.0,10.0,1.0,...,5.0,73.0,8.0,1.0,23.0,10.0,13.0,78.0,0.0,4.0


In [5]:
# X = df.drop(columns=['total_number_of_patients'])  # Drop date unless you plan to extract features
# y = df[['total_number_of_patients']]

X = df.drop(columns=['total_number_of_patients', 'total_number_of_emews'])
y = df[['total_number_of_patients', 'total_number_of_emews']]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

In [9]:
def apply_imputation(
        X_train: pd.DataFrame, 
        X_test: pd.DataFrame,
        imputer: Union[KNNImputer, IterativeImputer, SimpleImputer]
        ) -> Tuple[pd.DataFrame, pd.DataFrame]:

    num_cols = X_train.select_dtypes(include='number').columns
    non_num_cols = X_train.columns.difference(num_cols).to_list()

    X_train_num = X_train[num_cols]
    X_test_num = X_test[num_cols]

    needs_scaling = type(imputer) in (KNNImputer, IterativeImputer)

    if needs_scaling:
        scaler = StandardScaler()
        X_train_num = scaler.fit_transform(X_train_num)
        X_test_num = scaler.transform(X_test_num)

    X_train_imputed = imputer.fit_transform(X_train_num)
    X_test_imputed = imputer.transform(X_test_num)

    if needs_scaling:
        X_train_imputed = scaler.inverse_transform(X_train_imputed)
        X_test_imputed = scaler.inverse_transform(X_test_imputed)

    X_train_imputed = np.round(X_train_imputed).astype(int)
    X_test_imputed = np.round(X_test_imputed).astype(int)

    # Replace only numeric columns
    X_train_copy = X_train.copy()
    X_train_copy[num_cols] = X_train_imputed
    X_train_copy[non_num_cols] = X_train[non_num_cols]

    X_test_copy = X_test.copy()
    X_test_copy[num_cols] = X_test_imputed
    X_test_copy[non_num_cols] = X_test[non_num_cols]

    return X_train_copy, X_test_copy

In [10]:
imputation_strategies = {
    'mean': SimpleImputer(strategy='mean'),
    'median': SimpleImputer(strategy='median'),
    'mode': SimpleImputer(strategy='most_frequent'),
    'mice': IterativeImputer(random_state=42),
    'mice_hgb': IterativeImputer(estimator=HistGradientBoostingRegressor(random_state=42), random_state=42),
    'mice_lr': IterativeImputer(estimator=LinearRegression(), random_state=42),
    'mice_rf': IterativeImputer(estimator=RandomForestRegressor(random_state=42), random_state=42),
    'mice_svr': IterativeImputer(estimator=SVR(), random_state=42)
}

for i in range(3, 6):
    key = f'knn_{i}'
    imputation_strategies[key] = KNNImputer(n_neighbors=i)
    key = f'knn_{i}_distance'
    imputation_strategies[key] = KNNImputer(n_neighbors=i, weights='distance')
    key = f'mice_knn_{i}'
    imputation_strategies[key] = IterativeImputer(estimator=KNeighborsRegressor(n_neighbors=i), random_state=42)
    key = f'mice_knn_{i}_distance'
    imputation_strategies[key] = IterativeImputer(estimator=KNeighborsRegressor(n_neighbors=i, weights='distance'), random_state=42)
    

datasets = {}

for name, imputer in imputation_strategies.items():
    X_train_imp, X_test_imp = apply_imputation(X_train, X_test, imputer)

    train_df = pd.concat([X_train_imp, y_train], axis=1)
    test_df = pd.concat([X_test_imp, y_test], axis=1)

    train_df['date'] = pd.to_datetime(train_df['date'])
    test_df['date'] = pd.to_datetime(test_df['date'])

    # manual checks...
    patient_count_cols = ['zone_a_mwr_patients', 'zone_a__patients', 'zone_b/c_patients']
    emews_count_cols = ['zone_a_mwr_sets_of_emews', 'zone_a__sets_of_emews', 'zone_b/c_sets_of_emews']
    
    train_df['total_number_of_patients'] = train_df['total_number_of_patients'].fillna(train_df[patient_count_cols].sum(axis=1))
    test_df['total_number_of_patients'] = test_df['total_number_of_patients'].fillna(test_df[patient_count_cols].sum(axis=1))

    train_df['total_number_of_emews'] = train_df['total_number_of_emews'].fillna(train_df[emews_count_cols].sum(axis=1))
    test_df['total_number_of_emews'] = test_df['total_number_of_emews'].fillna(test_df[emews_count_cols].sum(axis=1))

    # train_df['corrected_total_number_of_patients'] = train_df[patient_count_cols].sum(axis=1)
    # test_df['corrected_total_number_of_patients'] = test_df[patient_count_cols].sum(axis=1)

    datasets[name] = (train_df, test_df)

In [None]:
from pycaret.regression import setup, compare_models, pull
import pandas as pd

results = []

for name, (train_df, test_df) in datasets.items():
    print(f"\nüîç Processing dataset: {name}")
    # train_df = train_df.copy().drop(columns=['corrected_total_number_of_patients'])
    # test_df = test_df.copy().drop(columns=['corrected_total_number_of_patients'])

    setup(
        data=train_df,
        target='total_number_of_patients',
        session_id=42,
        verbose=False,
        test_data=test_df  # Use predefined test set
    )

    best_model = compare_models(exclude=['lightgbm'], sort='R2')
    leaderboard = pull()
    top_model_results = leaderboard.iloc[0]

    results.append({
        'dataset': name,
        'model': top_model_results['Model'],
        'R2': top_model_results['R2'],
        'MAE': top_model_results['MAE'],
        'RMSE': top_model_results['RMSE']
    })

results_df = pd.DataFrame(results)
results_df = results_df.sort_values(by='R2', ascending=False)
print("\nüìä Final Results:")
print(results_df)



üîç Processing dataset: mean


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
lasso,Lasso Regression,3.6747,36.7457,5.6111,0.9369,0.26,0.117,0.017
en,Elastic Net,3.6482,36.7608,5.6157,0.9369,0.2599,0.1163,0.017
llar,Lasso Least Angle Regression,3.6749,36.7463,5.6112,0.9369,0.26,0.117,0.016
br,Bayesian Ridge,3.5909,37.3292,5.6662,0.9358,0.2623,0.1151,0.016
lr,Linear Regression,3.5864,37.8851,5.719,0.935,0.2633,0.1134,0.019
ridge,Ridge Regression,3.5819,37.8567,5.7153,0.935,0.263,0.1132,0.016
lar,Least Angle Regression,3.6133,38.0127,5.7151,0.9346,0.2647,0.1141,0.017
catboost,CatBoost Regressor,3.3589,44.6625,6.191,0.9239,0.2503,0.0752,0.595
huber,Huber Regressor,2.8549,45.7809,6.1727,0.9213,0.2855,0.0686,0.021
gbr,Gradient Boosting Regressor,3.8967,46.2853,6.4184,0.9199,0.27,0.098,0.033



üîç Processing dataset: median


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
en,Elastic Net,3.6604,37.4212,5.6429,0.9358,0.2649,0.1161,0.019
br,Bayesian Ridge,3.5942,37.6438,5.6376,0.9355,0.2649,0.1139,0.019
lasso,Lasso Regression,3.7173,37.6933,5.6825,0.9353,0.2659,0.1177,0.015
llar,Lasso Least Angle Regression,3.7171,37.6927,5.6824,0.9353,0.2659,0.1177,0.018
ridge,Ridge Regression,3.5993,38.351,5.7127,0.9343,0.2758,0.1142,0.017
lr,Linear Regression,3.5995,38.4017,5.7176,0.9342,0.2751,0.114,0.019
lar,Least Angle Regression,3.7429,43.2117,6.1357,0.9255,0.2778,0.116,0.018
catboost,CatBoost Regressor,3.4235,45.8061,6.2615,0.9224,0.2558,0.0767,0.533
huber,Huber Regressor,2.8247,48.3825,6.3592,0.9168,0.3022,0.0673,0.02
gbr,Gradient Boosting Regressor,3.8877,48.4347,6.5618,0.916,0.2567,0.0952,0.04



üîç Processing dataset: mode


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
catboost,CatBoost Regressor,3.2355,46.196,6.3218,0.9219,0.2606,0.074,0.545
gbr,Gradient Boosting Regressor,3.8094,47.8254,6.6444,0.9172,0.2707,0.0965,0.054
en,Elastic Net,5.042,54.9965,7.1937,0.9061,0.223,0.1406,0.019
lasso,Lasso Regression,5.1319,55.2135,7.2202,0.9058,0.2309,0.1433,0.02
llar,Lasso Least Angle Regression,5.1318,55.2182,7.2206,0.9058,0.2308,0.1433,0.016
br,Bayesian Ridge,4.8752,55.584,7.208,0.9047,0.2199,0.1352,0.018
et,Extra Trees Regressor,3.5342,56.1774,7.1265,0.9044,0.2712,0.0774,0.056
ridge,Ridge Regression,4.8581,58.2087,7.3596,0.8996,0.2333,0.1331,0.019
lr,Linear Regression,4.8611,58.2538,7.3627,0.8995,0.2339,0.1332,0.017
rf,Random Forest Regressor,3.9841,59.7122,7.4831,0.8977,0.276,0.0901,0.076



üîç Processing dataset: mice


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
br,Bayesian Ridge,3.5077,32.5315,5.2284,0.9433,0.2566,0.1095,0.016
en,Elastic Net,3.5541,32.6709,5.2499,0.9431,0.2575,0.1122,0.017
lr,Linear Regression,3.526,33.1843,5.3058,0.9423,0.2665,0.1098,0.018
ridge,Ridge Regression,3.524,33.1529,5.3023,0.9423,0.2661,0.1098,0.015
lasso,Lasso Regression,3.6135,33.1665,5.3069,0.9422,0.2602,0.1145,0.016
llar,Lasso Least Angle Regression,3.6134,33.1667,5.3069,0.9422,0.2601,0.1145,0.016
huber,Huber Regressor,2.9983,38.0755,5.6315,0.934,0.2541,0.0722,0.019
catboost,CatBoost Regressor,3.7311,49.6588,6.6312,0.9153,0.2569,0.0859,0.511
gbr,Gradient Boosting Regressor,4.1515,52.8014,6.8355,0.908,0.2663,0.102,0.032
et,Extra Trees Regressor,4.1023,56.8981,7.2436,0.9019,0.277,0.0915,0.051



üîç Processing dataset: mice_hgb


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
en,Elastic Net,4.2929,40.9555,6.168,0.9279,0.2686,0.1352,0.031
ridge,Ridge Regression,4.319,41.0424,6.1832,0.9277,0.2883,0.1356,0.016
br,Bayesian Ridge,4.2965,40.9248,6.1642,0.9277,0.2762,0.1356,0.018
lr,Linear Regression,4.3217,41.086,6.1866,0.9276,0.2887,0.1355,0.018
lasso,Lasso Regression,4.2959,41.1765,6.187,0.9276,0.2665,0.1354,0.015
llar,Lasso Least Angle Regression,4.2958,41.173,6.1867,0.9276,0.2665,0.1354,0.021
lar,Least Angle Regression,4.3447,41.3414,6.2114,0.9272,0.2897,0.1364,0.025
huber,Huber Regressor,4.2333,45.4214,6.5116,0.9205,0.2553,0.125,0.019
catboost,CatBoost Regressor,4.18,52.2571,6.7807,0.9097,0.2832,0.1075,0.78
gbr,Gradient Boosting Regressor,4.4277,54.0299,6.9087,0.9048,0.2812,0.114,0.034



üîç Processing dataset: mice_lr


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
ridge,Ridge Regression,3.5881,34.4502,5.4113,0.94,0.2615,0.1089,0.016
lr,Linear Regression,3.5878,34.4971,5.4145,0.9399,0.2616,0.1088,0.019
br,Bayesian Ridge,3.7314,34.9357,5.5031,0.9388,0.2616,0.1141,0.015
en,Elastic Net,3.8318,35.5136,5.5877,0.9378,0.2616,0.1186,0.016
lasso,Lasso Regression,3.9191,36.8366,5.7144,0.9354,0.266,0.1222,0.015
llar,Lasso Least Angle Regression,3.9191,36.8373,5.7144,0.9354,0.266,0.1222,0.016
lar,Least Angle Regression,3.7861,40.4198,5.8921,0.9311,0.259,0.112,0.018
huber,Huber Regressor,3.0643,43.4082,6.158,0.9245,0.2614,0.0691,0.019
catboost,CatBoost Regressor,3.4419,45.0845,6.2495,0.9228,0.2481,0.0806,0.514
gbr,Gradient Boosting Regressor,3.8667,44.4114,6.207,0.9222,0.2591,0.0967,0.03



üîç Processing dataset: mice_rf


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
en,Elastic Net,4.5472,45.7582,6.4535,0.9202,0.2941,0.1374,0.017
lasso,Lasso Regression,4.5647,45.8567,6.4608,0.9201,0.2941,0.1383,0.017
llar,Lasso Least Angle Regression,4.5647,45.8565,6.4608,0.9201,0.2941,0.1383,0.017
br,Bayesian Ridge,4.5934,46.1895,6.4929,0.9195,0.2936,0.137,0.015
ridge,Ridge Regression,4.7077,48.6532,6.6936,0.915,0.2967,0.1405,0.016
lr,Linear Regression,4.7061,48.747,6.6993,0.9148,0.2965,0.1403,0.018
lar,Least Angle Regression,4.7543,50.178,6.7964,0.9123,0.2988,0.1417,0.016
huber,Huber Regressor,4.2044,52.2273,6.8511,0.9092,0.3158,0.1142,0.019
catboost,CatBoost Regressor,4.1614,54.4223,6.9662,0.9054,0.2852,0.1026,0.48
gbr,Gradient Boosting Regressor,4.4469,56.2806,7.0801,0.9007,0.2818,0.1133,0.032



üîç Processing dataset: mice_svr


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
br,Bayesian Ridge,4.0665,39.8021,5.9961,0.9306,0.2234,0.1198,0.015
en,Elastic Net,4.0816,40.0363,6.0352,0.9302,0.2255,0.1201,0.016
lasso,Lasso Regression,4.104,40.0932,6.0472,0.93,0.2255,0.1207,0.02
llar,Lasso Least Angle Regression,4.1039,40.0918,6.0471,0.93,0.2255,0.1207,0.016
lr,Linear Regression,4.1508,40.7423,6.082,0.929,0.2449,0.1233,0.025
ridge,Ridge Regression,4.1459,40.762,6.0806,0.929,0.2426,0.1229,0.016
lar,Least Angle Regression,4.2749,43.9343,6.3607,0.9227,0.2336,0.128,0.017
huber,Huber Regressor,4.0149,44.967,6.4171,0.9218,0.2443,0.1128,0.018
catboost,CatBoost Regressor,4.3549,56.6934,7.1011,0.9022,0.2668,0.1076,0.522
gbr,Gradient Boosting Regressor,4.6964,59.3945,7.3418,0.8959,0.2853,0.1201,0.037



üîç Processing dataset: knn_3


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
lasso,Lasso Regression,4.4847,44.6597,6.4132,0.9226,0.2513,0.1417,0.019
llar,Lasso Least Angle Regression,4.4847,44.6592,6.4132,0.9226,0.2513,0.1417,0.017
en,Elastic Net,4.491,44.9097,6.4366,0.9221,0.2509,0.1425,0.017
br,Bayesian Ridge,4.5466,45.619,6.5037,0.9207,0.2536,0.1449,0.017
lr,Linear Regression,4.608,46.4513,6.5702,0.9193,0.2714,0.1476,0.026
ridge,Ridge Regression,4.6065,46.4116,6.5679,0.9193,0.2722,0.1476,0.018
lar,Least Angle Regression,4.6761,49.0633,6.7636,0.914,0.2627,0.1519,0.019
huber,Huber Regressor,4.4417,50.0021,6.7507,0.9135,0.2796,0.1345,0.023
gbr,Gradient Boosting Regressor,4.8022,59.4085,7.4034,0.8961,0.2967,0.1302,0.04
catboost,CatBoost Regressor,4.6861,61.3749,7.4232,0.8943,0.3037,0.1206,0.498



üîç Processing dataset: knn_3_distance


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
lasso,Lasso Regression,4.4902,44.7781,6.4247,0.9224,0.261,0.1399,0.015
llar,Lasso Least Angle Regression,4.4902,44.7784,6.4247,0.9224,0.261,0.1399,0.022
en,Elastic Net,4.4983,45.0169,6.4472,0.9219,0.2599,0.1408,0.018
br,Bayesian Ridge,4.5372,45.7519,6.516,0.9205,0.2599,0.1426,0.023
lr,Linear Regression,4.5985,46.4731,6.5745,0.9193,0.2787,0.1457,0.022
ridge,Ridge Regression,4.5993,46.5458,6.5798,0.9192,0.2813,0.1455,0.015
lar,Least Angle Regression,4.6491,47.963,6.6911,0.9163,0.2608,0.1485,0.027
huber,Huber Regressor,4.377,50.0171,6.7517,0.9134,0.2922,0.1318,0.026
catboost,CatBoost Regressor,4.7281,61.7777,7.4697,0.8933,0.2982,0.1219,0.686
gbr,Gradient Boosting Regressor,4.8879,61.1066,7.5064,0.8931,0.2974,0.1307,0.044



üîç Processing dataset: mice_knn_3


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
lasso,Lasso Regression,4.374,42.6583,6.2626,0.9259,0.2312,0.1362,0.018
llar,Lasso Least Angle Regression,4.3741,42.6584,6.2626,0.9259,0.2312,0.1362,0.015
en,Elastic Net,4.3877,42.8401,6.2745,0.9256,0.2296,0.1368,0.016
br,Bayesian Ridge,4.4542,44.0167,6.3644,0.9235,0.2283,0.1394,0.016
lr,Linear Regression,4.4759,44.4863,6.3955,0.923,0.2377,0.1401,0.017
ridge,Ridge Regression,4.4721,44.5076,6.3946,0.9229,0.2341,0.1398,0.018
lar,Least Angle Regression,4.506,44.9719,6.4243,0.9221,0.2364,0.1411,0.137
huber,Huber Regressor,4.2242,48.2723,6.6356,0.9165,0.2501,0.1288,0.019
catboost,CatBoost Regressor,4.3909,56.0099,7.0595,0.9038,0.2745,0.1121,0.497
gbr,Gradient Boosting Regressor,4.6177,55.6503,7.1103,0.9028,0.2853,0.1209,0.033



üîç Processing dataset: mice_knn_3_distance


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
lasso,Lasso Regression,4.2572,40.819,6.1028,0.929,0.2593,0.129,0.022
llar,Lasso Least Angle Regression,4.2572,40.8188,6.1028,0.929,0.2593,0.129,0.036
en,Elastic Net,4.2622,41.0959,6.1248,0.9285,0.2601,0.1295,0.022
br,Bayesian Ridge,4.2813,41.4527,6.151,0.9278,0.2619,0.1308,0.024
lr,Linear Regression,4.28,41.9137,6.1748,0.9271,0.269,0.131,0.022
ridge,Ridge Regression,4.2755,41.8994,6.1725,0.9271,0.2688,0.1308,0.028
lar,Least Angle Regression,4.3445,43.48,6.2863,0.9243,0.2906,0.1338,0.023
huber,Huber Regressor,4.0653,45.3107,6.3904,0.9211,0.2393,0.1239,0.029
knn,K Neighbors Regressor,5.8924,76.7392,8.5481,0.868,0.3301,0.1503,0.029
omp,Orthogonal Matching Pursuit,7.4851,104.6684,10.0864,0.821,0.3587,0.2306,0.039


Processing:   0%|          | 0/81 [00:00<?, ?it/s]

In [110]:
from pycaret.regression import setup, compare_models, pull
import pandas as pd

results = []

for name, (train_df, test_df) in datasets.items():
    print(f"\nüîç Processing dataset: {name}")
    # train_df = train_df.copy().drop(columns=['corrected_total_number_of_patients'])
    # test_df = test_df.copy().drop(columns=['corrected_total_number_of_patients'])

    setup(
        data=train_df,
        target='total_number_of_emews',
        session_id=42,
        verbose=False,
        test_data=test_df  # Use predefined test set
    )

    best_model = compare_models(exclude=['lightgbm'], sort='R2')
    leaderboard = pull()
    top_model_results = leaderboard.iloc[0]

    results.append({
        'dataset': name,
        'model': top_model_results['Model'],
        'R2': top_model_results['R2'],
        'MAE': top_model_results['MAE'],
        'RMSE': top_model_results['RMSE']
    })

results_df = pd.DataFrame(results)
results_df = results_df.sort_values(by='R2', ascending=False)
print("\nüìä Final Results:")
print(results_df)



üîç Processing dataset: mean


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
lasso,Lasso Regression,8.6691,304.1862,15.9037,0.9491,0.3259,0.0705,0.023
llar,Lasso Least Angle Regression,8.6697,304.1956,15.9041,0.9491,0.3259,0.0705,0.018
en,Elastic Net,8.6992,304.5382,15.9383,0.949,0.3311,0.0706,0.022
br,Bayesian Ridge,8.7277,307.7215,16.0622,0.9484,0.3376,0.0715,0.02
ridge,Ridge Regression,8.8944,326.6865,16.622,0.945,0.3277,0.0721,0.02
lr,Linear Regression,8.9023,327.6604,16.6462,0.9449,0.3276,0.0722,0.02
huber,Huber Regressor,7.3708,343.3379,17.0881,0.9428,0.3363,0.0544,0.022
gbr,Gradient Boosting Regressor,11.7813,391.853,19.105,0.9342,0.2795,0.0896,0.032
lar,Least Angle Regression,9.3326,403.6277,18.2039,0.9313,0.3416,0.0741,0.023
et,Extra Trees Regressor,11.8334,480.5326,21.2279,0.9207,0.2048,0.0779,0.052



üîç Processing dataset: median


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
lasso,Lasso Regression,8.2569,284.2602,15.3108,0.9525,0.3318,0.0679,0.017
llar,Lasso Least Angle Regression,8.2576,284.2633,15.3113,0.9525,0.3318,0.0679,0.021
en,Elastic Net,8.2737,284.4475,15.3422,0.9524,0.3327,0.0678,0.023
br,Bayesian Ridge,8.2932,284.3211,15.3654,0.9524,0.3321,0.0684,0.018
ridge,Ridge Regression,8.5953,292.9245,15.7395,0.951,0.3364,0.073,0.017
lr,Linear Regression,8.6027,293.2936,15.7527,0.9509,0.337,0.0731,0.022
huber,Huber Regressor,7.1509,336.8244,16.8964,0.9438,0.3635,0.0539,0.02
gbr,Gradient Boosting Regressor,11.3545,366.5766,18.4312,0.9383,0.2684,0.0881,0.045
lar,Least Angle Regression,9.5789,406.3509,18.4382,0.9304,0.3474,0.0848,0.018
et,Extra Trees Regressor,11.3945,453.6491,20.6511,0.9241,0.1988,0.0761,0.048



üîç Processing dataset: mode


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
en,Elastic Net,9.6378,278.6509,15.8541,0.9532,0.355,0.0759,0.018
lasso,Lasso Regression,9.6433,287.8197,16.0881,0.9517,0.3558,0.0764,0.016
llar,Lasso Least Angle Regression,9.645,288.0669,16.0947,0.9517,0.3559,0.0764,0.024
br,Bayesian Ridge,9.6109,303.4417,16.4742,0.9489,0.3594,0.0781,0.015
gbr,Gradient Boosting Regressor,10.0324,311.7302,16.9382,0.9478,0.2428,0.0766,0.031
et,Extra Trees Regressor,9.426,325.2195,17.6113,0.9457,0.1735,0.0614,0.048
ridge,Ridge Regression,9.6766,326.9447,16.9424,0.9448,0.3632,0.0794,0.015
lar,Least Angle Regression,9.6779,327.5921,16.9559,0.9447,0.3634,0.0794,0.017
lr,Linear Regression,9.688,327.2822,16.9494,0.9447,0.3627,0.0796,0.023
huber,Huber Regressor,9.5046,371.226,18.2244,0.9361,0.3344,0.0598,0.021



üîç Processing dataset: mice


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
lasso,Lasso Regression,8.0355,257.9631,14.4622,0.9562,0.3307,0.0668,0.016
llar,Lasso Least Angle Regression,8.036,257.9769,14.4629,0.9562,0.3307,0.0668,0.016
en,Elastic Net,8.0469,259.0524,14.5128,0.956,0.3296,0.066,0.016
br,Bayesian Ridge,8.1069,260.8596,14.58,0.9556,0.3275,0.0663,0.016
ridge,Ridge Regression,8.4349,269.5178,14.9632,0.9543,0.328,0.0695,0.018
lr,Linear Regression,8.4447,269.8878,14.9787,0.9542,0.3281,0.0696,0.019
huber,Huber Regressor,7.844,275.9763,15.1994,0.9532,0.3183,0.0574,0.019
lar,Least Angle Regression,8.6168,277.643,15.2474,0.9529,0.3267,0.0711,0.017
gbr,Gradient Boosting Regressor,11.7647,413.5235,19.2561,0.9322,0.2756,0.0904,0.031
et,Extra Trees Regressor,12.0162,487.2071,21.3701,0.9195,0.2148,0.0831,0.049



üîç Processing dataset: mice_hgb


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
lasso,Lasso Regression,11.7778,352.1262,18.009,0.94,0.2811,0.106,0.015
llar,Lasso Least Angle Regression,11.7777,352.117,18.0087,0.94,0.2811,0.106,0.016
en,Elastic Net,11.832,354.3678,18.0808,0.9396,0.2914,0.1066,0.016
ridge,Ridge Regression,11.9879,357.3761,18.1783,0.9392,0.2974,0.1075,0.015
br,Bayesian Ridge,11.8475,356.2277,18.1299,0.9392,0.295,0.1071,0.017
lr,Linear Regression,11.9959,357.4939,18.1796,0.9391,0.2979,0.1075,0.021
lar,Least Angle Regression,11.9975,357.7217,18.1876,0.9391,0.2978,0.1076,0.016
huber,Huber Regressor,11.8226,387.6884,18.9839,0.9351,0.3283,0.0964,0.02
gbr,Gradient Boosting Regressor,13.3463,481.5035,21.5428,0.9181,0.2804,0.1028,0.034
et,Extra Trees Regressor,13.0694,542.2865,22.8645,0.909,0.21,0.0897,0.054



üîç Processing dataset: mice_lr


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
br,Bayesian Ridge,8.6472,286.5463,15.5656,0.9511,0.3245,0.0689,0.017
en,Elastic Net,8.6589,289.0901,15.7176,0.9508,0.3261,0.0688,0.016
lr,Linear Regression,8.8847,288.8,15.543,0.9507,0.3267,0.0715,0.017
ridge,Ridge Regression,8.8702,288.4227,15.5325,0.9507,0.3267,0.0713,0.016
lasso,Lasso Regression,8.7183,291.6526,15.8038,0.9504,0.3288,0.0705,0.017
llar,Lasso Least Angle Regression,8.716,291.5661,15.8005,0.9504,0.3288,0.0704,0.017
huber,Huber Regressor,7.9022,309.1579,16.4321,0.948,0.3211,0.0534,0.02
lar,Least Angle Regression,10.5915,408.1911,18.1951,0.9288,0.3478,0.0878,0.016
gbr,Gradient Boosting Regressor,12.5197,459.5502,20.8203,0.9208,0.2823,0.096,0.037
et,Extra Trees Regressor,13.0107,578.497,23.494,0.9023,0.2217,0.0875,0.05



üîç Processing dataset: mice_rf


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
lasso,Lasso Regression,11.3942,369.1904,18.1224,0.9381,0.3772,0.0924,0.017
llar,Lasso Least Angle Regression,11.3941,369.1874,18.1223,0.9381,0.3772,0.0924,0.017
en,Elastic Net,11.4763,372.3397,18.2161,0.9376,0.3772,0.0928,0.016
br,Bayesian Ridge,11.6253,375.5874,18.3238,0.9371,0.3766,0.0941,0.016
huber,Huber Regressor,11.1185,401.5287,18.9101,0.933,0.3938,0.0871,0.022
ridge,Ridge Regression,12.1896,401.74,19.0613,0.9328,0.3819,0.1008,0.016
lr,Linear Regression,12.1932,402.4841,19.0808,0.9327,0.3822,0.1008,0.029
lar,Least Angle Regression,12.4144,455.3278,20.1017,0.9235,0.3877,0.1055,0.017
gbr,Gradient Boosting Regressor,13.2387,509.02,21.8901,0.9125,0.2966,0.1018,0.034
et,Extra Trees Regressor,13.4824,627.2521,24.1738,0.8957,0.2305,0.0906,0.055



üîç Processing dataset: mice_svr


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
lasso,Lasso Regression,10.6197,316.1824,16.8736,0.946,0.2842,0.089,0.021
llar,Lasso Least Angle Regression,10.6188,316.1506,16.8726,0.946,0.2842,0.089,0.021
en,Elastic Net,10.6672,317.4854,16.911,0.9458,0.2852,0.0892,0.021
br,Bayesian Ridge,10.7212,319.7238,16.963,0.9454,0.2852,0.09,0.016
huber,Huber Regressor,10.5943,332.8697,17.4657,0.9437,0.3073,0.0867,0.019
ridge,Ridge Regression,11.0968,333.2272,17.4196,0.9431,0.2959,0.0946,0.015
lr,Linear Regression,11.1151,333.7846,17.4374,0.943,0.2962,0.0948,0.017
gbr,Gradient Boosting Regressor,12.9775,457.0066,20.9715,0.9217,0.2938,0.0991,0.033
et,Extra Trees Regressor,12.6259,501.8477,21.8362,0.9158,0.1957,0.088,0.049
rf,Random Forest Regressor,13.2007,560.0732,23.0172,0.905,0.2533,0.0902,0.137



üîç Processing dataset: knn_3


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
lasso,Lasso Regression,12.0867,379.326,18.5048,0.9363,0.3785,0.1082,0.017
llar,Lasso Least Angle Regression,12.0867,379.3211,18.5047,0.9363,0.3785,0.1082,0.016
en,Elastic Net,12.1483,380.9124,18.5527,0.9361,0.3758,0.1086,0.017
br,Bayesian Ridge,12.2559,383.4405,18.6396,0.9357,0.3745,0.1097,0.017
huber,Huber Regressor,11.7603,391.6161,18.8143,0.9347,0.4047,0.1031,0.02
lr,Linear Regression,12.4623,391.7578,18.8973,0.9344,0.3749,0.1132,0.018
ridge,Ridge Regression,12.4712,391.7189,18.8949,0.9344,0.3739,0.1133,0.017
lar,Least Angle Regression,12.8659,435.9988,19.9758,0.9266,0.3814,0.1194,0.016
gbr,Gradient Boosting Regressor,13.853,537.1737,22.4659,0.9066,0.3093,0.1066,0.033
et,Extra Trees Regressor,13.6778,605.6111,23.8073,0.899,0.2323,0.0964,0.05



üîç Processing dataset: knn_3_distance


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
lasso,Lasso Regression,11.9809,379.4132,18.5119,0.9363,0.394,0.1057,0.017
llar,Lasso Least Angle Regression,11.9808,379.4143,18.5119,0.9363,0.394,0.1057,0.021
en,Elastic Net,12.0237,380.8291,18.5538,0.936,0.3925,0.1059,0.016
br,Bayesian Ridge,12.1324,383.7203,18.6496,0.9356,0.3926,0.1071,0.016
lr,Linear Regression,12.405,392.2687,18.9196,0.9343,0.3942,0.1112,0.018
ridge,Ridge Regression,12.3966,391.9744,18.9107,0.9343,0.3944,0.111,0.016
huber,Huber Regressor,11.7566,395.7461,18.9524,0.9341,0.3946,0.1013,0.019
lar,Least Angle Regression,12.7072,415.7398,19.5663,0.9301,0.3992,0.1152,0.016
gbr,Gradient Boosting Regressor,13.7553,530.8977,22.2695,0.9087,0.2887,0.1044,0.034
et,Extra Trees Regressor,13.4973,588.2182,23.4786,0.9017,0.2296,0.0951,0.049



üîç Processing dataset: mice_knn_3


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
lasso,Lasso Regression,11.3093,343.1165,17.6228,0.9417,0.3439,0.104,0.016
llar,Lasso Least Angle Regression,11.3093,343.1091,17.6227,0.9417,0.3439,0.104,0.018
en,Elastic Net,11.377,345.4197,17.6921,0.9413,0.3441,0.1044,0.017
br,Bayesian Ridge,11.4924,350.9223,17.8547,0.9403,0.3461,0.1056,0.017
huber,Huber Regressor,10.7992,357.0196,18.059,0.9398,0.3619,0.098,0.02
ridge,Ridge Regression,11.6339,356.945,18.0348,0.9393,0.3512,0.1073,0.016
lr,Linear Regression,11.6618,357.8773,18.055,0.9391,0.3494,0.1076,0.018
lar,Least Angle Regression,11.8079,364.5067,18.2524,0.938,0.3508,0.1089,0.021
gbr,Gradient Boosting Regressor,13.4374,497.1182,21.7579,0.9144,0.3017,0.1036,0.034
et,Extra Trees Regressor,13.057,569.8462,23.1525,0.9046,0.2193,0.0926,0.05



üîç Processing dataset: mice_knn_3_distance


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
lasso,Lasso Regression,10.8612,334.9894,17.2654,0.943,0.2841,0.101,0.015
llar,Lasso Least Angle Regression,10.8611,334.9798,17.2653,0.943,0.2841,0.101,0.017
en,Elastic Net,10.8903,336.4724,17.3066,0.9428,0.2826,0.1011,0.017
br,Bayesian Ridge,10.9695,339.8213,17.398,0.9421,0.2851,0.1021,0.017
huber,Huber Regressor,10.7745,347.2669,17.6399,0.9413,0.3166,0.1002,0.019
ridge,Ridge Regression,11.2274,346.5124,17.5986,0.941,0.298,0.105,0.018
lr,Linear Regression,11.24,346.8178,17.6103,0.9409,0.2989,0.1051,0.018
lar,Least Angle Regression,11.4256,363.1294,18.0536,0.9381,0.302,0.1083,0.016
gbr,Gradient Boosting Regressor,12.6915,449.7936,20.4652,0.9228,0.2944,0.0995,0.033
et,Extra Trees Regressor,12.64,557.673,22.7651,0.9065,0.211,0.0906,0.051



üîç Processing dataset: knn_4


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
lasso,Lasso Regression,11.5535,352.7759,17.8417,0.941,0.3705,0.1061,0.017
llar,Lasso Least Angle Regression,11.5538,352.7778,17.8418,0.941,0.3705,0.1061,0.017
en,Elastic Net,11.6429,356.2063,17.9493,0.9404,0.3724,0.1066,0.017
br,Bayesian Ridge,11.7782,361.5843,18.1124,0.9395,0.3778,0.1082,0.017
ridge,Ridge Regression,11.9557,366.3435,18.2631,0.9388,0.3804,0.1102,0.016
lr,Linear Regression,11.9699,366.8202,18.2746,0.9387,0.3801,0.1103,0.019
lar,Least Angle Regression,12.0934,372.0039,18.4381,0.9378,0.3838,0.1124,0.017
huber,Huber Regressor,11.1296,374.4734,18.4679,0.9376,0.3792,0.0984,0.02
gbr,Gradient Boosting Regressor,13.2879,476.4515,21.1326,0.9181,0.3031,0.1016,0.034
et,Extra Trees Regressor,13.5767,579.0144,23.3483,0.9028,0.2173,0.0957,0.054



üîç Processing dataset: knn_4_distance


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
lasso,Lasso Regression,11.5509,355.0086,17.8988,0.9405,0.3905,0.1049,0.018
llar,Lasso Least Angle Regression,11.5513,355.0084,17.8989,0.9405,0.3905,0.1049,0.018
en,Elastic Net,11.624,357.0856,17.965,0.9402,0.389,0.1053,0.016
br,Bayesian Ridge,11.734,360.153,18.0731,0.9397,0.391,0.1068,0.016
lr,Linear Regression,11.9657,366.3912,18.2867,0.9388,0.3971,0.1095,0.018
ridge,Ridge Regression,11.96,366.2692,18.2818,0.9388,0.396,0.1095,0.015
huber,Huber Regressor,10.9845,372.0355,18.3727,0.9381,0.371,0.0964,0.025
lar,Least Angle Regression,12.1233,377.0118,18.5992,0.9369,0.3999,0.1119,0.017
gbr,Gradient Boosting Regressor,13.5822,511.9328,22.0051,0.9127,0.3055,0.1063,0.034
et,Extra Trees Regressor,13.3595,577.6645,23.4426,0.9031,0.225,0.0932,0.051



üîç Processing dataset: mice_knn_4


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
lasso,Lasso Regression,10.7207,327.8752,17.1908,0.9445,0.3149,0.0968,0.016
llar,Lasso Least Angle Regression,10.7209,327.8817,17.1909,0.9445,0.3149,0.0968,0.017
en,Elastic Net,10.7805,330.9028,17.2786,0.944,0.3146,0.097,0.016
br,Bayesian Ridge,10.9298,336.6383,17.4468,0.943,0.3167,0.0986,0.016
lr,Linear Regression,11.0964,344.5437,17.6777,0.9416,0.3113,0.1004,0.018
ridge,Ridge Regression,11.098,344.7869,17.6762,0.9416,0.3123,0.1005,0.016
lar,Least Angle Regression,11.0943,344.947,17.6755,0.9416,0.3127,0.1005,0.019
huber,Huber Regressor,10.6588,349.6496,17.821,0.9411,0.3473,0.0946,0.02
gbr,Gradient Boosting Regressor,12.9035,468.9393,21.2336,0.9194,0.3215,0.0978,0.033
et,Extra Trees Regressor,12.9775,537.8995,22.4812,0.9098,0.2053,0.091,0.052



üîç Processing dataset: mice_knn_4_distance


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
lasso,Lasso Regression,11.0857,340.2933,17.4178,0.9423,0.3425,0.0998,0.016
llar,Lasso Least Angle Regression,11.0858,340.3015,17.418,0.9423,0.3425,0.0998,0.018
en,Elastic Net,11.172,344.1204,17.5351,0.9417,0.345,0.1007,0.016
br,Bayesian Ridge,11.243,347.5994,17.6494,0.9411,0.3484,0.1016,0.017
ridge,Ridge Regression,11.3886,349.65,17.7609,0.9408,0.3474,0.1027,0.017
lr,Linear Regression,11.4133,350.2461,17.7778,0.9406,0.3437,0.1029,0.02
lar,Least Angle Regression,11.5635,354.5318,17.9178,0.9399,0.3492,0.104,0.016
huber,Huber Regressor,11.0544,359.673,18.0621,0.9395,0.3587,0.0956,0.02
gbr,Gradient Boosting Regressor,13.3827,488.6587,21.5698,0.9164,0.3058,0.1031,0.034
et,Extra Trees Regressor,13.1816,561.4386,22.8507,0.9064,0.2244,0.0931,0.059



üîç Processing dataset: knn_5


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
lasso,Lasso Regression,11.2473,334.9677,17.2263,0.944,0.3637,0.105,0.016
llar,Lasso Least Angle Regression,11.2476,334.9675,17.2263,0.944,0.3637,0.105,0.017
en,Elastic Net,11.3315,336.3798,17.2768,0.9438,0.3644,0.1055,0.017
br,Bayesian Ridge,11.4934,341.0375,17.4265,0.943,0.3684,0.1074,0.016
ridge,Ridge Regression,11.7361,347.7625,17.6418,0.9419,0.3685,0.1099,0.017
lr,Linear Regression,11.7483,348.0836,17.6526,0.9418,0.3686,0.11,0.018
lar,Least Angle Regression,11.819,353.1399,17.807,0.9409,0.3716,0.1114,0.016
huber,Huber Regressor,10.9594,363.7841,18.1047,0.9394,0.3775,0.0984,0.021
gbr,Gradient Boosting Regressor,13.4374,491.1986,21.3274,0.9158,0.3026,0.1056,0.033
et,Extra Trees Regressor,13.0616,552.0031,22.7795,0.908,0.2079,0.0904,0.051



üîç Processing dataset: knn_5_distance


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
lasso,Lasso Regression,11.2882,339.1732,17.3629,0.9432,0.3807,0.1038,0.017
llar,Lasso Least Angle Regression,11.2885,339.179,17.3631,0.9432,0.3807,0.1038,0.017
en,Elastic Net,11.3758,341.1084,17.4328,0.9428,0.3809,0.1043,0.017
br,Bayesian Ridge,11.4996,344.7168,17.5577,0.9422,0.3838,0.106,0.017
ridge,Ridge Regression,11.7627,351.0546,17.7847,0.9412,0.3821,0.1084,0.015
lr,Linear Regression,11.7714,351.475,17.7979,0.9411,0.382,0.1085,0.023
huber,Huber Regressor,10.9871,364.7952,18.1094,0.9393,0.3867,0.0975,0.02
lar,Least Angle Regression,12.2769,388.1916,18.8157,0.9347,0.389,0.1148,0.085
gbr,Gradient Boosting Regressor,13.3784,501.79,21.6654,0.9141,0.2905,0.1037,0.033
et,Extra Trees Regressor,13.0282,541.187,22.5842,0.9093,0.2219,0.0901,0.053



üîç Processing dataset: mice_knn_5


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
lasso,Lasso Regression,9.9828,295.3481,16.2521,0.9497,0.2449,0.0898,0.018
llar,Lasso Least Angle Regression,9.9827,295.353,16.2522,0.9497,0.2449,0.0898,0.018
en,Elastic Net,10.0229,297.5547,16.319,0.9493,0.2451,0.09,0.017
br,Bayesian Ridge,10.1531,301.6415,16.4536,0.9486,0.2577,0.0916,0.018
lr,Linear Regression,10.3983,306.089,16.6095,0.9478,0.2823,0.0944,0.018
ridge,Ridge Regression,10.3898,305.9409,16.6037,0.9478,0.282,0.0943,0.016
huber,Huber Regressor,9.7049,321.1965,17.1538,0.9458,0.2502,0.0864,0.022
lar,Least Angle Regression,10.6238,335.9603,17.3649,0.9426,0.302,0.0993,0.016
gbr,Gradient Boosting Regressor,13.0347,481.7,21.3054,0.9174,0.3216,0.099,0.034
et,Extra Trees Regressor,12.8015,542.6514,22.5793,0.9093,0.2199,0.0895,0.05



üîç Processing dataset: mice_knn_5_distance


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
lasso,Lasso Regression,10.8766,354.845,17.5486,0.9399,0.4071,0.0934,0.018
llar,Lasso Least Angle Regression,10.8765,354.8466,17.5486,0.9399,0.4071,0.0934,0.017
en,Elastic Net,10.908,355.3647,17.5744,0.9398,0.4068,0.0934,0.017
br,Bayesian Ridge,10.9528,355.9885,17.6133,0.9397,0.4034,0.0945,0.022
lar,Least Angle Regression,11.3249,367.126,17.9924,0.9379,0.4096,0.101,0.016
ridge,Ridge Regression,11.3567,369.1256,18.0591,0.9375,0.4044,0.1008,0.017
lr,Linear Regression,11.3858,370.1038,18.0933,0.9373,0.4051,0.101,0.02
huber,Huber Regressor,10.5167,373.6304,18.0769,0.9369,0.4015,0.0874,0.021
gbr,Gradient Boosting Regressor,13.5608,529.1639,22.2346,0.9095,0.3151,0.1015,0.034
et,Extra Trees Regressor,13.5471,608.3969,23.8086,0.8982,0.2229,0.0935,0.05



üìä Final Results:
                dataset             model      R2      MAE     RMSE
3                  mice  Lasso Regression  0.9562   8.0355  14.4622
2                  mode       Elastic Net  0.9532   9.6378  15.8541
1                median  Lasso Regression  0.9525   8.2569  15.3108
5               mice_lr    Bayesian Ridge  0.9511   8.6472  15.5656
18           mice_knn_5  Lasso Regression  0.9497   9.9828  16.2521
0                  mean  Lasso Regression  0.9491   8.6691  15.9037
7              mice_svr  Lasso Regression  0.9460  10.6197  16.8736
14           mice_knn_4  Lasso Regression  0.9445  10.7207  17.1908
16                knn_5  Lasso Regression  0.9440  11.2473  17.2263
17       knn_5_distance  Lasso Regression  0.9432  11.2882  17.3629
11  mice_knn_3_distance  Lasso Regression  0.9430  10.8612  17.2654
15  mice_knn_4_distance  Lasso Regression  0.9423  11.0857  17.4178
10           mice_knn_3  Lasso Regression  0.9417  11.3093  17.6228
12                knn_4  La

In [None]:
# for name, (train_df, test_df) in datasets.items():
#     path = f'data/imputed/'
#     os.makedirs(path, exist_ok=True)

#     df = pd.concat([train_df, test_df], axis=0).sort_values(by='date').reset_index(drop=True)

#     df.to_csv(path+f'{name}_df.csv', index=False)