In [None]:
import sys
sys.path.insert(1, '../')
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_absolute_percentage_error, mean_absolute_error, mean_squared_log_error

from agingml import temporal_degradation_test as tdt

In [None]:
def plot_results():
    for model in models:
        errors_df = pd.read_parquet(f'../results/aging/{dataset}/aging_{dataset}_{model}_{n_simulations}_simulations_{n_prod}_prod.parquet')
        d_errors_df = pd.read_parquet(f'../results/aging/{dataset}/aging_{dataset}_{model}_{n_simulations}_simulations_{n_prod}_prod_{freq}.parquet')
        
        errors_df = errors_df[errors_df['model_age'] <= 800]
        d_errors_df = d_errors_df[d_errors_df['model_age'] <= 800]
        test_errors_df = errors_df[errors_df['partition'] == 'test']
        test_mape = test_errors_df.groupby('simulation_id').apply(lambda group: mean_absolute_percentage_error(group.y, group.y_pred))
        test_mape = pd.DataFrame(test_mape, columns=['test_mape']).reset_index()

        d_errors_df = pd.merge(d_errors_df, test_mape, how='left', on='simulation_id')
        d_errors_df = d_errors_df[d_errors_df['test_mape'] <= 0.1]

        valid_models = len(d_errors_df['simulation_id'].drop_duplicates())
        print(f'There are {valid_models} out of {n_simulations}')

        d_errors_df['degradation'] = d_errors_df['error'] > 0.15

        print(d_errors_df['degradation'].value_counts(normalize=True))
        degradations_per_model = d_errors_df.groupby(['simulation_id'])[['degradation']].sum().reset_index()
        num_degradated_models = len(degradations_per_model[degradations_per_model['degradation'] > 5])

        print(f"{num_degradated_models} out of {len(degradations_per_model)} models ({np.round(100 * num_degradated_models / len(degradations_per_model), 1)} %) degradaded")

        print(degradations_per_model.degradation.sort_values())
        fig = tdt.plot_aging_chart(d_errors_df, metric='MAPE', freq='days', plot_name=f"Model Aging Chart: NYC Taxi Trip Demand - {model}")
        fig.show()
        fig.savefig(f'../figures/aging/{dataset}/aging_plot_{dataset}_{model}_{n_simulations}_simulations_{n_prod}_prod.svg', format='svg')

In [None]:
dataset = 'taxi'
n_train = 8800 # one year
n_test = 2200
n_prod = 4400
n_simulations = 1500
metric = mean_absolute_percentage_error
freq = 'D'
models =['LGBMRegressor', 'ElasticNet', 'RandomForestRegressor', 'MLPRegressor']
plot_results()