In [None]:
import glob

import matplotlib.pyplot as plt
import mlflow
import numpy as np
import pandas as pd
import seaborn as sns

from tg import get_data_path, get_root_path
from tg.datasets import DATASET_FACTORY_LOOKUP

# color_palette = list(sns.color_palette("pastel").as_hex())
color_palette = list(sns.color_palette("husl", 9).as_hex())

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [None]:
dict_dataset_names = {
    "AIR_PASSENGERS": "Passageiros aéreos",
    "HOMICIDES": 'Homicídios dolosos no RJ',
    "NOISY_SINE30": "Seno ruidoso",
    "PERFECT_SINE30": "Seno perfeito",
    "RANDOM_WALK": "Caminhada aleatória"
}

dict_column_names = {
    "params.model_name": "Modelo",
    "metrics.rmse": "RMSE",
    "metrics.mae": "MAE",
    "metrics.mape": "MAPE",
    "metrics.smape": "sMAPE",
    "params.dataset_name": "Conjunto de dados"
}


In [None]:
mlflow.set_tracking_uri("file:///{}".format(get_root_path("mlruns")))
experiment = mlflow.get_experiment('0')

In [None]:
runs = mlflow.search_runs(experiment_ids=[experiment.experiment_id])
runs['artifact_uri'] = runs['artifact_uri'].apply(lambda x: x.replace(
    x.split('mlruns')[0], 'file///{}'.format(get_root_path(""))).replace(
        '\\', '/'))
runs.head(3)

In [None]:
    # 'NAIVE': _get_default_input,
    # 'ARIMA': _get_default_input,
    # 'SARIMA': _get_default_input,
    # 'RNN': _get_lagged_input,
    # 'SVR': _get_lagged_input,
    # 'ELM': _get_lagged_input,
    # 'STL': _get_default_input,
    # 'ES': _get_default_input,
    # 'LSTM': _get_lagged_input,
    # 'ARIMA_RNN': _get_default_input,
    # 'SARIMA_SVR': _get_default_input,
    # 'STL_ELM': _get_default_input,
    # 'ES_LSTM': _get_default_input

In [None]:
metrics_table = runs[[
    "params.model_name", "metrics.rmse", "metrics.mape", "metrics.smape",
    "metrics.mae", "params.dataset_name"
]].copy()

metrics_table = metrics_table.rename(columns=dict_column_names)

columns_order = ["Modelo", "Conjunto de dados", "RMSE", "MAE", "MAPE", "sMAPE"]

metrics_table["Conjunto de dados"] = metrics_table[
    "Conjunto de dados"].replace(dict_dataset_names)

metrics_table = metrics_table.sort_values(["sMAPE"]).drop_duplicates(
    subset=["Conjunto de dados", "Modelo"],
    keep="first").sort_values(["Conjunto de dados",
                               "sMAPE"]).reset_index(drop=True)

np.round(metrics_table, 4).head(3)

In [None]:
np.round(
    metrics_table[metrics_table['Conjunto de dados'] ==
                  dict_dataset_names['RANDOM_WALK']], 4)


In [None]:
# _metrics_table = metrics_table[columns_order].round(1).set_index(
#     ["Conjunto de dados", "Modelo"])
# latex_metrics_table = _metrics_table.to_latex()
# print(latex_metrics_table)

In [None]:
preds = []
for _, row in runs[[
        "params.model_name", "params.dataset_name", "artifact_uri"
]].iterrows():
    model = row["params.model_name"]
    dataset = row["params.dataset_name"]
    artifact_uri = row["artifact_uri"].split("///")[-1]
    glob_pattern = f"{artifact_uri}/*.csv"
    path = glob.glob(glob_pattern)[0]
    preds.append(pd.read_csv(path).iloc[:, 1].values)

runs["Predições"] = preds

In [None]:
preds_table = runs[[
    "params.model_name", "metrics.rmse", "metrics.mape", "metrics.smape",
    "metrics.mae", "params.dataset_name", "Predições"
]].copy()

preds_table = preds_table.rename(columns=dict_column_names)
columns_order = [
    "Modelo", "Conjunto de dados", "RMSE", "MAE", "MAPE", "sMAPE", "Predições"
]

preds_table["Conjunto de dados"] = preds_table["Conjunto de dados"].replace(
    dict_dataset_names)

preds_table = preds_table.sort_values(["sMAPE"]).drop_duplicates(
    subset=["Conjunto de dados", "Modelo"],
    keep="first").sort_values(["Conjunto de dados",
                               "sMAPE"]).reset_index(drop=True)

np.round(preds_table, 4)

In [None]:
dataset_name = "RANDOM_WALK"
# dataset_name = "HOMICIDES"
# dataset_name = "NOISY_SINE30"
# dataset_name = "PERFECT_SINE30"
# dataset_name = "RANDOM_WALK"
series = DATASET_FACTORY_LOOKUP[dataset_name]()

# model_names = ["ARIMA_RNN", "ES", "RNN", "SARIMA"]
model_names = ["ARIMA_RNN", "SARIMA_SVR", "STL_ELM", "SARIMA"]
# model_names = ['ARIMA', 'RNN', 'SVR', 'STL_ELM']
# 'ELM', 'STL'

In [None]:
fig, axs = plt.subplots(4, 1, figsize=(10, 7))

for i, model in enumerate(model_names):

    aux_data = preds_table.loc[(preds_table['Modelo'] == model) &
                               (preds_table['Conjunto de dados'] ==
                                dict_dataset_names[dataset_name]), :]
    aux_pred = aux_data['Predições'].values[0]
    aux_model = ' + '.join(model.split('_')).upper()

    axs[i].set_title(aux_model, fontsize=10)
    axs[i].plot(range(len(series)), series, color='black', alpha=0.5)
    axs[i].plot(range(len(series) - len(aux_pred), len(series)),
                aux_pred,
                color=color_palette[i],
                label="RMSE: {:.2f}%\nsMAPE: {:.1f}%".format(
                    aux_data['RMSE'].values[0], aux_data['sMAPE'].values[0]))

for ax in axs:

    for tick in ax.xaxis.get_major_ticks():
        tick.tick1line.set_visible(False)
        tick.tick2line.set_visible(False)

    for tick in ax.yaxis.get_major_ticks():
        tick.tick1line.set_visible(False)
        tick.tick2line.set_visible(False)

    ax.xaxis.set_ticklabels([])
    ax.yaxis.set_ticklabels([])
    ax.legend(fontsize=9, shadow=True, loc='upper left')
    ax.grid(linewidth=0.15)

fig.suptitle("Desempenho dos modelos híbridos - {}".format(
    dict_dataset_names[dataset_name]),
             fontsize=15)
plt.tight_layout()
plt.savefig(get_root_path("reports/desempenho_hibridos_{}.png".format(
    dataset_name.lower())),
            dpi=300)
plt.show()