# Funções

In [8]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_percentage_error
from statsmodels.tsa.holtwinters import SimpleExpSmoothing, ExponentialSmoothing, Holt
from collections import defaultdict
import json
import warnings # retirar avisos
warnings.filterwarnings('ignore')

In [4]:
def load_data():
    """
    Função para ler e transformar os dados já presentes no diretório especificado
    """
    path = "../data/daily_load.csv"
    df_load = pd.read_csv(path, parse_dates = ["date"])
    df_load2 = df_load[df_load["id_reg"] == "S"]           # região sul
    df_load3 = df_load2[df_load2["date"] <= '2022-05-31']  # data de corte
    df_load4 = df_load3[["date", "load_mwmed"]].set_index("date")
    return df_load4

In [5]:
def train_test_split(data, n_test):
    """
    Função para partir or dados em treino e teste
    """
    if isinstance(data, pd.DataFrame):
        train, test = data.iloc[:-n_test, :], data.iloc[-n_test:, :]
    elif isinstance(data, np.ndarray):
        train, test = data[:-n_test, :], data[-n_test:, :]
    return train, test

In [6]:
def create_future(start, t, cal_vars = False):
    """ Função para criar DataFrame de datas (dias) seguintes a T, assim como as variáveis de calendário se cal_vars = True.
       start: T + 1
       t: períodos à frente """
    dates = pd.date_range(start, freq = 'd', periods = t)
    df = pd.DataFrame(dates, columns = ['t'])
    if cal_vars == True:
        df = create_features(df, 't')
    elif cal_vars == False:
        pass
    return df

In [7]:
def get_measures(forecast, test):
    """
    Função para obter medidas de acurária a partir dos dados de projeção e teste
    """
    forecast.reset_index(drop = True, inplace = True)
    test.reset_index(drop = True, inplace = True)
    if isinstance(forecast, pd.Series) and isinstance(test, pd.Series):
        errors = [(test.iloc[i] - forecast.iloc[i])**2 for i in range(len(test))]
    else:
        errors = [(test.iloc[i][0] - forecast.iloc[i])**2 for i in range(len(test))]
    mae = mean_absolute_error(test, forecast)
    mse = mean_squared_error(test, forecast)
    rmse = np.sqrt(mse)
    mape = mean_absolute_percentage_error(test, forecast)
    # smape
    a = np.reshape(test.values, (-1,))
    b = np.reshape(forecast.values, (-1,))
    smape = np.mean(100*2.0 * np.abs(a - b) / (np.abs(a) + np.abs(b))).item()
    # dicionário com as medidas de erro
    measures = { "erro": sum(errors),
                 "mae": mae,
                 "mse": mse,
                 "rmse": rmse,
                 "mape": mape,
                 "smape": smape
                }
    # arredondamento
    # for key, item in measures.items():
    #     measures[key] = round(measures[key], 2)
    return measures

In [13]:
df = load_data()
df.load_mwmed = df.load_mwmed.interpolate(method="linear")
df

Unnamed: 0_level_0,load_mwmed
date,Unnamed: 1_level_1
2000-01-01,4800.650000
2000-01-02,4899.800000
2000-01-03,6261.554167
2000-01-04,6733.741667
2000-01-05,6961.170833
...,...
2022-05-27,12520.803833
2022-05-28,10525.490875
2022-05-29,9074.211250
2022-05-30,11648.709583


In [15]:
folds = 15 #partições
horz = n_test = 15 #horizonte de predição
rows = df.shape[0]
out = defaultdict(dict)
df_base = pd.DataFrame()
for fold in range(folds,0,-1):
    print(f"forecasting fold {fold}...")
    #slide = rows-(fold*horz)#-1
    slide = rows-((fold-1)*horz)
    df_cv = df.iloc[:slide]
    n_test = 15
    train, test = train_test_split(df_cv, n_test)
    fit1 = ExponentialSmoothing(train ,seasonal_periods=7,trend='add', seasonal='add').fit() # seasonal='mul' é melhor
    y_hat = fit1.forecast(n_test)
    out[f"cv_{fold}"]["pred"] = y_hat
    out[f"cv_{fold}"]["test"] = test["load_mwmed"]
d = dict(out)

forecasting fold 15...
forecasting fold 14...
forecasting fold 13...
forecasting fold 12...
forecasting fold 11...
forecasting fold 10...
forecasting fold 9...
forecasting fold 8...
forecasting fold 7...
forecasting fold 6...
forecasting fold 5...
forecasting fold 4...
forecasting fold 3...
forecasting fold 2...
forecasting fold 1...


In [16]:
mapes = []
for x in d:
    meas = get_measures(pd.Series(d[x]["pred"]),pd.Series(d[x]["test"]))
    print(meas)
    mapes.append(meas["mape"])

{'erro': 21500974.103465207, 'mae': 1007.112957996408, 'mse': 1433398.273564347, 'rmse': 1197.2461207138435, 'mape': 0.08710045110726373, 'smape': 9.276606605523007}
{'erro': 65607156.63467409, 'mae': 2027.8119050569278, 'mse': 4373810.442311605, 'rmse': 2091.3656883270332, 'mape': 0.1623410931838122, 'smape': 17.75897324645499}
{'erro': 2113956.1269205348, 'mae': 321.1293585323166, 'mse': 140930.408461369, 'rmse': 375.4069904268819, 'mape': 0.025752828154137066, 'smape': 2.549377712186443}
{'erro': 5564412.852183415, 'mae': 513.6771898258795, 'mse': 370960.85681222763, 'rmse': 609.0655603563771, 'mape': 0.038400228548430726, 'smape': 3.879141634554821}
{'erro': 91140472.99535382, 'mae': 2097.52863290451, 'mse': 6076031.533023587, 'rmse': 2464.9607568932183, 'mape': 0.18849471134540816, 'smape': 16.58882437101228}
{'erro': 128336371.05590637, 'mae': 2772.351305732098, 'mse': 8555758.070393758, 'rmse': 2925.022746987407, 'mape': 0.21127570077913524, 'smape': 23.909686116630848}
{'erro':

In [17]:
np.mean(mapes)

0.09384745283243388

In [18]:
with open('validation/hw_cv.json', 'w') as f:
    json.dump(d, f)

FileNotFoundError: [Errno 2] No such file or directory: 'validation/hw_cv.json'