# Funções

In [9]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_percentage_error
from collections import defaultdict
import warnings # retirar avisos
warnings.filterwarnings('ignore')

In [2]:
def load_data():
    """
    Função para ler e transformar os dados já presentes no diretório especificado
    """
    path = "../data/daily_load.csv"
    df_load = pd.read_csv(path, parse_dates = ["date"])
    df_load2 = df_load[df_load["id_reg"] == "S"]           # região sul
    df_load3 = df_load2[df_load2["date"] <= '2022-05-31']  # data de corte
    df_load4 = df_load3[["date", "load_mwmed"]].set_index("date")
    return df_load4

In [3]:
def train_test_split(data, n_test):
    """
    Função para partir or dados em treino e teste
    """
    if isinstance(data, pd.DataFrame):
        train, test = data.iloc[:-n_test, :], data.iloc[-n_test:, :]
    elif isinstance(data, np.ndarray):
        train, test = data[:-n_test, :], data[-n_test:, :]
    return train, test

In [4]:
def create_future(start, t, cal_vars = False):
    """ Função para criar DataFrame de datas (dias) seguintes a T, assim como as variáveis de calendário se cal_vars = True.
       start: T + 1
       t: períodos à frente """
    dates = pd.date_range(start, freq = 'd', periods = t)
    df = pd.DataFrame(dates, columns = ['t'])
    if cal_vars == True:
        df = create_features(df, 't')
    elif cal_vars == False:
        pass
    return df

In [5]:
def get_measures(forecast, test):
    """
    Função para obter medidas de acurária a partir dos dados de projeção e teste
    """
    forecast.reset_index(drop = True, inplace = True)
    test.reset_index(drop = True, inplace = True)
    if isinstance(forecast, pd.Series) and isinstance(test, pd.Series):
        errors = [(test.iloc[i] - forecast.iloc[i])**2 for i in range(len(test))]
    else:
        errors = [(test.iloc[i][0] - forecast.iloc[i])**2 for i in range(len(test))]
    mae = mean_absolute_error(test, forecast)
    mse = mean_squared_error(test, forecast)
    rmse = np.sqrt(mse)
    mape = mean_absolute_percentage_error(test, forecast)
    # smape
    a = np.reshape(test.values, (-1,))
    b = np.reshape(forecast.values, (-1,))
    smape = np.mean(100*2.0 * np.abs(a - b) / (np.abs(a) + np.abs(b))).item()
    # dicionário com as medidas de erro
    measures = { "erro": sum(errors),
                 "mae": mae,
                 "mse": mse,
                 "rmse": rmse,
                 "mape": mape,
                 "smape": smape
                }
    # arredondamento
    # for key, item in measures.items():
    #     measures[key] = round(measures[key], 2)
    return measures

# Prophet
Quick start: https://facebook.github.io/prophet/docs/quick_start.html <br>
Holidays and special events: https://facebook.github.io/prophet/docs/seasonality,_holiday_effects,_and_regressors.html#modeling-holidays-and-special-events

In [6]:
import fbprophet # conda install -c conda-forge fbprophet

In [7]:
fbprophet.__version__ # verifica versão do Prophet

'0.7.1'

In [8]:
df = load_data().reset_index()
df.columns = ['ds','y']
df

Unnamed: 0,ds,y
0,2000-01-01,4800.650000
1,2000-01-02,4899.800000
2,2000-01-03,6261.554167
3,2000-01-04,6733.741667
4,2000-01-05,6961.170833
...,...,...
8183,2022-05-27,12520.803833
8184,2022-05-28,10525.490875
8185,2022-05-29,9074.211250
8186,2022-05-30,11648.709583


In [None]:
h = 15
k_fold = 11

df_cv = pd.DataFrame()
for i in range(1, k_fold, 1):
    desloc= h + k_fold 
    df_ = df.iloc[:(-desloc+i+1), :]
    print(df_.ds.min(), df_.ds.max())
    m = fbprophet.Prophet(daily_seasonality=True)
    model = m.fit(df_)
    future = m.make_future_dataframe(periods = 15, freq = 'D')
    prediction = m.predict(future)
    pred_ = prediction[['ds','yhat']].iloc[-15:]
    #test = df.iloc[(-desloc+i+1):]
    pred_["ult_dt_train"] = df_.ds.max()
    df_cv = pd.concat([df_cv, pred_], axis = 0)

In [146]:
l = []
for dv_date in df_cv_2.ult_dt_train.unique():
    df_cv_2_ = df_cv_2[df_cv_2.ult_dt_train == dv_date]
    acuracia = get_measures(df_cv_2_.yhat, df_cv_2_.y)
    l.append(acuracia["mape"])
print(l, sum(l)/len(l))

[0.07077105194984414, 0.05370674274273128, 0.04681387381762834, 0.04311435640466109, 0.0405002643223417, 0.037236122791958, 0.03387277733960352, 0.03183900457840286, 0.02762989562026731, 0.025034444834939874]


In [148]:
df_cv_2[df_cv_2["ds"] == "2022-05-17"]

Unnamed: 0,ds,yhat,ult_dt_train,y
9,2022-05-17,12229.962169,2022-05-07,11964.909375
23,2022-05-17,12224.268394,2022-05-08,11964.909375
37,2022-05-17,12219.548759,2022-05-09,11964.909375
51,2022-05-17,12215.067467,2022-05-10,11964.909375
65,2022-05-17,12214.314448,2022-05-11,11964.909375
79,2022-05-17,12211.059129,2022-05-12,11964.909375
93,2022-05-17,12206.755787,2022-05-13,11964.909375
107,2022-05-17,12204.055145,2022-05-14,11964.909375
121,2022-05-17,12200.769751,2022-05-15,11964.909375
135,2022-05-17,12200.141516,2022-05-16,11964.909375


In [13]:
folds = 2 #partições
horz = 15 #horizonte de predição
rows = df.shape[0]
#out = defaultdict(dict)
df_base = pd.DataFrame()
for fold in range(folds,0,-1):
    print(f"forecasting fold {fold}...")
    #slide = rows-(fold*horz)#-1
    slide = rows-((fold-1)*horz)
    df_cv = df.iloc[:slide]
    m = fbprophet.Prophet(daily_seasonality=True)
    model = m.fit(df_cv)
    future = m.make_future_dataframe(periods = 15, freq = 'D')
    prediction = m.predict(future)
    pred_ = prediction[['ds','yhat']].iloc[-15:]
    pred_["ult_dt_train"] = df_cv.ds.max()
    df_cv = pd.concat([df_base, pred_], axis = 0)