In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

from fbprophet import Prophet
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.ar_model import AR
from statsmodels.tsa.statespace.sarimax import SARIMAX
from scipy.stats.mstats import mquantiles

# Turbine Data

In [None]:
# load data

df = pd.read_csv("data/Turbine_Data.csv", parse_dates=["Unnamed: 0"], low_memory=False)
df['DateTime'] = df['Unnamed: 0'] 
df.drop('Unnamed: 0', axis=1, inplace=True)
df['DateTime'] = pd.to_datetime(df['DateTime'], 
 format = '%Y-%m-%dT%H:%M:%SZ', 
 errors = 'coerce')

df['year'] = df['DateTime'].dt.year
df['month'] = df['DateTime'].dt.month
df['day'] = df['DateTime'].dt.day
df['hour'] = df['DateTime'].dt.hour
df['minute'] = df['DateTime'].dt.minute

df.head()

In [None]:
# compute daily means
df_daily = df.copy()
df_daily['DateTime'] = df_daily["DateTime"].dt.date
df_daily = df_daily[["AmbientTemperatue", "WindSpeed", "DateTime"]]
df_means = df_daily.groupby("DateTime").mean()
df_means["WindSpeed"].fillna(method="backfill", inplace=True)
df_means["AmbientTemperatue"].fillna(method="backfill", inplace=True)
df_means = df_means.reset_index()

In [None]:
plt.plot(df_means["WindSpeed"].values)
plt.title("Average daily wind speed", fontsize=16)
# plt.savefig("wind_data.pdf")

In [None]:
# Fit Facebook Prophet
import logging
logging.basicConfig(level=logging.ERROR)

zero_one_grid = np.linspace(0, 1, 500)

prophet1_preds = []
prophet2_preds = []
prophet3_preds = []
prophet4_preds = []
dates = []

df_prophet = df_means.copy()
df_prophet.columns = ["ds", "y", "temp"]
df_prophet["y"].fillna(method="backfill", inplace=True)
df_prophet["temp"].fillna(method="backfill", inplace=True)

offset = 365
for i in range(len(df_prophet) - offset - 1):
    print("\r {0} / {1}".format(i + 1, len(df_prophet) - offset - 1), end=" ", flush=True)
    curr_df = df_prophet.iloc[i:(offset + i)]
    # no seasonality, no temperature regressor
    m1 = Prophet(yearly_seasonality=False)
    m1.fit(curr_df)
    future = m1.make_future_dataframe(periods=1)
    dates.append(future["ds"].values[-1])
    
    ex = m1.setup_dataframe(future.copy())
    samples = m1.sample_posterior_predictive(ex)["yhat"][-1, :]
    prophet1_preds.append(mquantiles(samples, zero_one_grid))
    
    # yes seasonality, no temperature regressor
    m2 = Prophet(yearly_seasonality=True)
    m2.fit(curr_df)
    future = m2.make_future_dataframe(periods=1)
    ex = m2.setup_dataframe(future.copy())
    samples = m2.sample_posterior_predictive(ex)["yhat"][-1, :]
    prophet2_preds.append(mquantiles(samples, zero_one_grid))
    
    
    # no seasonality, yes temperature regressor
    m3 = Prophet(yearly_seasonality=False)
    m3.add_regressor("temp")
    m3.fit(curr_df)
    future = m3.make_future_dataframe(periods=1)
    future["temp"] = df_prophet["temp"].values[i:(offset + i + 1)]
    ex = m3.setup_dataframe(future.copy())
    samples = m3.sample_posterior_predictive(ex)["yhat"][-1, :]
    prophet3_preds.append(mquantiles(samples, zero_one_grid))
    
    # yes seasonality, yes temperature regressor
    m4 = Prophet(yearly_seasonality=True)
    m4.add_regressor("temp")
    m4.fit(curr_df)
    future = m4.make_future_dataframe(periods=1)
    future["temp"] = df_prophet["temp"].values[i:(offset + i + 1)]
    ex = m4.setup_dataframe(future.copy())
    samples = m4.sample_posterior_predictive(ex)["yhat"][-1, :]
    prophet4_preds.append(mquantiles(samples, zero_one_grid))

In [None]:
import pickle

with open("data/wind_forecasts_new.pickle", "wb") as fp:
    pickle.dump({"grid": zero_one_grid, "dates": dates, "m1": prophet1_preds, 
                 "m2": prophet2_preds, "m3": prophet3_preds, "m4": prophet4_preds}, fp)

In [None]:
forecast = m4.predict(future)
fig = m4.plot(forecast)

In [None]:
# reconstruct distributions

df["DateTime"] = df.DateTime.dt.tz_localize(None)
grid_y = np.linspace(0, 1, 100)
skip = []

Y_quantiles = []
for i, date in enumerate(dates):
    start = date
    end = date + np.timedelta64(1,'D')
    daydf = df[(df.DateTime >= start) & (df.DateTime < end)]
    if daydf.WindSpeed.isna().sum() > 50:
        Y_quantiles.append(None)
        skip.append(i)
    else:
        wind = daydf.WindSpeed.values
        wind = wind[~np.isnan(wind)]
        Y_quantiles.append(mquantiles(wind, grid_y))

In [None]:
with open("data/wind_true.pickle", "wb") as fp:
    pickle.dump({"grid": grid_y, "quantiles": Y_quantiles, "skip": skip}, fp)

In [None]:
Y_quantiles[10]

# Fit the model

In [None]:
import pickle

with open("data/wind_true.pickle", "rb") as fp:
    data = pickle.load(fp)
    grid_y = data["grid"]
    Y_quantiles = data["quantiles"]
    skip = data["skip"]

    
with open("data/wind_forecasts.pickle", "rb") as fp:
    data = pickle.load(fp)
    zero_one_grid = data["grid"]
    dates = data["dates"]
    prophet1_preds = data["m1"]
    prophet2_preds = data["m2"]
    prophet3_preds = data["m3"]
    prophet4_preds = data["m4"]

In [None]:
from pwass.spline import MonotoneQuadraticSplineBasis
from pwass.distributions import Distribution
from pwass.regression.multi_distrib_on_distrib import MultiDistribOnDistribReg
from pwass.regression.distrib_on_distrib import DistribOnDistribReg

In [None]:
Y = np.empty(len(Y_quantiles), dtype=object)
X = np.empty((len(prophet1_preds), 4), dtype=object)
spbasis = MonotoneQuadraticSplineBasis(40, zero_one_grid)

for i in range(len(Y)):
    if i in skip:
        continue
    
    curr_y = Distribution(wbasis=spbasis)
    curr_y.init_from_quantile(grid_y, Y_quantiles[i])
    curr_y.compute_spline_expansions()
    Y[i] = curr_y
    
    
    curr_x = Distribution(wbasis=spbasis)
    curr_x.init_from_quantile(zero_one_grid, prophet1_preds[i])
    curr_x.compute_spline_expansions()
    X[i, 0] = curr_x
    
    curr_x = Distribution(wbasis=spbasis)
    curr_x.init_from_quantile(zero_one_grid, prophet2_preds[i])
    curr_x.compute_spline_expansions()
    X[i, 1] = curr_x
    
    curr_x = Distribution(wbasis=spbasis)
    curr_x.init_from_quantile(zero_one_grid, prophet3_preds[i])
    curr_x.compute_spline_expansions()
    X[i, 2] = curr_x
    
    curr_x = Distribution(wbasis=spbasis)
    curr_x.init_from_quantile(zero_one_grid, prophet4_preds[i])
    curr_x.compute_spline_expansions()
    X[i, 3] = curr_x
    
skip = np.array(skip)

In [None]:
Y = np.delete(Y, skip)
X = np.delete(X, skip, axis=0)

In [None]:
trainX = X[:-100, :]
trainY = Y[:-100]
testX = X[-100:, :]
testY = Y[-100:]

In [None]:
regmulti = MultiDistribOnDistribReg()
regp1 = DistribOnDistribReg()
regp2 = DistribOnDistribReg()
regp3 = DistribOnDistribReg()
regp4 = DistribOnDistribReg()

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
param_grid = {"lambda_ridge":[0.5, 1.0, 2.0, 5.0, 10.0, 100.0],
              "spline_basis": [spbasis],
              "compute_spline": [False],
              "fit_intercept": [True, False]}

bestp1 = GridSearchCV(regp1, param_grid, cv=10, refit=True)
bestp1.fit(trainX[:, 0], trainY)

bestp2 = GridSearchCV(regp2, param_grid, cv=10, refit=True)
bestp2.fit(trainX[:, 1], trainY)

bestp3 = GridSearchCV(regp3, param_grid, cv=10, refit=True)
bestp3.fit(trainX[:, 2], trainY)

bestp4 = GridSearchCV(regp4, param_grid, cv=10, refit=True)
bestp4.fit(trainX[:, 3], trainY)

best_multi = GridSearchCV(regmulti, param_grid, cv=10, refit=True)
best_multi.fit(trainX, trainY)

In [None]:
bestp1.best_estimator_.score(testX[:, 0], testY, return_sd=True)

In [None]:
bestp2.best_estimator_.score(testX[:, 1], testY, return_sd=True)

In [None]:
bestp3.best_estimator_.score(testX[:, 2], testY, return_sd=True)

In [None]:
bestp4.best_estimator_.score(testX[:, 3], testY, return_sd=True)

In [None]:
best_multi.best_estimator_.score(testX, testY, return_sd=True)

In [None]:
best_multi.best_params_

In [None]:
fig, axes = plt.subplots(nrows=1, ncols=4, figsize=(20, 5))

reg = best_multi.best_estimator_
intercept = best_multi.best_params_["fit_intercept"]
nbasis = spbasis.nbasis

vmin = np.min(reg.beta[intercept:])
vmax = np.max(reg.beta[intercept:])

for i in range(4):
    start = i * nbasis + intercept
    end = start + nbasis + 1
    betamat = reg.beta[start:end, :]
    beta_eval = np.zeros((len(zero_one_grid), len(zero_one_grid)))
    for k in range(spbasis.nbasis):
        for l in range(spbasis.nbasis):
            beta_eval += betamat[k, l] * np.outer(
                spbasis.B[k, :], spbasis.B[l, :])
    axes[i].imshow(beta_eval, vmin=vmin, vmax=vmax, cmap="RdBu_r", origin='lower') 
#     axes[i].contourf(beta_eval, vmin=vmin, vmax=vmax, cmap="RdBu_r", 
#                      levels=np.linspace(-0.15, 0.10, 10))
    axes[i].contour(beta_eval, vmin=vmin, vmax=vmax, cmap="bwr", 
                    levels=np.linspace(-0.15, 0.10, 10))



tks = np.arange(0, beta_eval.shape[0] + 1, 100)
labs = ["{0:.1f}".format(x) for x in np.linspace(0, 1, len(tks))]

for i in range(4):
    axes[i].set_title("beta {0}".format(i + 1), fontsize=15)
    axes[i].set_xticks(tks)
    axes[i].set_xticklabels(labs)
    axes[i].set_yticks(tks)
    axes[i].set_yticklabels(labs)
    
fig.subplots_adjust(right=0.8)
cbar_ax = fig.add_axes([0.82, 0.2, 0.01, 0.6])
fig.colorbar(im, cax=cbar_ax)

# plt.savefig("beta.pdf", bbox_inches="tight")

In [None]:
predY = reg.predict(testX)

In [None]:
idx = 10

fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(14, 5))

axes[0].plot(spbasis.xgrid, spbasis.eval_spline(reg.beta[0, :]), label="alpha")
axes[0].legend(loc="lower center", fontsize=14, bbox_to_anchor=(0.5, -0.3))

axes[1].plot(testY[idx].quantile_grid, testY[idx].quantile_eval, lw=3, color="steelblue", label="observed")
axes[1].plot(predY[idx].quantile_grid, predY[idx].quantile_eval, lw=3, color="orange", label="predicted")
axes[1].plot(testX[idx][0].quantile_grid, testX[idx][0].quantile_eval, color="forestgreen", 
         alpha=0.8, label="M1")
axes[1].plot(testX[idx][1].quantile_grid, testX[idx][1].quantile_eval, "-.", color="seagreen", 
         alpha=0.8, label="M2")
axes[1].plot(testX[idx][2].quantile_grid, testX[idx][2].quantile_eval, ".", color="limegreen", 
         alpha=0.8, label="M3")
axes[1].plot(testX[idx][3].quantile_grid, testX[idx][3].quantile_eval, "--", color="green", 
         alpha=0.8, label="M4")

axes[1].legend(ncol=3, loc="lower center", fontsize=14, bbox_to_anchor=(0.5, -0.35))
axes[1].set_ylim(-2, 11)
plt.savefig("alpha_and_wind_pred.pdf", bbox_inches="tight")
plt.show()