# Environment Initialization

In [19]:
import matplotlib.pyplot as plt

import pandas as pd
import numpy as np

from statsmodels.tsa.arima.model import ARIMA

import os
if not os.path.exists("./report/figures/"):
  os.makedirs("./report/figures/")

# Data Import/Pre-Processing/Visualization

In [20]:
%matplotlib widget

dtype = {
  "Datetime": "string",
  "solar_mw": np.float64,
  "wind-direction": np.float64, 
  "wind-speed": np.float64,
  "humidity": np.float64,
  "average-wind-speed-(period)": np.float64,
  "average-pressure-(period)": np.float64,
  "temperature": np.float64
}
solar_energy_df = pd.read_csv("./solarenergy.csv",
                              delimiter=",",
                              dtype=dtype)

solar_energy_df["Datetime"] = pd.to_datetime(solar_energy_df["Datetime"],
                                             format="%d/%m/%Y %H:%M")
solar_energy_df = solar_energy_df.set_index("Datetime").sort_index()

axes = solar_energy_df.plot(figsize=(16,10), subplots=True, sharex=True)
plt.suptitle("Solar Energy Dataset")
axes[-1].set_xlabel("Time")
for axis in axes:
  axis.autoscale(True, "x", True)
plt.savefig("./report/figures/timeseries_raw.svg")
plt.close("all")

solar_energy_df = solar_energy_df.dropna()
solar_energy_df = solar_energy_df.resample("1H").interpolate("linear")

axes = solar_energy_df.plot(figsize=(16,10), subplots=True, sharex=True)
plt.suptitle("Solar Energy Dataset")
axes[-1].set_xlabel("Time")
for axis in axes:
  axis.autoscale(True, "x", True)
plt.savefig("./report/figures/timeseries_clean.svg")
plt.close("all")

# Baseline Model

In [21]:
baseline_model = ARIMA(endog=solar_energy_df["solar_mw"], order=(2,1,1))
res = baseline_model.fit()
print(res.mse)

65655.04431404267
