In [None]:
! pip install https://github.com/pandas-profiling/pandas-profiling/archive/master.zip 

# Background
You work for an energy company in Australia. Your company builds solar panel arrays and then sells the energy they produce to industrial customers. The company wants to expand to the city of Melbourne in the state of Victoria. Prices and demand for electricity change every day. Customers pay for the energy received using a formula based on the local energy market's daily price. Your company's pricing committee wants your team to estimate energy prices for the next 12-18 months to use those prices as the basis for contract negotiations. In addition, the VP of strategy is researching investing in storage capacity (i.e., batteries) as a new source of revenue. The plan is to store some of the energy produced by the solar panels when pricing conditions are unfavorable and sell it by the next day on the open market if the prices are higher.


  date
    datetime, the date of the recording
    Sort datetime, the date of the recording
   demand
    float, a total daily electricity demand in MWh
   RRP
    float, a recommended retail price in AUD$ / MWh
   demand_pos_RRP
    float, a total daily demand at positive RRP in MWh
  RRP_positive
    float, an averaged positive RRP, weighted by the corresponding       intraday  demand in AUD$ / MWh
   demand_neg_RRP
    float, an total daily demand at negative RRP in MWh
   RRP_negative
    float, an average negative RRP, weighted by the corresponding intraday demand in AUD$ / MWh*
   frac_at_neg_RRP
    float, an average negative RRP, weighted by the corresponding intraday demand in AUD$ / MWh
   min_temperature
    float, minimum temperature during the day in Celsius
   max_temperature
    float, maximum temperature during the day in Celsius

In [None]:
import pandas as pd
import pandas_profiling
df=pd.read_csv('/content/complete_dataset.csv')

In [None]:
from pandas_profiling import ProfileReport

In [None]:
import os
os._exit(00)


In [None]:
profile = ProfileReport(df, html = {'style' : {'full_width':True}})
profile.to_file(output_file="report.html")
profile.to_notebook_iframe()

# Your challenge
Create a report that covers the following:

How do energy prices change throughout the year? Are there any patterns by season or month of the year?
Build a forecast of daily energy prices the company can use as the basis of its financial planning.
Provide guidance on how much revenue the energy storage venture could generate per year using retail prices and a 70MWh storage system.


In [None]:
demand_df=df[['date','demand']].rename(columns={'date':'ds','demand':'y'})
demand_df['ds']=pd.to_datetime(demand_df['ds'])
demand_df.head()

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.figure(figsize=(10, 4))
plt.title("All Data")
plt.plot(demand_df['ds'].dt.to_pydatetime(),demand_df['y'])
plt.show()

the insights shows some yearly seasonality: the energy demand encreases evry year until june, then it decrease for  the rest of the year

In [None]:
plt.figure(figsize=(10,5))
plt.title('first 100 days')
plt.plot(demand_df['ds'].dt.to_pydatetime()[:100], demand_df['y'][:100])
plt.show()

we can notice that the energy demand is not consistent day to day 


In [None]:
train_size=8
df_train , df_valid= demand_df[:int(len(demand_df)*0.8)] ,demand_df[int(len(demand_df)*0.8):]
df_valid, df_test = df_valid[:len(df_valid)//2], df_valid[len(df_valid)//2:]
def train_valid_plot():
    """Visualizing the training + validation sets"""
    fig = plt.figure(figsize=(10,6))
    ax = fig.add_subplot(111)
    ax.plot(df_train['ds'].dt.to_pydatetime(), df_train["y"], color='#1f76b4', label='Training Set')
    ax.plot(df_valid['ds'].dt.to_pydatetime(), df_valid["y"], color='#fc7d0b', label='Validation Set')
    ax.plot(df_test['ds'].dt.to_pydatetime(), df_valid["y"], color='#CDC7E5', label='Test Set')
    ax.legend()
    plt.show()
    
train_valid_plot()

# Defining a Model
We use 'D' to set the frequency of predictions as daily, and we use plot-all to visualize model performance live during training. The only other alteration we make is to specify Australian holidays.


In [None]:
# NeuralProphet
!pip install neuralprophet[live] --quiet



set_random_seed(0)

In [None]:
from neuralprophet import NeuralProphet
from neuralprophet import set_random_seed

In [None]:
m = NeuralProphet()

m.add_country_holidays(country_name='Australia')
metrics = m.fit(df=df_train, validation_df=df_valid, freq="D", progress="plot-all")
metrics[-1:]

We can see from the graph above that the model is being overfit to the data. The model is fitting as low as it can on the training data, but we want the model to fit well on unseen data (ie. validation set). 

Looking at the metric plots above, we can see that the optimal parameters are reached around 25–30 epochs and then the model starts to overfit. We can combat this by specifying a number of epochs. A complete list of tuneable model parameters can be found here.

In [None]:
m = NeuralProphet(epochs=30)
m.add_country_holidays(country_name='Australia')
metrics2 = m.fit(df=df_train, validation_df=df_valid, freq="D" ,progress="plot-all")
metrics2[-1:]

# Evaluating a Model


In [None]:
future = m.make_future_dataframe(df=df_train, periods=len(df_valid), n_historic_predictions=True)
forecast = m.predict(df=future)
fig_forecast = m.plot(forecast)

In [None]:
fig = plt.figure(figsize=(10,6))
ax = fig.add_subplot(111)
ax.set_title("Train RMSE: {:.2f} --- Validation RMSE: {:.2f}".format(metrics2[-1:].RMSE.values[0], metrics2[-1:].RMSE_val.values[0]))
ax.plot(df_valid['ds'].dt.to_pydatetime(), df_valid["y"],'.k', label='True Value')
ax.plot(forecast[-len(df_valid):]['ds'].dt.to_pydatetime(), forecast[-len(df_valid):]["yhat1"], label='Predicted Value')
ax.legend()
plt.show()


In the third plot, we are looking at the yearly seasonality. We can see that energy demand is at its lowest in April and October, and energy demand is at its highest in July. 

In [None]:
fig_param = m.plot_parameters()

# Adding AR-Net (AutoRegression)
One of the new additions in Prophet is AR-Net (Auto-Regressive Neural Network). This allows NeuralProphet to use observations from previous time steps when making a prediction. In our case, this means that the model can use the previous day's energy demands to make its predictions.

In [None]:
m = NeuralProphet(n_forecasts=1, n_lags=3, epochs=30, changepoints_range=0.95)
m.add_country_holidays(country_name='Australia')
metrics3 = m.fit(df=df_train, validation_df=df_valid, freq="D")
metrics3[-1:]

In [None]:
import numpy as np


We can see from the metrics above that the validation RMSE decreased again. This is another significant gain in model performance we got by simply tuning two parameters.

If we use the same code that we did previously, only one prediction is made. It is unclear from the docs how to make "running" predictions when AR-Net is enabled, and therefore we can use the following code to make this possible. If anyone knows a built-in way to do this please let me know!


In [None]:
 valid_preds = [] #list to store predictions
lags = 3

for d in df_valid['ds'].values:
    # getting necessary df rows
    date_index = demand_df.index[demand_df['ds'] == d][0]
    future = demand_df.iloc[date_index-lags:date_index]
    
    # adding new row
    entry = pd.DataFrame({
        'ds': [d],
        'y' : [np.nan]
    })
    future = pd.concat([future, entry], ignore_index = True, axis = 0)
    
    # making prediction
    forecast = m.predict(df=future)
    valid_preds.append(forecast.loc[lags]['yhat1'])

We can then use the following code block to plot our predictions. We can see from the plot that the model is starting to pick up on outlying points.



In [None]:
# Creating DF for predictions
df_valid_copy = df_valid.copy()
df_valid_copy['yhat1'] = valid_preds
df_valid_copy.head()

# Plotting Predictions
fig = plt.figure(figsize=(10,6))
ax = fig.add_subplot(111)
ax.set_title("Train RMSE: {:.2f} --- Validation RMSE: {:.2f}".format(metrics3[-1:].RMSE.values[0], metrics3[-1:].RMSE_val.values[0]))
ax.plot(df_valid_copy['ds'].dt.to_pydatetime(), df_valid_copy["y"],'.k', label='True Value')
ax.plot(df_valid_copy['ds'].dt.to_pydatetime(), df_valid_copy["yhat1"], label='Predicted Value')
ax.legend()
plt.show()

In [None]:
import itertools


In [None]:
results = []  # Store the RMSEs for each params here
# Parameter Options
param_grid = {  
    'num_hidden_layers': [1,2],
    'changepoints_range': [0.95, 0.975, 0.99, 0.995, 0.999],
}

# Generate all combinations of parameters
all_params = [dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values())]
# Use cross validation to evaluate all parameters
for params in all_params:
    m = NeuralProphet(**params, n_forecasts=1, newer_samples_weight=4, n_lags=3, learning_rate=0.02, epochs=50, batch_size=32)
    m.add_country_holidays(country_name='Australia')
    metrics4 = m.fit(df=df_train, validation_df=df_valid, freq="D")
    results.append(dict({"RMSE_val": metrics4['RMSE_val'].min(), "RMSE_train": metrics4['RMSE'][metrics4['RMSE_val'].idxmin()], "score_epoch_number": metrics4['RMSE_val'].idxmin()}, **params))

In [None]:
# Find the best parameters
results_df = pd.DataFrame.from_dict(results, orient='columns')
results_df = results_df.sort_values('RMSE_val')
results_df.head(10)

In [None]:
m = NeuralProphet(newer_samples_weight=5, n_forecasts=1, n_lags=3, learning_rate=0.02, epochs=25, batch_size=32, num_hidden_layers=1, changepoints_range=0.995)
m.add_country_holidays(country_name='Australia')
metrics5 = m.fit(df=df_train, validation_df=df_valid, freq="D", progress="plot-all")
metrics5[-1:]


In [None]:
valid_preds = [] #list to store predictions
lags = 3

for d in df_valid['ds'].values:
    # getting necessary df rows
    date_index = demand_df.index[demand_df['ds'] == d][0]
    future = demand_df.iloc[date_index-lags:date_index]
    
    # adding new row
    entry = pd.DataFrame({
        'ds': [d],
        'y' : [np.nan]
    })
    future = pd.concat([future, entry], ignore_index = True, axis = 0)
    
    # making prediction
    forecast = m.predict(df=future)
    valid_preds.append(forecast.loc[lags]['yhat1'])

In [None]:
# Creating DF for predictions
df_valid_copy = df_valid.copy()
df_valid_copy['yhat1'] = valid_preds
df_valid_copy.head()

# Plotting Predictions
fig = plt.figure(figsize=(10,6))
ax = fig.add_subplot(111)
ax.set_title("Train RMSE: {:.2f} --- Validation RMSE: {:.2f}".format(metrics5[-1:].RMSE.values[0], metrics5[-1:].RMSE_val.values[0]))
ax.plot(df_valid_copy['ds'].dt.to_pydatetime(), df_valid_copy["y"],'.k', label='True Value')
ax.plot(df_valid_copy['ds'].dt.to_pydatetime(), df_valid_copy["yhat1"], label='Predicted Value')
ax.legend()
plt.show()
