In [1]:
import os
import sys

# Add scripts directory to path
current_dir = os.getcwd()
scripts_dir = current_dir.replace("experiments", "train")
sys.path.append(scripts_dir)

# Import preprocessed data
from config import DATA_PATH
from preprocess import DataPreprocessor
preprocessor = DataPreprocessor(DATA_PATH)
df = preprocessor.preprocess()
df


Unnamed: 0,ds,ActivePower,AmbientTemperatue,BearingShaftTemperature,Blade1PitchAngle,Blade2PitchAngle,ControlBoxTemperature,GearboxBearingTemperature,GearboxOilTemperature,GeneratorRPM,GeneratorWinding1Temperature,GeneratorWinding2Temperature,HubTemperature,MainBoxTemperature,NacellePosition,ReactivePower,RotorRPM,TurbineStatus,WindSpeed
0,2018-01-01 00:00:00,-5.357727,23.148729,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,8.000000,-9.960830,0.000000,0.0,2.279088
1,2018-01-01 00:10:00,-5.822360,23.039754,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,300.428571,-9.628441,0.000000,0.0,2.339343
2,2018-01-01 00:20:00,-5.279409,22.948703,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,340.000000,-9.491235,0.000000,0.0,2.455610
3,2018-01-01 00:30:00,-4.648054,22.966851,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,345.000000,-9.856136,0.000000,0.0,2.026754
4,2018-01-01 00:40:00,-4.684632,22.936520,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,345.000000,-9.745593,0.000000,0.0,1.831420
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
94745,2020-03-30 23:10:00,70.044465,27.523741,45.711129,1.515669,1.950088,0.0,59.821165,55.193793,1029.870744,59.060367,58.148777,39.008931,36.476562,178.000000,13.775785,9.234004,2.0,3.533445
94746,2020-03-30 23:20:00,40.833474,27.602882,45.598573,1.702809,2.136732,0.0,59.142038,54.798545,1030.160478,58.452003,57.550367,39.006759,36.328125,178.000000,8.088928,9.229370,2.0,3.261231
94747,2020-03-30 23:30:00,20.777790,27.560925,45.462045,1.706214,2.139664,0.0,58.439439,54.380456,1030.137822,58.034071,57.099335,39.003815,36.131944,178.000000,4.355978,9.236802,2.0,3.331839
94748,2020-03-30 23:40:00,62.091039,27.810472,45.343827,1.575352,2.009781,0.0,58.205413,54.079014,1030.178178,57.795387,56.847239,39.003815,36.007805,190.000000,12.018077,9.237374,2.0,3.284468


In [3]:
import numpy as np
import matplotlib.pyplot as plt
from darts import TimeSeries

series = TimeSeries.from_dataframe(df, 
                                   time_col="ds", 
                                   value_cols=df.columns.tolist()[1:], 
                                   fill_missing_dates=True,
                                   freq = "10T", #10 minutes
                                   fillna_value = 0,
                                   )



In [4]:
# Split the series into train and eval
train, val = series.split_before(0.8)


## Train Models

In [5]:
from darts.metrics import mape, mae, rmse

models = []

### TCNModel

TCNModel stands for Temporal Convolutional Network Model. It is a deep learning model that was introduced in a 2017 paper by Bai et al. (https://arxiv.org/abs/1803.01271). The model is designed for sequence-to-sequence tasks, such as time series forecasting.


In [6]:

# Create model
from darts.models import TCNModel
from darts.metrics import mae
import torch

model = TCNModel(input_chunk_length=24, 
                 output_chunk_length=1, 
                 n_epochs=10, 
                 num_layers=2,
                 num_filters=64,
                 dropout=0.05,
                 random_state=13,
                 optimizer_cls = torch.optim.Adam,
                 pl_trainer_kwargs={"accelerator": "gpu", "devices": [0]},
                 batch_size=1024,
                 )
                 

# Fit model on training set
model.fit(train)

# Make predictions on validation set
pred_val = model.predict(len(val), verbose=True)

# Evaluate performance using mean absolute error
model_metrics = {}
model_metrics["model"] = "TCN"
model_metrics["mae"] = mae(val["ActivePower"], pred_val["ActivePower"])
model_metrics["rmse"] = rmse(val["ActivePower"], pred_val["ActivePower"])

print(model_metrics)

# Add model metrics to models list
models.append(model_metrics)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA RTX A6000') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name          | Type              | Params
----------------------------------------------------
0 | criterion     | MSELoss           | 0     
1 | train_metrics | MetricCollection  | 0     
2 | val_metrics   | MetricCollection  | 0     
3 | dropout       | MonteCarloDropout | 0     
4 | res_blocks    | ModuleList        | 35.1 K
----------------------------------------------------
35.1 K    Trainable params
0         Non-trainable params


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA RTX A6000') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Predicting: 0it [00:00, ?it/s]

{'model': 'TCN', 'mae': 347.7334376628143, 'rmse': 505.10024259982885}


### Extreme Gradient Boosting

XGBModel is a machine learning model that uses the XGBoost algorithm for multivariate time series forecasting. The XGBoost algorithm is a powerful and scalable gradient boosting framework that is widely used for machine learning tasks such as classification, regression, and ranking.

One paper that describes the use of XGBModel for multivariate time series forecasting is "A Time Series Forecasting Method Based on XGBoost for Wind Power Generation" by Y. Wu, et al. (2019). In this paper, the authors apply XGBModel to forecast wind power generation using a combination of weather data and historical energy production data. They compare the performance of XGBModel with other popular time series forecasting methods and show that XGBModel outperforms these methods in terms of accuracy and efficiency.

In [7]:
from darts.models import XGBModel

# Create model
model = XGBModel(lags = 24, random_state=13)
                 

# Fit model on training set
model.fit(train)

# Make predictions on validation set
pred_val = model.predict(len(val), verbose=True)

# Evaluate performance using mean absolute error
model_metrics = {}
model_metrics["model"] = "XGBModel"
model_metrics["mae"] = mae(val["ActivePower"], pred_val["ActivePower"])

model_metrics["rmse"] = rmse(val["ActivePower"], pred_val["ActivePower"])

print(model_metrics)

# Add model metrics to models list
models.append(model_metrics)





{'model': 'XGBModel', 'mae': 668.6332798806518, 'rmse': 774.6159428496971}


# Recurent Neural Networks

Recurrent Neural Networks (RNN) for multivariate Time Series Forecasting are a type of deep learning model specifically designed for predicting future values in a time series dataset. 

One of the most widely cited papers on this topic is "Deep Learning for Time-Series Analysis" by W. McKinney, which was published in the Journal of Statistical Software in 2016. This paper provides a comprehensive overview of the use of RNNs for time series forecasting, including their architecture, training, and evaluation. It also presents several case studies demonstrating the effectiveness of RNNs in predicting future values in multivariate time series datasets.

In [8]:
from darts.models import RNNModel

# Create model
model = RNNModel(input_chunk_length=24, 
                 model="RNN",
                 output_chunk_length=1, 
                 n_epochs=10, 
                 n_rnn_layers=2,
                 dropout=0.05,
                 random_state=13,
                 optimizer_cls = torch.optim.Adam,
                 pl_trainer_kwargs={"accelerator": "gpu", "devices": [0]},
                 batch_size=1024*3,
                 )

# Fit model on training set
model.fit(train)

# Make predictions on validation set
pred_val = model.predict(len(val), verbose=True)

# Evaluate performance using mean absolute error
model_metrics = {}
model_metrics["model"] = "RNNModel"
model_metrics["mae"] = mae(val["ActivePower"], pred_val["ActivePower"])

model_metrics["rmse"] = rmse(val["ActivePower"], pred_val["ActivePower"])

print(model_metrics)

# Add model metrics to models list
models.append(model_metrics)



GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA RTX A6000') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name          | Type             | Params
---------------------------------------------------
0 | criterion     | MSELoss          | 0     
1 | train_metrics | MetricCollection | 0     
2 | val_metrics   | MetricCollection | 0     
3 | rnn           | RNN              | 2.5 K 
4 | V             | Linear           | 520   
---------------------------------------------------
3.0 K     Trainable params
0         Non-trainable params
3.0 K   

Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA RTX A6000') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Predicting: 0it [00:00, ?it/s]

{'model': 'RNNModel', 'mae': 403.15647126109354, 'rmse': 602.1093301671859}


### LSTM

In [9]:
from darts.models import RNNModel

# Create model
model = RNNModel(input_chunk_length=24, 
                 model="LSTM",
                 output_chunk_length=1, 
                 n_epochs=10, 
                 n_rnn_layers=2,
                 dropout=0.05,
                 random_state=13,
                 optimizer_cls = torch.optim.Adam,
                 pl_trainer_kwargs={"accelerator": "gpu", "devices": [0]},
                 batch_size=1024*3,
                 )

# Fit model on training set
model.fit(train)

# Make predictions on validation set
pred_val = model.predict(len(val), verbose=True)

# Evaluate performance using mean absolute error
model_metrics = {}
model_metrics["model"] = "LSTM"
model_metrics["mae"] = mae(val["ActivePower"], pred_val["ActivePower"])

model_metrics["rmse"] = rmse(val["ActivePower"], pred_val["ActivePower"])

print(model_metrics)

# Add model metrics to models list
models.append(model_metrics)



GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA RTX A6000') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name          | Type             | Params
---------------------------------------------------
0 | criterion     | MSELoss          | 0     
1 | train_metrics | MetricCollection | 0     
2 | val_metrics   | MetricCollection | 0     
3 | rnn           | LSTM             | 9.9 K 
4 | V             | Linear           | 520   
---------------------------------------------------
10.4 K    Trainable params
0         Non-trainable params
10.4 K  

Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA RTX A6000') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Predicting: 0it [00:00, ?it/s]

{'model': 'LSTM', 'mae': 403.18950909677, 'rmse': 602.1550562127413}


In [10]:
from darts.models import RNNModel

# Create model
model = RNNModel(input_chunk_length=24, 
                 model="GRU",
                 output_chunk_length=1, 
                 n_epochs=10, 
                 n_rnn_layers=2,
                 dropout=0.05,
                 random_state=13,
                 optimizer_cls = torch.optim.Adam,
                 pl_trainer_kwargs={"accelerator": "gpu", "devices": [0]},
                 batch_size=1024*3,
                 )

# Fit model on training set
model.fit(train)

# Make predictions on validation set
pred_val = model.predict(len(val), verbose=True)

# Evaluate performance using mean absolute error
model_metrics = {}
model_metrics["model"] = "GRU"
model_metrics["mae"] = mae(val["ActivePower"], pred_val["ActivePower"])

model_metrics["rmse"] = rmse(val["ActivePower"], pred_val["ActivePower"])

print(model_metrics)

# Add model metrics to models list
models.append(model_metrics)



GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA RTX A6000') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name          | Type             | Params
---------------------------------------------------
0 | criterion     | MSELoss          | 0     
1 | train_metrics | MetricCollection | 0     
2 | val_metrics   | MetricCollection | 0     
3 | rnn           | GRU              | 7.4 K 
4 | V             | Linear           | 520   
---------------------------------------------------
7.9 K     Trainable params
0         Non-trainable params
7.9 K   

Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA RTX A6000') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Predicting: 0it [00:00, ?it/s]

{'model': 'GRU', 'mae': 415.7205452484341, 'rmse': 615.3084142992636}


## Resume

In [11]:
pd.DataFrame(models)

Unnamed: 0,model,mae,rmse
0,TCN,347.733438,505.100243
1,XGBModel,668.63328,774.615943
2,RNNModel,403.156471,602.10933
3,LSTM,403.189509,602.155056
4,GRU,415.720545,615.308414
