In [1]:
import pandas as pd

df_path = "/home/jorge/data/awto_mle_challenge/data/wind_power_generation.csv"
df = pd.read_csv(df_path)

# Rename "Unnamed: 0" to "ds"
df.rename(columns={"Unnamed: 0": "ds"}, inplace=True)
df

Unnamed: 0,ds,ActivePower,AmbientTemperatue,BearingShaftTemperature,Blade1PitchAngle,Blade2PitchAngle,Blade3PitchAngle,ControlBoxTemperature,GearboxBearingTemperature,GearboxOilTemperature,...,GeneratorWinding2Temperature,HubTemperature,MainBoxTemperature,NacellePosition,ReactivePower,RotorRPM,TurbineStatus,WTG,WindDirection,WindSpeed
0,2017-12-31 00:00:00+00:00,,,,,,,,,,...,,,,,,,,G01,,
1,2017-12-31 00:10:00+00:00,,,,,,,,,,...,,,,,,,,G01,,
2,2017-12-31 00:20:00+00:00,,,,,,,,,,...,,,,,,,,G01,,
3,2017-12-31 00:30:00+00:00,,,,,,,,,,...,,,,,,,,G01,,
4,2017-12-31 00:40:00+00:00,,,,,,,,,,...,,,,,,,,G01,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
118219,2020-03-30 23:10:00+00:00,70.044465,27.523741,45.711129,1.515669,1.950088,1.950088,0.0,59.821165,55.193793,...,58.148777,39.008931,36.476562,178.0,13.775785,9.234004,2.0,G01,178.0,3.533445
118220,2020-03-30 23:20:00+00:00,40.833474,27.602882,45.598573,1.702809,2.136732,2.136732,0.0,59.142038,54.798545,...,57.550367,39.006759,36.328125,178.0,8.088928,9.229370,2.0,G01,178.0,3.261231
118221,2020-03-30 23:30:00+00:00,20.777790,27.560925,45.462045,1.706214,2.139664,2.139664,0.0,58.439439,54.380456,...,57.099335,39.003815,36.131944,178.0,4.355978,9.236802,2.0,G01,178.0,3.331839
118222,2020-03-30 23:40:00+00:00,62.091039,27.810472,45.343827,1.575352,2.009781,2.009781,0.0,58.205413,54.079014,...,56.847239,39.003815,36.007805,190.0,12.018077,9.237374,2.0,G01,190.0,3.284468


In [2]:
# Preprocess

# Convert ds to datetime
df["ds"] = pd.to_datetime(df["ds"]).dt.tz_localize(None)


# Remove all NaN in ActivePower column
df = df.dropna(subset=["ActivePower"])

# Convert NaN in another columns to 0
#df = df.fillna(0)

# Delete WTG column
df = df.drop(columns=["WTG"])

df.reset_index(drop=True, inplace=True)

In [3]:
import numpy as np
import matplotlib.pyplot as plt

from darts import TimeSeries

series = TimeSeries.from_dataframe(df, 
                                   time_col="ds", 
                                   value_cols=df.columns.tolist()[1:], 
                                   fill_missing_dates=True,
                                   freq = "10T", #10 minutes
                                   fillna_value = 0,
                                   )



In [4]:
# Split the series into train and eval
train, val = series.split_before(0.8)


## Train Models

In [5]:
from darts.metrics import mape, mae, rmse

models = []

### TCNModel

TCNModel stands for Temporal Convolutional Network Model. It is a deep learning model that was introduced in a 2017 paper by Bai et al. (https://arxiv.org/abs/1803.01271). The model is designed for sequence-to-sequence tasks, such as time series forecasting.


In [6]:

# Create model
from darts.models import TCNModel
from darts.metrics import mae

model = TCNModel(input_chunk_length=24, output_chunk_length=1, n_epochs=10, random_state=13)

# Fit model on training set
model.fit(train)

# Make predictions on validation set
pred_val = model.predict(len(val), verbose=True)

# Evaluate performance using mean absolute error
model_metrics = {}
model_metrics["model"] = "TCN"
model_metrics["mae"] = mae(val["ActivePower"], pred_val["ActivePower"])
model_metrics["mape"] = mape(val["ActivePower"], pred_val["ActivePower"])
model_metrics["rmse"] = rmse(val["ActivePower"], pred_val["ActivePower"])

# Add model metrics to models list
models.append(model_metrics)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type              | Params
----------------------------------------------------
0 | criterion     | MSELoss           | 0     
1 | train_metrics | MetricCollection  | 0     
2 | val_metrics   | MetricCollection  | 0     
3 | dropout       | MonteCarloDropout | 0     
4 | res_blocks    | ModuleList        | 646   
----------------------------------------------------
646       Trainable params
0         Non-trainable params
646       Total params
0.003     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

### ExponentialSmoothing

Exponential Smoothing is a well-known time series forecasting method that uses an exponentially-weighted moving average of past observations to make predictions about future values. The method was first described in a 1956 paper by Brown et al. ("Exponential Smoothing for Predicting Demand"), and has since become a popular method for time series forecasting due to its simplicity and effectiveness for short-term forecasting.


In [None]:
from darts.models import ExponentialSmoothing
from darts.metrics import mae

# Create model
model = ExponentialSmoothing()

# Fit model on training set
model.fit(train)

# Make predictions on validation set
pred_val = model.predict(len(val), verbose=True)

# Evaluate performance using mean absolute error
model_metrics = {}
model_metrics["model"] = "Exponential Smoothing"
model_metrics["mae"] = mae(val["ActivePower"], pred_val["ActivePower"])
model_metrics["mape"] = mape(val["ActivePower"], pred_val["ActivePower"])
model_metrics["rmse"] = rmse(val["ActivePower"], pred_val["ActivePower"])

# Add model metrics to models list
models.append(model_metrics)


### AutoArima 

AutoArima is a forecasting method that automatically selects the best ARIMA model for a given multivariate time series. It is an extension of the popular AutoARIMA algorithm, which is used for univariate time series forecasting. 

In [None]:
from darts.models import AutoARIMA

# Create model
model = AutoARIMA()

# Fit model on training set
model.fit(train)

# Make predictions on validation set
pred_val = model.predict(len(val), verbose=True)

# Evaluate performance using mean absolute error
model_metrics = {}
model_metrics["model"] = "AutoArima"
model_metrics["mae"] = mae(val["ActivePower"], pred_val["ActivePower"])
model_metrics["mape"] = mape(val["ActivePower"], pred_val["ActivePower"])
model_metrics["rmse"] = rmse(val["ActivePower"], pred_val["ActivePower"])

# Add model metrics to models list
models.append(model_metrics)


In [None]:
from darts.models import XGBModel

# Create model
model = XGBModel()

# Fit model on training set
model.fit(train)

# Make predictions on validation set
pred_val = model.predict(len(val), verbose=True)

# Evaluate performance using mean absolute error
model_metrics = {}
model_metrics["model"] = "XGBModel"
model_metrics["mae"] = mae(val["ActivePower"], pred_val["ActivePower"])
model_metrics["mape"] = mape(val["ActivePower"], pred_val["ActivePower"])
model_metrics["rmse"] = rmse(val["ActivePower"], pred_val["ActivePower"])

# Add model metrics to models list
models.append(model_metrics)



In [None]:
from darts.models import CatBoostModel

# Create model
model = CatBoostModel()

# Fit model on training set
model.fit(train)

# Make predictions on validation set
pred_val = model.predict(len(val), verbose=True)

# Evaluate performance using mean absolute error
model_metrics = {}
model_metrics["model"] = "CatBoostModel"
model_metrics["mae"] = mae(val["ActivePower"], pred_val["ActivePower"])
model_metrics["mape"] = mape(val["ActivePower"], pred_val["ActivePower"])
model_metrics["rmse"] = rmse(val["ActivePower"], pred_val["ActivePower"])

# Add model metrics to models list
models.append(model_metrics)



In [None]:
from darts.models import Prophet

# Create model
model = Prophet()

# Fit model on training set
model.fit(train)

# Make predictions on validation set
pred_val = model.predict(len(val), verbose=True)

# Evaluate performance using mean absolute error
model_metrics = {}
model_metrics["model"] = "Prophet"
model_metrics["mae"] = mae(val["ActivePower"], pred_val["ActivePower"])
model_metrics["mape"] = mape(val["ActivePower"], pred_val["ActivePower"])
model_metrics["rmse"] = rmse(val["ActivePower"], pred_val["ActivePower"])

# Add model metrics to models list
models.append(model_metrics)



In [None]:
from darts.models import RNNModel

# Create model
model = RNNModel()

# Fit model on training set
model.fit(train)

# Make predictions on validation set
pred_val = model.predict(len(val), verbose=True)

# Evaluate performance using mean absolute error
model_metrics = {}
model_metrics["model"] = "RNNModel"
model_metrics["mae"] = mae(val["ActivePower"], pred_val["ActivePower"])
model_metrics["mape"] = mape(val["ActivePower"], pred_val["ActivePower"])
model_metrics["rmse"] = rmse(val["ActivePower"], pred_val["ActivePower"])

# Add model metrics to models list
models.append(model_metrics)



In [None]:
pd.DataFrame(models)