In [None]:
import pandas as pd

df_path = "/home/jorge/data/awto_mle_challenge/data/wind_power_generation.csv"
df_path = "/home/jlortiz/awto_mle_challenge/data/wind_power_generation.csv"
df = pd.read_csv(df_path)

# Rename "Unnamed: 0" to "ds"
df.rename(columns={"Unnamed: 0": "ds"}, inplace=True)
df

In [None]:
# Preprocess

# Convert ds to datetime
df["ds"] = pd.to_datetime(df["ds"]).dt.tz_localize(None)


# Remove all NaN in ActivePower column
df = df.dropna(subset=["ActivePower"])

# Convert NaN in another columns to 0
#df = df.fillna(0)

# Delete WTG column
df = df.drop(columns=["WTG"])

df.reset_index(drop=True, inplace=True)

In [None]:
import numpy as np
import matplotlib.pyplot as plt

from darts import TimeSeries

series = TimeSeries.from_dataframe(df, 
                                   time_col="ds", 
                                   value_cols=df.columns.tolist()[1:], 
                                   fill_missing_dates=True,
                                   freq = "10T", #10 minutes
                                   fillna_value = 0,
                                   )



In [None]:
# Split the series into train and eval
train, val = series.split_before(0.8)


## Train Models

In [None]:
from darts.metrics import mape, mae, rmse

models = []

### TCNModel

TCNModel stands for Temporal Convolutional Network Model. It is a deep learning model that was introduced in a 2017 paper by Bai et al. (https://arxiv.org/abs/1803.01271). The model is designed for sequence-to-sequence tasks, such as time series forecasting.


In [None]:

# Create model
from darts.models import TCNModel
from darts.metrics import mae
import torch

model = TCNModel(input_chunk_length=24, 
                 output_chunk_length=1, 
                 n_epochs=20, 
                 num_layers=10,
                 num_filters=256,
                 dropout=0.1,
                 random_state=13,
                 optimizer_cls = torch.optim.Adam,
                 pl_trainer_kwargs={"accelerator": "gpu", "devices": [0]},
                 batch_size=1024*8,
                 )
                 

# Fit model on training set
model.fit(train)

# Make predictions on validation set
pred_val = model.predict(len(val), verbose=True)

# Evaluate performance using mean absolute error
model_metrics = {}
model_metrics["model"] = "TCN"
model_metrics["mae"] = mae(val["ActivePower"], pred_val["ActivePower"])
model_metrics["rmse"] = rmse(val["ActivePower"], pred_val["ActivePower"])

# Add model metrics to models list
models.append(model_metrics)

In [None]:
model_metrics

### Extreme Gradient Boosting

XGBModel is a machine learning model that uses the XGBoost algorithm for multivariate time series forecasting. The XGBoost algorithm is a powerful and scalable gradient boosting framework that is widely used for machine learning tasks such as classification, regression, and ranking.

One paper that describes the use of XGBModel for multivariate time series forecasting is "A Time Series Forecasting Method Based on XGBoost for Wind Power Generation" by Y. Wu, et al. (2019). In this paper, the authors apply XGBModel to forecast wind power generation using a combination of weather data and historical energy production data. They compare the performance of XGBModel with other popular time series forecasting methods and show that XGBModel outperforms these methods in terms of accuracy and efficiency.

In [None]:
from darts.models import XGBModel

# Create model
model = XGBModel(lags = 12)

# Fit model on training set
model.fit(train)

# Make predictions on validation set
pred_val = model.predict(len(val), verbose=True)

# Evaluate performance using mean absolute error
model_metrics = {}
model_metrics["model"] = "XGBModel"
model_metrics["mae"] = mae(val["ActivePower"], pred_val["ActivePower"])

model_metrics["rmse"] = rmse(val["ActivePower"], pred_val["ActivePower"])

# Add model metrics to models list
models.append(model_metrics)



# Recurent Neural Networks

Recurrent Neural Networks (RNN) for multivariate Time Series Forecasting are a type of deep learning model specifically designed for predicting future values in a time series dataset. 

One of the most widely cited papers on this topic is "Deep Learning for Time-Series Analysis" by W. McKinney, which was published in the Journal of Statistical Software in 2016. This paper provides a comprehensive overview of the use of RNNs for time series forecasting, including their architecture, training, and evaluation. It also presents several case studies demonstrating the effectiveness of RNNs in predicting future values in multivariate time series datasets.

In [None]:
from darts.models import RNNModel

# Create model
model = RNNModel(input_chunk_length=24, output_chunk_length=1, n_epochs=10, random_state=13)

# Fit model on training set
model.fit(train)

# Make predictions on validation set
pred_val = model.predict(len(val), verbose=True)

# Evaluate performance using mean absolute error
model_metrics = {}
model_metrics["model"] = "RNNModel"
model_metrics["mae"] = mae(val["ActivePower"], pred_val["ActivePower"])

model_metrics["rmse"] = rmse(val["ActivePower"], pred_val["ActivePower"])

# Add model metrics to models list
models.append(model_metrics)



## Resume

In [None]:
pd.DataFrame(models)