In [1]:
import pandas as pd

df_path = "/home/jorge/data/awto_mle_challenge/data/wind_power_generation.csv"
df_path = "/home/jlortiz/awto_mle_challenge/data/wind_power_generation.csv"
df = pd.read_csv(df_path)

# Rename "Unnamed: 0" to "ds"
df.rename(columns={"Unnamed: 0": "ds"}, inplace=True)
df

Unnamed: 0,ds,ActivePower,AmbientTemperatue,BearingShaftTemperature,Blade1PitchAngle,Blade2PitchAngle,Blade3PitchAngle,ControlBoxTemperature,GearboxBearingTemperature,GearboxOilTemperature,...,GeneratorWinding2Temperature,HubTemperature,MainBoxTemperature,NacellePosition,ReactivePower,RotorRPM,TurbineStatus,WTG,WindDirection,WindSpeed
0,2017-12-31 00:00:00+00:00,,,,,,,,,,...,,,,,,,,G01,,
1,2017-12-31 00:10:00+00:00,,,,,,,,,,...,,,,,,,,G01,,
2,2017-12-31 00:20:00+00:00,,,,,,,,,,...,,,,,,,,G01,,
3,2017-12-31 00:30:00+00:00,,,,,,,,,,...,,,,,,,,G01,,
4,2017-12-31 00:40:00+00:00,,,,,,,,,,...,,,,,,,,G01,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
118219,2020-03-30 23:10:00+00:00,70.044465,27.523741,45.711129,1.515669,1.950088,1.950088,0.0,59.821165,55.193793,...,58.148777,39.008931,36.476562,178.0,13.775785,9.234004,2.0,G01,178.0,3.533445
118220,2020-03-30 23:20:00+00:00,40.833474,27.602882,45.598573,1.702809,2.136732,2.136732,0.0,59.142038,54.798545,...,57.550367,39.006759,36.328125,178.0,8.088928,9.229370,2.0,G01,178.0,3.261231
118221,2020-03-30 23:30:00+00:00,20.777790,27.560925,45.462045,1.706214,2.139664,2.139664,0.0,58.439439,54.380456,...,57.099335,39.003815,36.131944,178.0,4.355978,9.236802,2.0,G01,178.0,3.331839
118222,2020-03-30 23:40:00+00:00,62.091039,27.810472,45.343827,1.575352,2.009781,2.009781,0.0,58.205413,54.079014,...,56.847239,39.003815,36.007805,190.0,12.018077,9.237374,2.0,G01,190.0,3.284468


In [2]:
# Preprocess

# Convert ds to datetime
df["ds"] = pd.to_datetime(df["ds"]).dt.tz_localize(None)


# Remove all NaN in ActivePower column
df = df.dropna(subset=["ActivePower"])

# Convert NaN in another columns to 0
#df = df.fillna(0)

# Delete WTG column
df = df.drop(columns=["WTG"])

df.reset_index(drop=True, inplace=True)

In [3]:
import numpy as np
import matplotlib.pyplot as plt

from darts import TimeSeries

series = TimeSeries.from_dataframe(df, 
                                   time_col="ds", 
                                   value_cols=df.columns.tolist()[1:], 
                                   fill_missing_dates=True,
                                   freq = "10T", #10 minutes
                                   fillna_value = 0,
                                   )

# Split the series into train and eval
train, val = series.split_before(0.8)


In [4]:
from darts.metrics import mape, mae, rmse

models = []

In [5]:
import optuna
import pandas as pd
from darts.models import TCNModel
from darts.metrics import mae, rmse
from tqdm import tqdm
import torch

results = []
# Define the objective function to optimize
def objective(trial):
    # Define the hyperparameters to search over
    input_chunk_length = trial.suggest_int('input_chunk_length', 10, 100)
    num_layers = trial.suggest_int('num_layers', 1, 10)
    num_filters = trial.suggest_int('num_filters', 8, 256)
    n_epochs = trial.suggest_int('n_epochs', 5, 20) # 5 y 20
    dropout = trial.suggest_uniform('dropout', 0.0, 0.5)

    # Create the TCN model with the current hyperparameters
    model = TCNModel(input_chunk_length=input_chunk_length,
                     output_chunk_length=1,
                     num_layers=num_layers,
                     num_filters=num_filters,
                     n_epochs=n_epochs,
                     dropout=dropout,
                     random_state=13,
                     optimizer_cls = torch.optim.Adam,
                     optimizer_kwargs={"lr": 1e-3},
                     pl_trainer_kwargs={"accelerator": "gpu", "devices": [0]},
                     batch_size=1024*10)

    # Fit the model on the training set
    model.fit(train)

    # Make predictions on the validation set
    pred_val = model.predict(len(val), verbose=True)

    # Evaluate the model's performance using root mean squared error
    rmse_score = rmse(val["ActivePower"], pred_val["ActivePower"])
    
    results_d = {"input_chunk_length": input_chunk_length,
                    "num_layers": num_layers,
                    "num_filters": num_filters,
                    "n_epochs": n_epochs,
                    "dropout": dropout,
                    "rmse_score": rmse_score}
    results.append(results_d)
    pd.DataFrame(results).to_csv("hyperparameters_results.csv")
    return rmse_score

# Set up the optuna study
study = optuna.create_study(direction='minimize')

# Run the optimization
study.optimize(objective, n_trials=5, show_progress_bar=True)



[32m[I 2023-03-15 22:21:39,302][0m A new study created in memory with name: no-name-48602839-84ba-4ea7-8c8a-405ce5efd3ed[0m
  self._init_valid()


  0%|          | 0/5 [00:00<?, ?it/s]

  dropout = trial.suggest_uniform('dropout', 0.0, 0.5)
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name          | Type              | Params
----------------------------------------------------
0 | criterion     | MSELoss           | 0     
1 | train_metrics | MetricCollection  | 0     
2 | val_metrics   | MetricCollection  | 0     
3 | dropout       | MonteCarloDropout | 0     
4 | res_blocks    | ModuleList        | 173 K 
----------------------------------------------------
173 K     Trainable params
0         Non-trainable params
173 K     Total params
1.389     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

In [None]:
# Create dataframe with the results of the optimization
results = study.trials_dataframe()

# Rename "value" to "rmse"
results.rename(columns={"value": "rmse"}, inplace=True)
results.to_csv("hyperparameters_results_final.csv")
results

Unnamed: 0,number,rmse,datetime_start,datetime_complete,duration,params_dropout,params_input_chunk_length,params_n_epochs,params_num_filters,params_num_layers,state
0,0,608.4175,2023-03-15 21:53:40.169897,2023-03-15 21:55:34.722394,0 days 00:01:54.552497,0.280719,38,1,87,2,COMPLETE
1,1,inf,2023-03-15 21:55:34.724483,2023-03-15 21:56:40.637841,0 days 00:01:05.913358,0.28645,14,2,88,1,COMPLETE
2,2,608.6522,2023-03-15 21:56:40.639706,2023-03-15 21:57:59.621599,0 days 00:01:18.981893,0.050705,47,2,24,2,COMPLETE
3,3,608.3976,2023-03-15 21:57:59.624811,2023-03-15 21:59:36.690343,0 days 00:01:37.065532,0.42215,18,2,82,2,COMPLETE
4,4,6.070036e+36,2023-03-15 21:59:36.699557,2023-03-15 22:00:49.626346,0 days 00:01:12.926789,0.238248,28,1,78,2,COMPLETE
