In [6]:
import wandb
from darts.models import TCNModel
from pyprocessta.model.utils import split_data
import pandas as pd
from darts.metrics import mape, mae
from darts import TimeSeries
from darts.dataprocessing.transformers import Scaler
from copy import deepcopy
MEAS_COLUMNS = [
    "TI-19",
    #      "FI-16",
    #     "TI-33",
    #     "FI-2",
    #     "FI-151",
    #     "TI-8",
    #     "FI-241",
    #  "valve-position-12",  # dry-bed
    #     "FI-38",  # strippera
    #     "PI-28",  # stripper
    #     "TI-28",  # stripper
    #      "FI-20",
    #     "FI-30",
    "TI-3",
    "FI-19",
    #     "FI-211",
    "FI-11",
    #     "TI-30",
    #     "PI-30",
    "TI-1213",
    #     "TI-4",
    #    "FI-23",
    #    "FI-20",
    #   "FI-20/FI-23",
    #    "TI-22",
    #    "delta_t",
    "TI-35",
    #     "delta_t_2"
]

TARGETS_clean = ["2-Amino-2-methylpropanol C4H11NO", "Piperazine C4H10N2"]


df = pd.read_pickle("../paper/20210624_df_cleaned.pkl")
Y = TimeSeries.from_dataframe(df, value_cols=TARGETS_clean)
X = TimeSeries.from_dataframe(df, value_cols=MEAS_COLUMNS)

transformer = Scaler()
X = transformer.fit_transform(X)

y_transformer = Scaler()
Y = y_transformer.fit_transform(Y)

def get_data(num_outputs):
    targets = TARGETS_clean if num_outputs == 1 else [TARGETS_clean[0]]
    train, valid, test = split_data(X, Y, targets, 0.5)

    return (train, valid, test)


   

In [8]:
print("get data")
train, valid, _ = get_data(0)

print("initialize model")
model_cov = TCNModel(
    input_chunk_length=60,  # run.config.input_chunk_length,
    output_chunk_length=30,
    num_layers=8,  # run.config.num_layers,
    num_filters=16,  # run.config.num_filters,
    kernel_size=4,  # run.config.kernel_size,
    dropout=0.1,  # run.config.dropout,
    weight_norm=True,  # run.config.weight_norm,
    batch_size=32,  # run.config.batch_size,
    n_epochs=100,  # run.config.n_epochs,
    log_tensorboard=False,
    optimizer_kwargs={"lr": 1e-3},  # run.config.lr},
)

print("fit")

model_cov.fit(series=train[1], past_covariates=train[0], verbose=False)

print("historical forecast train set")
backtest_train = model_cov.historical_forecasts(
    train[1],
    past_covariates=train[0],
    start=0.1,
    forecast_horizon=30,
    stride=1,
    retrain=False,
    verbose=False,
)

print("historical forecast valid")
backtest_valid = model_cov.historical_forecasts(
    valid[1],
    past_covariates=valid[0],
    start=0.1,
    forecast_horizon=30,
    stride=1,
    retrain=False,
    verbose=False,
)

print("getting scores")
print(backtest_valid)
mape_valid = mape(valid[1][TARGETS_clean[0]], backtest_valid["0"])
mape_train = mape(train[1][TARGETS_clean[0]], backtest_train["0"])

mae_valid = mae(valid[1][TARGETS_clean[0]], backtest_valid["0"])
mae_train = mae(train[1][TARGETS_clean[0]], backtest_train["0"])

# wandb.log({"mape_valid": mape_valid})
# wandb.log({"mape_train": mape_train})

print(f"MAPE valid {mape_valid}")

# wandb.log({"mae_valid": mae_valid})
# wandb.log({"mae_train": mae_train})




[2022-01-11 23:05:01,256] INFO | darts.models.forecasting.torch_forecasting_model | Train dataset contains 2615 samples.
[2022-01-11 23:05:01,256] INFO | darts.models.forecasting.torch_forecasting_model | Train dataset contains 2615 samples.
[2022-01-11 23:05:01,259] INFO | darts.models.forecasting.torch_forecasting_model | Time series values are 64-bits; casting model to float64.
[2022-01-11 23:05:01,259] INFO | darts.models.forecasting.torch_forecasting_model | Time series values are 64-bits; casting model to float64.


get data
initialize model
fit
historical forecast train set
historical forecast valid
getting scores
<TimeSeries (DataArray) (time: 1188, component: 1, sample: 1)>
array([[[nan]],

       [[nan]],

       [[nan]],

       ...,

       [[nan]],

       [[nan]],

       [[nan]]])
Coordinates:
  * time       (time) datetime64[ns] 2010-01-04T23:36:00 ... 2010-01-06T15:10:00
  * component  (component) <U1 '0'
Dimensions without coordinates: sample
MAPE valid nan


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [3]:
train[1]