In [1]:
%%capture
!pip install darts

In [2]:
!python --version

Python 3.10.12


In [85]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
import darts
import pandas as pd

from urllib.request import urlopen
from zipfile import ZipFile
from io import BytesIO

from darts import TimeSeries


zf = ZipFile("/content/drive/MyDrive/PRSA2017_Data_20130301-20170228.zip")

df = pd.DataFrame()
for file in zf.infolist():
    if file.filename.endswith('.csv'):
        df = df._append(pd.read_csv(zf.open(file)))

df['timestamp'] = pd.to_datetime(df[["year", "month", "day", "hour"]])
df.drop(columns=['No'], inplace=True)
df.sort_values(by=['timestamp', 'station']).head(10)

df = df.replace(df['station'].unique(), list(range(len(df['station'].unique()))))
df = df.replace(df['wd'].unique(), list(range(len(df['wd'].unique()))))

ts_df = df.dropna(subset = ['PM2.5'])

ts_df = ts_df[['PM2.5', 'timestamp', 'station', 'wd', 'WSPM', 'TEMP', 'PRES', 'DEWP', 'RAIN', 'O3', 'CO', 'NO2', 'SO2']]
ts_df = ts_df.dropna(subset=ts_df.columns.values)


d = ts_df.timestamp - pd.to_datetime('2013-03-01 00:00:00')

delta_index = []
for delta in d:
    delta_index.append(int(delta.total_seconds()/3600))

ts_df['ts_ind'] = delta_index

target_series = TimeSeries.from_group_dataframe(ts_df[["PM2.5", 'ts_ind', 'station']], group_cols=['station'], time_col='ts_ind')
cov_series = TimeSeries.from_group_dataframe(ts_df.drop(["PM2.5",'timestamp'], axis=1), group_cols=['station'], time_col='ts_ind')

split_point = int(len(cov_series[0]) * 0.8)
split_point_2 = int(len(cov_series[0]) * 0.95)

past_target_series = []
future_target_series = []
test_target_series = []

for ts in target_series:
    past_target_series.append(ts.slice(0, split_point))
    future_target_series.append(ts.slice(split_point, split_point_2))
    test_target_series.append(ts.slice(split_point_2, len(ts)))

past_cov_series = []
future_cov_series = []
test_cov_series = []

for ts in cov_series:
    past_cov_series.append(ts.slice(0, split_point))
    future_cov_series.append(ts.slice(0, split_point_2))
    test_cov_series.append(ts.slice(split_point_2, len(ts)))

In [5]:
from darts.models import NBEATSModel

model_nbeats = NBEATSModel(
    input_chunk_length=30,
    output_chunk_length=7,
    generic_architecture=False,
    num_blocks=3,
    num_layers=4,
    layer_widths=512,
    n_epochs=100,
    nr_epochs_val_period=1,
    batch_size=800,
    model_name="nbeats_interpretable_run",
    # input_chunk_length=10,
    # output_chunk_length=10,
    # n_epochs=1,
    # generic_architecture=False
)

In [90]:
model_nbeats.save('/content/drive/MyDrive/nbeats_model/11_16')

In [10]:

from darts.utils.timeseries_generation import sine_timeseries

series = sine_timeseries(length=60)

model_nbeats.fit(series=past_target_series,
                 past_covariates=past_cov_series,
                 val_series=future_target_series,
                 val_past_covariates=cov_series,
                 epochs=20)

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name          | Type             | Params
---------------------------------------------------
0 | criterion     | MSELoss          | 0     
1 | train_metrics | MetricCollection | 0     
2 | val_metrics   | MetricCollection | 0     
3 | stacks        | ModuleList       | 2.2 M 
---------------------------------------------------
2.0 M     Trainable params
284 K     Non-trainable params
2.2 M     Total params
8.963     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=20` reached.


NBEATSModel(generic_architecture=False, num_stacks=30, num_blocks=3, num_layers=4, layer_widths=512, expansion_coefficient_dim=5, trend_polynomial_degree=2, dropout=0.0, activation=ReLU, input_chunk_length=30, output_chunk_length=7, n_epochs=100, nr_epochs_val_period=1, batch_size=800, model_name=nbeats_interpretable_run)

In [11]:
type(model_nbeats)

darts.models.forecasting.nbeats.NBEATSModel

In [81]:
%%capture

from darts.metrics.metrics import rmse, mape

series_length = 1024
prediction_length = 24

nbeats_output = []
test_series_nbeats = []

for i in range(len(target_series)):
    test_series_nbeats.append(test_target_series[i].slice(split_point_2, split_point_2 + series_length))
    #nbeats_output.append(model_nbeats.predict(test_length, past_covariates=cov_series[i], series=series))

nbeats_output = model_nbeats.predict(prediction_length, past_covariates=cov_series, series=test_series_nbeats)

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


In [82]:

# print(nbeats_output[0][0].start_time())
# print(test_target_series[0].slice(
#     nbeats_output[0][0].start_time(),
#     nbeats_output[0][0].start_time() + 48).start_time())

validation_set = []

for i in range(len(nbeats_output)):
  start_time = nbeats_output[i].start_time()
  eval = test_target_series[i].slice(start_time, start_time + prediction_length)
  print("start {} {} rmse {} mape {}".format(start_time, eval.start_time(), rmse(nbeats_output[i], eval), mape(nbeats_output[i], eval)))

start 34334 34334 rmse 11.414119750932596 mape 53.52796535534423
start 34334 34334 rmse 26.203230300574383 mape 66.56422309339163
start 34334 34334 rmse 4.894382313833835 mape 35.3304204529507
start 34334 34334 rmse 6.304919556112387 mape 45.826153886516714
start 34334 34334 rmse 23.039919641783133 mape 59.47695403722022
start 34334 34334 rmse 8.271116196352981 mape 38.5390805978989
start 34334 34334 rmse 4.196669512839361 mape 34.22746424600973
start 34334 34334 rmse 5.06147819460251 mape 31.44416500230639
start 34334 34334 rmse 10.183955522859442 mape 48.94125981517972
start 34334 34334 rmse 5.150927283500551 mape 30.461214251236424
start 34334 34334 rmse 8.177403650560139 mape 42.36755877126957
start 34334 34334 rmse 5.7618403574441315 mape 41.62933388250888


In [26]:
from darts.metrics.metrics import rmse, mape

for i in range(len(nbeats_output)):
  print("rmse {} mape {}".format(rmse(nbeats_output[i], test_target_series[i][:test_length]),
                               mape(nbeats_output[i], test_target_series[i][:test_length])))

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=

rmse nan mape nan
rmse nan mape nan
rmse nan mape nan
rmse nan mape nan
rmse nan mape nan
rmse nan mape nan
rmse nan mape nan
rmse nan mape nan
rmse nan mape nan
rmse nan mape nan
rmse nan mape nan
rmse nan mape nan


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
