In [8]:
import json
with open('conversations_time_series.json') as json_file:
    conversations = json.load(json_file)

In [9]:
steps = 1
eval_name = "max"

In [10]:
import numpy as np
from numpy import array
# split a univariate sequence into samples
def split_sequence(sequence, n_steps):
    X, y = list(), list()
    for i in range(len(sequence)):
    # find the end of this pattern
        end_ix = i + n_steps
        # check if we are beyond the sequence
        if end_ix > len(sequence)-1:
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
        X.append(seq_x)
        y.append(seq_y)
    return array(X), array(y)

In [11]:
from sklearn.metrics import precision_score, f1_score, accuracy_score, roc_auc_score, recall_score
from darts.metrics import ope, mae, mse, mape, mase
import json

def results_pool(actual_list, actual_ts, future_ts, train_ts):
    predicted = [x.values()[0][0] for x in future_ts]
    verdict_true = [x[-1]>x[-2] for x in actual_list]
    verdict_predicted = [a>x[-2] for a,x in zip(predicted, actual_list)]
    precision = precision_score(verdict_true, verdict_predicted)
    f1 = f1_score(verdict_true, verdict_predicted)
    accuracy = accuracy_score(verdict_true, verdict_predicted)
    auc = roc_auc_score(verdict_true, verdict_predicted)
    recall = recall_score(verdict_true, verdict_predicted)
    mse_err = mse(actual_ts, future_ts)
    ope_err = ope(actual_ts, future_ts)
    mae_err = mae(actual_ts, future_ts)
    mape_err = mape(actual_ts, future_ts)
    #mase_err = mase(actual_ts, future_ts, train_ts)
    results = {'mse':mean(mse_err), 
               'ope':mean(ope_err), 
               'mae':mean(mae_err), 
               'mape':mean(mape_err),
               #'mase':mean(mase_err),
               'precision':precision, 
               'f1':f1, 
               'accuracy':accuracy, 
               'auc':auc, 
               'recall':recall}
    with open(f'Results-SotA/NBeats-{steps}-{eval_name}.json', 'w') as file:
        json.dump(results, file)

In [12]:
series_max = [i['max'] for i in conversations if (0 not in i['max']) and (len(i['max'])>=steps+1)]
series_avg = [i['avg'] for i in conversations if (0 not in i['avg']) and (len(i['avg'])>=steps+1)]

In [13]:
series_max

[[0.524361002, 0.729676991, 0.663392004],
 [0.339620112, 0.5076238, 0.288231249, 0.264932309, 0.729676991],
 [0.65283725, 0.524361002, 0.501079732],
 [0.487818978, 0.335301527, 0.487818978, 0.633909497],
 [0.456066561,
  0.498563279,
  0.498563279,
  0.498563279,
  0.386842573,
  0.339789771],
 [0.42690141, 0.299644114, 0.661277692, 0.450064888],
 [0.89503408, 0.295882863, 0.58570458],
 [0.560034928, 0.56003493, 0.385918066],
 [0.441746421, 0.404194407, 0.305277383, 0.224986568, 0.64977563],
 [1.0, 0.373420563, 0.616701424],
 [0.578276436, 0.304590486, 0.58570458],
 [0.357578414, 0.197545353, 0.288330805, 0.197545353],
 [0.366866883,
  0.568847528,
  0.21433187,
  0.239906726,
  0.335174054,
  1.0,
  0.523281721,
  1.0],
 [0.298764252, 0.338455546, 0.243711543],
 [0.449235735, 0.765622433, 0.765622433, 0.366866883, 0.36197484],
 [0.222962625, 0.358154043, 0.202689511],
 [0.325509831, 0.35385061, 0.568847528, 0.326539489, 0.568847528, 0.446648092],
 [0.416775125, 1.0, 0.487818978],
 [0.

In [14]:
len(series_avg)

4188

In [15]:
len(series_max)

3821

In [16]:
from statistics import mean, median
print(min([x for sub in series_max for x in sub]), max([x for sub in series_max for x in sub]), mean([x for sub in series_max for x in sub]), median([x for sub in series_max for x in sub]))
print(min([x for sub in series_avg for x in sub]), max([x for sub in series_avg for x in sub]), mean([x for sub in series_avg for x in sub]), median([x for sub in series_avg for x in sub]))

0.130823268 1.0 0.5043920014759261 0.4617933355
2.200298244937319e-05 1.0 0.02509494671440432 0.010642815385154061


In [17]:
#array_list_max = [split_sequence(i, steps) for i in series_max]
#array_list_avg = [split_sequence(i, steps) for i in series_avg]

In [18]:
import random
def train_val_test_split(data, percent1, percent2):
    split_place1 = int(percent1*len(data))
    split_place2 = int((percent1+percent2)*len(data))
    random.shuffle(data)
    return data[:split_place1], data[split_place1:split_place2], data[split_place2:]

In [19]:
train_list_max, val_list_max, test_list_max = train_val_test_split(series_max, 0.7, 0.15)
train_list_avg, val_list_avg, test_list_avg = train_val_test_split(series_avg, 0.7, 0.15)

In [20]:
train_array_max = [array(x) for x in train_list_max]
train_array_avg = [array(x) for x in train_list_avg]
train_max = [x.reshape((x.shape[0], 1, 1)) for x in train_array_max]
train_avg = [x.reshape((x.shape[0], 1, 1)) for x in train_array_avg]

In [21]:
len(train_max)

2674

In [22]:
len(train_avg)

2931

In [23]:
val_array_max = [array(x) for x in val_list_max]
val_array_avg = [array(x) for x in val_list_avg]
val_max = [x.reshape((x.shape[0], 1, 1)) for x in val_array_max]
val_avg = [x.reshape((x.shape[0], 1, 1)) for x in val_array_avg]

In [24]:
test_array_max = [array(x) for x in test_list_max]
test_array_avg = [array(x) for x in test_list_avg]
test_max = [x.reshape((x.shape[0], 1, 1)) for x in test_array_max]
test_avg = [x.reshape((x.shape[0], 1, 1)) for x in test_array_avg]

In [25]:
from darts.models import NBEATSModel
from darts import TimeSeries

In [26]:
test_max_ts = [TimeSeries.from_values(x) for x in test_max]
test_avg_ts = [TimeSeries.from_values(x) for x in test_avg]

In [27]:
fit_series_avg = [TimeSeries.from_values(x) for x in train_avg]
val_series_avg = [TimeSeries.from_values(x) for x in val_avg]
predict_series_avg = [x[:-1] for x in test_avg_ts]
y_test_avg_ts = [x[-1] for x in test_avg_ts]

In [28]:
model_one_step_avg = NBEATSModel(input_chunk_length=steps, output_chunk_length=1)
model_one_step_avg.fit(fit_series_avg, val_series=val_series_avg, verbose = True)

[2022-03-29 02:14:14,937] INFO | darts.models.forecasting.torch_forecasting_model | Train dataset contains 46896 samples.
[2022-03-29 02:14:14,937] INFO | darts.models.forecasting.torch_forecasting_model | Train dataset contains 46896 samples.
[2022-03-29 02:14:15,016] INFO | darts.models.forecasting.torch_forecasting_model | Time series values are 64-bits; casting model to float64.
[2022-03-29 02:14:15,016] INFO | darts.models.forecasting.torch_forecasting_model | Time series values are 64-bits; casting model to float64.


  0%|          | 0/100 [00:00<?, ?it/s]

Training loss: 0.0027, validation loss: 0.0027, best val loss: 0.0027

In [29]:
future_avg = model_one_step_avg.predict(n=1, series = predict_series_avg)

In [30]:
results_pool(test_list_avg, y_test_avg_ts, future_avg, predict_series_avg)

In [65]:
eval_name = "max"

In [66]:
fit_series_max = [TimeSeries.from_values(x) for x in train_max]
val_series_max = [TimeSeries.from_values(x) for x in val_max]
predict_series_max = [x[:-1] for x in test_max_ts]
y_test_max_ts = [x[-1] for x in test_max_ts]

In [67]:
model_one_step_max = NBEATSModel(input_chunk_length=steps, output_chunk_length=1)
model_one_step_max.fit(fit_series_max, val_series=val_series_max, verbose = True)

[2022-01-30 11:55:24,199] INFO | darts.models.forecasting.torch_forecasting_model | Train dataset contains 36672 samples.
[2022-01-30 11:55:24,199] INFO | darts.models.forecasting.torch_forecasting_model | Train dataset contains 36672 samples.
[2022-01-30 11:55:24,278] INFO | darts.models.forecasting.torch_forecasting_model | Time series values are 64-bits; casting model to float64.
[2022-01-30 11:55:24,278] INFO | darts.models.forecasting.torch_forecasting_model | Time series values are 64-bits; casting model to float64.


  0%|          | 0/100 [00:00<?, ?it/s]

Training loss: 0.0479, validation loss: 0.0503, best val loss: 0.0496

In [68]:
future_max = model_one_step_max.predict(n=1, series = predict_series_max)

In [69]:
results_pool(test_list_max, y_test_max_ts, future_max, predict_series_max)