In [1]:
import json
with open('conversations_context_time_series.json') as json_file:
    conversations = json.load(json_file)

In [37]:
steps = 4
eval_name = "avg"

In [38]:
import numpy as np
from numpy import array
# split a univariate sequence into samples
def split_sequence(sequence, n_steps):
    X, y = list(), list()
    for i in range(len(sequence)):
    # find the end of this pattern
        end_ix = i + n_steps
        # check if we are beyond the sequence
        if end_ix > len(sequence)-1:
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
        X.append(seq_x)
        y.append(seq_y)
    return array(X), array(y)

In [39]:
from sklearn.metrics import precision_score, f1_score, accuracy_score, roc_auc_score, recall_score
from darts.metrics import ope, mae, mse, mape, mase
import json

def results_pool(actual_list, actual_ts, future_ts, train_ts):
    predicted = [x.values()[0][0] for x in future_ts]
    verdict_true = [x[-1]>x[-2] for x in actual_list]
    verdict_predicted = [a>x[-2] for a,x in zip(predicted, actual_list)]
    precision = precision_score(verdict_true, verdict_predicted)
    f1 = f1_score(verdict_true, verdict_predicted)
    accuracy = accuracy_score(verdict_true, verdict_predicted)
    auc = roc_auc_score(verdict_true, verdict_predicted)
    recall = recall_score(verdict_true, verdict_predicted)
    mse_err = mse(actual_ts, future_ts, intersect = False)
    ope_err = ope(actual_ts, future_ts)
    mae_err = mae(actual_ts, future_ts)
    mape_err = mape(actual_ts, future_ts)
    #mase_err = mase(actual_ts, future_ts, train_ts)
    results = {'mse':mean(mse_err), 
               'ope':mean(ope_err), 
               'mae':mean(mae_err), 
               'mape':mean(mape_err),
               #'mase':mean(mase_err),
               'precision':precision, 
               'f1':f1, 
               'accuracy':accuracy, 
               'auc':auc, 
               'recall':recall}
    with open(f'Results-SotA/NBeatsContext-{steps}-{eval_name}.json', 'w') as file:
        json.dump(results, file)

In [40]:
series_max = [i['max'] for i in conversations if (0 not in i['max']) and (len(i['max'])>=steps+1)]
series_avg = [i['avg'] for i in conversations if (0 not in i['avg']) and (len(i['avg'])>=steps+1)]

In [41]:
len(series_max)

1400

In [42]:
len(series_avg)

1443

In [30]:
from statistics import mean, median
print(min([x for sub in series_max for x in sub]), max([x for sub in series_max for x in sub]), mean([x for sub in series_max for x in sub]), median([x for sub in series_max for x in sub]))
print(min([x for sub in series_avg for x in sub]), max([x for sub in series_avg for x in sub]), mean([x for sub in series_avg for x in sub]), median([x for sub in series_avg for x in sub]))

0.130823268 1.0 0.5055667951066781 0.471417151
0.0002077292153846154 0.603971215 0.02042109630791462 0.009809837318181818


In [31]:
#array_list_max = [split_sequence(i, steps) for i in series_max]
#array_list_avg = [split_sequence(i, steps) for i in series_avg]

In [32]:
import random
def train_val_test_split(data, percent1, percent2):
    split_place1 = int(percent1*len(data))
    split_place2 = int((percent1+percent2)*len(data))
    random.shuffle(data)
    return data[:split_place1], data[split_place1:split_place2], data[split_place2:]

In [33]:
train_list_max, val_list_max, test_list_max = train_val_test_split(series_max, 0.6, 0.2)
train_list_avg, val_list_avg, test_list_avg = train_val_test_split(series_avg, 0.6, 0.2)

In [34]:
train_array_max = [array(x) for x in train_list_max]
train_array_avg = [array(x) for x in train_list_avg]
train_max = [x.reshape((x.shape[0], 1, 1)) for x in train_array_max]
train_avg = [x.reshape((x.shape[0], 1, 1)) for x in train_array_avg]

In [35]:
len(train_max)

1532

In [36]:
len(train_avg)

1579

In [11]:
val_array_max = [array(x) for x in val_list_max]
val_array_avg = [array(x) for x in val_list_avg]
val_max = [x.reshape((x.shape[0], 1, 1)) for x in val_array_max]
val_avg = [x.reshape((x.shape[0], 1, 1)) for x in val_array_avg]

In [12]:
test_array_max = [array(x) for x in test_list_max]
test_array_avg = [array(x) for x in test_list_avg]
test_max = [x.reshape((x.shape[0], 1, 1)) for x in test_array_max]
test_avg = [x.reshape((x.shape[0], 1, 1)) for x in test_array_avg]

In [14]:
from darts.models import NBEATSModel
from darts import TimeSeries

In [15]:
test_max_ts = [TimeSeries.from_values(x) for x in test_max]
test_avg_ts = [TimeSeries.from_values(x) for x in test_avg]

In [17]:
fit_series_avg = [TimeSeries.from_values(x) for x in train_avg]
val_series_avg = [TimeSeries.from_values(x) for x in val_avg]
predict_series_avg = [x[:-1] for x in test_avg_ts]
y_test_avg_ts = [x[-1] for x in test_avg_ts]

In [18]:
model_one_step_avg = NBEATSModel(input_chunk_length=steps, output_chunk_length=1)
model_one_step_avg.fit(fit_series_avg, val_series=val_series_avg, verbose = True)

[2022-02-26 11:26:42,221] INFO | darts.models.forecasting.torch_forecasting_model | Train dataset contains 6055 samples.
[2022-02-26 11:26:42,221] INFO | darts.models.forecasting.torch_forecasting_model | Train dataset contains 6055 samples.
[2022-02-26 11:26:42,299] INFO | darts.models.forecasting.torch_forecasting_model | Time series values are 64-bits; casting model to float64.
[2022-02-26 11:26:42,299] INFO | darts.models.forecasting.torch_forecasting_model | Time series values are 64-bits; casting model to float64.


  0%|          | 0/100 [00:00<?, ?it/s]

Training loss: 0.0012, validation loss: 0.0012, best val loss: 0.0011

In [19]:
future_avg = model_one_step_avg.predict(n=1, series = predict_series_avg)

In [20]:
results_pool(test_list_avg, y_test_avg_ts, future_avg, predict_series_avg)

In [21]:
eval_name = "max"

In [22]:
fit_series_max = [TimeSeries.from_values(x) for x in train_max]
val_series_max = [TimeSeries.from_values(x) for x in val_max]
predict_series_max = [x[:-1] for x in test_max_ts]
y_test_max_ts = [x[-1] for x in test_max_ts]

In [23]:
model_one_step_max = NBEATSModel(input_chunk_length=steps, output_chunk_length=1)
model_one_step_max.fit(fit_series_max, val_series=val_series_max, verbose = True)

[2022-02-26 12:50:52,001] INFO | darts.models.forecasting.torch_forecasting_model | Train dataset contains 5880 samples.
[2022-02-26 12:50:52,001] INFO | darts.models.forecasting.torch_forecasting_model | Train dataset contains 5880 samples.
[2022-02-26 12:50:52,070] INFO | darts.models.forecasting.torch_forecasting_model | Time series values are 64-bits; casting model to float64.
[2022-02-26 12:50:52,070] INFO | darts.models.forecasting.torch_forecasting_model | Time series values are 64-bits; casting model to float64.


  0%|          | 0/100 [00:00<?, ?it/s]

Training loss: 0.0123, validation loss: 0.0624, best val loss: 0.0368

In [24]:
future_max = model_one_step_max.predict(n=1, series = predict_series_max)

In [25]:
results_pool(test_list_max, y_test_max_ts, future_max, predict_series_max)