In [1]:
import time
import numpy as np
np.random.seed(1)
import pandas as pd
import json
import matplotlib.pyplot as plt

In [2]:
import boto3
import s3fs
import sagemaker
from sagemaker import get_execution_role

In [3]:
s3_bucket = "deepar-bucket-2020"  # replace with an existing bucket if needed
s3_prefix = 'deepar-erp-notebook'    # prefix used for all data stored within the bucket


sagemaker_session = sagemaker.Session()
role = get_execution_role()


s3_data_path = "{}/{}/data".format(s3_bucket, s3_prefix)
s3_output_path = "{}/{}/output".format(s3_bucket, s3_prefix)

In [4]:
# region = sagemaker_session.boto_region_name

print(s3_data_path)
print(s3_output_path)
s3_bucket

deepar-bucket-2020/deepar-erp-notebook/data
deepar-bucket-2020/deepar-erp-notebook/output


'deepar-bucket-2020'

In [5]:
# configure the container image to be used for the region that we are running in.
image_name = sagemaker.amazon.amazon_estimator.get_image_uri(boto3.Session().region_name, "forecasting-deepar", "latest")

In [6]:
data = pd.read_csv("testing.csv", index_col=0, parse_dates=True, decimal=',')

In [7]:
train_data = data.loc["2004-01-02":"2016-12-30"]
test_data = data.loc["2016-12-30":"2018-04-18"]
print("Train size: ", train_data.shape)
print("Test size: ", test_data.shape)

Train size:  (3273, 56)
Test size:  (326, 56)


In [8]:
def series_to_obj(timestamps, target, dynamic_feat=None):
    obj = {"start": str(timestamps), "target": target, "dynamic_feat": dynamic_feat}
    return obj

def obj_to_byte(obj):
    return json.dumps(obj).encode('utf-8')

In [10]:
print(len(train_data.index))
train_data

3273


Unnamed: 0_level_0,ASPFWR5,US10YR,EPS,PER,OPEN,HIGH,LOW,CLOSE,BDIY,VIX,...,CAPE,CATY,INF,SIM,RELINF,DTOM,SENTIMENT1,SENTIMENT2,SENTIMENT3,HULBERTSENTIMENT
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2004-01-02,0.0102034784219897,4.3794,48.93,22.65440425,1111.92,1118.85,1105.08,1108.48,4761.0,18.22,...,29.9783657001614,38.3573111373531,0.0169356683638471,1.0,0,1.0,-0.040706,0.014863,-0.008476,34.26
2004-01-05,0.00674238151785269,4.3774,49.21,22.80471449,1108.48,1122.22,1108.48,1122.22,4757.0,17.49,...,30.341926876617,38.8308209571157,0.0169356683638471,1.0,0,0.0,-0.025638,0.015332,-0.005811,40.51
2004-01-06,-8.18517220091041E-05,4.271,49.2,22.83882114,1122.22,1124.46,1118.44,1123.67,4764.0,16.73,...,30.3731067764421,38.878977178357,0.0169356683638471,1.0,0,0.0,-0.028571,0.013071,-0.008448,43.63
2004-01-07,0.00486202040745925,4.2416,49.18,22.90219601,1123.67,1126.33,1116.45,1126.33,4804.0,15.5,...,30.43698779355,38.9686934512222,0.0169356683638471,1.0,0,0.0,-0.025994,0.013161,-0.009568,43.61
2004-01-08,0.00334812605055816,4.2553,49.18,23.01586011,1126.33,1131.92,1124.91,1131.92,4901.0,15.61,...,30.5799985953738,39.1599607108215,0.0169356683638471,1.0,0,0.0,-0.019927,0.012923,-0.0074,44.4
2004-01-09,0.0160642073199737,4.0803,49.18,22.81130541,1131.34,1131.36,1120.9,1121.86,5046.0,16.75,...,30.3002506931739,38.8099775547482,0.0169356683638471,1.0,0,0.0,-0.028698,0.01242,-0.008886,47.52
2004-01-12,0.0084414478363435,4.0861,49.17,22.92515762,1121.86,1127.85,1120.9,1127.23,5256.0,16.82,...,30.43730123757,38.9936924223803,0.0169356683638471,1.0,0,0.0,-0.035997,0.015245,-0.011063,45.98
2004-01-13,0.0221361868209451,4.011,49.17,22.80292862,1127.23,1129.07,1115.19,1121.22,5391.0,18.04,...,30.26708572335,38.7838344332659,0.0169356683638471,1.0,0,0.0,-0.0383,0.01684,-0.010056,42.85
2004-01-14,0.0112109791568255,3.9937,49.16,22.99674532,1121.22,1130.75,1121.22,1130.52,5459.0,16.75,...,30.5101541630208,39.103399906091,0.0169356683638471,1.0,0,0.0,-0.036777,0.01255,-0.007738,45.98
2004-01-15,0.00552119810945332,3.9688,49.16,23.02786819,1130.52,1137.11,1124.5,1132.05,5485.0,15.56,...,30.4612218666276,39.0488686077275,0.0185493589111312,1.0,0.00129095243782729,0.0,-0.004066,0.011863,-0.009667,36.6


In [11]:
# Extract target time series: ASPFWR5
train_target = train_data["ASPFWR5"].tolist()
for i in range(len(train_target)):
    train_target[i] = float(train_target[i])

test_target = test_data["ASPFWR5"].tolist()
for i in range(len(test_target)):
    test_target[i] = float(test_target[i])


In [157]:
print(len(train_target))
print(train_target)

3273
[0.0102034784219897, 0.00674238151785269, -8.18517220091041e-05, 0.00486202040745925, 0.00334812605055816, 0.0160642073199737, 0.0084414478363435, 0.0221361868209451, 0.0112109791568255, 0.00552119810945332, 0.0140819395748605, 0.00402356036532123, -0.0153137414617467, -0.011733047736492, -0.0085050167896209, -0.016702792041212, -0.00804844173761431, -0.0047670949188084, -0.00281667655403501, 0.00834128175221087, 0.00429400796475714, 0.00918769883848036, 0.0279595429055538, 0.0214119485718347, 0.00574571268902696, 0.0144755044687807, 0.00684843330386513, -0.00744264599431408, -0.00685943652112484, -0.00488089611062902, -0.0156208607860698, -0.00703405721216646, -0.0027000581039981, 0.00103826074307182, 0.0134288129840752, 0.00930504988375418, 0.00693481386001081, 0.00891529092777196, 0.0115755635391689, -0.010564641865373, -0.00870445939698048, -0.0274330434352129, -0.0430769481150433, -0.0333807184801631, -0.0334383749111, -0.0241373566957723, 0.00389283824061856, 0.0172112054929

In [158]:
# Extract features vector
train_dynamic_feat = []

train_features = train_data.columns.tolist()
train_features.remove("ASPFWR5")

for col in train_features:
    train_data[col] = train_data[col].fillna(method='ffill').fillna(method='bfill')
    train_dynamic_feat.append(train_data[col].tolist())
 
test_dynamic_feat = []
test_features = test_data.columns.tolist()
test_features.remove("ASPFWR5")

for col in test_features:
    test_data[col] = test_data[col].fillna(method='ffill').fillna(method='bfill')
    test_dynamic_feat.append(test_data[col].tolist())
    

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [159]:
print(len(train_dynamic_feat))
print(len(test_dynamic_feat))



for feat in train_dynamic_feat:
    print(len(feat))
   
for feat in test_dynamic_feat:
    print(len(feat))
   

55
55
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
3273
326
326
326
326
326
326
326
326
326
326
326
326
326
326
326
326
326
326
326
326
326
326
326
326
326
326
326
326
326
326
326
326
326
326
326
326
326
326
326
326
326
326
326
326
326
326
326
326
326
326
326
326
326
326
326


In [160]:
# Write json file to S3FileSyste,
# S3: aws simple storage system

s3filesystem = s3fs.S3FileSystem()
with s3filesystem.open(s3_data_path + "/train/train.json", 'wb') as fp:
    obj = series_to_obj(train_data.index, train_target, train_dynamic_feat)
    byte1 = obj_to_byte(obj)
    fp.write(byte1)
    fp.write('\n'.encode('utf-8'))
    
with s3filesystem.open(s3_data_path + "/test/test.json", 'wb') as fp:
    obj2 = series_to_obj(test_data.index, test_target, test_dynamic_feat)
    byte2 = obj_to_byte(obj2)
    fp.write(byte2)
    fp.write('\n'.encode('utf-8'))


In [161]:
estimator = sagemaker.estimator.Estimator(
    sagemaker_session=sagemaker_session,
    image_name=image_name,
    role=role,
    train_instance_count=1,
    train_instance_type='ml.c4.xlarge',
    base_job_name= 'deepar-erp-notebook',
    output_path= "s3://" + s3_output_path
)

In [17]:
"""
Set some hyperparameters: 
    - E.g. frequency of the time series used, 
    - number of data points the model will look at in the past, 
    - number of predicted data points. 
Network params: number of layers
    - number of cells per layer
    - likelihood function 
Training options:
    - number of epochs
    - batch size
    - learning rate
"""

freq = '1D'
prediction_length = 48
context_length = 72

hyperparameters = {
    "time_freq": freq,
    "context_length": str(context_length),
    "prediction_length": str(prediction_length),
    "num_cells": "40",
    "num_layers": "3",
    "likelihood": "gaussian",
    "epochs": "20",
    "mini_batch_size": "32",
    "learning_rate": "0.001",
    "dropout_rate": "0.05",
    "early_stopping_patience": "10"
}

estimator.set_hyperparameters(**hyperparameters)

In [20]:
data_channels = {
    "train": "s3://{}/train".format(s3_data_path),
    "test": "s3://{}/test".format(s3_data_path)
}

estimator.fit(inputs=data_channels)

2020-06-14 17:42:07 Starting - Starting the training job...
2020-06-14 17:42:10 Starting - Launching requested ML instances.........
2020-06-14 17:43:45 Starting - Preparing the instances for training...
2020-06-14 17:44:22 Downloading - Downloading input data...
2020-06-14 17:45:06 Training - Training image download completed. Training in progress..[34mArguments: train[0m
[34m[06/14/2020 17:45:09 INFO 139639315720000] Reading default configuration from /opt/amazon/lib/python2.7/site-packages/algorithm/resources/default-input.json: {u'num_dynamic_feat': u'auto', u'dropout_rate': u'0.10', u'mini_batch_size': u'128', u'test_quantiles': u'[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]', u'_tuning_objective_metric': u'', u'_num_gpus': u'auto', u'num_eval_samples': u'100', u'learning_rate': u'0.001', u'num_cells': u'40', u'num_layers': u'2', u'embedding_dimension': u'10', u'_kvstore': u'auto', u'_num_kv_servers': u'auto', u'cardinality': u'auto', u'likelihood': u'student-t', u'early_stopp

[34m[06/14/2020 17:46:03 INFO 139639315720000] Epoch[2] Batch[0] avg_epoch_loss=-2.525891[0m
[34m[06/14/2020 17:46:03 INFO 139639315720000] #quality_metric: host=algo-1, epoch=2, batch=0 train loss <loss>=-2.5258910656[0m
[34m[06/14/2020 17:46:04 INFO 139639315720000] Epoch[2] Batch[5] avg_epoch_loss=-2.465728[0m
[34m[06/14/2020 17:46:04 INFO 139639315720000] #quality_metric: host=algo-1, epoch=2, batch=5 train loss <loss>=-2.46572768688[0m
[34m[06/14/2020 17:46:04 INFO 139639315720000] Epoch[2] Batch [5]#011Speed: 217.74 samples/sec#011loss=-2.465728[0m
[34m[06/14/2020 17:46:05 INFO 139639315720000] Epoch[2] Batch[10] avg_epoch_loss=-2.471116[0m
[34m[06/14/2020 17:46:05 INFO 139639315720000] #quality_metric: host=algo-1, epoch=2, batch=10 train loss <loss>=-2.4775826931[0m
[34m[06/14/2020 17:46:05 INFO 139639315720000] Epoch[2] Batch [10]#011Speed: 228.45 samples/sec#011loss=-2.477583[0m
[34m[06/14/2020 17:46:05 INFO 139639315720000] processed a total of 363 examples

[34m[06/14/2020 17:47:34 INFO 139639315720000] Epoch[7] Batch[0] avg_epoch_loss=-2.447399[0m
[34m[06/14/2020 17:47:34 INFO 139639315720000] #quality_metric: host=algo-1, epoch=7, batch=0 train loss <loss>=-2.4473991394[0m
[34m[06/14/2020 17:47:34 INFO 139639315720000] Epoch[7] Batch[5] avg_epoch_loss=-2.528058[0m
[34m[06/14/2020 17:47:34 INFO 139639315720000] #quality_metric: host=algo-1, epoch=7, batch=5 train loss <loss>=-2.52805825075[0m
[34m[06/14/2020 17:47:34 INFO 139639315720000] Epoch[7] Batch [5]#011Speed: 228.19 samples/sec#011loss=-2.528058[0m
[34m[06/14/2020 17:47:35 INFO 139639315720000] processed a total of 314 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 17758.262872695923, "sum": 17758.262872695923, "min": 17758.262872695923}}, "EndTime": 1592156855.526414, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "AWS/DeepAR"}, "StartTime": 1592156837.767618}
[0m
[34m[06/14/2020 17:47:35 INFO 139639315720000] #thr

[34m[06/14/2020 17:49:20 INFO 139639315720000] Epoch[13] Batch[0] avg_epoch_loss=-2.450909[0m
[34m[06/14/2020 17:49:20 INFO 139639315720000] #quality_metric: host=algo-1, epoch=13, batch=0 train loss <loss>=-2.45090889931[0m
[34m[06/14/2020 17:49:21 INFO 139639315720000] Epoch[13] Batch[5] avg_epoch_loss=-2.484381[0m
[34m[06/14/2020 17:49:21 INFO 139639315720000] #quality_metric: host=algo-1, epoch=13, batch=5 train loss <loss>=-2.4843814373[0m
[34m[06/14/2020 17:49:21 INFO 139639315720000] Epoch[13] Batch [5]#011Speed: 232.21 samples/sec#011loss=-2.484381[0m
[34m[06/14/2020 17:49:22 INFO 139639315720000] Epoch[13] Batch[10] avg_epoch_loss=-2.512551[0m
[34m[06/14/2020 17:49:22 INFO 139639315720000] #quality_metric: host=algo-1, epoch=13, batch=10 train loss <loss>=-2.54635467529[0m
[34m[06/14/2020 17:49:22 INFO 139639315720000] Epoch[13] Batch [10]#011Speed: 223.88 samples/sec#011loss=-2.546355[0m
[34m[06/14/2020 17:49:22 INFO 139639315720000] processed a total of 335 

[34m[06/14/2020 17:50:49 INFO 139639315720000] Epoch[18] Batch[0] avg_epoch_loss=-2.364248[0m
[34m[06/14/2020 17:50:49 INFO 139639315720000] #quality_metric: host=algo-1, epoch=18, batch=0 train loss <loss>=-2.36424779892[0m
[34m[06/14/2020 17:50:50 INFO 139639315720000] Epoch[18] Batch[5] avg_epoch_loss=-2.435019[0m
[34m[06/14/2020 17:50:50 INFO 139639315720000] #quality_metric: host=algo-1, epoch=18, batch=5 train loss <loss>=-2.435019056[0m
[34m[06/14/2020 17:50:50 INFO 139639315720000] Epoch[18] Batch [5]#011Speed: 221.37 samples/sec#011loss=-2.435019[0m
[34m[06/14/2020 17:50:50 INFO 139639315720000] processed a total of 293 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 17142.88902282715, "sum": 17142.88902282715, "min": 17142.88902282715}}, "EndTime": 1592157050.794841, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "AWS/DeepAR"}, "StartTime": 1592157033.651461}
[0m
[34m[06/14/2020 17:50:50 INFO 139639315720000] #th

In [28]:
estimator.latest_training_job.name

'deepar-erp-notebook-2020-06-14-17-42-06-954'

In [21]:
job_name = estimator.latest_training_job.name

endpoint_name = sagemaker_session.endpoint_from_job(
    job_name=job_name,
    initial_instance_count=1,
    instance_type = 'ml.c4.xlarge',
    deployment_image=image_name,
    role=role
)


-------------!

In [99]:
inference_data = data.loc["2018-04-18" : "2018-12-17"]
prediction_data = data.loc["2018-12-17":"2019-04-17"]

inference_target = data.loc["2018-04-18" : "2018-12-17"]["ASPFWR5"]
prediction_target = data.loc["2018-12-17":"2019-04-17"]["ASPFWR5"]

dummy_target = data.loc["2018-04-18" : "2019-04-17"]

inference_features = test_features
inference_dynamic_feat = []
prediction_dynamic_feat = []

for col in inference_features:
    inference_data[col] = inference_data[col].fillna(method='ffill').fillna(method='bfill')
    prediction_data[col] = prediction_data[col].fillna(method='ffill').fillna(method='bfill')
    inference_dynamic_feat.append(inference_data[col].tolist())
    prediction_dynamic_feat.append(prediction_data[col].tolist())
    


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [127]:
dynamic_feat_2 = []
for i in range(2):
    dynamic_feat_2.append(inference_dynamic_feat)

In [101]:
predictor = sagemaker.predictor.RealTimePredictor(
    endpoint=endpoint_name,
    sagemaker_session=sagemaker_session,
    content_type="application/json"
)


# predictor.set_prediction_parameters(freq, len(prediction_target))

In [147]:
test_feat = []
for feat in inference_dynamic_feat:
    test_feat.append(feat[0])
        


In [169]:
def getIFeature(features, i):
    l = []
    for feat in features:
        l.append([feat[i]])
    return l

In [173]:
instances = [series_to_obj(inference_target.index[i], [inference_target.tolist()[i]], getIFeature(inference_dynamic_feat, i)) for i in range(48)]

In [176]:
instances

[{'start': '2018-04-18 00:00:00',
  'target': ['-0.0256500229847967'],
  'dynamic_feat': [[2.8728],
   [112.25],
   [24.1701106023631],
   [2710.11],
   [2717.49],
   [2703.63],
   [2708.64],
   [1124.0],
   [15.6],
   [0.56],
   [14826.23],
   [89.605],
   [266.37707533419],
   [1.16493474814002],
   [1.69],
   [1605.0],
   [40.65],
   [4.58],
   [-0.005716153340876],
   [2.1629],
   [0.01841746978166],
   [3.305],
   [3.81],
   [5.47630939769732],
   [0.0],
   [0.0],
   [0.0],
   [7.53633386812832],
   [-10.0],
   [92.41],
   [61.9],
   [78.1],
   [59.3],
   [1.77],
   [1.17],
   [0.023991887296598],
   [36600.0],
   [33360.0],
   [3.66],
   [0.77],
   [1.065],
   [4.1],
   [998.141608045938],
   [1319.0],
   [-0.0104386791320378],
   [29.3515773611619],
   [28.8967947030047],
   [0.02315528401033],
   [1.0],
   [0.00173041118858439],
   [0.0],
   [-0.01758],
   [0.008537],
   [-0.006882],
   [45.85]]},
 {'start': '2018-04-19 00:00:00',
  'target': ['-0.00997376768279123'],
  'dynami

In [174]:
quantiles=["0.1", "0.5", "0.9"]
configuration = {"num_samples":169 , "output_types": ["quantiles"], "quantiles": quantiles}
http_request_data = {"instances": instances, "configuration": configuration}
req = json.dumps(http_request_data).encode('utf-8')

In [175]:
predictor.predict(req)

ModelError: An error occurred (ModelError) when calling the InvokeEndpoint operation: Received client error (400) from model with message "Invalid request: The field dynamic_feat needs to be provided in the full prediction range but request has dynamic_feat only for 0 time units in the prediction range when trying to predict for 48 time units.". See https://us-west-1.console.aws.amazon.com/cloudwatch/home?region=us-west-1#logEventViewer:group=/aws/sagemaker/Endpoints/deepar-erp-notebook-2020-06-14-17-42-06-954 in account 837343359523 for more information.

In [165]:
req

b'{"instances": [{"start": "2018-04-18 00:00:00", "target": ["-0.0256500229847967", "-0.00997376768279123", "-0.000521477578435857", "-0.00809321846977421", "0.00723862424770798", "-0.00197315258544207", "-0.0142967125827819", "-0.00237348747366374", "0.00872157779150301", "0.00694363313193177", "0.0233014331653793", "0.0348146368671884", "0.0249976690338699", "0.0220972935439484", "0.0151870242898807", "0.0097650650907815", "-0.000387454389533809", "-0.00593798981982079", "0.00107489839321955", "0.00520071345042862", "0.00374950668554756", "0.00254180316405317", "0.00265668911468274", "-0.0164037824184541", "-0.000364878157455655", "-0.00925787100860219", "0.00256190114099752", "0.00968784644734068", "0.0219824661399994", "0.0170511289178452", "0.023087472576429", "0.0162697763805359", "0.0128587874177642", "0.0134239370983795", "0.00190270055540126", "0.00452552012817996", "0.000302376475542157", "-0.00308614812372384", "-0.00822029554918256", "-0.00332063599994915", "-0.012124298800

In [12]:
sagemaker_session.delete_endpoint(endpoint_name)

NameError: name 'endpoint_name' is not defined