In [None]:
import sagemaker

sess = sagemaker.Session()

bucket = sess.default_bucket()
prefix = 'sagemaker/linear-time-series-forecast'
 
# Define IAM role
import boto3
import re
from sagemaker import get_execution_role

role = get_execution_role()



In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import io
import os
import time
import json
import sagemaker.amazon.common as smac
import sagemaker
from sagemaker.predictor import csv_serializer, json_deserializer

In [None]:
data = pd.read_csv('./wheat.csv')

In [None]:
data.head()

In [None]:
train = data[['Modal_Price']]

In [None]:
train['price_lag1'] = train['Modal_Price'].shift(1)
train['price_lag2'] = train['Modal_Price'].shift(2)
train['price_lag3'] = train['Modal_Price'].shift(3)
train['price_lag4'] = train['Modal_Price'].shift(4)
train['price_lag5'] = train['Modal_Price'].shift(5)
train['price_lag6'] = train['Modal_Price'].shift(6)
train['price_lag7'] = train['Modal_Price'].shift(7)
train['price_lag8'] = train['Modal_Price'].shift(8)
train['price_lag9'] = train['Modal_Price'].shift(9)
train['price_lag10'] = train['Modal_Price'].shift(10)

In [None]:
train = train.iloc[10:, ]
split_train = int(len(train) * 0.6)
split_test = int(len(train) * 0.8)

In [None]:
train_y = train['Modal_Price'][:split_train]
train_X = train.drop('Modal_Price', axis=1).iloc[:split_train, ].as_matrix()
validation_y = train['Modal_Price'][split_train:split_test]
validation_X = train.drop('Modal_Price', axis=1).iloc[split_train:split_test, ].as_matrix()
test_y = train['Modal_Price'][split_test:]
test_X = train.drop('Modal_Price', axis=1).iloc[split_test:, ].as_matrix()

In [None]:
buf = io.BytesIO()
smac.write_numpy_to_dense_tensor(buf, np.array(train_X).astype('float32'), np.array(train_y).astype('float32'))
buf.seek(0)

In [None]:
key = 'linear_train.data'
boto3.resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'train', key)).upload_fileobj(buf)
s3_train_data = 's3://{}/{}/train/{}'.format(bucket, prefix, key)
print('uploaded training data location: {}'.format(s3_train_data))

In [None]:
buf = io.BytesIO()
smac.write_numpy_to_dense_tensor(buf, np.array(validation_X).astype('float32'), np.array(validation_y).astype('float32'))
buf.seek(0)

In [None]:
key = 'linear_validation.data'
boto3.resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'validation', key)).upload_fileobj(buf)
s3_validation_data = 's3://{}/{}/validation/{}'.format(bucket, prefix, key)
print('uploaded validation data location: {}'.format(s3_validation_data))

In [None]:
from sagemaker.amazon.amazon_estimator import get_image_uri
container = get_image_uri(boto3.Session().region_name, 'linear-learner')

In [None]:
sess = sagemaker.Session()

linear = sagemaker.estimator.Estimator(container,
                                       role, 
                                       train_instance_count=1, 
                                       train_instance_type='ml.c4.xlarge',
                                       output_path='s3://{}/{}/output'.format(bucket, prefix),
                                       sagemaker_session=sess)
linear.set_hyperparameters(feature_dim=10,
                           mini_batch_size=100,
                           predictor_type='regressor',
                           epochs=10,
                           num_models=32,
                           loss='absolute_loss')

linear.fit({'train': s3_train_data, 'validation': s3_validation_data})

In [None]:
linear_predictor = linear.deploy(initial_instance_count=1,
                                 instance_type='ml.t2.medium')

In [None]:
linear_predictor.content_type = 'text/csv'
linear_predictor.serializer = csv_serializer
linear_predictor.deserializer = json_deserializer

In [None]:
result = linear_predictor.predict(test_X)
one_step = np.array([r['score'] for r in result['predictions']])

In [None]:
print('One-step-ahead MdAPE = ', np.median(np.abs(test_y - one_step) / test_y))
plt.plot(np.array(test_y), label='actual')
plt.plot(one_step, label='forecast')
plt.legend()
plt.show()