# 8 DeepAR
## c Prediction on cloud
Data source: [kaggle_DailyDelhiClimate](https://www.kaggle.com/sumanthvrao/daily-climate-time-series-data?select=DailyDelhiClimateTrain.csv])

In [1]:
import time
import numpy as np
import pandas as pd
import json
import matplotlib.pyplot as plt
import datetime
import boto3
import sagemaker
from sagemaker import get_execution_role
from sagemaker.amazon.amazon_estimator import get_image_uri
pd.set_option("display.max_rows", 6)

In [2]:
# From ???
def encode_target(ts):
    return [x if np.isfinite(x) else "NaN" for x in ts]  

def encode_dynamic_feat(dynamic_feat):  
    l = []
    for col in dynamic_feat:
        assert (not dynamic_feat[col].isna().any()), col  + ' has NaN'             
        l.append(dynamic_feat[col].tolist())
    return l

def series_to_obj(ts, cat=None, dynamic_feat=None):
    obj = {"start": str(ts.index[0]), "target": encode_target(ts)}
    if cat is not None:
        obj["cat"] = cat
    if dynamic_feat is not None:
        obj["dynamic_feat"] = encode_dynamic_feat(dynamic_feat)
    return obj

def series_to_jsonline(ts, cat=None, dynamic_feat=None):
    return json.dumps(series_to_obj(ts, cat, dynamic_feat))      

In [3]:
# From ???
class DeepARPredictor(sagemaker.predictor.RealTimePredictor):

    def set_prediction_parameters(self, freq, prediction_length):
        """Set the time frequency and prediction length parameters. This method **must** be called
        before being able to use `predict`.
        
        Parameters:
        freq -- string indicating the time frequency
        prediction_length -- integer, number of predicted time points
        
        Return value: none.
        """
        self.freq = freq
        self.prediction_length = prediction_length
        
    def predict(self, ts, cat=None, dynamic_feat=None, 
                encoding="utf-8", num_samples=100, quantiles=["0.1", "0.5", "0.9"]):
        """Requests the prediction of for the time series listed in `ts`, each with the (optional)
        corresponding category listed in `cat`.
        
        Parameters:
        ts -- list of `pandas.Series` objects, the time series to predict
        cat -- list of integers (default: None)
        encoding -- string, encoding to use for the request (default: "utf-8")
        num_samples -- integer, number of samples to compute at prediction time (default: 100)
        quantiles -- list of strings specifying the quantiles to compute (default: ["0.1", "0.5", "0.9"])
        
        Return value: list of `pandas.DataFrame` objects, each containing the predictions
        """
        #prediction_times = [x.index[-1]+1 for x in ts]
        prediction_times = [x.index[-1] + datetime.timedelta(hours=1) for x in ts] 
        
        req = self.__encode_request(ts, cat, dynamic_feat, encoding, num_samples, quantiles)
        res = super(DeepARPredictor, self).predict(req)
        return self.__decode_response(res, prediction_times, encoding)
    
    def __encode_request(self, ts, cat, dynamic_feat, encoding, num_samples, quantiles):
        
        instances = [series_to_obj(ts[k], 
                                   cat[k] if cat else None,
                                   dynamic_feat) 
                     for k in range(len(ts))]
        
        configuration = {"num_samples": num_samples, "output_types": ["quantiles"], "quantiles": quantiles}
        http_request_data = {"instances": instances, "configuration": configuration}
        return json.dumps(http_request_data).encode(encoding)
    
    def __decode_response(self, response, prediction_times, encoding):
        response_data = json.loads(response.decode(encoding))
        list_of_df = []
        for k in range(len(prediction_times)):
            #prediction_index = pd.DatetimeIndex(start=prediction_times[k], freq=self.freq, periods=self.prediction_length)
            prediction_index = pd.date_range(start=prediction_times[k], freq=self.freq, periods=self.prediction_length)            
            list_of_df.append(pd.DataFrame(data=response_data['predictions'][k]['quantiles'], index=prediction_index))
        return list_of_df

NameError: name 'sagemaker' is not defined

In [5]:
# Load the data
data = pd.read_csv("D:/3. Projects/AWS Sagemaker/practice/S3/sagemaker-tutorial-rnd/DeepAR/data_1w.csv",
                   parse_dates=['date'], index_col=0)
test = pd.read_csv("D:/3. Projects/AWS Sagemaker/practice/S3/sagemaker-tutorial-rnd/DeepAR/test_climate.csv",
                   parse_dates=['date'], index_col=0)

In [7]:
data

Unnamed: 0_level_0,meantemp,humidity,midyear
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2013-01-06,7.705556,84.077778,0.0
2013-01-13,12.343537,65.145238,0.0
2013-01-20,13.642857,81.663265,0.0
...,...,...,...
2016-12-18,17.871140,61.808297,0.0
2016-12-25,17.123006,66.346011,0.0
2017-01-01,15.085173,82.139204,0.0


In [8]:
test

Unnamed: 0_level_0,meantemp,humidity,midyear
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2016-07-03,32.289626,69.560204,1.0
2016-07-10,32.473810,68.526190,1.0
2016-07-17,29.991071,81.053571,1.0
...,...,...,...
2016-12-18,17.871140,61.808297,0.0
2016-12-25,17.123006,66.346011,0.0
2017-01-01,15.085173,82.139204,0.0


In [40]:
# Prepare train and test target variables
start = pd.Timestamp("2013-01-06", freq="w") # start time is the first row in the dataset
end_train = pd.Timestamp("2015-12-31", freq="w")
end_val = pd.Timestamp("2016-06-30", freq="w")

train_target = data[start:end_train]["meantemp"].to_list()
test_target = data[start:end_val]["meantemp"].to_list()

# Prepare train and test dynamic features
features = ["humidity", "midyear"]
val_features = data.loc[start:end_val, features]

In [30]:
val_features

Unnamed: 0_level_0,humidity,midyear
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-01-06,84.077778,0.0
2013-01-13,65.145238,0.0
2013-01-20,81.663265,0.0
...,...,...
2016-06-12,46.029592,1.0
2016-06-19,50.665901,1.0
2016-06-26,58.358660,1.0


In [None]:
# Forecase the test dataset
deepar_predictor = DeepARPredictor(
    endpoint="deepar-climate",
    sagemaker_session=sagemaker.Session(),
    content_type="application/json"
)
prediction_length = 26
deepar_predictor.set_prediction_parameters(freq="w", prediction_length=prediction_length)

pred_val = deepar_predictor.predict(
    train_target,
    cat=  None,
    dynamic_feat=val_features
)

In [None]:
# Display the prediction result
pred_val

In [None]:
# Save the result


### Forecast the test data

In [None]:
pred_test = deepar_predictor.predict(
    test_target,
    cat=None,
    dynamic_feat=data.loc[start:end_val+datetime.timedelta(weeks=prediction_length), features]
)

In [None]:
# Display the prediction result
pred_test

In [None]:
# Save the result


Delete the endpoint if it is not needed anymore.