# Calling Model and Make Predictions
This Notebook will call the trained model and make predictions. The Following data sources are used: <br>
processed regression features data <br>
processed time series data <br>

In [30]:
%matplotlib inline

import sys
from urllib.request import urlretrieve
import zipfile
from dateutil.parser import parse
import json
from random import shuffle
import random
import datetime
import os

import pickle
import boto3
import s3fs
import sagemaker
import numpy as np
import pandas as pd
from sagemaker import get_execution_role
from sagemaker.predictor import csv_serializer

In [2]:
sagemaker_session = sagemaker.Session()
s3_bucket = 'wmg-streaming-prediction-dev/streaming_data_processed' 
s3_prefix = 'ts_data'   
role = sagemaker.get_execution_role()        

region = sagemaker_session.boto_region_name

s3_data_path = "s3://{}/{}/data".format(s3_bucket, s3_prefix)
s3_output_path = "s3://{}/{}/output".format(s3_bucket, s3_prefix)

In [5]:
# if the song's first week stream is bigger than 10k,
# or want to use the regression model:
# run this cell to deploy ts model
image_name = '811284229777.dkr.ecr.us-east-1.amazonaws.com/xgboost:1'
model_artifacts = 's3://wmg-streaming-prediction-dev/xgboost-regression-results-updated/wmg-streaming-updated-025-f393d468/output/model.tar.gz'
trained_reg_model = sagemaker.model.Model(
    model_data= model_artifacts,
    image=image_name,
    role=role)  

trained_reg_model.deploy(initial_instance_count=1, instance_type='ml.c4.xlarge')

Parameter image will be renamed to image_uri in SageMaker Python SDK v2.


---------------!

In [7]:
# read in processed test data
test_data = pd.read_csv('validation_updated.csv') # replace the path with your processed data path

In [15]:
end_point_name = 'xgboost-2020-08-17-00-23-27-595' # replace the endpoint with the endpoint created from last step deloyment (check sagemaker console/endpoint)
predictor_reg = sagemaker.predictor.RealTimePredictor(
    endpoint=end_point_name,
    content_type='csv'
    )

In [16]:
predictor_reg.content_type = 'text/csv'
predictor_reg.serializer = csv_serializer
predictor_reg.deserializer = None

def predict(data, rows=500):
    split_array = np.array_split(data, int(data.shape[0] / float(rows) + 1))
    predictions = ''
    for array in split_array:
        predictions = ','.join([predictions, predictor_reg.predict(array).decode('utf-8')])

    return np.fromstring(predictions[1:], sep=',')

predictions_reg = predict(test_data.values[:, 1:])

In [18]:
predictions_reg[0]

956.836425781

In [23]:
# helper function for TS model
class DeepARPredictor(sagemaker.predictor.RealTimePredictor):
    
    def __init__(self, *args, **kwargs):
        super().__init__(*args, content_type=sagemaker.content_types.CONTENT_TYPE_JSON, **kwargs)
        
    def predict(self, ts, cat=None, dynamic_feat=None, 
                num_samples=100, return_samples=False, quantiles=["0.1", "0.5", "0.9"]):
        """Requests the prediction of for the time series listed in `ts`, each with the (optional)
        corresponding category listed in `cat`.
        
        ts -- `pandas.Series` object, the time series to predict
        cat -- integer, the group associated to the time series (default: None)
        num_samples -- integer, number of samples to compute at prediction time (default: 100)
        return_samples -- boolean indicating whether to include samples in the response (default: False)
        quantiles -- list of strings specifying the quantiles to compute (default: ["0.1", "0.5", "0.9"])
        
        Return value: list of `pandas.DataFrame` objects, each containing the predictions
        """
        prediction_time = ts.index[-1] + 1*datetime.timedelta(days =1)
        quantiles = [str(q) for q in quantiles]
        req = self.__encode_request(ts, cat, dynamic_feat, num_samples, return_samples, quantiles)
        res = super(DeepARPredictor, self).predict(req)
        return self.__decode_response(res, ts.index.freq, prediction_time, return_samples)
    
    def __encode_request(self, ts, cat, dynamic_feat, num_samples, return_samples, quantiles):
        instance = series_to_dict(ts, cat if cat is not None else None, dynamic_feat if dynamic_feat else None)

        configuration = {
            "num_samples": num_samples,
            "output_types": ["quantiles", "samples"] if return_samples else ["quantiles"],
            "quantiles": quantiles
        }
        
        http_request_data = {
            "instances": [instance],
            "configuration": configuration
        }
        
        return json.dumps(http_request_data).encode('utf-8')
    
    def __decode_response(self, response, freq, prediction_time, return_samples):
        # we only sent one time series so we only receive one in return
        # however, if possible one will pass multiple time series as predictions will then be faster
        predictions = json.loads(response.decode('utf-8'))['predictions'][0]
        prediction_length = len(next(iter(predictions['quantiles'].values())))
        #print(predictions)
        prediction_index = pd.DatetimeIndex(pd.date_range(start=prediction_time, freq=freq, periods=prediction_length))        
        if return_samples:
            dict_of_samples = {'sample_' + str(i): s for i, s in enumerate(predictions['samples'])}
        else:
            dict_of_samples = {}
        return pd.DataFrame(data={**predictions['quantiles'], **dict_of_samples}, index=prediction_index)

    def set_frequency(self, freq):
        self.freq = freq
        
def encode_target(ts):
    return [x if np.isfinite(x) else "NaN" for x in ts]        

def series_to_dict(ts, cat=None, dynamic_feat=None):
    """Given a pandas.Series object, returns a dictionary encoding the time series.

    ts -- a pands.Series object with the target time series
    cat -- an integer indicating the time series category

    Return value: a dictionary
    """
    obj = {"start": str(ts.index[0]), "target": encode_target(ts)}
    if cat is not None:
        obj["cat"] = cat
    if dynamic_feat is not None:
        obj["dynamic_feat"] = dynamic_feat        
    return obj

In [24]:
# if the song's first week stream is less than 10k,
# or want to use the Time series model to look into further future:
# run this cell to deploy ts model
image_name = sagemaker.amazon.amazon_estimator.get_image_uri(region, "forecasting-deepar", "latest")
model_artifacts = 's3://wmg-streaming-prediction-dev/streaming_data_processed/ts_data-new-features/data/wmg-streaming-deepar-014-e2e6ec13/output/model.tar.gz'
trained_ts_model = sagemaker.model.Model(
    model_data= model_artifacts,
    image=image_name,
    role=role)  

trained_ts_model.deploy(initial_instance_count=1, 
                        instance_type='ml.c4.xlarge')

'get_image_uri' method will be deprecated in favor of 'ImageURIProvider' class in SageMaker Python SDK v2.
Parameter image will be renamed to image_uri in SageMaker Python SDK v2.


---------------!

In [25]:
# set ts model parameter
freq = 'D'
prediction_length = 7 

In [28]:
## replace the endpoint with the endpoint created from last step deloyment (check sagemaker console/endpoint)
endpoint_name_ts = 'forecasting-deepar-2020-08-17-00-45-50-348' 
predictor_ts = DeepARPredictor(
    endpoint = endpoint_name_ts,
    sagemaker_session=sagemaker_session
)
#predictor_ts.set_prediction_parameters(freq, prediction_length)

In [31]:
#read in test file
with open("ts_cut_off.txt", "rb") as fp:   # Unpickling
    timeseries_cutoff = pickle.load(fp)
with open("ts_cat_cut_off.txt", "rb") as fp:   # Unpickling
    ts_cat_cutoff = pickle.load(fp)

In [32]:
#generate predictions
pred_test_3408 = predictor_ts.predict(ts=timeseries_cutoff[3408][:3], cat =ts_cat_cutoff[3408].tolist(),  quantiles=[0.10, 0.5, 0.90])

In [33]:
pred_test_3408

Unnamed: 0,0.1,0.9,0.5
2019-06-24,56.030914,112.300713,83.285385
2019-06-25,45.667892,119.654633,75.221924
2019-06-26,23.479815,108.269173,67.669724
2019-06-27,22.158871,110.212738,57.509518
2019-06-28,19.131807,90.380646,55.812595
2019-06-29,15.017368,84.437759,50.351746
2019-06-30,12.908667,80.282097,36.983978


In [7]:
## delete all endpoints if you are not using them.
# you can also do this through sagemaker console/endpoints
import boto3
client = boto3.client('sagemaker')
client.delete_endpoint(EndpointName = endpoint_name_ts)
client.delete_endpoint(EndpointName = end_point_name)

NameError: name 'endpoint_name_ts' is not defined