# DeepAR Deployment on Slugging data

### Author: Ridha Alkhabaz 
## Imports:

In [1]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)

In [8]:
import sys
import zipfile
from dateutil.parser import parse
import json
from random import shuffle
import random
import datetime
import os
import math
import boto3
import s3fs
import sagemaker
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import timedelta
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from ipywidgets import IntSlider, FloatSlider, Checkbox

In [9]:
import tensorflow.python.keras.utils as generic_utils
from tensorflow.keras.models import Sequential, load_model

In [10]:
import boto3
import sagemaker
from sagemaker import get_execution_role

## Data formating and cleaning:

In [27]:
# Convert the sequence of seconds into datetime array
def parser(x):
    now_seconds = 0
    y = x.astype(np.float) + now_seconds
    z = pd.to_datetime(y, unit='s')
    return z

# Read the data from fname and eventually plot them
def read_data(fname, plot_data = False):
    # Read the time series
    datats = read_csv(fname, header=0, parse_dates=[0], index_col=0, squeeze=True, date_parser=parser)

    headers = list(datats.columns.values)
    headers.insert(0, datats.index.name)

    # Resample the data using a uniform timestep
    datats = datats.resample('S').mean()
    datats = datats.interpolate(method='linear')

    # Convert data to numpy array
    data = datats.reset_index().values

    # Replace timestamps with seconds
    time_sec = array([data[i, 0].timestamp() for i in range(len(data))])
    data = np.c_[time_sec, data[:, 1:]]

    # Plot the pressure readings
    if (plot_data):
        pyplot.plot(data[:, 0], data[:, 1:8])
        pyplot.xlabel(headers[0])
        pyplot.ylabel('Pressure (bar)')
        # Use the original headers
        # headersplot = [w.replace('x_', '$x_') for w in headers[1:8]]
        # headersplot = [w.replace('}=', '}$=') for w in headersplot]
        # headersplot = [w.replace(' (bar)', '') for w in headersplot]
        # Use the headers p(x=xi)
        headersplot = [w[-8:-2] for w in headers[1:8]]
        px = ['$p(x_{%d}' % i for i in range(1, 8)]
        tail = [')$'] * 7
        headersplot = [px + headersplot + tail for px, headersplot, tail in zip(px, headersplot, tail)]
        pyplot.legend(headersplot)
        pyplot.title('Distributed pressure readings')
        pyplot.show(block=False)
        pyplot.savefig('pressure_readings.pdf')

    # Normalize the data
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaler = scaler.fit(data)
    scaler.scale_[0] = 1    # Do not normalize time

#       Apply the same normalization to all pressure readings
    pind = list(range(1, 8))  # Indices of pressure readings
    pmin = scaler.data_min_[pind].min()
    pmax = scaler.data_max_[pind].max()
    scaler.scale_[pind] = ((scaler.feature_range[1] - scaler.feature_range[0]) / (pmax - pmin))
    scaler.min_[pind] = scaler.feature_range[0] - pmin * scaler.scale_[pind]

    data = scaler.transform(data)
    return data, scaler
#     return data

# Generate Nts sample input and output sequences from time series in data.
def generate_samples(data, features, Nts, step, length, shift):

    X = np.zeros((Nts, length, len(features)))
    tX = np.zeros((Nts, length))
    for i in range(Nts):
        X[i] = data[i*step+shift : i*step+shift+length, features]
        tX[i] = data[i*step+shift : i*step+shift+length, 0]

    return X, tX

# X, tX, Y, tY assumed to be normalized to [0, 1]
def visualize(X, tX, Y, tY):

    Ns = X.shape[0]    # Number of sequences
    Nif = X.shape[2]    # Number of input features
    Nof = Y.shape[2]    # Number of output features

    # Plot input sequences
    squeeze = 0.9
    barheight = squeeze * np.minimum(1 / Ns, 0.1)
    interbar = 0.1 * barheight
    starty = 0.5 + (barheight + interbar) * Ns / 2

    f, ax = pyplot.subplots(1, sharex=True)
    pyplot.xlim(0, 1)   # Fix the x range to (0, 1)

    for i in range(Ns):
        for j in range(Nif):
            ax.plot(tX[i,:], X[i,:,j], 'b')
        for j in range(Nof):
            ax.plot(tY[i, :], Y[i, :, j], 'r')
        #
    # Add bars to indicate the span of data sequences
    startybar = starty
    for i in range(Ns):
        endybar = startybar - barheight
        ax.axhspan(startybar, endybar, xmin=min(tX[i,:]), xmax=max(tX[i,:]), facecolor='g', alpha=0.5)  # Input
        ax.axhspan(startybar, endybar, xmin=min(tY[i, :]), xmax=max(tY[i, :]), facecolor='r', alpha=0.5)  # Output
        startybar = endybar - interbar

    ax.set_title('Data sequences', fontweight='bold')
    pyplot.show(block=False)



In [28]:
dataset = read_data('riser_pq_uni.csv')

In [29]:
sagemaker_session = sagemaker.Session()

ValueError: Must setup local AWS configuration with a region supported by SageMaker.