# Essential imports 

In [None]:
import requests
import pandas as pd
import json
import numpy as np
import time
import os
from dotenv import load_dotenv

# Prepare the *headers* for the API request call with credentials

In [None]:

load_dotenv()

my_id = os.getenv('IBM_CLIENT_ID')
my_secret = os.getenv('IBM_CLIENT_SECRET')

headers = {
    "accept": "application/json",
    "X-IBM-Client-Id": my_id,
    "X-IBM-Client-Secret" : my_secret
}

# Set urls and file variable

In [None]:
base_url = "https://api.ibm.com/"
tsc_service = "timeseriesclass/run/timeseriesclassification/"
health_check = tsc_service + "health_check"
nnrocket_svc = tsc_service + "nnrocket"
status_check_svc = tsc_service + "status/"

data_file = "sample01.csv"

# Preliminary *health check* to make sure your credentials are OK and you can access the service!

In [None]:

full_health_check_url = base_url + health_check
print(full_health_check_url)
response = requests.get(full_health_check_url, headers=headers)
print(response.text)

# Prepare *parameters* and *files* inputs for the HTTP request for accessing the NN Rocket service

In [None]:
params = (
    ('time column', 'Time'),
    ('time string format', '%Y-%m-%d %H:%M:%S'),
    ('target columns', '["Value1","Value2","Value3","Value4","Value5"]'),
    ('categorical columns', '["Category1","Category2","Category3"]'),
    ('label column', 'Label'),
    ('snapshot column', 'Snapshot'),
    ('train test split', '0.5'),
    ('result type', 'accuracy'),
)
files = {'data file': open(data_file, "rb")}

# Your data file in CSV format & various validation checks, prior to making the HTTP request
> ## The *validator* function below makes several basic sanity checks to help you avoid surprises

In [None]:

params_dict = dict(params)
time_column = params_dict['time column']
time_format = params_dict['time string format']
target_columns = json.loads(params_dict['target columns'])
label_column =  params_dict['label column']
snapshot_column = params_dict['snapshot column']

def validator(data_file, time_column, time_format, target_columns, label_column, snapshot_column):

    try:
        userdf = pd.read_csv(data_file)
    except FileNotFoundError:
        raise FileNotFoundError("Submitted csv file not found.")
    except EmptyDataError:
        raise EmptyDataError("There is no data in the submitted csv")
    except ParserError:
        raise ParserError("There is an issue parsing the submitted csv")

    # check column headings
    column_names = userdf.columns
    if time_column not in column_names:
        raise ValueError(time_column+'  not found in submitted csv file')
    if label_column not in column_names:
        raise ValueError(label_column+'  not found in submitted csv file')
    if snapshot_column not in column_names:
        raise ValueError(snapshot_column+'  not found in submitted csv file')
    # check target columns exist and is numeric 
    for t in target_columns:
        if t not in column_names:
            raise ValueError(t+'  not found in submitted csv file')
        else:
            if not userdf[t].apply(np.isreal).all(axis=None):
                raise ValueError('Non numeric values found in column ',t)

    # check time formatting
    try:
        userdf[time_column] = pd.to_datetime(userdf[time_column],format=time_format)
    except ValueError as e:
        raise ValueError('Specified time column not recognized'+e)

    # check distinct timestamps
    uniquetscount = userdf[[snapshot_column, time_column]].groupby([snapshot_column]).nunique()[time_column].tolist() 
    numtscount = userdf[[snapshot_column, time_column]].groupby([snapshot_column]).size().tolist()
    if uniquetscount != numtscount:
        raise ValueError('Duplicate timestamps detected in the csv')

    # check label column for single cases
    groupbylabel = userdf[[snapshot_column, label_column]].groupby(snapshot_column).mean()
    labelid = groupbylabel[label_column]
    if labelid.value_counts(ascending=True).tolist()[0] < 5:
        raise ValueError('One or more labels have less than five snapshot instances and thus insufficient training sample')

    return userdf

user_df = validator(data_file, time_column, time_format, target_columns, label_column, snapshot_column)

# Inspect data prior to sending it across

In [None]:
user_df.head()

# Make the HTTP request using the *requests* module
> ## It returns a *task_id*

In [None]:


full_svc_url = base_url+nnrocket_svc
print(full_svc_url)
response = requests.post(full_svc_url, headers=headers, params=params, files=files)
task_id = response.text
task_id=task_id.replace('""','').rstrip()
print(task_id)


# Make a status check call against the above returned *task_id* 

In [None]:

full_status_check_url = base_url+status_check_svc + task_id.replace('"', '')
print(full_status_check_url)
response = requests.get(full_status_check_url, headers=headers)
print(response.text)

# Check Again! Using the same *task_id* 

In [None]:
response = requests.get(full_status_check_url, headers=headers)
print(response.text)