In [15]:
import pandas as pd
import numpy as np
from sklearn.metrics import root_mean_squared_error

#import time
import cdsw
import cmlapi
import os
import json, requests

In [2]:
window = 24
horizon = 6

# simulating get predictions

In [33]:
df = pd.read_csv('data/simple_synthetic_data.csv')

In [34]:
#df['time'] = pd.to_datetime(df['time'])

In [35]:
df.head()

Unnamed: 0,link,location,time,rx_gbs,tx_gbs
0,Verizon,US-DA3,2024-05-01 00:00:00,2.46522,9.689268
1,Verizon,US-DA3,2024-05-01 01:00:00,2.280575,11.636575
2,Verizon,US-DA3,2024-05-01 02:00:00,1.702963,3.688097
3,Verizon,US-DA3,2024-05-01 03:00:00,1.565919,10.631742
4,Verizon,US-DA3,2024-05-01 04:00:00,0.976097,4.585088


In [36]:
# function(s)
def dataframe_to_json(df):
    """Create a dictionary that will later be converted to a JSON object
    ensure that 
    """
    data = {
        'rx_gbs': df['rx_gbs'].iloc[-(window+1):].tolist(),
        'tx_gbs': df['tx_gbs'].iloc[-(window+1):].tolist(),
        'time': [df['time'].iloc[-1]],  # Only the last time entry
        'link': [df['link'].iloc[-1]],  # Only the last link entry
        'location': [df['location'].iloc[-1]]  # Only the last location entry
    }
    
    # build embedded dictionary step 1
    request_dict = {"request":data}

    # access key will be end point specific
    BackDict = {"accessKey":model_key}
    BackDict.update(request_dict)
    request_dict=BackDict
    
    return request_dict

In [37]:
# model parameter configuration
model_name = "lstm-test2"
project_name = 'ibm_poc'
client = cmlapi.default_client(url=os.getenv("CDSW_API_URL").replace("/api/v1", ""), cml_api_key=os.getenv("CDSW_APIV2_KEY"))
target_model = client.list_all_models(search_filter=json.dumps({"name": model_name}))
model_key = target_model.models[0].access_key
model_url = os.getenv("CDSW_API_URL").replace("https://", "https://modelservice.").replace("/api/v1", "/model")

# lines below get at the most recent deployment of the model to get deployment crn
proj_id = client.list_projects(search_filter=json.dumps({"name":project_name })).projects[0].id
mod_id = target_model.models[0].id
build_list = client.list_model_builds(project_id = proj_id, model_id = mod_id,sort='created_at')
build_id = build_list.model_builds[-1].id
model_deployments = client.list_model_deployments(project_id=proj_id,model_id=mod_id,build_id=build_id)
cr_number = model_deployments.model_deployments[0].crn

In [38]:
stride = 12        # determines frequecy of model inference
                   # how many observations between forecast requests
load_frequency = 2 # ratio of number of observations between load jobs
                   # number of observations between forecast requests
                   # e.g., ground truth job every 24 hours, forecast request every 12, therefore load_frequency is 2 
#load_lag = 1       # number of observation sets to load each load
                   # this will determine heap size
    
m_window = 10      # monitoring window size 
    

In [39]:
# pick a segmentabs
# create a future ts 
# match that future ts with existing ts 'time'field

In [40]:
sample_df = df[['link','location']].drop_duplicates().sample()
link_pick = sample_df['link'].values[0]
location_pick = sample_df['location'].values[0]
print(sample_df)

      link location
0  Verizon   US-DA3


In [41]:
# test_df has only one combintation  of link_pick and location_pick
test_df = df[(df.link == link_pick) & (df.location == location_pick)]

In [42]:
# pick a random segment of size window + 1
start_row = np.random.randint(0, len(test_df) - (window+1))
random_segment = df.iloc[start_row:start_row + (window+1)]

In [43]:
request_dict = dataframe_to_json(random_segment)

In [45]:
r = requests.post(model_url, data=json.dumps(request_dict), headers={'Content-Type': 'application/json'})

In [46]:
horizon_time = r.json()['response']['prediction']['time']

In [48]:
horizon_time

['2024-05-07T01:00:00',
 '2024-05-07T02:00:00',
 '2024-05-07T03:00:00',
 '2024-05-07T04:00:00',
 '2024-05-07T05:00:00',
 '2024-05-07T06:00:00']

In [49]:
df.dtypes

link         object
location     object
time         object
rx_gbs      float64
tx_gbs      float64
dtype: object

In [50]:
times = [t.replace('T', ' ') for t in horizon_time]

# Filter the DataFrame
filtered_df = df[df['time'].isin(times)]


In [51]:
filtered_df

Unnamed: 0,link,location,time,rx_gbs,tx_gbs
145,Verizon,US-DA3,2024-05-07 01:00:00,6.484178,1.660921
146,Verizon,US-DA3,2024-05-07 02:00:00,0.571595,11.119694
147,Verizon,US-DA3,2024-05-07 03:00:00,8.352953,2.791839
148,Verizon,US-DA3,2024-05-07 04:00:00,0.743763,3.162689
149,Verizon,US-DA3,2024-05-07 05:00:00,2.153168,4.5878
150,Verizon,US-DA3,2024-05-07 06:00:00,11.490688,4.855042


In [52]:
df[df['time'].isin(horizon_time)]

Unnamed: 0,link,location,time,rx_gbs,tx_gbs
