In [1]:
import numpy as np
import json, requests
import pandas as pd
import random
import cmlapi
import cdsw
import os
import time
from sklearn.metrics import root_mean_squared_error

In [2]:
import uuid

In [3]:

# this will be run as a job 

# if os.environ.get("MODEL_NAME") == "":
#     os.environ["MODEL_NAME"] = "lstm"
# if os.environ.get("PROJECT_NAME") == "":
#     os.environ["PROJECT_NAME"] = "SDWAN"
    
window = 24

# function(s)
def dataframe_to_json(df):
    """Create a dictionary that will later be converted to a JSON object
    ensure that 
    """
    data = {
        'rx_gbs': df['rx_gbs'].iloc[-(window+1):].tolist(),
        'tx_gbs': df['tx_gbs'].iloc[-(window+1):].tolist(),
        'time': [df['time'].iloc[-1]],  # Only the last time entry
        'link': [df['link'].iloc[-1]],  # Only the last link entry
        'location': [df['location'].iloc[-1]]  # Only the last location entry
    }
    
    # build embedded dictionary step 1
    request_dict = {"request":data}

    # access key will be end point specific
    BackDict = {"accessKey":model_key}
    BackDict.update(request_dict)
    request_dict=BackDict
    
    return request_dict

# read data frame (or table data)
df = pd.read_csv('data/simple_synthetic_data.csv')

# sort data by time 
df.sort_values(by=['time'],inplace=True)

In [4]:
df.shape

(721, 5)

In [5]:
##########################################################################
### CML Model API 
# model parameter configuration
model_name = "LSTM-2"
#os.environ["MODEL_NAME"]
project_name = "SDWAN"
#os.environ["PROJECT_NAME"]
client = cmlapi.default_client(url=os.getenv("CDSW_API_URL").replace("/api/v1", ""), cml_api_key=os.getenv("CDSW_APIV2_KEY"))
target_model = client.list_all_models(search_filter=json.dumps({"name": model_name}))
model_key = target_model.models[0].access_key
model_url = os.getenv("CDSW_API_URL").replace("https://", "https://modelservice.").replace("/api/v1", "/model")

# lines below get at the most recent deployment of the model to get deployment crn
proj_id = client.list_projects(search_filter=json.dumps({"name":project_name })).projects[0].id
mod_id = target_model.models[0].id
build_list = client.list_model_builds(project_id = proj_id, model_id = mod_id,sort='created_at')
build_id = build_list.model_builds[-1].id
model_deployments = client.list_model_deployments(project_id=proj_id,model_id=mod_id,build_id=build_id)
cr_number = model_deployments.model_deployments[0].crn
##########################################################################

In [8]:
job_name = 'check_model'

In [9]:
target_job = client.list_jobs(proj_id, search_filter=json.dumps({"name": job_name}))

In [10]:
target_job.jobs[0].id

'y3he-t8ld-281f-qwdz'

In [13]:
job_run = client.create_job_run(
            cmlapi.CreateJobRunRequest(),
            project_id = proj_id, 
            job_id = target_job.jobs[0].id
            )

In [7]:
horizon = 6

In [119]:


stride = 12        # determines frequecy of model inference
                   # how many observations between forecast requests
load_frequency = 2 # ratio of number of observations between load jobs
                   # number of observations between forecast requests
                   # e.g., ground truth job every 24 hours, forecast request every 12, therefore load_frequency is 2 
#load_lag = 1       # number of observation sets to load each load
                   # this will determine heap size
    
m_window = 10      # monitoring window size 

# code below slides through new data

In [120]:
# offset = horizon + (df.shape[0]-horizon)%window

In [121]:
# #for j,i in enumerate(range(0,df.shape[0]-(window),stride)):
# for j,i in enumerate(range(0,df.shape[0]-(horizon) - (offset),stride)):
# #for j,i in enumerate(range(0,df.shape[0]-(horizon)-(window),stride)):
#     temp_df = df.iloc[i:window+1+i,:] # 
#     #print(i)
#     # convert input to json

In [122]:
for j,i in enumerate(range(0,df.shape[0]-(horizon)-(window),stride)):
    temp_df = df.iloc[i:window+1+i,:] # 
    if j == len(range(0, df.shape[0] - (horizon)-(window), stride)) - 1:
        print("This is the last iteration",i)

This is the last iteration 684


In [123]:
df.shape[0]-(horizon)-(window)

691

In [124]:
(691//12)*12

684

In [125]:
print(i)

684


In [126]:
df.shape[0]-(horizon)-(window)

691

In [127]:
response_labels_sample = []
for j,i in enumerate(range(0,df.shape[0]-(horizon)-(window),stride)):
    temp_df = df.iloc[i:window+1+i,:] # 
    # convert input to json
    request_dict = dataframe_to_json(temp_df)
    
    # response = requests.post(model_url, data=json.dumps(request_dict), headers={'Content-Type': 'application/json'})
    # #time.sleep(1)
    
    start_ts = pd.to_datetime(temp_df.time[:1].values[0]) + pd.Timedelta('1H')  # Start from the next timestamp, 1 hour later
    future_ts = pd.date_range(start=start_ts, periods=horizon, freq='1H')  # Generate timestamps in 1-hour increments
    
    horizon_time = [time.isoformat() for time in future_ts]
    
    rx_rand = [random.randint(5, 10) for _ in range(6)]
    tx_rand = [random.randint(5, 10) for _ in range(6)]

    random_id = str(uuid.uuid4())
    
    new_item = {
        "uuid": random_id,
        "rx_bytes_forecast": rx_rand,
        "tx_bytes_forecast": tx_rand,
        "horizon_time": horizon_time,
        "timestamp_ms": int(round(time.time() * 1000)),
        "rx_gbs_values": None,
        "tx_gbs_values": None
    }
    response_labels_sample.append(new_item)

# Load Ground Truth phase
    if (j + 1) % load_frequency == 0:
        load_flag = True
        offset = load_frequency
    elif j == len(range(0, df.shape[0] - (horizon) - (window), stride)) - 1:
        load_flag = True
        offset = (j + 1) % load_frequency
    else:
        load_flag = False

    if load_flag:
        #print(f"Iteration {j+1}: Checking load frequency condition")
        recent_items = response_labels_sample[-offset:]  # Get last offset elements
        #print(f"Recent items (last {load_frequency}): {recent_items}")

        for val in recent_items:
            # Extract the horizon times for this entry
            horizon_times = val['horizon_time']
            #print(f"UUID: {val['uuid']}, Horizon times: {horizon_times}")

            # Filter the DataFrame rows where the 'time' column matches any of the horizon times
            filtered_horizon_times = [t.replace('T', ' ') for t in horizon_times]
            matched_rows = df[df['time'].isin(filtered_horizon_times)]
            #print(f"Matched rows for horizon times {filtered_horizon_times}: {matched_rows}")

            # Extract 'rx_gbs' and 'tx_gbs' values for these rows
            rx_gbs_values = matched_rows['rx_gbs'].tolist()
            tx_gbs_values = matched_rows['tx_gbs'].tolist()

            # Update the corresponding item in response_labels_sample
            for item in response_labels_sample:
                if item['uuid'] == val['uuid']:
                    item['rx_gbs_values'] = rx_gbs_values
                    item['tx_gbs_values'] = tx_gbs_values
                    break  # Exit the loop once the matching item is found and updated
        
            
#     if (j+1)% m_window == 0:
#         m_recent_items = response_labels_sample[-m_window:]  # Get last m_window elements

#         print('length of m recent items', len(m_recent_items))
#         print('iteration',j)
        
#         start_timestamp_ms = m_recent_items[0]["timestamp_ms"]
#         end_timestamp_ms = m_recent_items[-1]["timestamp_ms"]
        
#         # Aggregate all timestamps and predictions from recent items
#         all_horizon_times = []
#         all_rx_predictions = []
#         all_tx_predictions = []

#         for item in m_recent_items:
#             all_horizon_times.extend(item['horizon_time'])
#             all_rx_predictions.extend(item['rx_bytes_forecast'])
#             all_tx_predictions.extend(item['tx_bytes_forecast'])
            
#         # Filter rows that match any of the collected timestamps
#         filtered_all_horizon_times = [t.replace('T', ' ') for t in all_horizon_times]
#         matched_rows = df[df['time'].isin(filtered_all_horizon_times)]
        
#         # Get actual 'rx_gbs' and 'tx_gbs' values
#         actual_rx = matched_rows['rx_gbs'].values #.tolist()
#         actual_tx = matched_rows['tx_gbs'].values #.tolist()
        
#         # convert predictions to np array
#         all_rx_predictions = np.array(all_rx_predictions)
#         all_tx_predictions = np.array(all_tx_predictions)
        
#         # simplified RMSE calc ' please update as required'
#         rmse_rx = root_mean_squared_error(actual_rx,all_rx_predictions)
#         rmse_tx = root_mean_squared_error(actual_tx,all_tx_predictions)

#         rmse = (rmse_rx + rmse_tx)/2

#         cdsw.track_aggregate_metrics(
#                 {"rmse": rmse},
#                 start_timestamp_ms,
#                 end_timestamp_ms,
#                 model_deployment_crn=cr_number,
#             )

In [128]:
# del(response_labels_sample)
# del(response_df)
print(j)

57


In [129]:
response_df = pd.json_normalize(response_labels_sample)

In [130]:
response_df.tail()

Unnamed: 0,uuid,rx_bytes_forecast,tx_bytes_forecast,horizon_time,timestamp_ms,rx_gbs_values,tx_gbs_values
53,5e31ee0b-f4ed-4838-8bd1-042f9d50227b,"[6, 10, 9, 5, 9, 5]","[5, 8, 7, 9, 8, 6]","[2024-05-27T13:00:00, 2024-05-27T14:00:00, 202...",1716249392085,"[0.616966009, 0.371091108, 11.169648003, 1.099...","[0.0, 1.965731814, 9.898407986, 2.954697861, 7..."
54,0cd2f75c-30c3-48a2-9641-89bc1aa0b597,"[7, 6, 5, 5, 6, 10]","[6, 5, 8, 6, 6, 8]","[2024-05-28T01:00:00, 2024-05-28T02:00:00, 202...",1716249392088,"[7.513855262, 3.441458023, 0.741191405, 3.2789...","[6.478874172, 3.605046383, 1.710514911, 10.638..."
55,50f91d05-da9d-4192-bbaa-e4e1a184ea0f,"[10, 5, 8, 5, 10, 8]","[6, 6, 5, 8, 8, 9]","[2024-05-28T13:00:00, 2024-05-28T14:00:00, 202...",1716249392089,"[4.359039237, 4.299071228, 2.362060309, 4.2183...","[11.316515315, 0.0, 11.161147976, 3.894878814,..."
56,9fffeb68-ce62-490f-8f80-a4b6931d7cc9,"[7, 6, 6, 8, 8, 7]","[9, 10, 10, 5, 10, 9]","[2024-05-29T01:00:00, 2024-05-29T02:00:00, 202...",1716249392091,"[1.445444321, 25.41586434, 1.383489557, 2.1851...","[11.085551135, 0.0, 4.559966045, 2.033256693, ..."
57,1e0fd61d-0036-41cf-9bb3-b4ec7bb3beda,"[7, 10, 9, 9, 7, 6]","[8, 7, 7, 6, 9, 5]","[2024-05-29T13:00:00, 2024-05-29T14:00:00, 202...",1716249392092,"[7.88396159, 3.540820261, 1.110646899, 1.40179...","[14.283415862, 0.833865873, 2.033834142, 5.780..."


In [91]:
response_df[response_df.isna().any(axis=1)]

Unnamed: 0,uuid,rx_bytes_forecast,tx_bytes_forecast,horizon_time,timestamp_ms,rx_gbs_values,tx_gbs_values
690,e90f6363-f5eb-4fb8-8a4b-cc5c163bb87e,"[9, 8, 8, 5, 5, 6]","[10, 6, 7, 8, 10, 6]","[2024-05-29T19:00:00, 2024-05-29T20:00:00, 202...",1716247716038,,
