## MLO - Single Concept

## Brain Name: 
## Brain Version: 

In this notebook, we will use the Bonsai Custom Assessments to analyze our results. To learn more about Bonsai custom assessments, use [these documents](https://docs.microsoft.com/en-us/bonsai/guides/assess-brain).

### Imports

In [None]:
import numpy as np
import pandas as pd
import networkx as nx
import ast
import matplotlib.pyplot as plt
from matplotlib import animation
from IPython.display import Video
# import mplcursors
%matplotlib notebook

### Load Kqlmagic Extension

In [None]:
%reload_ext Kqlmagic
%config Kqlmagic.display_limit = 5 #limiting the number of rows displayed (full rows will still be stored)

### Login to the log analytics workspace

In [None]:
LOG_ANALYTICS_WORKSPACE_ID = "" 
ALIAS = 'MLO' # add your alias. Can be anything, but should not be empty
%kql loganalytics://code;workspace=LOG_ANALYTICS_WORKSPACE_ID;alias=ALIAS

## Import custom assessment data

In [None]:
brain_name = "".lower() # provide brain name here
brain_version = # provide brain version
assessment_name = "custom_assessment_1".lower()

In [None]:
%%kql
let _brain_name = brain_name;
let _brain_version = brain_version;
let _assessment_name = assessment_name;
EpisodeLog_CL
  | where BrainName_s == _brain_name and BrainVersion_d == _brain_version and AssessmentName_s == _assessment_name
  | join kind=inner (
      IterationLog_CL
      | sort by Timestamp_t desc
  ) on EpisodeId_g
  | project 
      AssessmentName = AssessmentName_s,
      EpisodeId = EpisodeId_g,
      IterationIndex = IterationIndex_d,
      Timestamp = Timestamp_t,
      SimConfig = parse_json(SimConfig_s),
      SimState = parse_json(SimState_s),
      SimAction = parse_json(SimAction_s),
      Reward = Reward_d,
      CumulativeReward = CumulativeReward_d,
      GoalMetrics = parse_json(GoalMetrics_s),
      Terminal = Terminal_b,
      FinishReason = FinishReason_s,
      LessonIndex = LessonIndex_d,
      EpisodeType = EpisodeType_s
  | order by EpisodeId asc, IterationIndex asc


In [None]:
# convert kql query results in a dataframe
assessment_data = _kql_raw_result_.to_dataframe()

In [None]:
def format_kql_logs(df: pd.DataFrame) -> pd.DataFrame:
    ''' Function to format a dataframe obtained from KQL query.
        Output format: keeps only selected columns, and flatten nested columns [SimAction, SimState, SimConfig]

        Parameters
        ----------
        df : DataFrame
            dataframe obtained from running KQL query then exporting `_kql_raw_result_.to_dataframe()`
    '''
    selected_columns = ["Timestamp", "IterationIndex", "Reward", "CumulativeReward", "Terminal", "SimState", "SimAction", "SimConfig", "EpisodeId"]
    nested_columns =  [ "SimState", "SimAction", "SimConfig"]
    df_selected_columns = df[selected_columns]
    series_lst = []
    ordered_columns = ["EpisodeId", "IterationIndex", "Reward", "Terminal"]
    for i in nested_columns:
        new_series = df_selected_columns[i].apply(pd.Series)
        column_names = new_series.columns.values.tolist()
        series_lst.append(new_series)
        if len(column_names) > 0:
            ordered_columns.extend(column_names)
        del(df_selected_columns[i])

    series_lst.append(df_selected_columns)
    formated_df = pd.concat(series_lst, axis=1)
    formated_df = formated_df.sort_values(by='Timestamp',ascending=True) # reorder df based on Timestamp
    formated_df.index = range(len(formated_df)) # re-index
    formated_df['Timestamp']=pd.to_datetime(formated_df['Timestamp']) # convert Timestamp to datetime

    formated_df = formated_df[ordered_columns]
    
    return formated_df.sort_values(by=["EpisodeId", "IterationIndex"])


In [None]:
assessment_df_flattened = format_kql_logs(assessment_data)

In [None]:
assessment_df_flattened.describe()

In [None]:
selected_columns = [
    'IterationIndex',
    'machines_actual_speed',
    'machines_state',
    'Reward',
    'all_conveyor_levels',
    'initial_bin_level',
    'sink_throughput_absolute_sum'
]

df_assessment = assessment_df_flattened[selected_columns]

for col in selected_columns:
        df_assessment[col] = df_assessment[col].apply(lambda x: ast.literal_eval(str(x)))

In [None]:
# flatten the columns
def flatten_column(df_assessment, col_lst):
    df_flat = pd.concat([pd.DataFrame(df_assessment[x].values.tolist()).add_prefix(x) for x in col_lst], axis=1)
    df_assessment = pd.concat([df_flat, df_assessment.drop(col_lst, axis=1)], axis=1)
    return df_assessment

In [None]:
# Applying the flattening function
selected_columns = [
    'IterationIndex',
    'machines_actual_speed',
    'machines_state',
    'Reward',
    'all_conveyor_levels',
    'initial_bin_level',
    'sink_throughput_absolute_sum'
]
df_assessment_flattened = flatten_column(df_assessment, selected_columns)

In [None]:
# Remove 1st and 2nd iterations
remove_iteration = [1]
df_throughput = df_assessment_flattened
df_throughput = df_throughput[~df_throughput['IterationIndex0'].isin(remove_iteration)]

### Total number of cans generated across 100 episodes

In [None]:
Total_cans = df_throughput['machines_actual_speed5'].sum()
print(f"The total number of cans generated across 100 episodes is: {Total_cans}")

In [None]:
fig, axes = plt.subplots(3, 2, figsize=(8,8))
for i in range(6):
    column_name = 'machines_state' + str(i)
    ax = axes[i//2,i%2]
    ax.hist(df_assessment_flattened[column_name])
    ax.set_title(f"Machine {i}")
    ax.set_xticks([-1, 0, 1])
fig.suptitle('Machine State')
plt.tight_layout()
# mplcursors.cursor(hover=True)

### Distribution of idle, down, and running counts across 100 episodes for all machines

In [None]:
index = df_assessment_flattened.index
total_iterations = len(index)
down_count = 0
run_count = 0
idle_count = 0
for i in range(6):
    column_name = 'machines_state' + str(i)
    df_down = df_assessment_flattened[df_assessment_flattened[column_name] == -1]
    df_run = df_assessment_flattened[df_assessment_flattened[column_name] == 1]
    df_down_sum = df_down[column_name].sum(axis = 0)
    df_run_sum = df_run[column_name].sum(axis = 0)
    df_idle_sum = total_iterations - df_run_sum + df_down_sum    
    down_count += df_down_sum
    run_count += df_run_sum
    idle_count += df_idle_sum
print('Total number of down occurances is', -down_count)
print('Total number of run occurances is', run_count)
print('Total number of idle occurances is', idle_count)

In [None]:
fig, axes = plt.subplots(3, 2, figsize=(8,8))
for i in range(6):
    column_name = 'machines_actual_speed' + str(i)
    ax = axes[i//2,i%2]
    ax.hist(df_assessment_flattened[column_name])
    ax.set_title(f"Machine {i}")
fig.suptitle('Machine Speed')
plt.tight_layout()
# mplcursors.cursor(hover=True)

## Episode Level Assessment

In [None]:
# pick episode with specific bin level
# you can change which episode's results you'd like to view with this parameter
bin_level = 18
single_episode_df = assessment_df_flattened[assessment_df_flattened['initial_bin_level']==bin_level]

In [None]:
bin_level = single_episode_df['initial_bin_level'].iloc[0]
print(f"The initial bin level is: {bin_level}")

### Machines and buffers visualization

In [None]:
def plot_line_for_iter(iter_count, df, position, node_sizes, ax, G):
    ax.clear()

    for key, val in position.items():
        if key == "source1":
            continue
        elif key == "sink":
            machine_speed = df.loc[iter_count, :]["machines_actual_speed"][-1]
            plt.text(val[0]-1.2, val[1] + 0.009,
                     'Throughput: ' + str(machine_speed), fontsize=7, color="green") # throughput at the end
        else:
            machine_id = int(key[1:])
            #print(machine_id)
            machine_speed = df.loc[iter_count, :]["machines_actual_speed"][machine_id]
            plt.text(val[0]-1.2, val[1] + 0.009,
                     'Speed: ' + str(machine_speed), fontsize=7, color="green") # machine speed
            if key != "m5": # there isn't a conveyor between sink and m5
                conveyor_buffer = df.loc[iter_count, :]["conveyor_buffers"][machine_id]
                plt.text(val[0] + 1.2, val[1] - 0.009,
                         'Buffer: ' + str(int(np.sum(conveyor_buffer))), fontsize=7, color="blue")
    nx.draw(G, nx.get_node_attributes(G, 'pos'),
            with_labels=True, node_size=node_sizes, font_size=8)

    ax.set_title(f"Iteration: {iter_count}")
    plt.tight_layout()


In [None]:
def simple_animation(df):
    max_iter = int(df["IterationIndex"].max())
    df_copy = df.copy()
    df_copy.set_index("IterationIndex", inplace=True)
    # position of the nodes in the graph
    position = {'source1': (0, 0.02), 'm0': (5, 0.02), 'm1': (10, 0.02), 'm2': (15, 0.02),
                'm3': (20, 0.02), 'm4': (25, 0.02), 'm5': (30, 0.02), 'sink': (35, 0.02)}

    # graph that will represent the manufacturing line
    G = nx.Graph()
    # connections between the nodes (machines)
    G.add_edges_from([("source1", "m0"), ("m0", "m1"), ("m1", "m2"),
                      ("m2", "m3"), ("m3", "m4"), ("m4", "m5"), ("m5", "sink")])
    node_sizes = [7500] * 8
    node_sizes = [node/8 for node in node_sizes]
    for key, val in position.items():
        G.add_node(str(key), pos=val)
    # Build plot
    fig, ax = plt.subplots(figsize=(6,4), dpi=100)

    ani = animation.FuncAnimation(fig, plot_line_for_iter, frames=range(1,max_iter+1), fargs=(df_copy, position, node_sizes, ax, G))
    ani.save('animation.mp4')

    plt.close()

simple_animation(single_episode_df)

In [None]:
from IPython.display import Video
Video('animation.mp4')

# Throughput and cumulative reward

In [None]:
reward = single_episode_df["Reward"]
cumulative_rew = np.cumsum(reward.to_numpy())
plt.plot(cumulative_rew)
plt.title("Cumulative Sum of Reward")
print(f"The cumulative reward at the end of the episode is: {cumulative_rew[-1]}")

In [None]:
machines_speeds = np.array(single_episode_df["machines_actual_speed"].to_list())
plt.plot(machines_speeds[:, -1])
plt.title("Throughput")

In [None]:
print(f"The total number of products at the output of the line throughout the episode is: {np.sum(machines_speeds[:, -1])}")

# Analysis of machine speeds and buffer sizes

In [None]:
speeds = np.array(single_episode_df["machines_actual_speed"].to_list())
fig, axes = plt.subplots(3, 2, figsize=(8,8))
for i in range(6):
    ax = axes[i//2,i%2]
    ax.plot(speeds[:,i])
    ax.set_title(f"Machine {i}")
fig.suptitle('Machine Speed')
plt.tight_layout()
# mplcursors.cursor(hover=True)

In [None]:
machine_states = np.array(single_episode_df["machines_state"].to_list())
fig, axes = plt.subplots(3, 2, figsize=(8,8))
for i in range(6):
    ax = axes[i//2,i%2]
    ax.plot(machine_states[:,i])
    ax.set_title(f"Machine {i}")
    ax.set_yticks([-1, 0, 1])
fig.suptitle('Machine State')
plt.tight_layout()
# mplcursors.cursor(hover=True)

In [None]:
buffers = np.array(single_episode_df["all_conveyor_levels"].to_list())
buffers_estimates = np.array(single_episode_df["all_conveyor_levels_estimate"].to_list())
fig, axes = plt.subplots(3, 2, figsize=(8,8))
for i in range(5):
    ax = axes[i//2,i%2]
    ax.plot(buffers[:,i], color='r', label='Real')
    ax.plot(buffers_estimates[:,i], color='b', label='Estimation')
    ax.set_title(f"Machine {i}")
fig.suptitle('Buffer Levels')
plt.tight_layout()
# mplcursors.cursor(hover=True)

In [None]:
speeds = np.array(single_episode_df["machines_actual_speed"].to_list())
fig, axes = plt.subplots(3, 2, figsize=(8,8))
for i in range(6):
    ax = axes[i//2,i%2]
    ax.hist(speeds[:,i])
    ax.set_title(f"Machine {i}")
fig.suptitle('Machine Speed Distribution')
plt.tight_layout()

In [None]:
# machine buffer
buffers = np.array(single_episode_df["conveyor_buffers"].to_list())
fig, axes = plt.subplots(3, 2, figsize=(8,8))
for i in range(5):
    ax = axes[i//2,i%2]
    ax.hist(np.sum(buffers[:,i,:],axis=1))
    ax.set_title(f"Buffer {i}")
fig.suptitle('Conveyor Buffer Distribution')
plt.tight_layout()

In [None]:
# machine states
machine_states = np.array(single_episode_df["machines_state"].to_list())
fig, axes = plt.subplots(3, 2, figsize=(8,8))
for i in range(6):
    ax = axes[i//2,i%2]
    ax.hist(machine_states[:,i], range=[-1.1, 1.1], align="mid")
    ax.set_title(f"Machine {i}")
    ax.set_xticks([-1, 0, 1])
fig.suptitle('Machine State Distribution')
plt.tight_layout()

In [None]:
# machine proxes
prox = np.array(single_episode_df["conveyor_infeed_m1_prox_empty"].to_list())
fig, axes = plt.subplots(3, 2, figsize=(8,8))
for i in range(5):
    ax = axes[i//2,i%2]
    ax.hist(prox[:,i], range=[-0, 1.1], align="left")
    ax.set_title(f"Buffer {i}")
    ax.set_xticks([0, 1])
fig.suptitle('conveyor_infeed_m1_prox_empty')
plt.tight_layout()

In [None]:
# machine proxes
prox = np.array(single_episode_df["conveyor_infeed_m2_prox_empty"].to_list())
fig, axes = plt.subplots(3, 2, figsize=(8,8))
for i in range(5):
    ax = axes[i//2,i%2]
    ax.hist(prox[:,i], range=[-0, 1.1], align="left")
    ax.set_title(f"Buffer {i}")
    ax.set_xticks([0, 1])
fig.suptitle('conveyor_infeed_m2_prox_empty')
plt.tight_layout()

In [None]:
# machine proxes
prox = np.array(single_episode_df["conveyor_discharge_p1_prox_full"].to_list())
fig, axes = plt.subplots(3, 2, figsize=(8,8))
for i in range(5):
    ax = axes[i//2,i%2]
    ax.hist(prox[:,i], range=[-0, 1.1], align="left")
    ax.set_title(f"Buffer {i}")
    ax.set_xticks([0, 1])
fig.suptitle('conveyor_discharge_p1_prox_full')
plt.tight_layout()

In [None]:
# machine proxes
prox = np.array(single_episode_df["conveyor_discharge_p2_prox_full"].to_list())
fig, axes = plt.subplots(3, 2, figsize=(8,8))
for i in range(5):
    ax = axes[i//2,i%2]
    ax.hist(prox[:,i], range=[-0, 1.1], align="left")
    ax.set_title(f"Buffer {i}")
    ax.set_xticks([0, 1])
fig.suptitle('conveyor_discharge_p2_prox_full')
plt.tight_layout()