# AWS EFA Heatmap

In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.offline as pyo

from IPython.display import display, HTML, Markdown
from plotly.subplots import make_subplots

import nsys_display

display(HTML('<style>.container { width:95% !important; }</style>'))
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', None)
pyo.init_notebook_mode()

This line graph displays the summary of data read and written rates by all the profiled network devices:
* x axis represents the rank duration, scaling from 0 to the maximum duration across all ranks.
* y axis represents the mean value of data read and written across all ranks.

In [None]:
# Load the DataFrame from the parquet file
df = pd.read_parquet('analysis.parquet')

# Create unique name. Will need to change later
df['Name'] = df['Rank'].astype(str) + '/' + df['DeviceId'].astype(int).astype(str)

# Convert ns to seconds
df['Duration'] = df['Duration'] * 1e-9

metrics = [
    "rdma_read_bytes per second",
    "rdma_read_resp_bytes per second",
    "rdma_write_bytes per second",
    "rdma_write_recv_bytes per second"
]

nsys_display.display_summary_graph(
    df,
    metrics,
    xaxis_title="Duration (s)",
    yaxis_title="Value",
    title="RDMA Metrics Summary (bins=30)"
)

These heatmaps display AWS EFA network metrics which are collected using NVTX counters via the efa_metrics plugin 
* x axis represents the rank duration, scaling from 0 to the maximum duration across all ranks.
* y axis represents the set of Rank/Device name for which metrics were collected.

The heatmaps present:
* rdma_write_recv_bytes per second
* rdma_write_bytes per second
* rdma_read_bytes per second
* rdma_read_resp_bytes per second

In [None]:
# Create heatmaps for each metric
for metric in metrics:
    if metric not in df.columns:
        print(f"Metric '{metric}' is not in the DataFrame.")
        continue

    fig = make_subplots(
        rows=1, cols=1,
        subplot_titles=[metric],
        vertical_spacing=0.1
    )

    # Add the heatmap trace
    fig.add_trace(
        go.Heatmap(
            x=df['Duration'],
            y=df['Name'],
            z=df[metric],
            colorscale='Viridis',
            zmax=df[metric].max(),
            zmin=df[metric].min(),
            colorbar=dict(title='Value'),
            hovertemplate=(
                '<br>'.join([
                    'Duration (s): %{{x}}',
                    'TypeId: %{{y}}',
                    '{} (Value): %{{z}}'])+'<extra></extra>').format(type)),
        1,    1)

    # Layout update
    fig.update_layout(
        height=500,
        title=f'Heatmap of {metric}',
        xaxis_title='Duration (s)',
        yaxis_title='Name',
        xaxis=dict(
            title='Duration (s)',
            titlefont=dict(size=14),
            tickfont=dict(size=12)
        ),
        yaxis=dict(
            title='Rank/DeviceID',
            titlefont=dict(size=14),
            tickfont=dict(size=12),
            categoryorder="category descending"
        )
    )

    fig.show()

# Files

The table associates each rank number with the original filename. Ranks are assigned assuming that the file names include the rank with sufficient zero padding for proper sorting. Otherwise, the actual rank may differ from the assigned ID.

In [None]:
files_df = pd.read_parquet("files.parquet")
display(files_df)