In [1]:
from collections import defaultdict
import csv
from itertools import product
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.express as px

In [2]:
def get_parallelism_style(dp, hp, pp):
    if dp == 1 and hp == 1 and pp == 1:
        return "Base"
    elif dp > 1 and hp == 1 and pp == 1:
        return "D"
    elif dp == 1 and hp > 1 and pp == 1:
        return "T"
    elif dp == 1 and hp == 1 and pp > 1:
        return "P"
    elif dp > 1 and hp > 1 and pp == 1:
        return "D/T"
    elif dp == 1 and hp > 1 and pp > 1:
        return "T/P"
    elif dp > 1 and hp == 1 and pp > 1:
        return "D/P"
    elif dp > 1 and hp > 1 and pp > 1:
        return "D/T/P"
    else:
        raise ValueError(f"Invalid degree combination dp={dp}, hp={hp}, pp={pp}")

In [8]:
df = pd.read_csv('/home/t-sikris/win/Downloads/mlp_dgx_simulated_grid_search_results_v2.csv')

In [9]:
fig = px.scatter(
    df.query(f"model_size=='mlp-small'"),
    x="throughput",
    y="peak_memory",
    color="batch_size",
    labels={
        "throughput": "Simulated Throughput (seconds)",
        "peak_memory": "Simulated Peak Per-Device Memory (GB)",
        "batch_size": "Batch Size",
    },
#     color_discrete_sequence=["blue", "orange", "green", "goldenrod", "magenta"],
#     category_orders={"batch_size": sorted_batch_sizes},
    hover_data=["model_size", "dp_degree", "hp_degree", "pp_degree", "num_microbatches"],
    title="MLP training", #f"{model_size}, world size {world_size}",
)
fig.show()

In [35]:
def plot_memory_usage_vs_metric(
    simulation_filename,
    model_size,
    min_batch_size,
    metric,
    xlabel,
    ylabel,
):
    df = pd.read_csv(simulation_filename)
    df = df[df["model_size"] == model_size]
    df["peak_memory"] /= 1e9
#     df = df[df["peak_memory"] <= 32]
    df = df[df["batch_size"] >= (min_batch_size / 2)]
    df["dummy_column_for_size"] = 1.0
    parallelism_styles = [
        get_parallelism_style(dp, hp, pp)
        for (dp, hp, pp) in df[["dp_degree", "hp_degree", "pp_degree"]].values
    ]
    df["parallelism_style"] = parallelism_styles
    df["config_name"] = [
        f"{d}/{h}/{p}/{k}" for (d, h, p, k) in df[["dp_degree", "hp_degree", "pp_degree", "num_microbatches"]].values
    ]
    df["log_peak_memory"] = np.log(df["peak_memory"])
    markers = ["o", "P", "^", "*", "X", "D", "H", "s"]
    colors = [
        "#1f77b4",  # muted blue
        "#ff7f0e",  # safety orange
        "#2ca02c",  # cooked asparagus green
        "#d62728",  # brick red
        "#9467bd",  # muted purple
        "#8c564b",  # chestnut brown
        "#e377c2",  # raspberry yogurt pink
        "#7f7f7f",  # middle gray
        # "#bcbd22",  # curry yellow-green
        # "#17becf",  # blue-teal
    ]
    parallelism_styles = [
        "D",
        "T",
        "P",
        "D/T",
        "T/P",
        "D/P",
        "D/T/P",
    ]
    fig = px.scatter(
        df,
        x="throughput",
        y="log_peak_memory",
        symbol="parallelism_style",
        color="parallelism_style",
        hover_name='config_name',
        hover_data=["batch_size", "dp_degree", "hp_degree", "pp_degree", "num_microbatches"],
        labels={
            "throughput": "Throughput (samples / second)",
            "log_peak_memory": "Peak Memory (log GB)",
            "parallelism_style": "Parallelism style",
        },
        color_discrete_sequence=[
            "#1f77b4",  # muted blue
            "#ff7f0e",  # safety orange
            "#2ca02c",  # cooked asparagus green
            "#d62728",  # brick red
            "#9467bd",  # muted purple
            "#8c564b",  # chestnut brown
            "#e377c2",  # raspberry yogurt pink
            "#7f7f7f",  # middle gray
            "#bcbd22",  # curry yellow-green
            "#17becf",  # blue-teal
        ],
        category_orders={
            "parallelism_style": [
                "D",
                "T",
                "P",
                "D/T",
                "T/P",
                "D/P",
                "D/T/P",
                "Sequential",
            ],
        },
        size="dummy_column_for_size",
        size_max=10,
#         animation_frame='batch_size',
#         animation_group='config_name',
        range_x=[0, max(df["throughput"])],
        range_y=[0, max(df["log_peak_memory"])],
    )
    # Draw memory cutoff line
    fig.add_hline(y=np.log(32), line_width=2)
    fig.show()
    return df

In [36]:
df = plot_memory_usage_vs_metric(
    '/home/t-sikris/win/Downloads/mlp_dgx_simulated_grid_search_results_v2.csv',
    "mlp-large",
    1,
    "throughput",
    "Throughput (samples / second)",
    "Peak Memory (GB)",
)

In [None]:
df[df['config_name'] == '2/2/4/2']

In [None]:
# Why aren't some points moving with batch size?

In [22]:
8*1035/60

138.0

In [None]:
data = df.query(f"model_size=='mlp-large'")
fig = px.scatter(
    data,
    x="throughput",
    y="peak_memory",
    labels={
        "throughput": "Simulated Throughput (samples/second)",
        "peak_memory": "Simulated Peak Per-Device Memory (GB)",
    },
    hover_data=["model_size", "dp_degree", "hp_degree", "pp_degree", "num_microbatches"],
    title="MLP training",
#     animation_frame="batch_size",
    range_x=[0, max(data["throughput"])],
    range_y=[0, max(data["peak_memory"])],
)
fig.show()