In [35]:
import re
import pandas as pd
import seaborn as sns
import copy
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from pprint import pprint
plt.rcParams.update({'font.size': 12})
plt.rcParams.update({'font.weight': 'bold'})
hatches = ['..', '\\\\', 'O', 'x', '\\', '--', 'o']

orig_colors = sns.color_palette("colorblind")
colors = [
    orig_colors[2],
    orig_colors[4],
    orig_colors[0],
    orig_colors[1],
]

GPU_PER_NODE = 4
num_steps = 10
models = [
            {'n': 1, 'm': 3, 'H': 2560, 'F': 11008, 'N': 30, 'L': 32, 'U': 2048, 'S': 4, 'K': 5, 'mbs': 16, 'ckpt_size': 45}, 
            {'n': 2, 'm': 7, 'H': 4096, 'F': 11008, 'N': 32, 'L': 32, 'U': 2048, 'S': 4, 'K': 5, 'mbs': 16, 'ckpt_size': 83}, 
            {'n': 4, 'm': 13, 'H': 5120, 'F': 13824, 'N': 40, 'L': 40, 'U': 2048, 'S': 4, 'K': 5, 'mbs': 16, 'ckpt_size': 166}, 
            {'n': 8, 'm': 30, 'H': 6656, 'F': 17920, 'N': 60, 'L': 52, 'U': 2048, 'S': 4, 'K': 5, 'mbs': 16, 'ckpt_size': 444}, 
            {'n': 20, 'm': 70, 'H': 8192, 'F': 28672, 'N': 80, 'L': 64, 'U': 2048, 'S': 4, 'K': 5, 'mbs': 16, 'ckpt_size': 1065},
            {'n': 70, 'm': 175, 'H': 14336, 'F': 28672, 'N': 70, 'L': 112, 'U': 2048, 'S': 4, 'K': 5, 'mbs': 8, 'ckpt_size': 2754},
]
DEFAULT_DEEPSPEED = "DeepSpeed"
FULLY_OPTIMIZED = "Deep Optimizer States"
PARTLY_OPTIMIZED = "Part Deep Optimizer States"

approach_code = {
    DEFAULT_DEEPSPEED: 0,
    PARTLY_OPTIMIZED: 1,
    FULLY_OPTIMIZED: 2
}
base_config = {"basepath":"./", "approach": DEFAULT_DEEPSPEED, "dp": 1, "tp": 1, "ga": 1, "tf_ratio":1, 
                "act_ckpt":True, "mbs": 4, "gbs": 4, "subg": int(100*1e6), "opt_gaps": 5, "num_steps": 10}
df_columns = [
    'elapsed_time_per_iteration_ms', 
    'samples_per_second', 
    'TFLOPs', 
    'fwd', 
    'bwd', 
    'bwd_inner_microstep', 
    'bwd_allreduce_microstep', 
    'step_microstep'
]

def parse_log(m, c, ret = None):
    is_prefetch = 1 if c['approach'] == FULLY_OPTIMIZED else 0
    is_async = 1 if c['approach'] in (PARTLY_OPTIMIZED, FULLY_OPTIMIZED)  else 0
    log_file = (
            f"{c['basepath']}/log-{m['m']}B-tp{c['tp']}-dp{c['dp']}-l{m['N']}-h{m['H']}-a{m['L']}-sl{m['U']}-"
            f"gbs{c['gbs']}-mbs{c['mbs']}-ratio{c['tf_ratio']}-subg{c['subg']}-prefetch{is_prefetch}-"
            f"flush_async{is_async}-opt_gaps{c['opt_gaps']}.log"
    )
#     print(f"Reading {log_file}")
       
    data = {k: [] for k in df_columns}
    with open(log_file, 'r') as file:
        for line in file:
            # Use regular expressions to extract required fields from each line
            match = re.search(r'elapsed time per iteration \(ms\): (\d+\.\d+)', line)
            if match:
                data['elapsed_time_per_iteration_ms'].append(float(match.group(1)))

            match = re.search(r'samples per second: (\d+\.\d+)', line)
            if match:
                data['samples_per_second'].append(float(match.group(1)))

            match = re.search(r'TFLOPs: (\d+\.\d+)', line)
            if match:
                data['TFLOPs'].append(float(match.group(1)))

            match = re.search(r'fwd: (\d+\.\d+)', line)
            if match:
                data['fwd'].append(float(match.group(1)))

            match = re.search(r'bwd: (\d+\.\d+)', line)
            if match:
                data['bwd'].append(float(match.group(1)))

            match = re.search(r'bwd_inner_microstep: (\d+\.\d+)', line)
            if match:
                data['bwd_inner_microstep'].append(float(match.group(1)))

            match = re.search(r'bwd_allreduce_microstep: (\d+\.\d+)', line)
            if match:
                data['bwd_allreduce_microstep'].append(float(match.group(1)))

            match = re.search(r'step_microstep: (\d+\.\d+)', line)
            if match:
                data['step_microstep'].append(float(match.group(1)))
    
    df = pd.DataFrame(data, columns=df_columns)
    # We would have 10 values, select the last 5 of them
    df = df[4:]
    if ret is not None:
        df = df[ret]
    # Display the DataFrame
    # print(df)

    return df

def get_avg(arr):
    return sum(arr)/len(arr)

# Parse log for 7B and/or 13B models for varying GA values
m = list(filter(lambda x: x.get('m') == 13, models))[0]
config = copy.deepcopy(base_config)
for ga in [1, 2, 4, 8, 16]:
    config['basepath'] = f"./act-output-{m['m']}B-scale-GA-single-GPU"
    config['ga'] = ga
    config['gbs'] = config['mbs']*ga
    config['approach'] = DEFAULT_DEEPSPEED
    df_default = get_avg(parse_log(m, config, ret="TFLOPs"))
    config['approach'] = PARTLY_OPTIMIZED
    df_partly = get_avg(parse_log(m, config, ret="TFLOPs"))
    config['approach'] = FULLY_OPTIMIZED
    df_fully = get_avg(parse_log(m, config, ret="TFLOPs"))
    print(ga, df_default, df_partly, df_fully)
    

1 45.026666666666664 62.65166666666667 66.20666666666666
2 65.02499999999999 94.485 101.09333333333332
4 101.685 128.23166666666665 132.975
8 129.5533333333333 152.91166666666666 160.68
16 157.71833333333333 172.30833333333337 177.15166666666667
