# MLCommons Earthquake GPU Event Time Analysis
- Creates plots of the GPU power usage during execution
- Annotates key times during execution

In [None]:
import os
import re
import glob
import pickle
import warnings
import datetime
import numpy as np
import pandas as pd
import seaborn as sns
from datetime import time
from datetime import timedelta
import matplotlib.dates as md
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker

In [None]:
import matplotlib as mpl
mpl.rcParams['agg.path.chunksize'] = 10000

### Plotting Functions

In [None]:
# formatting dictionary for event plot
timers_dict = {
 'EVAL':{
     'hatch':None, 
     'facecolor':"none", 
     'edgecolor':None,'rename':None, 
     'color':'tab:blue', 
     'alpha':0.15
 },
 'CELL_READ_DATA':{
     'hatch':'//',
     'facecolor':"none",
     'edgecolor':'black',
     'rename':None,
     'color':None,
     'alpha':0.7
 },
 'data head setup':{
     'hatch':None, 
     'facecolor':"none", 
     'edgecolor':None,
     'rename':None,
     'color':'tab:green',
     'alpha':0.15
 },
 'legal sampling location':{
     'hatch':'\\\\', 
     'facecolor':"none",
     'edgecolor':'black',
     'rename':None,
     'color':None,
     'alpha':0.7
 },
 'RunTFTCustomVersion bestfit finalize TFTTestpredict':{
     'hatch':None, 
     'facecolor':"none", 
     'edgecolor':None,
     'rename':'TFTTestpredict',
     'color':'tab:cyan',
     'alpha':0.15
 },
 'RunTFTCustomVersion bestfit finalize VisualizeTFT TFTSaveandInterpret setFFFFmapping':{
     'hatch':None,
     'facecolor':"none",
     'edgecolor':None,
     'rename':'setFFFFmapping',
     'color':'tab:purple',
     'alpha':0.15
 },
 'RunTFTCustomVersion bestfit finalize VisualizeTFT DLprediction':{
     'hatch':None,
     'facecolor':"none",
     'edgecolor':None,
     'rename':'DLprediction',
     'color':'tab:orange',
     'alpha':0.15
 },
 #'DLResults_Graphs':{
 #    'hatch':None,
 #    'facecolor':"none",
 #    'edgecolor':None,
 #    'rename':'DLResults_Graphs',
 #    'color':'tab:olive',
 #    'alpha':0.15
 #}
}

In [None]:
def plot_gpu_events(timer_df, gpu_df, epochs, name, path, zoom=False):
    """ Create gpu events plot and save figure.
    Args:
        timer_df: timer dataframe. 
        gpu_df: gpu log dataframe.
        epochs: number of epochs.
        name: run name.
        path: output path.
        zoom: optional arg for zooming on event
    """
    def time_correction(delta):
        return lambda x: x - delta.round('1h')
    def get_duration(min_time, delta):
        return lambda x: abs((x-(min_time - delta.round('1h')))).total_seconds()/3600
    #def get_duration(min_time, delta):
    #    return lambda x: abs((x-(min_time - delta.round('1h')))).total_seconds()
    #def make_datetime(seconds):
    #    import datetime
    #    from datetime import time
        #import pdb; pdb.set_trace()
   #     days = (seconds // 3600)//24
   #     day = datetime.date.today()+timedelta(days=days)
   #     hours = int((seconds // 3600) - (days*24))
   #     mins = int((seconds % 3600) // 60)
   #     seconds = int(seconds % 60)
   #     
   #     #hours = seconds // 3600
   #     #mins = (seconds % 3600) // 60
   #     #seconds = seconds % 60
   #     #return lambda y: time(int(y // 3600),int((y % 3600) // 60),int(y % 60))
   #     #            datetime.datetime.combine(datetime.date.today(), t)
   #     return datetime.datetime.combine(day, time(hours,mins,seconds))
    def make_datetime():
        from datetime import time
        
        #hours = seconds // 3600
        #mins = (seconds % 3600) // 60
        #seconds = seconds % 60
        return lambda x: time(int(x // 3600),int((x % 3600) // 60),int(x % 60))

    # ignore warning about overridding plot properties
    warnings.filterwarnings("ignore")
    
    # initialize
    event_times_dir = os.path.join(path,'event_times')
    if not zoom == False:
        name = f'{name}_zoomed_{zoom}'
        fig = plt.figure(figsize=(7,7), facecolor='white', dpi=360)
    else:
        fig = plt.figure(figsize=(10,7), facecolor='white', dpi=360)
    ax = fig.add_subplot(111)

    # get epoch data
    num_epochs = int(epochs)
    count = num_epochs
    epoch_timers = []
    for epoch in range(num_epochs):
        epoch_times = [x for x in timer_df['timer'] if f'Epoch:{epoch}' in x]
        if not epoch_times:
            return
        end_time = max(timer_df[timer_df['timer'].isin(epoch_times)]['end'])
        timer_df.loc[timer_df['timer'] == f'RunTFTCustomVersion train Epoch:{epoch}', 'end'] = end_time
        timer_df.loc[timer_df['timer'] == f'RunTFTCustomVersion train Epoch:{epoch}', 'timer'] = f'Epoch:{epoch}'
        epoch_timers.append(f'Epoch:{epoch}')
    epoch_alpha = 0.2
    alpha_inc = (0.7)/num_epochs
    
    # select columns of interest    
    timers = list(timers_dict.keys()) + epoch_timers
    event_df = timer_df[timer_df['timer'].isin(timers)]
    
    # find time delta
    delta = min(event_df['start']) - min(gpu_df.reset_index()['time'])
    min_time = min(event_df['start'])

    
    
    event_df['start'] = event_df['start'].apply(time_correction(delta))
    event_df['end'] = event_df['end'].apply(time_correction(delta))
    timer_df['start'] = timer_df['start'].apply(time_correction(delta))
    timer_df['end'] = timer_df['end'].apply(time_correction(delta))
    
    event_df['start'] = event_df['start'].apply(get_duration(min_time, delta))
    event_df['end'] = event_df['end'].apply(get_duration(min_time, delta))
    timer_df['start'] = timer_df['start'].apply(get_duration(min_time, delta))
    timer_df['end'] = timer_df['end'].apply(get_duration(min_time, delta))
    gpu_df['time'] = gpu_df['time'].apply(get_duration(min_time, delta))
    
    # create plot of each event
    ax.plot(gpu_df['time'], gpu_df['power_draw W'], color='black', linewidth=0.75)
    for i, row in event_df.iterrows():
        start_time = row['start']
        end_time = row['end']
        if 'Epoch:' in row['timer'] and zoom is False:
            count = count - 1
            if not count == 1:
                ax.axvspan(start_time, end_time,
                            alpha=epoch_alpha,
                            #label=row['timer'],
                            label='_nolegend_',
                            color='tab:red')
            else:
                ax.axvspan(start_time, end_time,
                            alpha=epoch_alpha,
                            #label=row['timer'],
                            label='Epochs',
                            color='tab:red')                
            epoch_alpha += alpha_inc
        elif 'Epoch:' not in row['timer']:
            timer_style = timers_dict[row['timer']]
            if timer_style['rename'] is not None:
                row['timer'] = timer_style['rename']
            if zoom == False:
                label= row['timer']
            else:
                label = '_nolegend_'
            ax.axvspan(start_time, end_time, 
                        alpha=timer_style['alpha'], label=label, 
                        hatch=timer_style['hatch'], facecolor=timer_style['facecolor'], 
                        edgecolor=timer_style['edgecolor'], 
                        color=timer_style['color'])
    # annotations
    annotation_epoch = num_epochs-2
    sample = timer_df[timer_df['timer'] == f'RunTFTCustomVersion validation bestfit Epoch:{annotation_epoch}']
    annotation_height = 1.13
    start_time = sample['start']
    end_time = sample['end']
    filtered = gpu_df[(gpu_df['time'] >= start_time.values[0]) & (gpu_df['time'] <= end_time.values[0])]
    watts = filtered.loc[filtered['power_draw W'].idxmax()]['power_draw W']
    time = filtered.loc[filtered['power_draw W'].idxmax()]['time']
    
    # make annotations zoom dependent
    if zoom == False:
        #plt.annotate('validation/bestfit', 
        #             xy=(time,watts), 
        #             xytext=(time+timedelta(hours=1), max(gpu_df['power_draw W'])*annotation_height),
        #             xycoords='data',
        #             horizontalalignment="left", verticalalignment='center',
                     #connectionstyle='angle,angleA=-90,angleB=10,rad=5'
        #             arrowprops=dict(arrowstyle='->',lw=1, connectionstyle="arc,angleB=70,armA=0,armB=20"))
        save_dir = os.path.join(event_times_dir,'full')
        plt.title(f'{name} Event Times', y=1.08)
        ax.legend(bbox_to_anchor=(1.01, 1), loc='upper left')
    elif zoom == 'validation':
        row = timer_df.loc[timer_df['timer'] == f'RunTFTCustomVersion validation bestfit Epoch:{annotation_epoch}']
        start_time = row['start'] - delta.round('1h')
        end_time = row['end'] - delta.round('1h')
        ax.axvspan(start_time, end_time,
            label= 'Validation',
            alpha=0.3,
            color='tab:olive')
        ax.set_xlim([start_time - timedelta(minutes=3.5) ,end_time + timedelta(minutes=3.5)])
        ax.xaxis.set_major_locator(md.MinuteLocator())
        plt.title('Epoch Validation Fit', y=1.08)
        ax.legend(loc='upper right')
        save_dir = os.path.join(event_times_dir,'validation_zoom')
    elif zoom == 'DLResults':
        row = timer_df.loc[timer_df['timer'] == 'DLResults_Graphs']
        start_time = row['start'] - delta.round('1h')
        end_time = row['end'] - delta.round('1h')
        ax.set_xlim([start_time - timedelta(minutes=3.5) ,end_time + timedelta(minutes=0.5)])
        plt.title('DLResults', y=1.08)
        save_dir = os.path.join(event_times_dir,'DLResults_zoom')
        ax.legend(loc='upper right')
    
    # plot formatting 
    ax.set_ylabel(f'Watts')
    ax.set_xlabel(f'Execution Time (Hours)')
    #ax.set_ylim(0,max(gpu_df['power_draw W'])*1.25)
    ax.set_ylim(0,235)
    if zoom == False:
        start_time = min(timer_df['start'])
        end_time = max(timer_df['end'])
        hours = end_time - start_time
        if hours < 8.5:
            ax.xaxis.set_ticks(np.arange(start_time, end_time, 0.5))
        elif hours < 17:
            ax.xaxis.set_ticks(np.arange(start_time, end_time, 1))
        elif hours < 34:
            ax.xaxis.set_ticks(np.arange(start_time, end_time, 2))
        else:
            ax.xaxis.set_ticks(np.arange(start_time, end_time, 4))    
    ax.grid(False)
    plt.show()
    
    # save figure
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    plot, _ = SAVEFIG(fig, name, save_dir)
    plt.clf()
    plt.close(fig)

In [None]:
def SAVEFIG(fig, filename, path=None, formats=('png', 'pdf')):
    fileout = os.path.join(path,filename)
    for my_format in formats:
        fig.savefig(f"{fileout}.{my_format}", format=my_format, bbox_inches="tight")
    return 1, tuple(f'{fileout}.{fmt}' for fmt in formats)

### Load Pickle

In [None]:
cwd = os.getcwd()
pickle_file = os.path.join(cwd,'experiment_data.pkl')
with open(pickle_file, 'rb') as f:
    loaded_dict = pickle.load(f)

### Display Available Data

In [None]:
data = pd.DataFrame()
for experiment in loaded_dict.keys():
    run = {}
    gpu = loaded_dict[experiment]['run_info']['gpu']
    system = loaded_dict[experiment]['run_info']['system']
    filesystem = loaded_dict[experiment]['run_info']['filesystem']
    run['count'] = 1
    run['sys'] = f"{gpu}-{system}-{filesystem}"
    run_df = pd.DataFrame([run])
    data = pd.concat([data, run_df], ignore_index=True) 
data.groupby('sys').count()

### Create Analysis Outputs

In [None]:
analysis_path = os.path.join(os.getcwd(),'analysis')
for experiment in loaded_dict.keys():
    if not experiment.startswith('mar2022'):
        path = loaded_dict[experiment]['run_info']['path']
        epochs = loaded_dict[experiment]['run_info']['epochs']
        dir_path = path.rsplit('/',1)[0]
        if not os.path.exists(analysis_path):
            os.mkdir(analysis_path)
        timer_df = loaded_dict[experiment]['timer_df'].copy()
        gpu_df = loaded_dict[experiment]['gpu_df'].copy()
        gpu_df['time'] = pd.to_datetime(gpu_df['time'].str.split(".").str[0],format='%Y-%m-%d:%H:%M:%S')
        plot_gpu_events(timer_df, gpu_df, epochs, experiment, analysis_path)
        #plot_gpu_events(timer_df, gpu_df, epochs, experiment, analysis_path, 'validation')
        #plot_gpu_events(timer_df, gpu_df, epochs, experiment, plot_path, 'DLResults')
