# MLCommons Earthquake GPU Power Analysis
- Generates GPU Power Usage Graphics

In [None]:
import os
import re
import glob
import pickle
import datetime
import numpy as np
import pandas as pd
import seaborn as sns
from datetime import timedelta
import matplotlib.dates as md
import matplotlib.pyplot as plt

### Plotting Functions

In [None]:
def plot_power_usage(df, path, span='total', normalize=True):
    """ Create power usage plot and save figure.
    Args:
        df: power dataframe. 
        path: output path.
        span: 'total' or 'train' or 'avg'
        normalize: bool argument to normalize to number of epochs
    """
    fig = plt.figure(figsize=(8, 6), dpi=360)
    power_usage_dir = os.path.join(path,'power_usage')
    if not os.path.exists(power_usage_dir):
        os.mkdir(power_usage_dir)
    
    # augment data
    df['epochs'] = df['epochs'].astype(int)
    df.sort_values('epochs')   
    df['kWh/epoch_total'] = df['kWh_total']/df['epochs']
    df['kWh/epoch_fit'] = df['kWh_fit']/df['epochs']
    
    if span == 'total' and normalize == False:
        # plot total notebook epochs vs. kWh
        save_name = 'total_epoch_vs_watts'
        sns.barplot(x='epochs', y='kWh_total', hue='gpu',data=df) 
        plt.title('Total Notebook: Epochs vs. kWh')
        plt.ylabel('kWh')

    if span == 'total' and normalize == True:
        # plot total notebook kWh per Epoch plot
        save_name = 'total_kWh_per_epoch'
        sns.barplot(x='epochs', y='kWh/epoch_total', hue='gpu',data=df) 
        plt.title('Total Notebook: Epochs vs. kWh/Epoch')
        plt.ylabel('kWh/epoch')
    
    if span == 'train' and normalize == False:
        # plot model fit epochs vs. kWh
        save_name = 'model_fit_epoch_vs_watts'
        sns.barplot(x='epochs', y='kWh_fit', hue='gpu',data=df) 
        plt.title('Model Fit: Epochs vs. kWh') 
        plt.ylabel('kWh')
    
    if span == 'train' and normalize == True:
        # plot model fit kWh per Epoch plot
        save_name = 'model_fit_kWh_per_epoch'
        sns.set_style("whitegrid")
        sns.barplot(x='epochs', y='kWh/epoch_fit', hue='gpu',data=df) 
        plt.title('Model Fit: Epochs vs. kWh/Epoch')
        plt.ylabel('kWh/epoch')
    
    if span == 'avg' and normalize == True:
        # plot average model fit kWh per Epoch
        save_name = 'average_kWh_per_epoch'
        grouped = df.groupby('gpu').mean()['kWh/epoch_fit'].reset_index()
        sns.set_style("whitegrid")
        sns.barplot(x="gpu", y="kWh/epoch_fit", data=grouped)
        plt.title('Average kWh/Epoch per GPU', fontsize=14)
        plt.ylabel('kWh/epoch', fontsize=12)
        plt.xlabel('GPU', fontsize=12)
    plt.legend(bbox_to_anchor=(1.01, 1), loc='upper left')
    plot, _ = SAVEFIG(fig, save_name, power_usage_dir)
    plt.show()
    plt.close(fig)

In [None]:
def SAVEFIG(fig, filename, path=None, formats=('png', 'pdf')):
    fileout = os.path.join(path,filename)
    for my_format in formats:
        fig.savefig(f"{fileout}.{my_format}", format=my_format, bbox_inches="tight")
    return 1, tuple(f'{fileout}.{fmt}' for fmt in formats)

### Load Pickle

In [None]:
cwd = os.getcwd()
pickle_file = os.path.join(cwd,'experiment_data.pkl')
with open(pickle_file, 'rb') as f:
    loaded_dict = pickle.load(f)

### Display Available Data

In [None]:
data = pd.DataFrame()
for experiment in loaded_dict.keys():
    run = {}
    gpu = loaded_dict[experiment]['run_info']['gpu']
    system = loaded_dict[experiment]['run_info']['system']
    filesystem = loaded_dict[experiment]['run_info']['filesystem']
    run['count'] = 1
    run['sys'] = f"{gpu}-{system}-{filesystem}"
    run_df = pd.DataFrame([run])
    data = pd.concat([data, run_df], ignore_index=True) 
data.groupby('sys').count()

### Create Analysis Outputs

In [None]:
analysis_path = os.path.join(os.getcwd(),'analysis')

power_df = pd.DataFrame()
for experiment in loaded_dict.keys():
    power_data = loaded_dict[experiment]['power_df']
    if power_data is not None:
        power_df = pd.concat([power_df,power_data])

# power usage plots
plot_power_usage(power_df, analysis_path, span='total', normalize=False)
plot_power_usage(power_df, analysis_path, span='total', normalize=True)
plot_power_usage(power_df, analysis_path, span='train', normalize=False)
plot_power_usage(power_df, analysis_path, span='train', normalize=True)
plot_power_usage(power_df, analysis_path, span='avg', normalize=True)