# MLCommons Earthquake NNSE Analysis Notebook
- Generate markdown tables for earthquake predictions across epochs

In [None]:
import os
import re
import ast
import glob
import pickle
import datetime
import numpy as np
import pandas as pd
import seaborn as sns
from datetime import timedelta
import matplotlib.dates as md
import matplotlib.pyplot as plt

In [None]:
pd.set_option('display.max_rows', 500)

### Plotting Functions

In [None]:
def plot_nnse(data_dict):
    fig = plt.figure(figsize=(8, 6), dpi=360)
    data = pd.DataFrame()
    for experiment in data_dict.keys():
        timer_df = data_dict[experiment]['timer_df']
        if not timer_df is None:
            run = {}
            run['gpu'] = data_dict[experiment]['run_info']['gpu']
            run['system'] = data_dict[experiment]['run_info']['system']
            run['filesystem'] = data_dict[experiment]['run_info']['filesystem']
            run['epochs'] = int(data_dict[experiment]['run_info']['epochs'])
            run['date'] = data_dict[experiment]['run_info']['date']
            run['sys'] = f"{run['gpu']}-{run['system']}-{run['filesystem']}-{run['date']}"
            if not timer_df.query("timer == 'NNSE_1774'").empty:
                dict_str = timer_df.query("timer == 'NNSE_1774'")['msg'].values[0].replace(';',',')
                NNSE = ast.literal_eval(dict_str)
                run['NNSE'] = NNSE['Summed_NNSE_val']
                run_df = pd.DataFrame([run])
                data = pd.concat([data, run_df], ignore_index=True)
            
    # create plot
    #sns.lineplot(x='epochs', 
    #             y='NNSE',
    #             hue='sys',
    #             data=data,
    #             legend = False)
    sns.scatterplot(x='epochs', 
                    y='NNSE',
                    hue='sys',
                    s=50,
                    data=data) 
    
    plt.ylabel('NNSE Value')
    plt.xlabel('Epochs')
    plt.legend(bbox_to_anchor=(1.01, 1), loc='upper left')
    
    plt.show()
    

In [None]:
def plot_nnse_bar(data_dict):
    fig = plt.figure(figsize=(8, 6), dpi=360)
    data = pd.DataFrame()
    for experiment in data_dict.keys():
        timer_df = data_dict[experiment]['timer_df']
        if not timer_df is None:
            run = {}
            run['gpu'] = data_dict[experiment]['run_info']['gpu']
            run['system'] = data_dict[experiment]['run_info']['system']
            run['filesystem'] = data_dict[experiment]['run_info']['filesystem']
            run['epochs'] = int(data_dict[experiment]['run_info']['epochs'])
            run['date'] = data_dict[experiment]['run_info']['date']
            run['sys'] = f"{run['gpu']}-{run['system']}-{run['filesystem']}-{run['date']}"
            if not timer_df.query("timer == 'NNSE_1774'").empty:
                dict_str = timer_df.query("timer == 'NNSE_1774'")['msg'].values[0].replace(';',',')
                NNSE = ast.literal_eval(dict_str)
                run['NNSE'] = NNSE['Summed_NNSE_val']
                run_df = pd.DataFrame([run])
                data = pd.concat([data, run_df], ignore_index=True)
            
    # create plot
    #sns.lineplot(x='epochs', 
    #             y='NNSE',
    #             hue='sys',
    #             data=data,
    #             legend = False)
    #sns.scatterplot(x='epochs', 
    #                y='NNSE',
    #                hue='sys',
    #                s=50,
    #                data=data) 
    sns.barplot(x='epochs', y='NNSE', hue='sys',data=data)
    
    plt.ylabel('NNSE Value')
    plt.xlabel('Epochs')
    plt.legend(bbox_to_anchor=(1.01, 1), loc='upper left')
    
    plt.show()

In [None]:
def SAVEFIG(fig, filename, path=None, formats=('png', 'pdf')):
    fileout = os.path.join(path,filename)
    for my_format in formats:
        fig.savefig(f"{fileout}.{my_format}", format=my_format, bbox_inches="tight")
    return 1, tuple(f'{fileout}.{fmt}' for fmt in formats)

### Load Pickle

In [None]:
cwd = os.getcwd()
pickle_file = os.path.join(cwd,'experiment_data.pkl')
with open(pickle_file, 'rb') as f:
    loaded_dict = pickle.load(f)

### Display Available Data

In [None]:
data = pd.DataFrame()
for experiment in loaded_dict.keys():
    run = {}
    gpu = loaded_dict[experiment]['run_info']['gpu']
    system = loaded_dict[experiment]['run_info']['system']
    filesystem = loaded_dict[experiment]['run_info']['filesystem']
    run['count'] = 1
    run['sys'] = f"{gpu}-{system}-{filesystem}"
    run_df = pd.DataFrame([run])
    data = pd.concat([data, run_df], ignore_index=True) 
data.groupby('sys').count()

### Create Analysis Output

In [None]:
NNSE_data = pd.DataFrame()
for experiment in loaded_dict.keys():
    if not experiment.startswith('mar2022'):
        NNSE = loaded_dict[experiment]['NNSE_df']
        #import pdb; pdb.set_trace()
        NNSE_data = pd.concat([NNSE_data, NNSE], ignore_index=True)
epoch_grouped = NNSE_data.groupby(['epochs','time']).mean(['averaged','summed']).reset_index()
epoch_grouped['epochs'] = epoch_grouped['epochs'].astype(int)
epoch_grouped = epoch_grouped.sort_values('epochs')
for time in epoch_grouped['time'].unique():
    filtered = epoch_grouped[epoch_grouped['time'] == time]
    filtered = filtered.set_index('epochs')
    print(f'{time}')
    print(filtered.to_markdown())
    print('\n')