In [5]:
import json
import jinja2
import os, glob
from pprint import pformat
import numpy as np
import pandas as pd
import copy

# Some nice colors from https://sashat.me/2017/01/11/list-of-20-simple-distinct-colors/
colors = ((230, 25, 75), (60, 180, 75), (255, 225, 25), (0, 130, 200), (245, 130, 48), (145, 30, 180), 
          (70, 240, 240), (240, 50, 230), (210, 245, 60), (250, 190, 190), (0, 128, 128), (230, 190, 255),
          (170, 110, 40), (255, 250, 200), (128, 0, 0), (170, 255, 195), (128, 128, 0), (255, 215, 180))

def render(tpl_path, dataframes_list=None, **kwargs):
    
    if dataframes_list is None:
        dataframes_list = []

    path, filename = os.path.split(tpl_path)

    environment = jinja2.Environment(undefined=jinja2.StrictUndefined,
                                      loader=jinja2.FileSystemLoader(path or './'))
    
    loaded_dataframes=load_data(dataframes_list,efficiency='electrons')
    # If there are additional keyword arguments, add them to the context
    context = kwargs
    context['dataframes_list'] = []


    return environment.get_template(filename).render(context)

def load_data(dataframes_list,**kwargs):
    
    loaded_dataframes = {}

    for num, df_dict in enumerate(dataframes_list, start=1):
        df_name = f'df_{num}'
        dataframe = df_dict
        file = df_dict['file']
        time_unit = df_dict['time_unit']
        loaded_dataframes[df_name] = copy.deepcopy(pd.read_csv(file))  
        
        column_name = df_dict['column_name']
        if column_name not in loaded_dataframes[df_name].columns:
            raise ValueError(f"Error: Column '{column_name}' does not exist in DataFrame '{df_name}'.")
            
        
        efficiency = df_dict['efficiency']
        if efficiency not in loaded_dataframes[df_name].columns:
            raise ValueError(f"Error: Column '{efficiency}' does not exist in DataFrame '{df_name}'.")
            
        x_axis = df_dict['x_axis']
        if x_axis not in loaded_dataframes[df_name].columns:
            raise ValueError(f"Error: Column '{x_axis}' does not exist in DataFrame '{df_name}'.")
            
     
        loaded_dataframes[df_name][f'other components ({df_name})'] = loaded_dataframes[df_name]['walltime'] - loaded_dataframes[df_name][column_name]
        loaded_dataframes[df_name][f'{column_name} of ({df_name})'] = loaded_dataframes[df_name][column_name]
        
        if efficiency in loaded_dataframes[df_name].columns:
                        
            T1 = loaded_dataframes[df_name][efficiency][loaded_dataframes[df_name][x_axis]== loaded_dataframes[df_name][x_axis].min()].values[0]
            Tp = loaded_dataframes[df_name][efficiency].values
            # Calculate speedup and efficiency
            loaded_dataframes[df_name]['speedup'] = T1 / Tp
            loaded_dataframes[df_name][f'efficiency of {efficiency}'] = (T1 / Tp) *( loaded_dataframes[df_name][x_axis].min()/loaded_dataframes[df_name][x_axis]) * 100


        if time_unit == 'second':
            loaded_dataframes[df_name].iloc[:, 3:] /= 1
        elif time_unit == 'minute':
            loaded_dataframes[df_name].iloc[:, 3:] /= 60
        elif time_unit == 'hour':
            loaded_dataframes[df_name].iloc[:, 3:] /= 3600
        else:
            print(f"Error: Invalid time unit '{time_unit}' specified.")
                
    return loaded_dataframes

dataframes_list = [
    {'file': '/home/mandanas/1-CINECA-projects/benchmark/benchmark_10it/bench_7.2dev_gpua_iter/000000/result/result.dat', 'column_name': 'electrons', 'efficiency':'electrons', 'x_axis':'Nodes', 'time_unit':'second'},
    {'file': '/home/mandanas/1-CINECA-projects/benchmark/benchmark_10it/bench_7.2dev_nogpua_iter/000000/result/result.dat', 'column_name': 'electrons', 'efficiency':'electrons', 'x_axis':'Nodes','time_unit':'second'},
    #{'filename': 'results.dat', 'column_name': 'sth_kernel', 'efficiency':'sth_kernel', 'x_axis':'Nodes', 'time_unit':'second'}
    ]

loaded_dataframes = load_data(dataframes_list, efficiency='electrons')
dataframes_list[0]['file']
#loaded_dataframes.keys()

'/home/mandanas/1-CINECA-projects/benchmark/benchmark_10it/bench_7.2dev_gpua_iter/000000/result/result.dat'

In [4]:
def add_to_plots(plots, key, value, accuracy):
    for k, plot in plots.items():
        if k == key:
            plot['data'].append(value)
            if accuracy:
                plot['accuracy'].append(accuracy)
        else:
            plot['data'].append('NaN')
            if accuracy:
                plot['accuracy'].append(0)

def gen_plots(dataframes_list):
    loaded_dataframes = load_data(dataframes_list, efficiency='electrons')
    labels = []
    datasets = {'efficiency': {'label': ''},
                'runtime_component': {'label': ''},
                'speedup': {'label': ''},
                'efficiency_runtime': {'label': ''},
               }

    descriptions = set()

    for df_name in loaded_dataframes.keys():
        # Update labels with current df_name
        labels.append(df_name)
        # Update descriptions with unique descriptions from the dataframe
        descriptions.update(loaded_dataframes[df_name].get('description_column', []))
        # Process dataframe and add to plots
        process_dataframe(df_name, loaded_dataframes[df_name], datasets)
            
    file = dataframes_list[0]['file']
    _, basename = os.path.split(file)
    name, _ = os.path.splitext(basename)
    with open(name + '.html', 'w') as f:
        f.write(render('plot.tmpl', {'title': name, 'labels': labels, 'datasets': datasets, 'descriptions': descriptions}))


        
def process_dataframe(df_name, df, datasets):
    labels = []

    labels.append(df_name)
    for k in list(datasets.keys()):
        if k in df.columns:
            # create data if it does not exist
            if not datasets[k].get('plots', False):
                datasets[k]['plots'] = {}
                # Assuming descriptions and colors are defined somewhere in your code
            #    for i, init_desc in enumerate(descriptions):
             #       datasets[k]['plots'][init_desc] = {'data': [], 'accuracy': [], 'color': 'rgba({}, {}, {}, 1)'.format(*colors[i])}

            # Add to reference or test
            add_to_plots(datasets[k]['plots'], df_name, df[k].mean(), None)
            # Assuming no accuracy column in the dataframe for now

            #datasets[k]['min'] = min(datasets[k].get('min', 1e12), df[k].min())
            #datasets[k]['max'] = max(datasets[k].get('max', -1e12), df[k].max())
        else:
            datasets.pop(k, None)

        #if not datasets[k]['label']:
         #   try:
          #      datasets[k]['label'] = k + ' (' + df[k + '_units'] + ')'
           # except KeyError:
            #    print("Warning, no units for " + k)



def gen_index(flist):
    entries = {}
    for filename in flist:
        _, basename = os.path.split(filename)
        name, _ = os.path.splitext(basename)
        entries[name + '.html'] = name

    with open('index.html', 'w') as f:
        f.write(render('index.tmpl', {'entries': entries}))


    

    gen_plots(dataframes_list)

    for file in dataframes_list['file'].values:
        
        flist.append(file)

    gen_index(flist)
    
    
if __name__ == "__main__":
    flist = []

    # Define dataframes_list as a list of dictionaries
   # dataframes_list = [
    #    {'filename': 'dat_file1', 'column_name': 'any_column', 'efficiency': 'any_column',
     #    'x_axis': 'any_column', 'time_unit': 'minute'},
      #  {'filename': 'dat_file2', 'column_name': 'any_column', 'efficiency': 'any_column',
       #  'x_axis': 'any_column', 'time_unit': 'minute'},
        #{'filename': 'dat_file3', 'column_name': 'any_column', 'efficiency': 'any_column',
         #'x_axis': 'any_column', 'time_unit': 'minute'}
    #]
    
    dataframes_list = [
    {'file': '/home/mandanas/1-CINECA-projects/benchmark/benchmark_10it/bench_7.2dev_gpua_iter/000000/result/result.dat', 'column_name': 'electrons', 'efficiency':'electrons', 'x_axis':'Nodes', 'time_unit':'second'},
    {'file': '/home/mandanas/1-CINECA-projects/benchmark/benchmark_10it/bench_7.2dev_nogpua_iter/000000/result/result.dat', 'column_name': 'electrons', 'efficiency':'electrons', 'x_axis':'Nodes','time_unit':'second'},
    #{'filename': 'results.dat', 'column_name': 'sth_kernel', 'efficiency':'sth_kernel', 'x_axis':'Nodes', 'time_unit':'second'}
        ]
    # Call gen_plots with the correct variable
    gen_plots(dataframes_list)

    for entry in dataframes_list:
        flist.append(entry['file'])

    # Call gen_index with the correct variable
    gen_index(flist)

TypeError: string indices must be integers, not 'str'