In [1]:
import pandas as pd
import numpy as np
from pathlib import Path

from utils import extract_job_data, get_slurm_data, format_node_names


In [2]:

def preprocess_slurm(df: object) -> object:
    """processing the slurm data

    Args:
        df (object): a data frame with job id and the feature

    Returns:
        object: a dataframe with extracted features as its columns
    """

    df['feature'] = df['feature'].str.split('\n')
    df['length_of_feature'] = [len(l) for l in df['feature'].tolist()]
    # df['length_of_feature'].value_counts()[0:10]
    
    
    
 
    lower_bound = 0
    upper_bound = len(df)
    data_processed = []

    for n in range(lower_bound, upper_bound):

        len_feature = df.iloc[n, :]['length_of_feature']
        if len_feature > 2:
            job_id =int( df.iloc[n, :]['job_id'])
            query_name = df.iloc[n, :]['feature'][0]
            signal = df.iloc[n, :]['feature'][1:-1]
            
            
            data = {'job_id': [job_id] * len(signal),
                    'query_name': [query_name] * len(signal),
                    'signal': signal}

            data_processed.append(pd.DataFrame(data))

    df = pd.concat(data_processed, axis=0)
    df['query_name'] = df['query_name'].str.split('|')
    df['signal'] = df['signal'].str.split('|')
    # get the length of signal name column
    df['length_of_query'] = [len(l) for l in df['query_name'].tolist()]
    df['length_of_signal'] = [len(l) for l in df['signal'].tolist()]
    
    


    signal_names = df['query_name'].iloc[0][0:-1]
        # for the 13 signals
    for i, signal_name in enumerate(signal_names):
        df[signal_name] = df['signal'].apply(lambda x:x[i])
        
        
        
    df['formatted_node_names'] = df['NodeList'].apply(format_node_names)
    df.drop(['query_name','signal', 'length_of_query',
                 'length_of_signal', 'JobName'], axis=1, inplace=True)

    df.rename(columns={"JobID":"Slurm_job_id"}, inplace=True)

 
    df.sort_values(by='job_id', inplace=True)


    return df

In [3]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
job_folder_path =  Path('/projects/2/prjs1098/system_analytics_2024/prom_data/benchmark_single_node_job_data')
job_file_paths = list(Path.glob(job_folder_path, "*.json"))
file_path_writing = job_folder_path / 'benchmark_job_data_cleaned.csv'

display(job_file_paths)


[PosixPath('/projects/2/prjs1098/system_analytics_2024/prom_data/benchmark_single_node_job_data/AI_student_runs_single_2024-12-11-09-32.json'),
 PosixPath('/projects/2/prjs1098/system_analytics_2024/prom_data/benchmark_single_node_job_data/AI_student_runs_single_2024-12-11-10-06.json'),
 PosixPath('/projects/2/prjs1098/system_analytics_2024/prom_data/benchmark_single_node_job_data/AI_student_runs_single_2024-11-20-12-42.json'),
 PosixPath('/projects/2/prjs1098/system_analytics_2024/prom_data/benchmark_single_node_job_data/AI_student_runs_single_2024-11-20-14-47.json'),
 PosixPath('/projects/2/prjs1098/system_analytics_2024/prom_data/benchmark_single_node_job_data/AI_student_runs_single_2024-11-20-13-45.json'),
 PosixPath('/projects/2/prjs1098/system_analytics_2024/prom_data/benchmark_single_node_job_data/AI_student_runs_single_2024-11-20-11-54.json'),
 PosixPath('/projects/2/prjs1098/system_analytics_2024/prom_data/benchmark_single_node_job_data/AI_student_runs_single_2024-11-20-14-28.

In [4]:


main_key_list = ['jobid', 'name', 'system', 'result', 'nodelist', 'fail_reason', 
            'fail_phase', 'environment', 'time_compile', 'time_performance', 'time_run', 
            'time_sanity', 'time_setup', 'time_total']

pref_vars_key_list = ["name", "reference", "thres_lower", "thres_upper","unit","value"]



check_vars_key_list = ['valid_prog_environs', 'valid_systems', 'executable', 'executable_opts', 
                  'num_tasks', 'num_tasks_per_node', 'num_cpus_per_task', 'time_limit', 'prerun_cmds',
                  'postrun_cmds','modules', 'env_vars','readonly_files'
                  ]


df_list = []
for file_path in job_file_paths:
    print(file_path)
    all_job_data = extract_job_data(file_path, main_key_list, pref_vars_key_list, check_vars_key_list)
    # print([len(job) for job in all_job_data])
    # _ = pd.DataFrame(all_job_data)
    # if _.empty or _.isna().all().all():
    #     display(_.sample(n=10))
    df_list.append(pd.DataFrame(all_job_data))
    
df = pd.concat(df_list, axis=0, ignore_index=True)
# display(df.sample(n=10))


display(df.sample(n=10))


/projects/2/prjs1098/system_analytics_2024/prom_data/benchmark_single_node_job_data/AI_student_runs_single_2024-12-11-09-32.json
/projects/2/prjs1098/system_analytics_2024/prom_data/benchmark_single_node_job_data/AI_student_runs_single_2024-12-11-10-06.json
/projects/2/prjs1098/system_analytics_2024/prom_data/benchmark_single_node_job_data/AI_student_runs_single_2024-11-20-12-42.json
/projects/2/prjs1098/system_analytics_2024/prom_data/benchmark_single_node_job_data/AI_student_runs_single_2024-11-20-14-47.json
/projects/2/prjs1098/system_analytics_2024/prom_data/benchmark_single_node_job_data/AI_student_runs_single_2024-11-20-13-45.json
/projects/2/prjs1098/system_analytics_2024/prom_data/benchmark_single_node_job_data/AI_student_runs_single_2024-11-20-11-54.json
/projects/2/prjs1098/system_analytics_2024/prom_data/benchmark_single_node_job_data/AI_student_runs_single_2024-11-20-14-28.json
/projects/2/prjs1098/system_analytics_2024/prom_data/benchmark_single_node_job_data/AI_student_ru

/projects/2/prjs1098/system_analytics_2024/prom_data/benchmark_single_node_job_data/AI_student_runs_single_2024-12-11-10-39.json


Unnamed: 0,jobid,name,system,result,nodelist,fail_reason,fail_phase,environment,time_compile,time_performance,time_run,time_sanity,time_setup,time_total,pref_name,pref_reference,pref_thres_lower,pref_thres_upper,pref_unit,pref_value,valid_prog_environs,valid_systems,executable,executable_opts,num_tasks,num_tasks_per_node,num_cpus_per_task,time_limit,prerun_cmds,postrun_cmds,modules,env_vars,readonly_files
350,,HemePure_CPUBuildTest ~snellius:gpu_h100+eb-foss,snellius:gpu_h100,success,[],,,eb-foss,740.142676,0.007025,0.267144,0.007058,0.024316,917.256948,no_data,no_data,no_data,no_data,no_data,no_data,[eb-foss],[snellius:gpu_h100],./HemePure_CPUBuildTest_fad8439d,[],1,,,1800.0,[],[],"[Python-bare-2.7.18, libtirpc]",{},[]
165,8618142.0,Palabos_CPU_WS %nnodes=1 %ppn=full,snellius:genoa,success,[tcn650],,,eb-foss,0.08541,0.065114,548.816145,0.033488,0.735287,554.503631,MLUPS,0,,,MLUPS,215.439,[eb-foss],"[snellius:rome, snellius:fat, snellius:gpu_a10...",/scratch-shared/benjamic/reframe_output/stagin...,[/scratch-shared/benjamic/reframe_output/stagi...,192,192.0,,1800.0,[sed -i 's/<referenceResolution> 400 </<refere...,[],[],{},[]
250,8618958.0,HPCG_MPIOnly %nnodes=1,snellius:genoa,success,[tcn755],,,eb-intel-mpi,53.794621,0.03624,265.069486,0.125959,0.051595,555.047575,gflops,0,,,Gflop/s,66.2172,"[eb-foss-mpi, eb-intel-mpi]","[lisa:silver, lisa:gold, snellius:rome, snelli...",hpcg/bin/xhpcg,"[--nx=104, --ny=104, --nz=104, -t2]",192,192.0,1.0,3600.0,[],[],[],{},[]
207,8616988.0,VASP %version=6.4.2,snellius:rome,aborted,[],aborted due to KeyboardInterrupt,run,eb-foss,0.028758,,1065.922933,,0.046753,1091.496335,no_data,no_data,no_data,no_data,no_data,no_data,[eb-foss],"[snellius:rome, snellius:genoa]",vasp_std,[],256,,,86400.0,[],[],[vasp-foss-6.4.2],{},[]
347,,HemePure_CPUBuildTest ~snellius:genoa+eb-foss,snellius:genoa,success,[],,,eb-foss,808.143786,0.054098,138.546298,0.051725,0.022796,976.831596,no_data,no_data,no_data,no_data,no_data,no_data,[eb-foss],[snellius:genoa],./HemePure_CPUBuildTest_2a752034,[],1,,,1800.0,[],[],"[Python-bare-2.7.18, libtirpc]",{},[]
307,,Palabos_CPU_WS %nnodes=1 %ppn=full,snellius:gpu_h100,failure,[],spawned process error: command 'sbatch rfm_job...,run,eb-foss,0.117863,,6.261048,,0.146674,17.310231,no_data,no_data,no_data,no_data,no_data,no_data,[eb-foss],"[snellius:rome, snellius:fat, snellius:gpu_a10...",/scratch-shared/benjamic/reframe_output/stagin...,[/scratch-shared/benjamic/reframe_output/stagi...,64,64.0,,1800.0,[sed -i 's/<referenceResolution> 400 </<refere...,[],[],{},[]
358,8618820.0,HemePure_CPU_SS_small %nnodes=1 %ppn=half,snellius:fat,success,[fcn40],,,eb-foss,0.201681,0.029072,98.782633,0.090209,0.222703,105.018171,NumSites,0,,,s,18450.0,[eb-foss],"[snellius:rome, snellius:fat, snellius:gpu_a10...",/scratch-shared/benjamic/reframe_output/stagin...,[ -in /gpfs/work4/1/reframe0/resourcesdir/Hem...,64,64.0,,3540.0,[],[cp results/report.txt /gpfs/home3/benjamic/Re...,[],{},[]
262,,Palabos_CPUBuildTest ~snellius:fat+eb-foss,snellius:fat,success,[],,,eb-foss,140.493884,0.030935,527.950191,0.03418,0.023578,690.318531,no_data,no_data,no_data,no_data,no_data,no_data,[eb-foss],[snellius:fat],./Palabos_CPUBuildTest_00f7d1bf,[],1,,,,[],[],[],{},[]
379,8995983.0,Palabos_CPU_WS %nnodes=1 %ppn=full,snellius:rome,success,[tcn330],,,eb-foss,0.09399,0.029506,1629.064777,0.11319,0.070935,1636.837,MLUPS,0,,,MLUPS,37.7511,[eb-foss],"[snellius:rome, snellius:fat, snellius:gpu_a10...",/scratch-shared/benjamic/reframe_output/stagin...,[/scratch-shared/benjamic/reframe_output/stagi...,128,128.0,,1800.0,[sed -i 's/<referenceResolution> 400 </<refere...,[],[],{},[]
283,,HemePure_CPU_SS_small %nnodes=1 %ppn=half,snellius:gpu_a100,failure,[],spawned process error: command 'sbatch rfm_job...,run,eb-foss,0.02861,,1.744501,,0.046593,3.718218,no_data,no_data,no_data,no_data,no_data,no_data,[eb-foss],"[snellius:rome, snellius:fat, snellius:gpu_a10...",/scratch-shared/benjamic/reframe_output/stagin...,[ -in /gpfs/work4/1/reframe0/resourcesdir/Hem...,36,36.0,,3540.0,[],[cp results/report.txt /gpfs/home3/benjamic/Re...,[],{},[]


In [5]:

mask = (df['jobid']=='None') | (df['jobid'].isna())

df_benckmark= df[~(mask)].copy()
df_benckmark['jobid'] = pd.to_numeric(df_benckmark['jobid']).astype('Int64')

print(f"How many potential jobs: {len(df)} and How many jobids are available: {len(df[~(mask)])}")


How many potential jobs: 405 and How many jobids are available: 235


In [6]:
""" 
Get the start time, end time and nodelist from slurm for the selected jobs
and merge them with the benchmark data
"""
df_slurm = get_slurm_data(df_benckmark['jobid'])
df_slurm_preprocessed = preprocess_slurm(df_slurm)

df = pd.merge(left=df_slurm_preprocessed, right=df_benckmark, how='inner',
              left_on='job_id', right_on='jobid')
display(df_slurm_preprocessed.head(n=5), df.head())



Unnamed: 0,job_id,Submit,Eligible,Start,End,Elapsed,Slurm_job_id,State,AllocCPUS,TotalCPU,NodeList,formatted_node_names
0,8616980,2024-11-20T11:54:35,2024-11-20T11:54:35,2024-11-20T11:54:52,2024-11-20T12:07:43,00:12:51,8616980,COMPLETED,72,00:00:00,gcn49,gcn49
0,8616982,2024-11-20T11:54:38,2024-11-20T11:54:38,2024-11-20T11:54:41,2024-11-20T12:07:18,00:12:37,8616982,COMPLETED,64,00:00:00,gcn149,gcn149
0,8616984,2024-11-20T11:54:40,2024-11-20T11:54:40,2024-11-20T11:54:52,2024-11-20T12:08:01,00:13:09,8616984,COMPLETED,128,00:00:00,tcn109,tcn109
0,8616986,2024-11-20T11:54:43,2024-11-20T11:54:43,2024-11-20T11:54:52,2024-11-20T12:07:56,00:13:04,8616986,COMPLETED,192,00:00:00,tcn575,tcn575
0,8616988,2024-11-20T11:55:00,2024-11-20T11:55:00,2024-11-20T11:55:22,2024-11-20T12:12:54,00:17:32,8616988,CANCELLED by 54971,256,00:00:00,"tcn[149,151]","tcn149,tcn151"


Unnamed: 0,job_id,Submit,Eligible,Start,End,Elapsed,Slurm_job_id,State,AllocCPUS,TotalCPU,NodeList,formatted_node_names,jobid,name,system,result,nodelist,fail_reason,fail_phase,environment,time_compile,time_performance,time_run,time_sanity,time_setup,time_total,pref_name,pref_reference,pref_thres_lower,pref_thres_upper,pref_unit,pref_value,valid_prog_environs,valid_systems,executable,executable_opts,num_tasks,num_tasks_per_node,num_cpus_per_task,time_limit,prerun_cmds,postrun_cmds,modules,env_vars,readonly_files
0,8616980,2024-11-20T11:54:35,2024-11-20T11:54:35,2024-11-20T11:54:52,2024-11-20T12:07:43,00:12:51,8616980,COMPLETED,72,00:00:00,gcn49,gcn49,8616980,GROMACS_GPU %n_gpus=1,snellius:gpu_a100,success,[gcn49],,,eb-foss,0.029076,0.227419,792.55326,0.096275,0.527797,794.377215,perf,0,,,ns/day,16.529,[eb-foss],"[snellius:gpu_a100, snellius:gpu_h100]",gmx_mpi,[mdrun -v -pin on -dlb yes -resethway -deffnm ...,1,1.0,18.0,1800.0,[cp /gpfs/work4/1/reframe0/resourcesdir/gromac...,[],[gromacs-foss-gpu],{},[]
1,8616982,2024-11-20T11:54:38,2024-11-20T11:54:38,2024-11-20T11:54:41,2024-11-20T12:07:18,00:12:37,8616982,COMPLETED,64,00:00:00,gcn149,gcn149,8616982,GROMACS_GPU %n_gpus=1,snellius:gpu_h100,success,[gcn149],,,eb-foss,0.028724,0.073034,766.922158,0.097536,0.04874,770.778406,perf,0,,,ns/day,25.705,[eb-foss],"[snellius:gpu_a100, snellius:gpu_h100]",gmx_mpi,[mdrun -v -pin on -dlb yes -resethway -deffnm ...,1,1.0,16.0,1800.0,[cp /gpfs/work4/1/reframe0/resourcesdir/gromac...,[],[gromacs-foss-gpu],{},[]
2,8616984,2024-11-20T11:54:40,2024-11-20T11:54:40,2024-11-20T11:54:52,2024-11-20T12:08:01,00:13:09,8616984,COMPLETED,128,00:00:00,tcn109,tcn109,8616984,GROMACS_CPU %n_cores=32,snellius:rome,success,[tcn109],,,eb-foss-mpi,0.028928,0.179587,806.839507,0.187784,0.047647,813.222719,perf,0,,,ns/day,3.262,[eb-foss-mpi],"[snellius:rome, snellius:genoa]",gmx_mpi,[mdrun -v -pin on -dlb yes -resethway -deffnm ...,32,32.0,1.0,1800.0,[cp /gpfs/work4/1/reframe0/resourcesdir/gromac...,[],[gromacs-foss],{},[]
3,8616986,2024-11-20T11:54:43,2024-11-20T11:54:43,2024-11-20T11:54:52,2024-11-20T12:07:56,00:13:04,8616986,COMPLETED,192,00:00:00,tcn575,tcn575,8616986,GROMACS_CPU %n_cores=32,snellius:genoa,success,[tcn575],,,eb-foss-mpi,0.029197,0.046268,798.165663,0.052712,0.047641,806.888496,perf,0,,,ns/day,3.343,[eb-foss-mpi],"[snellius:rome, snellius:genoa]",gmx_mpi,[mdrun -v -pin on -dlb yes -resethway -deffnm ...,32,32.0,1.0,1800.0,[cp /gpfs/work4/1/reframe0/resourcesdir/gromac...,[],[gromacs-foss],{},[]
4,8616988,2024-11-20T11:55:00,2024-11-20T11:55:00,2024-11-20T11:55:22,2024-11-20T12:12:54,00:17:32,8616988,CANCELLED by 54971,256,00:00:00,"tcn[149,151]","tcn149,tcn151",8616988,VASP %version=6.4.2,snellius:rome,aborted,[],aborted due to KeyboardInterrupt,run,eb-foss,0.028758,,1065.922933,,0.046753,1091.496335,no_data,no_data,no_data,no_data,no_data,no_data,[eb-foss],"[snellius:rome, snellius:genoa]",vasp_std,[],256,,,86400.0,[],[],[vasp-foss-6.4.2],{},[]


In [7]:
""" 
For some jobs the start time is None! 
they have job ids, but no record comes out
"""
mask = (df['Start']=='None') 
display(df[mask])
df = df[~mask].copy()




Unnamed: 0,job_id,Submit,Eligible,Start,End,Elapsed,Slurm_job_id,State,AllocCPUS,TotalCPU,NodeList,formatted_node_names,jobid,name,system,result,nodelist,fail_reason,fail_phase,environment,time_compile,time_performance,time_run,time_sanity,time_setup,time_total,pref_name,pref_reference,pref_thres_lower,pref_thres_upper,pref_unit,pref_value,valid_prog_environs,valid_systems,executable,executable_opts,num_tasks,num_tasks_per_node,num_cpus_per_task,time_limit,prerun_cmds,postrun_cmds,modules,env_vars,readonly_files


In [8]:
# removing all the step ids
df = df[(df['Slurm_job_id'].str.contains(pat='^\\d+$', regex=True))]
df.head()

Unnamed: 0,job_id,Submit,Eligible,Start,End,Elapsed,Slurm_job_id,State,AllocCPUS,TotalCPU,NodeList,formatted_node_names,jobid,name,system,result,nodelist,fail_reason,fail_phase,environment,time_compile,time_performance,time_run,time_sanity,time_setup,time_total,pref_name,pref_reference,pref_thres_lower,pref_thres_upper,pref_unit,pref_value,valid_prog_environs,valid_systems,executable,executable_opts,num_tasks,num_tasks_per_node,num_cpus_per_task,time_limit,prerun_cmds,postrun_cmds,modules,env_vars,readonly_files
0,8616980,2024-11-20T11:54:35,2024-11-20T11:54:35,2024-11-20T11:54:52,2024-11-20T12:07:43,00:12:51,8616980,COMPLETED,72,00:00:00,gcn49,gcn49,8616980,GROMACS_GPU %n_gpus=1,snellius:gpu_a100,success,[gcn49],,,eb-foss,0.029076,0.227419,792.55326,0.096275,0.527797,794.377215,perf,0,,,ns/day,16.529,[eb-foss],"[snellius:gpu_a100, snellius:gpu_h100]",gmx_mpi,[mdrun -v -pin on -dlb yes -resethway -deffnm ...,1,1.0,18.0,1800.0,[cp /gpfs/work4/1/reframe0/resourcesdir/gromac...,[],[gromacs-foss-gpu],{},[]
1,8616982,2024-11-20T11:54:38,2024-11-20T11:54:38,2024-11-20T11:54:41,2024-11-20T12:07:18,00:12:37,8616982,COMPLETED,64,00:00:00,gcn149,gcn149,8616982,GROMACS_GPU %n_gpus=1,snellius:gpu_h100,success,[gcn149],,,eb-foss,0.028724,0.073034,766.922158,0.097536,0.04874,770.778406,perf,0,,,ns/day,25.705,[eb-foss],"[snellius:gpu_a100, snellius:gpu_h100]",gmx_mpi,[mdrun -v -pin on -dlb yes -resethway -deffnm ...,1,1.0,16.0,1800.0,[cp /gpfs/work4/1/reframe0/resourcesdir/gromac...,[],[gromacs-foss-gpu],{},[]
2,8616984,2024-11-20T11:54:40,2024-11-20T11:54:40,2024-11-20T11:54:52,2024-11-20T12:08:01,00:13:09,8616984,COMPLETED,128,00:00:00,tcn109,tcn109,8616984,GROMACS_CPU %n_cores=32,snellius:rome,success,[tcn109],,,eb-foss-mpi,0.028928,0.179587,806.839507,0.187784,0.047647,813.222719,perf,0,,,ns/day,3.262,[eb-foss-mpi],"[snellius:rome, snellius:genoa]",gmx_mpi,[mdrun -v -pin on -dlb yes -resethway -deffnm ...,32,32.0,1.0,1800.0,[cp /gpfs/work4/1/reframe0/resourcesdir/gromac...,[],[gromacs-foss],{},[]
3,8616986,2024-11-20T11:54:43,2024-11-20T11:54:43,2024-11-20T11:54:52,2024-11-20T12:07:56,00:13:04,8616986,COMPLETED,192,00:00:00,tcn575,tcn575,8616986,GROMACS_CPU %n_cores=32,snellius:genoa,success,[tcn575],,,eb-foss-mpi,0.029197,0.046268,798.165663,0.052712,0.047641,806.888496,perf,0,,,ns/day,3.343,[eb-foss-mpi],"[snellius:rome, snellius:genoa]",gmx_mpi,[mdrun -v -pin on -dlb yes -resethway -deffnm ...,32,32.0,1.0,1800.0,[cp /gpfs/work4/1/reframe0/resourcesdir/gromac...,[],[gromacs-foss],{},[]
4,8616988,2024-11-20T11:55:00,2024-11-20T11:55:00,2024-11-20T11:55:22,2024-11-20T12:12:54,00:17:32,8616988,CANCELLED by 54971,256,00:00:00,"tcn[149,151]","tcn149,tcn151",8616988,VASP %version=6.4.2,snellius:rome,aborted,[],aborted due to KeyboardInterrupt,run,eb-foss,0.028758,,1065.922933,,0.046753,1091.496335,no_data,no_data,no_data,no_data,no_data,no_data,[eb-foss],"[snellius:rome, snellius:genoa]",vasp_std,[],256,,,86400.0,[],[],[vasp-foss-6.4.2],{},[]


In [9]:
# remove all the rows that are not completed
mask = ~(df['State']=='COMPLETED')
display(df[mask])
df = df[~mask].copy()

Unnamed: 0,job_id,Submit,Eligible,Start,End,Elapsed,Slurm_job_id,State,AllocCPUS,TotalCPU,NodeList,formatted_node_names,jobid,name,system,result,nodelist,fail_reason,fail_phase,environment,time_compile,time_performance,time_run,time_sanity,time_setup,time_total,pref_name,pref_reference,pref_thres_lower,pref_thres_upper,pref_unit,pref_value,valid_prog_environs,valid_systems,executable,executable_opts,num_tasks,num_tasks_per_node,num_cpus_per_task,time_limit,prerun_cmds,postrun_cmds,modules,env_vars,readonly_files
4,8616988,2024-11-20T11:55:00,2024-11-20T11:55:00,2024-11-20T11:55:22,2024-11-20T12:12:54,00:17:32,8616988,CANCELLED by 54971,256,00:00:00,"tcn[149,151]","tcn149,tcn151",8616988,VASP %version=6.4.2,snellius:rome,aborted,[],aborted due to KeyboardInterrupt,run,eb-foss,0.028758,,1065.922933,,0.046753,1091.496335,no_data,no_data,no_data,no_data,no_data,no_data,[eb-foss],"[snellius:rome, snellius:genoa]",vasp_std,[],256,,,86400.0,[],[],[vasp-foss-6.4.2],{},[]
5,8616990,2024-11-20T11:55:03,2024-11-20T11:55:03,2024-11-20T11:55:06,2024-11-20T12:12:54,00:17:48,8616990,CANCELLED by 54971,384,00:00:00,"tcn[888,893]","tcn888,tcn893",8616990,VASP %version=6.4.2,snellius:genoa,aborted,[],aborted due to KeyboardInterrupt,run,eb-foss,0.028725,,1063.542924,,0.047807,1091.422901,no_data,no_data,no_data,no_data,no_data,no_data,[eb-foss],"[snellius:rome, snellius:genoa]",vasp_std,[],384,,,86400.0,[],[],[vasp-foss-6.4.2],{},[]
6,8617010,2024-11-20T11:57:03,2024-11-20T11:57:03,2024-11-20T11:57:22,2024-11-20T11:57:40,00:00:18,8617010,FAILED,128,00:00:00,tcn306,tcn306,8617010,HPCG_MPIOnly %nnodes=1,snellius:rome,failure,[tcn306],sanity error: index out of bounds: 0,sanity,eb-intel-mpi,115.829539,,40.982624,0.250703,0.052086,190.08914,no_data,no_data,no_data,no_data,no_data,no_data,"[eb-foss-mpi, eb-intel-mpi]","[lisa:silver, lisa:gold, snellius:rome, snelli...",hpcg/bin/xhpcg,"[--nx=104, --ny=104, --nz=104, -t2]",128,128.0,1.0,3600.0,[],[],[],{},[]
12,8617019,2024-11-20T11:57:36,2024-11-20T11:57:36,2024-11-20T11:57:38,2024-11-20T11:58:26,00:00:48,8617019,FAILED,192,00:00:00,tcn1005,tcn1005,8617019,HPCG_MPIOnly %nnodes=1,snellius:genoa,failure,[tcn1005],sanity error: index out of bounds: 0,sanity,eb-intel-mpi,32.440213,,52.282237,0.033061,0.051539,234.739804,no_data,no_data,no_data,no_data,no_data,no_data,"[eb-foss-mpi, eb-intel-mpi]","[lisa:silver, lisa:gold, snellius:rome, snelli...",hpcg/bin/xhpcg,"[--nx=104, --ny=104, --nz=104, -t2]",192,192.0,1.0,3600.0,[],[],[],{},[]
14,8617022,2024-11-20T11:57:42,2024-11-20T11:57:42,2024-11-20T11:57:43,2024-11-20T11:58:20,00:00:37,8617022,FAILED,128,00:00:00,fcn62,fcn62,8617022,HPCG_MPIOnly %nnodes=1,snellius:fat,failure,[fcn62],sanity error: index out of bounds: 0,sanity,eb-intel-mpi,32.372833,,41.648577,0.031247,0.051604,229.765781,no_data,no_data,no_data,no_data,no_data,no_data,"[eb-foss-mpi, eb-intel-mpi]","[lisa:silver, lisa:gold, snellius:rome, snelli...",hpcg/bin/xhpcg,"[--nx=104, --ny=104, --nz=104, -t2]",128,128.0,1.0,3600.0,[],[],[],{},[]
25,8617372,2024-11-20T12:47:44,2024-11-20T12:47:44,2024-11-20T12:47:56,2024-11-20T12:48:13,00:00:17,8617372,FAILED,128,00:00:00,tcn306,tcn306,8617372,HPCG_MPIOnly %nnodes=1,snellius:rome,failure,[tcn306],sanity error: index out of bounds: 0,sanity,eb-intel-mpi,57.932809,,35.176644,0.100945,0.223092,308.427281,no_data,no_data,no_data,no_data,no_data,no_data,"[eb-foss-mpi, eb-intel-mpi]","[lisa:silver, lisa:gold, snellius:rome, snelli...",hpcg/bin/xhpcg,"[--nx=104, --ny=104, --nz=104, -t2]",128,128.0,1.0,3600.0,[],[],[],{},[]
28,8617376,2024-11-20T12:48:32,2024-11-20T12:48:32,2024-11-20T12:48:39,2024-11-20T12:49:27,00:00:48,8617376,FAILED,192,00:00:00,tcn1014,tcn1014,8617376,HPCG_MPIOnly %nnodes=1,snellius:genoa,failure,[tcn1014],sanity error: index out of bounds: 0,sanity,eb-intel-mpi,52.118239,,58.920915,0.086645,0.051461,381.053181,no_data,no_data,no_data,no_data,no_data,no_data,"[eb-foss-mpi, eb-intel-mpi]","[lisa:silver, lisa:gold, snellius:rome, snelli...",hpcg/bin/xhpcg,"[--nx=104, --ny=104, --nz=104, -t2]",192,192.0,1.0,3600.0,[],[],[],{},[]
29,8617377,2024-11-20T12:48:58,2024-11-20T12:48:58,2024-11-20T12:49:00,2024-11-20T12:49:18,00:00:18,8617377,FAILED,128,00:00:00,fcn40,fcn40,8617377,HPCG_MPIOnly %nnodes=1,snellius:fat,failure,[fcn40],sanity error: index out of bounds: 0,sanity,eb-intel-mpi,49.913301,,25.175953,0.124615,0.050967,372.578818,no_data,no_data,no_data,no_data,no_data,no_data,"[eb-foss-mpi, eb-intel-mpi]","[lisa:silver, lisa:gold, snellius:rome, snelli...",hpcg/bin/xhpcg,"[--nx=104, --ny=104, --nz=104, -t2]",128,128.0,1.0,3600.0,[],[],[],{},[]
113,8619477,2024-11-20T14:51:58,2024-11-20T14:51:58,2024-11-20T14:52:09,2024-11-20T14:52:29,00:00:20,8619477,FAILED,128,00:00:00,tcn431,tcn431,8619477,HPCG_MPIOnly %nnodes=1,snellius:rome,failure,[tcn431],sanity error: index out of bounds: 0,sanity,eb-intel-mpi,63.662785,,37.014676,0.062711,0.057766,281.118705,no_data,no_data,no_data,no_data,no_data,no_data,"[eb-foss-mpi, eb-intel-mpi]","[lisa:silver, lisa:gold, snellius:rome, snelli...",hpcg/bin/xhpcg,"[--nx=104, --ny=104, --nz=104, -t2]",128,128.0,1.0,3600.0,[],[],[],{},[]
116,8619502,2024-11-20T14:52:55,2024-11-20T14:52:55,2024-11-20T14:52:56,2024-11-20T14:53:27,00:00:31,8619502,FAILED,192,00:00:00,tcn586,tcn586,8619502,HPCG_MPIOnly %nnodes=1,snellius:genoa,failure,[tcn586],sanity error: index out of bounds: 0,sanity,eb-intel-mpi,60.390557,,37.524774,0.059154,0.058495,338.917252,no_data,no_data,no_data,no_data,no_data,no_data,"[eb-foss-mpi, eb-intel-mpi]","[lisa:silver, lisa:gold, snellius:rome, snelli...",hpcg/bin/xhpcg,"[--nx=104, --ny=104, --nz=104, -t2]",192,192.0,1.0,3600.0,[],[],[],{},[]


In [10]:

# drop not useful columns
df.drop(['NodeList', 'jobid', 'nodelist'], inplace=True, axis=1)
df.rename(columns={"formatted_node_names":"node_list"}, inplace=True)
df.columns = df.columns.str.lower()
display(df.head())
print(len(df))

Unnamed: 0,job_id,submit,eligible,start,end,elapsed,slurm_job_id,state,alloccpus,totalcpu,node_list,name,system,result,fail_reason,fail_phase,environment,time_compile,time_performance,time_run,time_sanity,time_setup,time_total,pref_name,pref_reference,pref_thres_lower,pref_thres_upper,pref_unit,pref_value,valid_prog_environs,valid_systems,executable,executable_opts,num_tasks,num_tasks_per_node,num_cpus_per_task,time_limit,prerun_cmds,postrun_cmds,modules,env_vars,readonly_files
0,8616980,2024-11-20T11:54:35,2024-11-20T11:54:35,2024-11-20T11:54:52,2024-11-20T12:07:43,00:12:51,8616980,COMPLETED,72,00:00:00,gcn49,GROMACS_GPU %n_gpus=1,snellius:gpu_a100,success,,,eb-foss,0.029076,0.227419,792.55326,0.096275,0.527797,794.377215,perf,0,,,ns/day,16.529,[eb-foss],"[snellius:gpu_a100, snellius:gpu_h100]",gmx_mpi,[mdrun -v -pin on -dlb yes -resethway -deffnm ...,1,1.0,18.0,1800.0,[cp /gpfs/work4/1/reframe0/resourcesdir/gromac...,[],[gromacs-foss-gpu],{},[]
1,8616982,2024-11-20T11:54:38,2024-11-20T11:54:38,2024-11-20T11:54:41,2024-11-20T12:07:18,00:12:37,8616982,COMPLETED,64,00:00:00,gcn149,GROMACS_GPU %n_gpus=1,snellius:gpu_h100,success,,,eb-foss,0.028724,0.073034,766.922158,0.097536,0.04874,770.778406,perf,0,,,ns/day,25.705,[eb-foss],"[snellius:gpu_a100, snellius:gpu_h100]",gmx_mpi,[mdrun -v -pin on -dlb yes -resethway -deffnm ...,1,1.0,16.0,1800.0,[cp /gpfs/work4/1/reframe0/resourcesdir/gromac...,[],[gromacs-foss-gpu],{},[]
2,8616984,2024-11-20T11:54:40,2024-11-20T11:54:40,2024-11-20T11:54:52,2024-11-20T12:08:01,00:13:09,8616984,COMPLETED,128,00:00:00,tcn109,GROMACS_CPU %n_cores=32,snellius:rome,success,,,eb-foss-mpi,0.028928,0.179587,806.839507,0.187784,0.047647,813.222719,perf,0,,,ns/day,3.262,[eb-foss-mpi],"[snellius:rome, snellius:genoa]",gmx_mpi,[mdrun -v -pin on -dlb yes -resethway -deffnm ...,32,32.0,1.0,1800.0,[cp /gpfs/work4/1/reframe0/resourcesdir/gromac...,[],[gromacs-foss],{},[]
3,8616986,2024-11-20T11:54:43,2024-11-20T11:54:43,2024-11-20T11:54:52,2024-11-20T12:07:56,00:13:04,8616986,COMPLETED,192,00:00:00,tcn575,GROMACS_CPU %n_cores=32,snellius:genoa,success,,,eb-foss-mpi,0.029197,0.046268,798.165663,0.052712,0.047641,806.888496,perf,0,,,ns/day,3.343,[eb-foss-mpi],"[snellius:rome, snellius:genoa]",gmx_mpi,[mdrun -v -pin on -dlb yes -resethway -deffnm ...,32,32.0,1.0,1800.0,[cp /gpfs/work4/1/reframe0/resourcesdir/gromac...,[],[gromacs-foss],{},[]
7,8617011,2024-11-20T11:57:08,2024-11-20T11:57:08,2024-11-20T11:57:22,2024-11-20T12:02:20,00:04:58,8617011,COMPLETED,128,00:00:00,tcn111,HPCG_MPIOnly %nnodes=1,snellius:rome,success,,,eb-foss-mpi,120.110956,0.027074,318.363357,0.167167,0.051292,472.05297,gflops,0,,,Gflop/s,37.848,"[eb-foss-mpi, eb-intel-mpi]","[lisa:silver, lisa:gold, snellius:rome, snelli...",hpcg/bin/xhpcg,"[--nx=104, --ny=104, --nz=104, -t2]",128,128.0,1.0,3600.0,[],[],[],{},[]


220


In [11]:
df['node_list'].value_counts()

node_list
fcn40      24
tcn738     10
fcn25      10
tcn575      9
fcn45       8
tcn607      6
tcn745      5
tcn306      5
tcn109      5
tcn281      5
tcn262      5
tcn354      5
tcn22       5
tcn561      5
tcn589      4
tcn552      4
tcn111      4
fcn62       4
tcn356      4
fcn49       4
gcn22       4
tcn747      4
tcn146      3
gcn49       3
tcn408      3
fcn52       3
gcn108      3
tcn291      3
tcn755      3
fcn41       3
tcn114      2
tcn750      2
fcn60       2
tcn597      2
gcn131      2
tcn558      2
tcn77       2
tcn650      2
gcn69       2
tcn417      2
tcn6        2
tcn593      2
tcn808      2
tcn349      2
gcn149      1
tcn584      1
tcn135      1
tcn329      1
gcn111      1
tcn1005     1
tcn105      1
gcn122      1
tcn83       1
tcn86       1
tcn569      1
fcn64       1
tcn144      1
tcn331      1
gcn17       1
tcn90       1
tcn664      1
tcn645      1
tcn586      1
tcn527      1
tcn1014     1
gcn128      1
tcn334      1
gcn119      1
gcn134      1
tcn178      1
gcn46     

In [12]:

# df.to_csv(file_path_writing, index=False)