In [1]:
import pandas as pd
from datetime import datetime as dt
import numpy as np
import re

In [2]:
# read in dataset 
ce5 = pd.read_csv('../data/slurm_wrapper_ce5.log',
                  header=None,
                  delimiter=' - ',
                  engine='python',
                #   nrows=1000000
                  )

In [3]:
# set column names
colnames = ['END', 'USER', 'RETRY', 'TIME', 'RETURNCODE', 'COMMAND']
ce5.columns = colnames

from carb_funcs import optional_ms

# Apply the parsing function to the 'END' column
ce5['END'] = optional_ms(ce5['END'])

# Clean and convert other columns
ce5['TIME'] = ce5['TIME'].str.replace('time', '', regex=False).astype(float)
ce5['USER'] = ce5['USER'].str.replace('user', '', regex=False).astype(int)
ce5['RETRY'] = ce5['RETRY'].str.replace('retry', '', regex=False).astype(int)
ce5['RETURNCODE'] = ce5['RETURNCODE'].str.replace('returncode', '', regex=False).astype(float)
ce5['COMMAND'] = ce5['COMMAND'].str.replace('command', '', regex=False)
ce5['COMMAND'] = [re.sub("[\'\\[\\] ]", '', row).split(',') for row in ce5['COMMAND']];
# Display the updated DataFrame
ce5.head(2)

Unnamed: 0,END,USER,RETRY,TIME,RETURNCODE,COMMAND
0,2020-10-16 08:15:39.278699,0,0,0.073476,0.0,"[/usr/bin/sacct, -u, appelte1, -S, 2020-10-10]"
1,2020-10-16 08:18:08.313309,0,0,0.183632,0.0,"[/usr/bin/sacct, -u, appelte1, -S, 2020-10-10]"


In [4]:
# checking that all values contain timestamps
print(ce5['END'].sort_values().iloc[-1])

2021-10-07 21:59:17.693458


To identify periods of unresponsiveness, use the two log files. Look for records that are the "sbatch" command from user 9204 (the test user) that have return code 1 and an execution time of greater than 15 seconds.

In [5]:
# slicing out the records that satisfy unresponsive conditions outlined. 
ce5_filtered = ce5[(ce5['USER'] == 9204) & (ce5['RETURNCODE'] == 1.0) & (ce5['TIME'] > 15)]
ce5_unresponsive_df = ce5_filtered[ce5_filtered['COMMAND'].map(lambda x: any('sbatch' in cmd for cmd in x))]
display(ce5_unresponsive_df.head())
print(ce5_unresponsive_df.info(), ce5_unresponsive_df.shape)

Unnamed: 0,END,USER,RETRY,TIME,RETURNCODE,COMMAND
49958,2020-10-18 06:53:44.272915,9204,0,20.038464,1.0,"[/usr/bin/sbatch, /tmp/condor_g_scratch.0x55c1..."
49972,2020-10-18 06:54:04.322412,9204,1,20.048906,1.0,"[/usr/bin/sbatch, /tmp/condor_g_scratch.0x55c1..."
50467,2020-10-18 07:47:25.825172,9204,0,20.082628,1.0,"[/usr/bin/sbatch, /tmp/condor_g_scratch.0x55c1..."
50473,2020-10-18 07:47:45.871008,9204,1,20.045221,1.0,"[/usr/bin/sbatch, /tmp/condor_g_scratch.0x55c1..."
50582,2020-10-18 07:53:33.972840,9204,0,20.041486,1.0,"[/usr/bin/sbatch, /tmp/condor_g_scratch.0x55c1..."


<class 'pandas.core.frame.DataFrame'>
Index: 1811 entries, 49958 to 4731181
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   END         1811 non-null   datetime64[ns]
 1   USER        1811 non-null   int64         
 2   RETRY       1811 non-null   int64         
 3   TIME        1811 non-null   float64       
 4   RETURNCODE  1811 non-null   float64       
 5   COMMAND     1811 non-null   object        
dtypes: datetime64[ns](1), float64(2), int64(2), object(1)
memory usage: 99.0+ KB
None (1811, 6)


In [7]:
ce5_unresponsive_df.to_csv('../data/ce5_unresponsive.csv', index=False)