# accre_jobs_2020

In [1]:
import pandas as pd

## Importing the Data

Check the `accre-jobs-2020--datasource-preprocessing.ipynb` file for the data pre-processing steps

In [2]:
accre_jobs_2020 = pd.read_csv('../data/accre-jobs-2020-processed.csv')
accre_jobs_2020.head()

Unnamed: 0,JOBID,ACCOUNT,USER,REQMEM,USEDMEM,REQTIME,USEDTIME,NODES,CPUS,PARTITION,EXITCODE,STATE,NODELIST
0,15925210,treviso,arabella,122880Mn,65973.49M,13-18:00:00,13-18:00:28,1,24,production,0:0,COMPLETED,cn1531
1,15861126,treviso,arabella,122880Mn,67181.12M,13-18:00:00,12-14:50:56,1,24,production,0:0,COMPLETED,cn1441
2,15861125,treviso,arabella,122880Mn,69111.86M,13-18:00:00,13-18:00:20,1,24,production,0:0,COMPLETED,cn1464
3,16251645,treviso,arabella,122880Mn,65317.33M,13-18:00:00,12-03:50:32,1,24,production,0:0,COMPLETED,cn1473
4,16251646,treviso,arabella,122880Mn,65876.11M,13-18:00:00,13-18:00:03,1,24,production,0:0,COMPLETED,cn1440


## Data Preparation

### Change headers to all lowercase

In [3]:
accre_jobs_2020.columns = accre_jobs_2020.columns.map(str.lower)
accre_jobs_2020.head()

Unnamed: 0,jobid,account,user,reqmem,usedmem,reqtime,usedtime,nodes,cpus,partition,exitcode,state,nodelist
0,15925210,treviso,arabella,122880Mn,65973.49M,13-18:00:00,13-18:00:28,1,24,production,0:0,COMPLETED,cn1531
1,15861126,treviso,arabella,122880Mn,67181.12M,13-18:00:00,12-14:50:56,1,24,production,0:0,COMPLETED,cn1441
2,15861125,treviso,arabella,122880Mn,69111.86M,13-18:00:00,13-18:00:20,1,24,production,0:0,COMPLETED,cn1464
3,16251645,treviso,arabella,122880Mn,65317.33M,13-18:00:00,12-03:50:32,1,24,production,0:0,COMPLETED,cn1473
4,16251646,treviso,arabella,122880Mn,65876.11M,13-18:00:00,13-18:00:03,1,24,production,0:0,COMPLETED,cn1440


First, let's look at the quick summary

In [4]:
accre_jobs_2020.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3816290 entries, 0 to 3816289
Data columns (total 13 columns):
 #   Column     Dtype 
---  ------     ----- 
 0   jobid      object
 1   account    object
 2   user       object
 3   reqmem     object
 4   usedmem    object
 5   reqtime    object
 6   usedtime   object
 7   nodes      int64 
 8   cpus       int64 
 9   partition  object
 10  exitcode   object
 11  state      object
 12  nodelist   object
dtypes: int64(2), object(11)
memory usage: 378.5+ MB


### Focusing on "production" partition only

We really only want to look at the "production" partition so rows with other partitions should be removed.

In [5]:
accre_jobs_2020_prod = accre_jobs_2020.loc[accre_jobs_2020["partition"] == "production"]
accre_jobs_2020_prod.head()

Unnamed: 0,jobid,account,user,reqmem,usedmem,reqtime,usedtime,nodes,cpus,partition,exitcode,state,nodelist
0,15925210,treviso,arabella,122880Mn,65973.49M,13-18:00:00,13-18:00:28,1,24,production,0:0,COMPLETED,cn1531
1,15861126,treviso,arabella,122880Mn,67181.12M,13-18:00:00,12-14:50:56,1,24,production,0:0,COMPLETED,cn1441
2,15861125,treviso,arabella,122880Mn,69111.86M,13-18:00:00,13-18:00:20,1,24,production,0:0,COMPLETED,cn1464
3,16251645,treviso,arabella,122880Mn,65317.33M,13-18:00:00,12-03:50:32,1,24,production,0:0,COMPLETED,cn1473
4,16251646,treviso,arabella,122880Mn,65876.11M,13-18:00:00,13-18:00:03,1,24,production,0:0,COMPLETED,cn1440


In [6]:
accre_jobs_2020_prod.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3311788 entries, 0 to 3816289
Data columns (total 13 columns):
 #   Column     Dtype 
---  ------     ----- 
 0   jobid      object
 1   account    object
 2   user       object
 3   reqmem     object
 4   usedmem    object
 5   reqtime    object
 6   usedtime   object
 7   nodes      int64 
 8   cpus       int64 
 9   partition  object
 10  exitcode   object
 11  state      object
 12  nodelist   object
dtypes: int64(2), object(11)
memory usage: 353.7+ MB


### How many are production nodes? Non-production nodes?

In [7]:
accre_jobs_2020["partition"].value_counts()

production              3311788
nogpfs                   327652
sam                       79151
pascal                    48004
turing                    39406
debug                      6738
maxwell                    3348
cgw-capra1                   83
cgw-dougherty1               72
cgw-cqs1                     34
cgw-vm-qa-flatearth1          4
cgw-cqs3                      4
devel                         2
cgw-rocksteady                2
cgw-tbi01                     2
Name: partition, dtype: int64

### Converting Job times to total seconds

Job time is in a format of either `d-hh:mm:ss` or `hh:mm:ss`, it needs to be converted to total seconds

In [8]:
def convert_time_to_seconds(time_str):
    """Convert the given time string d-hh:mm:ss or hh:mm:ss to total seconds."""
    
    # Account for possible errors
    try:
        
        # Initialize all time components to 0
        days = hours = minutes = seconds = 0

        # Split the time string at the dash mark
        time_dash_split = time_str.split("-") # => Either ["d", "hh:mm:ss"] or ["hh:mm:ss"]

        # Assign each time element to variables
        if len(time_dash_split) > 1: 
            # Remove the first element from the list and convert to integer and add to days
            days += int(time_dash_split.pop(0))

        # If here, len(time_dash_split) == 1
        # Split the time at the colon marks
        time_colon_split = time_dash_split[0].split(":") # => ["hh", "mm", "ss"]
        seconds += int(time_colon_split.pop())
        minutes += int(time_colon_split.pop())
        hours += int(time_colon_split.pop())

        # Now combine everything together
        return (((((days * 24) + hours) * 60) + minutes) * 60) + seconds
        
    except AttributeError as error:
        # Skip everything if the column is already in seconds (assuming one single digit)
        return time_str

Now, we can convert the time columns to seconds

In [9]:
new_reqtime = accre_jobs_2020["reqtime"].map(convert_time_to_seconds)
new_usedtime = accre_jobs_2020["usedtime"].map(convert_time_to_seconds)

accre_jobs_2020 = accre_jobs_2020.assign(reqtime = new_reqtime)
accre_jobs_2020 = accre_jobs_2020.assign(usedtime = new_usedtime)

accre_jobs_2020.head()

Unnamed: 0,jobid,account,user,reqmem,usedmem,reqtime,usedtime,nodes,cpus,partition,exitcode,state,nodelist
0,15925210,treviso,arabella,122880Mn,65973.49M,1188000,1188028,1,24,production,0:0,COMPLETED,cn1531
1,15861126,treviso,arabella,122880Mn,67181.12M,1188000,1090256,1,24,production,0:0,COMPLETED,cn1441
2,15861125,treviso,arabella,122880Mn,69111.86M,1188000,1188020,1,24,production,0:0,COMPLETED,cn1464
3,16251645,treviso,arabella,122880Mn,65317.33M,1188000,1050632,1,24,production,0:0,COMPLETED,cn1473
4,16251646,treviso,arabella,122880Mn,65876.11M,1188000,1188003,1,24,production,0:0,COMPLETED,cn1440


In [10]:
accre_jobs_2020['usedmem'].str[-1].value_counts()

M    3213713
0     602577
Name: usedmem, dtype: int64

In [11]:
accre_mem_zeros = accre_jobs_2020[accre_jobs_2020['usedmem'] == '0'] # Why would they have used memory == 0
accre_mem_zeros

Unnamed: 0,jobid,account,user,reqmem,usedmem,reqtime,usedtime,nodes,cpus,partition,exitcode,state,nodelist
4805,17072928,endive,bennett,8192Mn,0,432000,25,1,1,turing,0:0,COMPLETED,gpu0039
4806,17072929,endive,bennett,8192Mn,0,432000,34,1,1,turing,0:0,COMPLETED,gpu0039
4807,17072930,endive,bennett,8192Mn,0,432000,23,1,1,turing,0:0,COMPLETED,gpu0039
4809,17072932,endive,bennett,8192Mn,0,432000,65,1,1,turing,0:0,COMPLETED,gpu0039
4810,17072933,endive,bennett,8192Mn,0,432000,27,1,1,turing,0:0,COMPLETED,gpu0039
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3816275,24173810,cms,uscmslocal,5000Mn,0,172800,244,1,1,production,0:0,RUNNING,cn407
3816276,24173811,cms,uscmslocal,28000Mn,0,172800,245,1,4,production,0:0,RUNNING,cn1507
3816277,24173812,cms,uscmslocal,186648Mn,0,172800,267,1,32,nogpfs,0:0,RUNNING,cn1557
3816288,24173817,boysenberries,hortensia,100000Mn,0,86400,185,1,8,production,0:0,RUNNING,cn1509


In [12]:
accre_jobs_2020["account"].value_counts() # Combine CANCELLED BY into CANCELLED

cms         807408
cep         609282
summer      380840
galia       198180
carrot      192551
             ...  
with             1
flamingo         1
purple           1
tree             1
hass             1
Name: account, Length: 175, dtype: int64

### Remove `STATE == RUNNING`

Because these computers are still in their running state, their `usedmem` and `usedtime` are not accurate and could throw off our analysis

In [13]:
computers_state_running = accre_jobs_2020[accre_jobs_2020['state'] == 'RUNNING']
computers_state_running.shape

(3108, 13)

In [14]:
accre_jobs_2020 = accre_jobs_2020[accre_jobs_2020['state'] != 'RUNNING']
accre_jobs_2020.shape

(3813182, 13)

In [15]:
accre_mem_zeros = accre_jobs_2020[accre_jobs_2020['usedmem'] == '0'] # Why would they have used memory == 0
accre_mem_zeros

Unnamed: 0,jobid,account,user,reqmem,usedmem,reqtime,usedtime,nodes,cpus,partition,exitcode,state,nodelist
4805,17072928,endive,bennett,8192Mn,0,432000,25,1,1,turing,0:0,COMPLETED,gpu0039
4806,17072929,endive,bennett,8192Mn,0,432000,34,1,1,turing,0:0,COMPLETED,gpu0039
4807,17072930,endive,bennett,8192Mn,0,432000,23,1,1,turing,0:0,COMPLETED,gpu0039
4809,17072932,endive,bennett,8192Mn,0,432000,65,1,1,turing,0:0,COMPLETED,gpu0039
4810,17072933,endive,bennett,8192Mn,0,432000,27,1,1,turing,0:0,COMPLETED,gpu0039
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3816245,24173761_4,portabella,vennie,16384Mn,0,86400,36,1,1,production,0:0,COMPLETED,cn1135
3816258,24173800_3,portabella,vennie,16384Mn,0,86400,37,1,1,production,0:0,COMPLETED,cn1133
3816259,24173800_4,portabella,vennie,16384Mn,0,86400,37,1,1,production,0:0,COMPLETED,cn1092
3816260,24173800_5,portabella,vennie,16384Mn,0,86400,37,1,1,production,0:0,COMPLETED,cn1135


In [16]:
# Split exit code
accre_jobs_2020[["exitcode_user", "exitcode_error"]] = accre_jobs_2020["exitcode"].str.split(":", expand=True) # exitcode_user , exitcode_error
accre_jobs_2020

Unnamed: 0,jobid,account,user,reqmem,usedmem,reqtime,usedtime,nodes,cpus,partition,exitcode,state,nodelist,exitcode_user,exitcode_error
0,15925210,treviso,arabella,122880Mn,65973.49M,1188000,1188028,1,24,production,0:0,COMPLETED,cn1531,0,0
1,15861126,treviso,arabella,122880Mn,67181.12M,1188000,1090256,1,24,production,0:0,COMPLETED,cn1441,0,0
2,15861125,treviso,arabella,122880Mn,69111.86M,1188000,1188020,1,24,production,0:0,COMPLETED,cn1464,0,0
3,16251645,treviso,arabella,122880Mn,65317.33M,1188000,1050632,1,24,production,0:0,COMPLETED,cn1473,0,0
4,16251646,treviso,arabella,122880Mn,65876.11M,1188000,1188003,1,24,production,0:0,COMPLETED,cn1440,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3816283,24173815_6,portabella,vennie,32768Mn,23269.08M,86400,96,1,2,production,0:0,COMPLETED,cn432,0,0
3816284,24173815_7,portabella,vennie,32768Mn,20879.81M,86400,97,1,2,production,0:0,COMPLETED,cn440,0,0
3816285,24173815_8,portabella,vennie,32768Mn,30042.68M,86400,147,1,2,production,0:0,COMPLETED,cn312,0,0
3816286,24173815_9,portabella,vennie,32768Mn,31067.75M,86400,147,1,2,production,0:0,COMPLETED,cn312,0,0


In [17]:
# Fix used memory to Mb per node
accre_jobs_2020['used_mb_per_node'] = accre_jobs_2020['usedmem'].str.strip('M').astype(float)
accre_jobs_2020

Unnamed: 0,jobid,account,user,reqmem,usedmem,reqtime,usedtime,nodes,cpus,partition,exitcode,state,nodelist,exitcode_user,exitcode_error,used_mb_per_node
0,15925210,treviso,arabella,122880Mn,65973.49M,1188000,1188028,1,24,production,0:0,COMPLETED,cn1531,0,0,65973.49
1,15861126,treviso,arabella,122880Mn,67181.12M,1188000,1090256,1,24,production,0:0,COMPLETED,cn1441,0,0,67181.12
2,15861125,treviso,arabella,122880Mn,69111.86M,1188000,1188020,1,24,production,0:0,COMPLETED,cn1464,0,0,69111.86
3,16251645,treviso,arabella,122880Mn,65317.33M,1188000,1050632,1,24,production,0:0,COMPLETED,cn1473,0,0,65317.33
4,16251646,treviso,arabella,122880Mn,65876.11M,1188000,1188003,1,24,production,0:0,COMPLETED,cn1440,0,0,65876.11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3816283,24173815_6,portabella,vennie,32768Mn,23269.08M,86400,96,1,2,production,0:0,COMPLETED,cn432,0,0,23269.08
3816284,24173815_7,portabella,vennie,32768Mn,20879.81M,86400,97,1,2,production,0:0,COMPLETED,cn440,0,0,20879.81
3816285,24173815_8,portabella,vennie,32768Mn,30042.68M,86400,147,1,2,production,0:0,COMPLETED,cn312,0,0,30042.68
3816286,24173815_9,portabella,vennie,32768Mn,31067.75M,86400,147,1,2,production,0:0,COMPLETED,cn312,0,0,31067.75


In [18]:
accre_jobs_2020['used_mb_per_core'] = (accre_jobs_2020['used_mb_per_node']) / (accre_jobs_2020['cpus'] * accre_jobs_2020['nodes'])
accre_jobs_2020


Unnamed: 0,jobid,account,user,reqmem,usedmem,reqtime,usedtime,nodes,cpus,partition,exitcode,state,nodelist,exitcode_user,exitcode_error,used_mb_per_node,used_mb_per_core
0,15925210,treviso,arabella,122880Mn,65973.49M,1188000,1188028,1,24,production,0:0,COMPLETED,cn1531,0,0,65973.49,2748.895417
1,15861126,treviso,arabella,122880Mn,67181.12M,1188000,1090256,1,24,production,0:0,COMPLETED,cn1441,0,0,67181.12,2799.213333
2,15861125,treviso,arabella,122880Mn,69111.86M,1188000,1188020,1,24,production,0:0,COMPLETED,cn1464,0,0,69111.86,2879.660833
3,16251645,treviso,arabella,122880Mn,65317.33M,1188000,1050632,1,24,production,0:0,COMPLETED,cn1473,0,0,65317.33,2721.555417
4,16251646,treviso,arabella,122880Mn,65876.11M,1188000,1188003,1,24,production,0:0,COMPLETED,cn1440,0,0,65876.11,2744.837917
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3816283,24173815_6,portabella,vennie,32768Mn,23269.08M,86400,96,1,2,production,0:0,COMPLETED,cn432,0,0,23269.08,11634.540000
3816284,24173815_7,portabella,vennie,32768Mn,20879.81M,86400,97,1,2,production,0:0,COMPLETED,cn440,0,0,20879.81,10439.905000
3816285,24173815_8,portabella,vennie,32768Mn,30042.68M,86400,147,1,2,production,0:0,COMPLETED,cn312,0,0,30042.68,15021.340000
3816286,24173815_9,portabella,vennie,32768Mn,31067.75M,86400,147,1,2,production,0:0,COMPLETED,cn312,0,0,31067.75,15533.875000


In [26]:
accre_jobs_2020["reqmem_mc"] = accre_jobs_2020[accre_jobs_2020["reqmem"].str.contains("Mc")]
accre_jobs_2020

Unnamed: 0,jobid,account,user,reqmem,usedmem,reqtime,usedtime,nodes,cpus,partition,exitcode,state,nodelist,exitcode_user,exitcode_error,used_mb_per_node,used_mb_per_core,reqmem_mc
0,15925210,treviso,arabella,122880Mn,65973.49M,1188000,1188028,1,24,production,0:0,COMPLETED,cn1531,0,0,65973.49,2748.895417,
1,15861126,treviso,arabella,122880Mn,67181.12M,1188000,1090256,1,24,production,0:0,COMPLETED,cn1441,0,0,67181.12,2799.213333,
2,15861125,treviso,arabella,122880Mn,69111.86M,1188000,1188020,1,24,production,0:0,COMPLETED,cn1464,0,0,69111.86,2879.660833,
3,16251645,treviso,arabella,122880Mn,65317.33M,1188000,1050632,1,24,production,0:0,COMPLETED,cn1473,0,0,65317.33,2721.555417,
4,16251646,treviso,arabella,122880Mn,65876.11M,1188000,1188003,1,24,production,0:0,COMPLETED,cn1440,0,0,65876.11,2744.837917,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3816283,24173815_6,portabella,vennie,32768Mn,23269.08M,86400,96,1,2,production,0:0,COMPLETED,cn432,0,0,23269.08,11634.540000,
3816284,24173815_7,portabella,vennie,32768Mn,20879.81M,86400,97,1,2,production,0:0,COMPLETED,cn440,0,0,20879.81,10439.905000,
3816285,24173815_8,portabella,vennie,32768Mn,30042.68M,86400,147,1,2,production,0:0,COMPLETED,cn312,0,0,30042.68,15021.340000,
3816286,24173815_9,portabella,vennie,32768Mn,31067.75M,86400,147,1,2,production,0:0,COMPLETED,cn312,0,0,31067.75,15533.875000,


In [30]:
accre_jobs_2020["reqmem_mn"] = accre_jobs_2020[accre_jobs_2020["reqmem"].str.contains("Mn")]
accre_jobs_2020

Unnamed: 0,jobid,account,user,reqmem,usedmem,reqtime,usedtime,nodes,cpus,partition,exitcode,state,nodelist,exitcode_user,exitcode_error,used_mb_per_node,used_mb_per_core,reqmem_mc,reqmem_mn
0,15925210,treviso,arabella,122880Mn,65973.49M,1188000,1188028,1,24,production,0:0,COMPLETED,cn1531,0,0,65973.49,2748.895417,,15925210
1,15861126,treviso,arabella,122880Mn,67181.12M,1188000,1090256,1,24,production,0:0,COMPLETED,cn1441,0,0,67181.12,2799.213333,,15861126
2,15861125,treviso,arabella,122880Mn,69111.86M,1188000,1188020,1,24,production,0:0,COMPLETED,cn1464,0,0,69111.86,2879.660833,,15861125
3,16251645,treviso,arabella,122880Mn,65317.33M,1188000,1050632,1,24,production,0:0,COMPLETED,cn1473,0,0,65317.33,2721.555417,,16251645
4,16251646,treviso,arabella,122880Mn,65876.11M,1188000,1188003,1,24,production,0:0,COMPLETED,cn1440,0,0,65876.11,2744.837917,,16251646
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3816283,24173815_6,portabella,vennie,32768Mn,23269.08M,86400,96,1,2,production,0:0,COMPLETED,cn432,0,0,23269.08,11634.540000,,24173815_6
3816284,24173815_7,portabella,vennie,32768Mn,20879.81M,86400,97,1,2,production,0:0,COMPLETED,cn440,0,0,20879.81,10439.905000,,24173815_7
3816285,24173815_8,portabella,vennie,32768Mn,30042.68M,86400,147,1,2,production,0:0,COMPLETED,cn312,0,0,30042.68,15021.340000,,24173815_8
3816286,24173815_9,portabella,vennie,32768Mn,31067.75M,86400,147,1,2,production,0:0,COMPLETED,cn312,0,0,31067.75,15533.875000,,24173815_9
