In [1]:
import pandas as pd
import numpy as np
from io import StringIO
import re

import random
from datetime import datetime
from datetime import timedelta
import time
%matplotlib inline

In [2]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

Q4. Optimizing memory is more important for longer running jobs then shorter running jobs as the resources are tied up for longer. If jobs are weighted by runtime, what is the average percent of memory used of the requested memory for each group? (i.e., what is the weighted average of memory usage vs. requested memory for each group?)

To do:  
-group data frame by user groups  
-understand sequential jobid numbering (and if that is a proxy for arrays)  
-create new columns for weighted average requested and weighted average used per group  
-focus on "production" partitions (but if there's time, look at others as well)  
-compare required time to used time and completed jobs to failed jobs; is there a buffer around run time? does the job cancel the second that used time exceeded required time?  

In [3]:
accre = pd.read_csv('../data/accre-jobs-2020-clean.csv', nrows=5000)

In [4]:
accre.head()
accre.tail()

Unnamed: 0,JOBID,ACCOUNT,USER,REQMEM,USEDMEM,REQTIME,USEDTIME,NODES,CPUS,PARTITION,EXITCODE,STATE,NODELIST,REQMEM_NO,REQMEM_TYPE,REQMEM_PER_CORE,USEDMEM_TYPE,USEDMEM_NO,USEDMEM_PER_CORE,REQTIME_DAY_SEC,REQTIME_T,REQTIME_SEC,USEDTIME_DAY_SEC,USEDTIME_T,USEDTIME_SEC
4995,18625828,cms,cmspilot,28000Mn,11730.53M,2-00:00:00,1-18:30:30,1,4,production,0:0,COMPLETED,cn1212,28000,Mn,7000.0,M,11730,2932.5,172800.0,00:00:00,172800.0,86400.0,18:30:30,153030.0
4996,18625829,cms,cmspilot,28000Mn,4993.34M,2-00:00:00,03:02:21,1,4,production,0:0,COMPLETED,cn1215,28000,Mn,7000.0,M,4993,1248.25,172800.0,00:00:00,172800.0,0.0,03:02:21,10941.0
4997,18625830,cms,cmspilot,28000Mn,9823.94M,2-00:00:00,16:47:29,1,4,production,0:0,COMPLETED,cn1216,28000,Mn,7000.0,M,9823,2455.75,172800.0,00:00:00,172800.0,0.0,16:47:29,60449.0
4998,18625831,cms,cmspilot,28000Mn,5074.64M,2-00:00:00,06:07:29,1,4,production,0:0,COMPLETED,cn1217,28000,Mn,7000.0,M,5074,1268.5,172800.0,00:00:00,172800.0,0.0,06:07:29,22049.0
4999,18625832,cms,cmspilot,28000Mn,9819.73M,2-00:00:00,23:22:20,1,4,production,0:0,COMPLETED,cn1205,28000,Mn,7000.0,M,9819,2454.75,172800.0,00:00:00,172800.0,0.0,23:22:20,84140.0


Examine 'production' partitions only.

In [5]:
accre_prod_only = accre[accre.PARTITION == 'production']
accre_prod_only

Unnamed: 0,JOBID,ACCOUNT,USER,REQMEM,USEDMEM,REQTIME,USEDTIME,NODES,CPUS,PARTITION,EXITCODE,STATE,NODELIST,REQMEM_NO,REQMEM_TYPE,REQMEM_PER_CORE,USEDMEM_TYPE,USEDMEM_NO,USEDMEM_PER_CORE,REQTIME_DAY_SEC,REQTIME_T,REQTIME_SEC,USEDTIME_DAY_SEC,USEDTIME_T,USEDTIME_SEC
0,18616746,cms,cmslocal,5000Mn,1284.14M,2-00:00:00,00:15:35,1,1,production,0:0,COMPLETED,cn1423,5000,Mn,5000.0,M,1284,1284.00,172800.0,00:00:00,172800.0,0.0,00:15:35,935.0
1,18616752,cms,cmslocal,5000Mn,1275.16M,2-00:00:00,00:11:41,1,1,production,0:0,COMPLETED,cn1424,5000,Mn,5000.0,M,1275,1275.00,172800.0,00:00:00,172800.0,0.0,00:11:41,701.0
2,18616754,cms,cmslocal,5000Mn,1259.60M,2-00:00:00,00:15:35,1,1,production,0:0,COMPLETED,cn1311,5000,Mn,5000.0,M,1259,1259.00,172800.0,00:00:00,172800.0,0.0,00:15:35,935.0
3,18616757,cms,cmslocal,5000Mn,1271.18M,2-00:00:00,00:15:36,1,1,production,0:0,COMPLETED,cn1311,5000,Mn,5000.0,M,1271,1271.00,172800.0,00:00:00,172800.0,0.0,00:15:36,936.0
4,18616758,cms,cmslocal,5000Mn,1260.83M,2-00:00:00,00:15:35,1,1,production,0:0,COMPLETED,cn372,5000,Mn,5000.0,M,1260,1260.00,172800.0,00:00:00,172800.0,0.0,00:15:35,935.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,18625828,cms,cmspilot,28000Mn,11730.53M,2-00:00:00,1-18:30:30,1,4,production,0:0,COMPLETED,cn1212,28000,Mn,7000.0,M,11730,2932.50,172800.0,00:00:00,172800.0,86400.0,18:30:30,153030.0
4996,18625829,cms,cmspilot,28000Mn,4993.34M,2-00:00:00,03:02:21,1,4,production,0:0,COMPLETED,cn1215,28000,Mn,7000.0,M,4993,1248.25,172800.0,00:00:00,172800.0,0.0,03:02:21,10941.0
4997,18625830,cms,cmspilot,28000Mn,9823.94M,2-00:00:00,16:47:29,1,4,production,0:0,COMPLETED,cn1216,28000,Mn,7000.0,M,9823,2455.75,172800.0,00:00:00,172800.0,0.0,16:47:29,60449.0
4998,18625831,cms,cmspilot,28000Mn,5074.64M,2-00:00:00,06:07:29,1,4,production,0:0,COMPLETED,cn1217,28000,Mn,7000.0,M,5074,1268.50,172800.0,00:00:00,172800.0,0.0,06:07:29,22049.0


Sort by most used time in sec to least;
Keep columns of interest only

In [7]:
accre_prod_only.sort_values('USEDTIME_SEC' , ascending=False)

Unnamed: 0,JOBID,ACCOUNT,USER,REQMEM,USEDMEM,REQTIME,USEDTIME,NODES,CPUS,PARTITION,EXITCODE,STATE,NODELIST,REQMEM_NO,REQMEM_TYPE,REQMEM_PER_CORE,USEDMEM_TYPE,USEDMEM_NO,USEDMEM_PER_CORE,REQTIME_DAY_SEC,REQTIME_T,REQTIME_SEC,USEDTIME_DAY_SEC,USEDTIME_T,USEDTIME_SEC
1500,18619452,cms,cmspilot,28000Mn,11840.07M,2-00:00:00,1-23:37:44,1,4,production,0:0,COMPLETED,cn1375,28000,Mn,7000.0,M,11840,2960.00,172800.0,00:00:00,172800.0,86400.0,23:37:44,171464.0
1472,18619251,cms,cmspilot,28000Mn,10205.76M,2-00:00:00,1-23:26:42,1,4,production,0:0,COMPLETED,cn1461,28000,Mn,7000.0,M,10205,2551.25,172800.0,00:00:00,172800.0,86400.0,23:26:42,170802.0
4979,18625812,cms,cmspilot,28000Mn,9746.30M,2-00:00:00,1-23:20:20,1,4,production,0:0,COMPLETED,cn491,28000,Mn,7000.0,M,9746,2436.50,172800.0,00:00:00,172800.0,86400.0,23:20:20,170420.0
846,18618515,cms,cmspilot,28000Mn,10226.02M,2-00:00:00,1-23:09:54,1,4,production,0:0,COMPLETED,cn1241,28000,Mn,7000.0,M,10226,2556.50,172800.0,00:00:00,172800.0,86400.0,23:09:54,169794.0
11,18616862,cms,cmspilot,28000Mn,9478.09M,2-00:00:00,1-22:58:16,1,4,production,0:0,COMPLETED,cn1384,28000,Mn,7000.0,M,9478,2369.50,172800.0,00:00:00,172800.0,86400.0,22:58:16,169096.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4872,18625524_86,cep,gemma,5120Mc,0,1-15:00:00,00:00:04,1,1,production,0:0,COMPLETED,cn1369,5120,Mc,5120.0,,0,0.00,86400.0,15:00:00,140400.0,0.0,00:00:04,4.0
4874,18625524_88,cep,gemma,5120Mc,0,1-15:00:00,00:00:04,1,1,production,0:0,COMPLETED,cn1260,5120,Mc,5120.0,,0,0.00,86400.0,15:00:00,140400.0,0.0,00:00:04,4.0
4875,18625524_89,cep,gemma,5120Mc,0,1-15:00:00,00:00:04,1,1,production,0:0,COMPLETED,cn1260,5120,Mc,5120.0,,0,0.00,86400.0,15:00:00,140400.0,0.0,00:00:04,4.0
4876,18625524_90,cep,gemma,5120Mc,0,1-15:00:00,00:00:04,1,1,production,0:0,COMPLETED,cn1260,5120,Mc,5120.0,,0,0.00,86400.0,15:00:00,140400.0,0.0,00:00:04,4.0


In [9]:
# df1 = df[['a','d']]
accre_prod_coi = accre_prod_only[['JOBID', 'ACCOUNT', 'USER', 'STATE', 'REQMEM_PER_CORE', 'USEDMEM_PER_CORE', 'REQTIME_SEC', 'USEDTIME_SEC', 'NODES', 'CPUS', 'PARTITION', 'NODELIST']]

In [10]:
accre_prod_coi

Unnamed: 0,JOBID,ACCOUNT,USER,STATE,REQMEM_PER_CORE,USEDMEM_PER_CORE,REQTIME_SEC,USEDTIME_SEC,NODES,CPUS,PARTITION,NODELIST
0,18616746,cms,cmslocal,COMPLETED,5000.0,1284.00,172800.0,935.0,1,1,production,cn1423
1,18616752,cms,cmslocal,COMPLETED,5000.0,1275.00,172800.0,701.0,1,1,production,cn1424
2,18616754,cms,cmslocal,COMPLETED,5000.0,1259.00,172800.0,935.0,1,1,production,cn1311
3,18616757,cms,cmslocal,COMPLETED,5000.0,1271.00,172800.0,936.0,1,1,production,cn1311
4,18616758,cms,cmslocal,COMPLETED,5000.0,1260.00,172800.0,935.0,1,1,production,cn372
...,...,...,...,...,...,...,...,...,...,...,...,...
4995,18625828,cms,cmspilot,COMPLETED,7000.0,2932.50,172800.0,153030.0,1,4,production,cn1212
4996,18625829,cms,cmspilot,COMPLETED,7000.0,1248.25,172800.0,10941.0,1,4,production,cn1215
4997,18625830,cms,cmspilot,COMPLETED,7000.0,2455.75,172800.0,60449.0,1,4,production,cn1216
4998,18625831,cms,cmspilot,COMPLETED,7000.0,1268.50,172800.0,22049.0,1,4,production,cn1217


In [11]:
accre_prod_coi.sort_values('USEDTIME_SEC' , ascending=False)

Unnamed: 0,JOBID,ACCOUNT,USER,STATE,REQMEM_PER_CORE,USEDMEM_PER_CORE,REQTIME_SEC,USEDTIME_SEC,NODES,CPUS,PARTITION,NODELIST
1500,18619452,cms,cmspilot,COMPLETED,7000.0,2960.00,172800.0,171464.0,1,4,production,cn1375
1472,18619251,cms,cmspilot,COMPLETED,7000.0,2551.25,172800.0,170802.0,1,4,production,cn1461
4979,18625812,cms,cmspilot,COMPLETED,7000.0,2436.50,172800.0,170420.0,1,4,production,cn491
846,18618515,cms,cmspilot,COMPLETED,7000.0,2556.50,172800.0,169794.0,1,4,production,cn1241
11,18616862,cms,cmspilot,COMPLETED,7000.0,2369.50,172800.0,169096.0,1,4,production,cn1384
...,...,...,...,...,...,...,...,...,...,...,...,...
4872,18625524_86,cep,gemma,COMPLETED,5120.0,0.00,140400.0,4.0,1,1,production,cn1369
4874,18625524_88,cep,gemma,COMPLETED,5120.0,0.00,140400.0,4.0,1,1,production,cn1260
4875,18625524_89,cep,gemma,COMPLETED,5120.0,0.00,140400.0,4.0,1,1,production,cn1260
4876,18625524_90,cep,gemma,COMPLETED,5120.0,0.00,140400.0,4.0,1,1,production,cn1260
