In [33]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn.cluster import KMeans

In [34]:
slurm = pd.read_csv("/mnt/research/CMSE495-SS24-ICER/slurm_usage/DID_FINAL_SLURM_OCT_2023.csv",delimiter="|",nrows=1000)

In [35]:
slurm = slurm.drop(columns=["Unnamed: 0.1","Unnamed: 0"])

In [38]:
slurm

Unnamed: 0,JobID,User,Group,Submit,Start,End,Elapsed,State,Account,AssocID,...,CPUTimeRAW,ReqCPUS,AllocCPUS,ReqMem,MaxRSS,ReqNodes,NNodes,NodeList,ReqTRES,AllocTRES
0,31496544,user_679,group_121,2023-03-21T11:13:45,Unknown,Unknown,00:00:00,PENDING,account_017,assocID_489,...,0,28,0,21000M,,1,1,None assigned,"billing=3192,cpu=28,gres/gpu=4,mem=21000M,node=1",
1,31497932,user_679,group_121,2023-03-21T11:31:18,Unknown,Unknown,00:00:00,PENDING,account_017,assocID_489,...,0,28,0,21000M,,1,1,None assigned,"billing=3192,cpu=28,gres/gpu=4,mem=21000M,node=1",
2,31993628,user_105,group_114,2023-03-22T18:19:12,Unknown,Unknown,00:00:00,PENDING,account_017,assocID_661,...,0,12,0,150G,,1,1,None assigned,"billing=23347,cpu=12,gres/gpu=8,mem=150G,node=1",
3,39087660,user_652,group_054,2023-04-04T13:09:10,Unknown,Unknown,00:00:00,PENDING,account_017,assocID_557,...,0,640,0,20G,,10,10,None assigned,"billing=3112,cpu=640,mem=20G,node=10",
4,59062820,user_188,group_046,2023-05-08T09:58:20,2024-01-01T00:58:57,2024-01-01T00:59:06,00:00:09,COMPLETED,account_017,assocID_676,...,360,40,40,8G,,1,1,skl-029,"billing=1245,cpu=40,mem=8G,node=1","billing=1245,cpu=40,mem=8G,node=1"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,90366424,user_258,group_010,2023-09-29T00:54:50,2023-09-29T00:54:50,2023-10-02T09:48:18,3-08:53:28,FAILED,account_017,assocID_153,...,7280200,25,25,,421032K,1,1,amr-234,,"cpu=25,mem=200G,node=1"
996,90366424,user_258,group_010,2023-09-29T00:54:50,2023-09-29T00:54:50,2023-10-02T09:48:19,3-08:53:29,COMPLETED,account_017,assocID_153,...,7280225,25,25,,0,1,1,amr-234,,"billing=31129,cpu=25,mem=200G,node=1"
997,90375532,user_082,group_127,2023-09-25T15:15:32,2023-09-29T02:05:46,2023-10-02T09:48:27,3-07:42:41,FAILED,account_017,assocID_153,...,7174025,5,25,200G,,1,1,amr-208,"billing=31129,cpu=5,mem=200G,node=1","billing=31129,cpu=25,mem=200G,node=1"
998,90375532,user_258,group_010,2023-09-29T02:05:46,2023-09-29T02:05:46,2023-10-02T09:48:27,3-07:42:41,FAILED,account_017,assocID_153,...,7174025,25,25,,548156K,1,1,amr-208,,"cpu=25,mem=200G,node=1"


In [11]:
slurm.columns

Index(['JobID', 'User', 'Group', 'Submit', 'Start', 'End', 'Elapsed', 'State',
       'Account', 'AssocID', 'Partition', 'Timelimit', 'UserCPU', 'SystemCPU',
       'TotalCPU', 'CPUTime', 'CPUTimeRAW', 'ReqCPUS', 'AllocCPUS', 'ReqMem',
       'MaxRSS', 'ReqNodes', 'NNodes', 'NodeList', 'ReqTRES', 'AllocTRES'],
      dtype='object')

## Identify ghostUsers

**Not having allocated remomy makes sense since a user can make a huge request and having it not go through resulting in NaN allocation
but didn't understand why there would be NaN ReqTRES and an AllocTRES jobs available. I did more investigation 
and found that only one user_id is the cause of all of that, Users that are doing this: {'user_258'}, 
Number of times allocationTRES was given to a NaN Request: 369**

In [12]:
# only two columns: Resource Requested and Allocation given to each job
slurm[["ReqTRES","AllocTRES"]]

Unnamed: 0,ReqTRES,AllocTRES
0,"billing=3192,cpu=28,gres/gpu=4,mem=21000M,node=1",
1,"billing=3192,cpu=28,gres/gpu=4,mem=21000M,node=1",
2,"billing=23347,cpu=12,gres/gpu=8,mem=150G,node=1",
3,"billing=3112,cpu=640,mem=20G,node=10",
4,"billing=1245,cpu=40,mem=8G,node=1","billing=1245,cpu=40,mem=8G,node=1"
...,...,...
995,,"cpu=25,mem=200G,node=1"
996,,"billing=31129,cpu=25,mem=200G,node=1"
997,"billing=31129,cpu=5,mem=200G,node=1","billing=31129,cpu=25,mem=200G,node=1"
998,,"cpu=25,mem=200G,node=1"


In [40]:
# Number of times allocationTRES was given to a NaN Request
print("Count of Resource requested =  NaN:",len(list(slurm[slurm["ReqTRES"].isnull()]["User"])))

print("Users that are doing this:",set(list(slurm[slurm["ReqTRES"].isnull()]["User"])))

Count of Resource requested =  NaN: 369
Users that are doing this: {'user_258'}


In [39]:
# function to identify users that have AllocTRES populating without ReqTRES: NaN
def identify_ghostUsers(data):
    print("Number of times allocationTRES was given to a NaN Request:",len(list(data[data["ReqTRES"].isnull()]["User"])))
    print("Users that are doing this:",set(list(data[data["ReqTRES"].isnull()]["User"])))


# testing function
identify_ghostUsers(slurm)

Number of times allocationTRES was given to a NaN Request: 369
Users that are doing this: {'user_258'}
