# Tally slurm total GPU hours for an account

This assumes that you have run

```bash
sacct -p pli-c --allusers --json > sacct_pli.json    
sacct -p pli --allusers --json > sacct_other.json    
```

on the server, printing out all jobs for the `pli` account.

Or for a larger time window, add `-S 2024-01-01 `

In [18]:
%load_ext autoreload
%autoreload 2

import json
from pathlib import Path
from pandas import DataFrame as DF
from slurm_analyzer import SLURMAnalyzer
import pandas as pd
from datetime import datetime
import tabulate

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [19]:
dpath = Path("..", 'data')
assert dpath.is_dir()


In [20]:
df = pd.concat([
    SLURMAnalyzer().parse(json.loads((dpath / "pli_7.json").read_text())),
    SLURMAnalyzer().parse(json.loads((dpath / "pli_c_7.json").read_text()))
])

In [21]:
# clean
# remove strange values (negative wait times etc.) by requiring that jobs run at least 10 min
df = df.query("elapsed > 600")

In [22]:
def by_time(df, title=""):
    tab = []

    def add(label, query):
        sdf = df.query(query)
        tab.append((label, sdf.wait_time_h.mean(), len(sdf)))
    
    add("Last 7 days", "age_days <= 7")
    add("Last 30 days", "age_days <= 30")
    add("Last 60 days", "age_days <= 60")

    if title:
        print(title)
    print(tabulate.tabulate(tab, headers=["Period", "Wait time (h)", "Jobs"]))


# Core partition

## Large jobs 

In [23]:
for nodes in [1, 2, 4]:
    by_time(df.query(f"partition == 'pli-c' and {nodes} <= n_nodes and elapsed_h > 24"), f">= {nodes} nodes, >= 24h runtime")
    print()

>= 1 nodes, >= 24h runtime
Period          Wait time (h)    Jobs
------------  ---------------  ------
Last 7 days           5.10251      61
Last 30 days          4.65745     154
Last 60 days          7.12816     443

>= 2 nodes, >= 24h runtime
Period          Wait time (h)    Jobs
------------  ---------------  ------
Last 7 days           17.2819      15
Last 30 days          14.4519      46
Last 60 days          19.0333     109

>= 4 nodes, >= 24h runtime
Period          Wait time (h)    Jobs
------------  ---------------  ------
Last 7 days        0.00486111       2
Last 30 days      12.1466          21
Last 60 days       8.69215         37



## Smaller jobs

In [24]:
for t in [1, 10, 24]:
    by_time(df.query(f"partition == 'pli-c' and gpu_time_h >= {t}"), f"<= {t} GPU hours")
    print()

<= 1 GPU hours
Period          Wait time (h)    Jobs
------------  ---------------  ------
Last 7 days           2.90843     794
Last 30 days          9.07535    7444
Last 60 days          9.86872   13412

<= 10 GPU hours
Period          Wait time (h)    Jobs
------------  ---------------  ------
Last 7 days           7.33164     248
Last 30 days          4.45846    1006
Last 60 days          4.30354    1962

<= 24 GPU hours
Period          Wait time (h)    Jobs
------------  ---------------  ------
Last 7 days           7.4221      149
Last 30 days          5.98635     544
Last 60 days          5.79894    1083



# Campus partition

In [25]:
for t in [1, 10, 24]:
    by_time(df.query(f"partition == 'pli' and gpu_time_h >= {t}"), f"<= {t} GPU hours")
    print()

<= 1 GPU hours
Period          Wait time (h)    Jobs
------------  ---------------  ------
Last 7 days          0.524585     201
Last 30 days         1.16109     1870
Last 60 days         1.72412     3033

<= 10 GPU hours
Period          Wait time (h)    Jobs
------------  ---------------  ------
Last 7 days          0.563236      91
Last 30 days         1.64013      295
Last 60 days         2.74021      547

<= 24 GPU hours
Period          Wait time (h)    Jobs
------------  ---------------  ------
Last 7 days         0.0486792      53
Last 30 days        2.19757       171
Last 60 days        3.52922       260

