In [1]:
import pandas as pd
import datetime
import shutil
import cpmapi
import glob
import math
import re
import os

from pcp import pmapi, pmi
from cpmapi import (
    PM_SEM_DISCRETE, PM_SEM_INSTANT, PM_SEM_COUNTER,
    PM_SPACE_KBYTE, PM_SPACE_BYTE, PM_TIME_SEC, PM_TIME_USEC, PM_TIME_MSEC,
    PM_TYPE_FLOAT, PM_TYPE_U32, PM_TYPE_U64, PM_TYPE_STRING, PM_TYPE_DOUBLE,
    PM_ID_NULL, PM_IN_NULL, PM_INDOM_NULL)
from cpmi import (PMI_ERR_DUPMETRICNAME, PMI_ERR_DUPINSTNAME)

Helper for analysing time spent loading and transforming

In [2]:
from time import process_time, perf_counter

def start_timer():
    t0 = process_time()
    c0 = perf_counter()
    return (t0, c0)

def stop_timer(t0, c0):
    t1 = process_time() - t0
    c1 = perf_counter() - c0
    return 'Completed in %.5f seconds CPU time, %.5f elapsed time' % (t1, c1)

Helper for discarding some PM-100 information (e.g. weather details)

In [3]:
def ignore_metric(plugin, name):
    #print('Ignoring', plugin, 'metric', name)
    return None

### Functions providing PCP metric metadata for IPMI metrics

In [4]:
def ipmi_metric(name):
    scan = re.search(r'^pci$', name)
    if scan:
        return 'ipmi.pci', None
    scan = re.search(r'^pcie$', name)
    if scan:
        return 'ipmi.pcie', None
    scan = re.search(r'^ambient$', name)
    if scan:
        return 'ipmi.ambient.temperature', None
    scan = re.search(r'^total_power$', name)
    if scan:
        return 'ipmi.total_power', None
    scan = re.search(r'^fan_disk_power$', name)
    if scan:
        return 'ipmi.fan_disk_power', None
    scan = re.search(r'(^fan[0-9].*?)', name)
    if scan:
        return 'ipmi.fan', scan.group(1)
    scan = re.search(r'(ps[0-9].*?)_input_power', name)
    if scan:
        return 'ipmi.input_power', scan.group(1)
    scan = re.search(r'(p[0-9].*?)_power', name)
    if scan:
        return 'ipmi.cpu.power', scan.group(1)
    scan = re.search(r'(p[0-9].*?)_io_power', name)
    if scan:
        return 'ipmi.io.power', scan.group(1)
    scan = re.search(r'(p[0-9].*?)_mem_power', name)
    if scan:
        return 'ipmi.memory.power', scan.group(1)
    scan = re.search(r'(ps[0-9].*?)_input_volta', name)
    if scan:
        return 'ipmi.input_voltage', scan.group(1)
    scan = re.search(r'(ps[0-9].*?)_output_curre', name)
    if scan:
        return 'ipmi.output_current', scan.group(1)
    scan = re.search(r'(ps[0-9].*?)_output_volta', name)
    if scan:
        return 'ipmi.output_voltage', scan.group(1)
    scan = re.search(r'(p[0-9].*?)_temp', name)
    if scan:
        return 'ipmi.cpu.temperature', scan.group(1)
    scan = re.search(r'(gpu[0-9].*?)_core_temp', name)
    if scan:
        return 'ipmi.gpu.core.temperature', scan.group(1)
    scan = re.search(r'gv100(card[0-9].*?)', name)
    if scan:
        return 'ipmi.gpu.gv100', scan.group(1)
    scan = re.search(r'(gpu[0-9].*?)_mem_temp', name)
    if scan:
        return 'ipmi.gpu.memory.temperature', scan.group(1)
    scan = re.search(r'(dimm[0-9].*?)_temp', name)
    if scan:
        return 'ipmi.dimm.temperature', scan.group(1)

    print('Missing IPMI metric details for', name)
    return None

### Functions providing PCP metric metadata for all GPU metrics

In [5]:
def generic_gpu_metric(log, metricname, instid, instname): return {
    'name': 'gpu.' + metricname, 'pmid': PM_ID_NULL,
    'indom': log.pmiInDom(120, 0), 'instid': instid, 'instname': instname,
    'type': PM_TYPE_FLOAT, 'sem': PM_SEM_INSTANT,
    'units': log.pmiUnits(0, 0, 0, 0, 0, 0)
}

def file_gpu_metric(log, metricname):
    # match on input like: "Gpu2_xid_errors"
    # PCP metric becomes: gpu.xid_errors[GPU2]
    scan = re.search(r'Gpu([0-9].*?)_(.*)', metricname)
    if not scan:
        return None
    gpu_id = int(scan.group(1))
    gpu_name = 'GPU' + scan.group(1)
    metric_name = 'gpu.' + scan.group(2)
    return generic_gpu_metric(log, metric_name, gpu_id, gpu_name)

### Functions providing PCP metric metadata for all kernel metrics

In [6]:
def hinv_ncpu(log): return {
    'name': 'hinv.ncpu', 'pmid': log.pmiID(60, 0, 32),
    'indom': None, 'instid': None, 'instname': None,
    'type': PM_TYPE_U32, 'sem': PM_SEM_DISCRETE,
    'units': log.pmiUnits(0, 0, 0, 0, 0, 0)
}

def kernel_all_load(log, instid, instname): return {
    'name': 'kernel.all.load', 'pmid': log.pmiID(60, 2, 0),
    'indom': log.pmiInDom(60, 2), 'instid': instid, 'instname': instname,
    'type': PM_TYPE_FLOAT, 'sem': PM_SEM_INSTANT,
    'units': log.pmiUnits(0, 0, 0, 0, 0, 0)
}
def kernel_all_load1(log): return kernel_all_load(log, 1, '1 minute')
def kernel_all_load5(log): return kernel_all_load(log, 5, '5 minute')
def kernel_all_load15(log): return kernel_all_load(log, 15, '15 minute')

def mem_util_bufmem(log): return {
    'name': 'mem.util.bufmem', 'pmid': log.pmiID(60, 1, 4),
    'indom': None, 'instid': None, 'instname': None,
    'type': PM_TYPE_U64, 'sem': PM_SEM_INSTANT,
    'units': log.pmiUnits(1, 0, 0, PM_SPACE_KBYTE, 0, 0)
}

def mem_util_cached(log): return {
    'name': 'mem.util.cached', 'pmid': log.pmiID(60, 1, 5),
    'indom': None, 'instid': None, 'instname': None,
    'type': PM_TYPE_U64, 'sem': PM_SEM_INSTANT,
    'units': log.pmiUnits(1, 0, 0, PM_SPACE_KBYTE, 0, 0)
}

def mem_util_free(log): return {
    'name': 'mem.util.free', 'pmid': log.pmiID(60, 1, 2),
    'indom': None, 'instid': None, 'instname': None,
    'type': PM_TYPE_U64, 'sem': PM_SEM_INSTANT,
    'units': log.pmiUnits(1, 0, 0, PM_SPACE_KBYTE, 0, 0)
}

def mem_util_shared(log): return {
    'name': 'mem.util.shared', 'pmid': log.pmiID(60, 1, 3),
    'indom': None, 'instid': None, 'instname': None,
    'type': PM_TYPE_U64, 'sem': PM_SEM_INSTANT,
    'units': log.pmiUnits(1, 0, 0, PM_SPACE_KBYTE, 0, 0)
}

def mem_physmem(log): return {
    'name': 'mem.physmem', 'pmid': log.pmiID(60, 1, 0),
    'indom': None, 'instid': None, 'instname': None,
    'type': PM_TYPE_U64, 'sem': PM_SEM_DISCRETE,
    'units': log.pmiUnits(1, 0, 0, PM_SPACE_KBYTE, 0, 0)
}

def kernel_all_nprocs(log): return {
    'name': 'kernel.all.nprocs', 'pmid': log.pmiID(60, 2, 3),
    'indom': None, 'instid': None, 'instname': None,
    'type': PM_TYPE_U64, 'sem': PM_SEM_INSTANT,
    'units': log.pmiUnits(0, 0, 0, 0, 0, 0)
}

def kernel_all_running(log): return {
    'name': 'kernel.all.running', 'pmid': log.pmiID(60, 0, 15),
    'indom': None, 'instid': None, 'instname': None,
    'type': PM_TYPE_U64, 'sem': PM_SEM_INSTANT,
    'units': log.pmiUnits(0, 0, 0, 0, 0, 0)
}

def swap_length(log): return {
    'name': 'swap.length', 'pmid': log.pmiID(60, 1, 6),
    'indom': None, 'instid': None, 'instname': None,
    'type': PM_TYPE_U64, 'sem': PM_SEM_INSTANT,
    'units': log.pmiUnits(1, 0, 0, PM_SPACE_BYTE, 0, 0)
}
    
def swap_free(log): return {
    'name': 'swap.free', 'pmid': log.pmiID(60, 1, 8),
    'indom': None, 'instid': None, 'instname': None,
    'type': PM_TYPE_U64, 'sem': PM_SEM_INSTANT,
    'units': log.pmiUnits(1, 0, 0, PM_SPACE_BYTE, 0, 0)
}
    
def kernel_uname_sysname(log): return {
    'name': 'kernel.uname.sysname', 'pmid': log.pmiID(60, 12, 2),
    'indom': None, 'instid': None, 'instname': None,
    'type': PM_TYPE_STRING, 'sem': PM_SEM_DISCRETE,
    'units': log.pmiUnits(0, 0, 0, 0, 0, 0)
}
    
def kernel_uname_release(log): return {
    'name': 'kernel.uname.release', 'pmid': log.pmiID(60, 12, 0),
    'indom': None, 'instid': None, 'instname': None,
    'type': PM_TYPE_STRING, 'sem': PM_SEM_DISCRETE,
    'units': log.pmiUnits(0, 0, 0, 0, 0, 0)
}
    
def kernel_uname_machine(log): return {
    'name': 'kernel.uname.machine', 'pmid': log.pmiID(60, 12, 3),
    'indom': None, 'instid': None, 'instname': None,
    'type': PM_TYPE_STRING, 'sem': PM_SEM_DISCRETE,
    'units': log.pmiUnits(0, 0, 0, 0, 0, 0)
}
    
def kernel_all_boottime(log): return {
    'name': 'kernel.all.boottime', 'pmid': log.pmiID(60, 0, 17),
    'indom': None, 'instid': None, 'instname': None,
    'type': PM_TYPE_U64, 'sem': PM_SEM_DISCRETE,
    'units': log.pmiUnits(0, 1, 0, 0, PM_TIME_SEC, 0)
}
    
def kernel_all_idletime(log): return {
    'name': 'kernel.all.idletime', 'pmid': log.pmiID(60, 26, 1),
    'indom': None, 'instid': None, 'instname': None,
    'type': PM_TYPE_DOUBLE, 'sem': PM_SEM_INSTANT,
    'units': log.pmiUnits(0, -1, 0, 0, PM_TIME_SEC, 0)
}

def hinv_all_cpu_clock(log): return {
    'name': 'hinv.all.cpu.clock', 'pmid': PM_ID_NULL,
    'indom': None, 'instid': None, 'instname': None,
    'type': PM_TYPE_FLOAT, 'sem': PM_SEM_DISCRETE,
    'units': log.pmiUnits(0, -1, 0, 0, PM_TIME_USEC, 0)
}
    
def kernel_all_cpu_user(log): return {
    'name': 'kernel.all.cpu.user', 'pmid': log.pmiID(60, 0, 20),
    'indom': None, 'instid': None, 'instname': None,
    'type': PM_TYPE_FLOAT, 'sem': PM_SEM_INSTANT,
    'units': log.pmiUnits(0, -1, 0, 0, PM_TIME_SEC, 0)
}

def kernel_all_cpu_wait_total(log): return {
    'name': 'kernel.all.cpu.wait.total', 'pmid': log.pmiID(60, 0, 35),
    'indom': None, 'instid': None, 'instname': None,
    'type': PM_TYPE_FLOAT, 'sem': PM_SEM_INSTANT,
    'units': log.pmiUnits(0, -1, 0, 0, PM_TIME_SEC, 0)
}
    
def kernel_all_cpu_steal(log): return {
    'name': 'kernel.all.cpu.steal', 'pmid': log.pmiID(60, 0, 55),
    'indom': None, 'instid': None, 'instname': None,
    'type': PM_TYPE_FLOAT, 'sem': PM_SEM_INSTANT,
    'units': log.pmiUnits(0, -1, 0, 0, PM_TIME_SEC, 0)
}
    
def kernel_all_cpu_nice(log): return {
    'name': 'kernel.all.cpu.nice', 'pmid': log.pmiID(60, 0, 21),
    'indom': None, 'instid': None, 'instname': None,
    'type': PM_TYPE_FLOAT, 'sem': PM_SEM_INSTANT,
    'units': log.pmiUnits(0, -1, 0, 0, PM_TIME_SEC, 0)
}
    
def kernel_all_cpu_idle(log): return {
    'name': 'kernel.all.cpu.idle', 'pmid': log.pmiID(60, 0, 23),
    'indom': None, 'instid': None, 'instname': None,
    'type': PM_TYPE_FLOAT, 'sem': PM_SEM_INSTANT,
    'units': log.pmiUnits(0, -1, 0, 0, PM_TIME_SEC, 0)
}
    
def kernel_all_cpu_sys(log): return {
    'name': 'kernel.all.cpu.sys', 'pmid': log.pmiID(60, 0, 22),
    'indom': None, 'instid': None, 'instname': None,
    'type': PM_TYPE_FLOAT, 'sem': PM_SEM_INSTANT,
    'units': log.pmiUnits(0, -1, 0, 0, PM_TIME_SEC, 0)
}
    
def network_all_out_packets(log): return {
    'name': 'network.all.out.packets', 'pmid': log.pmiID(60, 90, 5),
    'indom': None, 'instid': None, 'instname': None,
    'type': PM_TYPE_FLOAT, 'sem': PM_SEM_INSTANT,
    'units': log.pmiUnits(0, 0, -1, 0, 0, 1)
}

def network_all_in_packets(log): return {
    'name': 'network.all.in.packets', 'pmid': log.pmiID(60, 90, 1),
    'indom': None, 'instid': None, 'instname': None,
    'type': PM_TYPE_FLOAT, 'sem': PM_SEM_INSTANT,
    'units': log.pmiUnits(0, 0, -1, 0, 0, 1)
}
    
def network_all_out_bytes(log): return {
    'name': 'network.all.out.bytes', 'pmid': log.pmiID(60, 90, 4),
    'indom': None, 'instid': None, 'instname': None,
    'type': PM_TYPE_FLOAT, 'sem': PM_SEM_INSTANT,
    'units': log.pmiUnits(-1, 0, 0, PM_SPACE_BYTE, 0, 0)
}
    
def network_all_in_bytes(log): return {
    'name': 'network.all.in.bytes', 'pmid': log.pmiID(60, 90, 0),
    'indom': None, 'instid': None, 'instname': None,
    'type': PM_TYPE_FLOAT, 'sem': PM_SEM_INSTANT,
    'units': log.pmiUnits(-1, 0, 0, PM_SPACE_BYTE, 0, 0)
}

def filesys_all_free(log): return {
    'name': 'filesys.all.free', 'pmid': PM_ID_NULL,
    'indom': None, 'instid': None, 'instname': None,
    'type': PM_TYPE_FLOAT, 'sem': PM_SEM_INSTANT,
    'units': log.pmiUnits(1, 0, 0, PM_SPACE_KBYTE, 0, 0)
}
    
def filesys_all_capacity(log): return {
    'name': 'filesys.all.capacity', 'pmid': PM_ID_NULL,
    'indom': None, 'instid': None, 'instname': None,
    'type': PM_TYPE_FLOAT, 'sem': PM_SEM_INSTANT,
    'units': log.pmiUnits(1, 0, 0, PM_SPACE_KBYTE, 0, 0)
}
    
def filesys_all_maxused(log): return {
    'name': 'filesys.all.maxused', 'pmid': PM_ID_NULL,
    'indom': None, 'instid': None, 'instname': None,
    'type': PM_TYPE_FLOAT, 'sem': PM_SEM_INSTANT,
    'units': log.pmiUnits(1, 0, 0, PM_SPACE_KBYTE, 0, 0)
}

In [7]:
# Mapping individual PM100 file names to specific PCP metrics
file_kernel_metrics = {
    'cpu_num': hinv_ncpu,
    'mem_buffers': mem_util_bufmem,
    'mem_cached': mem_util_cached,
    'mem_free': mem_util_free,
    'mem_shared': mem_util_shared,
    'mem_total': mem_physmem,
    'proc_total': kernel_all_nprocs,
    'proc_run': kernel_all_running,
    'swap_total': swap_length,
    'swap_free': swap_free,
    'os_name': kernel_uname_sysname,
    'os_release': kernel_uname_release,
    'machine_type': kernel_uname_machine,
    'boottime': kernel_all_boottime,
    'cpu_aidle': kernel_all_idletime,
    'cpu_speed': hinv_all_cpu_clock,
    'cpu_user': kernel_all_cpu_user,
    'cpu_wio': kernel_all_cpu_wait_total,
    'cpu_steal': kernel_all_cpu_steal,
    'cpu_nice': kernel_all_cpu_nice,
    'cpu_idle': kernel_all_cpu_idle,
    'cpu_system': kernel_all_cpu_sys,
    'load_one': kernel_all_load1,
    'load_five': kernel_all_load5,
    'load_fifteen': kernel_all_load15,
    'pkts_out': network_all_out_packets,
    'pkts_in': network_all_in_packets,
    'bytes_out': network_all_out_bytes,
    'bytes_in': network_all_in_bytes,
    'disk_free': filesys_all_free,
    'disk_total': filesys_all_capacity,
    'part_max_used': filesys_all_maxused,
    'gexec': None, # "scalable cluster remote execution system"
}

def file_kernel_metric(log, filename):
    try:
        kernel_metric = file_kernel_metrics[filename]
        if not kernel_metric:
            raise TypeError(filename)
        return kernel_metric(log)
    except:
        print('Kernel metric missing:', filename)
    return None

In [8]:
def get_file_mapping(log, file):
    result = re.search(r'/plugin=(.*?)/metric=(.*?)/', file)
    plugin, metric = result.group(1, 2)
    #print(plugin, metric)

    if plugin == 'ganglia_pub':
        if metric[:3] == 'Gpu':
            return file_gpu_metric(log, metric)
        return file_kernel_metric(log, metric)

    # ignore these subsystems for now
    elif plugin in ['ipmi_pub', 'weather_pub']:  # environmental info
        return ignore_metric(plugin, metric)
    elif plugin in ['job_table', 'slurm_pub']:  # HPC job scheduler info
        return ignore_metric(plugin, metric)
    elif plugin in ['logics_pub', 'nagios_pub', 'vertiv_pub', 'schneider_pub']:
        return ignore_metric(plugin, metric)

    print('Plugin handler missing:', plugin)
    return None

In [9]:
def get_node(df, node, column, datestring):
    node_df = df.loc[df['node'] == node]  # just this node
    node_df.set_index('timestamp', inplace=True)
    try:  # there may be no data on this day (e.g. GPU metrics)
        node_df = node_df.loc[datestring]  # just this day
    except KeyError:
        return None
    node_df = node_df.rename({'value': column}, axis=1)
    node_df = node_df.drop('node', axis=1)
    return node_df.sort_index()

In [10]:
def get_node_dataframes(host='marconi100', year=2022, month=9, day=1, nodes=None):
    """
    Reads PM100 dataset for a single day.  Returns a tuple with two dictionaries:
    1. dataframes with metric columns indexed by timestamp, for each node
    2. mapping dataframe column names back to PCP metric metadata
    """
    node_dfs = {}
    column_map = {}
    datestring = '%d-%02d-%02d' % (year, month, day)

    files = glob.glob('year_month=*/plugin=*/metric=*/*.parquet')
    base = pmi.pmiLogImport(host) # for static functions

    for file in files:
        pcp_metric = get_file_mapping(base, file)
        if not pcp_metric:
            continue
    
        tt, cc = start_timer()    
        df = pd.read_parquet(file)
        ss = stop_timer(tt, cc)
    
        if 'node' not in df.columns:
            print('No node column in', file)
            continue
        print('*** Loaded', file)
        print(ss)
    
        if not nodes: # no subset; all nodes
            nodes = sorted(df.node.unique())

        for node in nodes:    
            column = pcp_metric['name']   # regular PCP name
            if pcp_metric['instname']:
                column += '[' + pcp_metric['instname'] + ']'
            column_map[column] = pcp_metric
    
            #tt, cc = start_timer()    
    
            nodedf = get_node(df, node, column, datestring)
            if nodedf is None:   # e.g. missing GPU metrics
                continue

            try:
                ndf = node_dfs[node]
                #print('Merging node', node, 'with', len(nodedf), 'rows and', len(nodedf.columns), 'columns.')
                ndf = ndf.merge(nodedf, how='outer', left_index=True, right_index=True)
            except KeyError:
                ndf = nodedf
            node_dfs[node] = ndf
    
            #ss = stop_timer(tt, cc)
            #print('Completed node', node, 'from file', file, 'with', len(ndf), 'rows and', len(ndf.columns), 'columns.')
            #print(ss)

    del base # finished with temporary log (hack for using static functions)

    return (node_dfs, column_map)

In [11]:
def put_node_dataframes(archive, logpath, host, node_dfs, column_map):
    """
    Write out PCP archive for a single node and single day of data.
    """
    epoch = datetime.datetime(1970,1,1, tzinfo=datetime.timezone.utc)
    nlogs = 0

    for node, node_df in node_dfs.items():
        nodename = '%s_node%s' % (host, node.zfill(3))
        hostname = '%s.cineca.it' % (nodename)
        filename = '%s/%s/%s' % (logpath, nodename, archive)
    
        dirname = os.path.dirname(filename)
        os.makedirs(dirname, exist_ok=True)
        print('Host:', hostname)
        print('File:', filename)
    
        log = pmi.pmiLogImport(filename)
        log.pmiSetHostname(hostname)
        log.pmiSetTimezone(timezone)
    
        # add metric/indom metadata to the archive
        for column in node_df.columns:
            #print(column)
            metric = column_map[column]
            indom = metric['indom']
            if not indom:
                indom = PM_INDOM_NULL
    
            print('AddMetric:', metric['name'])
            try:
                log.pmiAddMetric(metric['name'], metric['pmid'], metric['type'],
                             indom, metric['sem'], metric['units'])
            except pmi.pmiErr as error:
                if indom == PM_INDOM_NULL and error.code == PMI_ERR_DUPMETRICNAME:
                    pass  # duplicates inserts
            if indom != PM_INDOM_NULL:
                print('AddInstance:', metric['instname'])
                try:
                    log.pmiAddInstance(indom, metric['instname'], metric['instid'])
                except pmi.pmiErr as error:
                    if error.code == PMI_ERR_DUPINSTNAME:
                        pass  # duplicate inserts
    
        # add values from each metric to the archive
        for row in node_df.itertuples(index=True, name='sample'):
            #print(len(row), row)
            seconds = int((row[0] - epoch).total_seconds())
            count = 0
            for c, column in enumerate(node_df.columns):
                value = row[c+1]
                if not isinstance(value, str):
                    if math.isnan(value):
                        continue
                    value = str(value).rstrip('.0')
                metric = column_map[column]
                instname = metric['instname'] or ''
                log.pmiPutValue(metric['name'], instname, value)
                count += 1
    
            log.pmiWrite(seconds, 0)
            print('Wrote:', count, 'metric values at offset', seconds, row[0])

        nlogs += 1
        del log

    return nlogs

In [12]:
days = [(2022, 9, 1), (2022, 9, 2)]
host = 'marconi100'
logpath = 'archives'
timezone = 'UTC' #'CET'?
nodes = ["0", "1", "2"] #'None' for all nodes

if os.path.exists(logpath) and os.path.isdir(logpath):
    shutil.rmtree(logpath)

for (year, month, day) in days:
    archive = '%4d%02d%02d' % (year, month, day)
    node_dataframes, column_mapping = get_node_dataframes(host, year, month, day, nodes)
    logs = put_node_dataframes(archive, logpath, host, node_dataframes, column_mapping)
    print('=== Wrote %d archives for %d-%d-%d' % (logs, day, month, year))

*** Loaded year_month=22-09/plugin=ganglia_pub/metric=mem_buffers/a_0.parquet
Completed in 6.16678 seconds CPU time, 2.17194 elapsed time
*** Loaded year_month=22-09/plugin=ganglia_pub/metric=Gpu0_sync_boost_violation/a_0.parquet
Completed in 9.24342 seconds CPU time, 3.38758 elapsed time
*** Loaded year_month=22-09/plugin=ganglia_pub/metric=Gpu2_memory_temp/a_0.parquet
Completed in 10.41895 seconds CPU time, 4.13755 elapsed time
*** Loaded year_month=22-09/plugin=ganglia_pub/metric=Gpu1_retired_pages_sbe/a_0.parquet
Completed in 8.88962 seconds CPU time, 4.46733 elapsed time
*** Loaded year_month=22-09/plugin=ganglia_pub/metric=Gpu3_retired_pages_pending/a_0.parquet
Completed in 7.60724 seconds CPU time, 3.14901 elapsed time
*** Loaded year_month=22-09/plugin=ganglia_pub/metric=Gpu1_thermal_violation/a_0.parquet
Completed in 10.53990 seconds CPU time, 4.44259 elapsed time
*** Loaded year_month=22-09/plugin=ganglia_pub/metric=Gpu3_low_util_violation/a_0.parquet
Completed in 8.74839 sec