## GPU Information Dataframe

<br>

### Development Environment

In [None]:
!pip install pynvml

In [7]:
import pynvml
import numpy as np
import pandas as pd
from pynvml.smi import nvidia_smi
from subprocess import check_output

### GPU Information

In [6]:
columns = ['timestamp', 'name', 'pci.bus_id', 'driver_version', 'pstate',
           'pcie.link.gen.max', 'pcie.link.gen.current',
           'temperature.gpu', 'utilization.gpu', 'utilization.memory',
           'memory.total', 'memory.free', 'memory.used']

qstring = ','.join(columns)
command = ['nvidia-smi', f'--query-gpu={qstring}', '--format=csv']

output = check_output(command, encoding='utf-8').strip()
lines = output.split('\n')

rows = []
keys = [column.replace('.', '_') for column in lines[0].split(', ')]
for line in lines[1:]:
    values = line.split(', ')
    rows.append(dict(tuple(zip(keys, values))))
    
gpu_df = pd.DataFrame(rows)
gpu_df.to_excel("workspace/gpu_df.xlsx", index=False)

In [7]:
gpu_df

Unnamed: 0,timestamp,name,pci_bus_id,driver_version,pstate,pcie_link_gen_max,pcie_link_gen_current,temperature_gpu,utilization_gpu [%],utilization_memory [%],memory_total [MiB],memory_free [MiB],memory_used [MiB]
0,2023/09/07 09:07:40.010,NVIDIA GeForce RTX 4090,00000000:01:00.0,525.125.06,P2,4,4,76,99 %,52 %,24564 MiB,1927 MiB,22289 MiB
1,2023/09/07 09:07:40.010,NVIDIA GeForce RTX 4090,00000000:21:00.0,525.125.06,P2,4,4,74,94 %,47 %,24564 MiB,1064 MiB,23152 MiB


In [8]:
pynvml.nvmlInit()
result = []
for dev_id in range(pynvml.nvmlDeviceGetCount()):
    handle = pynvml.nvmlDeviceGetHandleByIndex(dev_id)
    for proc in pynvml.nvmlDeviceGetComputeRunningProcesses(handle):
        result.append([proc.pid, proc.usedGpuMemory, dev_id])

In [9]:
gpu_usage = pd.DataFrame(result,columns=["pid","bytes of memory", "device"])
gpu_usage["MB of memory"] = gpu_usage["bytes of memory"] / (1024*1024)
gpu_usage["GB of memory"] = gpu_usage["bytes of memory"] / (1024*1024*1024)

In [23]:
gpu_usage

Unnamed: 0,pid,bytes of memory,device,MB of memory,GB of memory
0,105277,32939966464,0,31414.0,30.677734
1,105468,32979812352,1,31452.0,30.714844


### PID Information

In [10]:
gpu_usage_by_id_r = gpu_usage.groupby("pid").apply(lambda x : ", ".join([str(i) for i in x["device"].tolist()])).reset_index(drop=False)
gpu_usage_by_id_r.columns = ["pid","device_list"]
gpu_usage_by_id_l = gpu_usage.groupby("pid").agg({"MB of memory" : "sum","GB of memory" : "sum",}).reset_index(drop=False)
gpu_usage_by_id = gpu_usage_by_id_l.merge(gpu_usage_by_id_r,on="pid",how="left")

In [11]:
gpu_usage_by_id

Unnamed: 0,pid,MB of memory,GB of memory,device_list
0,105277,31414.0,30.677734,0
1,105468,31452.0,30.714844,1


### Device Information

In [36]:
nvsmi = nvidia_smi.getInstance()
total_gpu_usage = pd.DataFrame([list(i.values())[0] for i in nvsmi.DeviceQuery('memory.free, memory.total')["gpu"]])
total_gpu_usage["device"] = np.arange(len(total_gpu_usage))

d = gpu_usage.groupby(["device","device_name"]).agg({"MB of memory" : "sum","GB of memory" : "sum",})
device_gpusage = d.reset_index(drop=False)

In [26]:
device_gpusage

Unnamed: 0,index,MB of memory,GB of memory
0,device,31414.0,30.677734
1,device_name,31452.0,30.714844


### Reference

<b>Stackoverflow</b>
<br>[How do I customize nvidia-smi 's output to show PID username?](https://stackoverflow.com/questions/50264491/how-do-i-customize-nvidia-smi-s-output-to-show-pid-username)

<br><b>Blog</b>
<br>Dandyrillra
<br>[nvidia-smi 출력 결과물을 pandas 데이터프레임으로 받아보자](https://dandyrilla.github.io/2023-07-10/nvidia-smi-pandas-dataframe/)
<br><br>data-newbie
<br>[Ubuntu GPU 확인 방법](https://data-newbie.tistory.com/771/)
