In [64]:
import pandas as pd
import numpy as np

# 读取原始性能数据，时间戳-处理器ID-计数值-性能事件
raw_data_path = "/home/tongyu/project/hperf/tmp/20221212_test001/perf_result"
raw_data = pd.read_csv(raw_data_path,
                            header=None,
                            names=["timestamp", "unit", "value", "event"],
                            usecols=[0, 1, 2, 4])
raw_data

Unnamed: 0,timestamp,unit,value,event
0,1.004951,CPU0,1004.92,cpu-clock
1,1.004951,CPU1,1004.92,cpu-clock
2,1.004951,CPU2,1004.92,cpu-clock
3,1.004951,CPU3,1004.92,cpu-clock
4,1.004951,CPU4,1004.92,cpu-clock
...,...,...,...,...
18555,28.428993,CPU59,7.00,r00c5
18556,28.428993,CPU60,13.00,r00c5
18557,28.428993,CPU61,10.00,r00c5
18558,28.428993,CPU62,387.00,r00c5


In [65]:
# 聚合测量时间，给出测量时间内每一个cpu每一个性能事件的计数结果
event_per_cpu = raw_data.groupby(['unit', 'event']).agg(
    event_total=('value', np.sum) 
)
event_per_cpu

# 取得某一个cpu的结果
# event_per_cpu.loc[["CPU5", "CPU6"], :]
event_per_cpu.loc["CPU5", :]

Unnamed: 0_level_0,event_total
event,Unnamed: 1_level_1
cpu-clock,28428.12
cycles:D,93558350000.0
instructions:D,183480700000.0
msr/tsc/,82247750000.0
r00c4,10518880000.0
r00c5,5470665.0
r08d1,4412791000.0
r10d1,3953493000.0
r20d1,496043400.0
ref-cycles:D,82241090000.0


In [66]:
# 将列索引变成一个列
# event_per_cpu.loc[["CPU5", "CPU6"], :].reset_index()
scoped_event_per_cpu = event_per_cpu.loc["CPU5", :].reset_index()
scoped_event_per_cpu

Unnamed: 0,event,event_total
0,cpu-clock,28428.12
1,cycles:D,93558350000.0
2,instructions:D,183480700000.0
3,msr/tsc/,82247750000.0
4,r00c4,10518880000.0
5,r00c5,5470665.0
6,r08d1,4412791000.0
7,r10d1,3953493000.0
8,r20d1,496043400.0
9,ref-cycles:D,82241090000.0


In [67]:
# 对event名称进行修改，全部更改为自定义的名称

events = [
    {
        "id": 0,
        "perf_name": "cpu-clock",
        "name": "CPU TIME"
    },
    {
        "id": 10,
        "perf_name": "msr/tsc/",
        "name": "TSC",
    },
    {
        "id": 20,
        "perf_name": "cycles",
        "name": "CYCLES"
    },
    {
        "id": 21,
        "perf_name": "instructions",
        "name": "INSTRUCTIONS"
    },
    {
        "id": 22,
        "perf_name": "ref-cycles",
        "name": "REFERENCE CYCLES"
    },
    {
        "id": 30,
        "perf_name": "r08d1",
        "name": "L1 CACHE MISSES"
    },
    {
        "id": 31,
        "perf_name": "r10d1",
        "name": "L2 CACHE MISSES"
    },
    {
        "id": 32,
        "perf_name": "r20d1",
        "name": "L3 CACHE MISSES"
    },
    {
        "id": 33,
        "perf_name": "r00c4",
        "name": "BRANCHES"
    },
    {
        "id": 34,
        "perf_name": "r00c5",
        "name": "BRANCH MISSES"
    }
]
events

[{'id': 0, 'perf_name': 'cpu-clock', 'name': 'CPU TIME'},
 {'id': 10, 'perf_name': 'msr/tsc/', 'name': 'TSC'},
 {'id': 20, 'perf_name': 'cycles', 'name': 'CYCLES'},
 {'id': 21, 'perf_name': 'instructions', 'name': 'INSTRUCTIONS'},
 {'id': 22, 'perf_name': 'ref-cycles', 'name': 'REFERENCE CYCLES'},
 {'id': 30, 'perf_name': 'r08d1', 'name': 'L1 CACHE MISSES'},
 {'id': 31, 'perf_name': 'r10d1', 'name': 'L2 CACHE MISSES'},
 {'id': 32, 'perf_name': 'r20d1', 'name': 'L3 CACHE MISSES'},
 {'id': 33, 'perf_name': 'r00c4', 'name': 'BRANCHES'},
 {'id': 34, 'perf_name': 'r00c5', 'name': 'BRANCH MISSES'}]

In [68]:
mapping = {}
for item in events:
    mapping[item["perf_name"]] = item["name"]

scoped_event_per_cpu["event"] = scoped_event_per_cpu["event"].apply(lambda x: mapping[x.split(":")[0]])
scoped_event_per_cpu


Unnamed: 0,event,event_total
0,CPU TIME,28428.12
1,CYCLES,93558350000.0
2,INSTRUCTIONS,183480700000.0
3,TSC,82247750000.0
4,BRANCHES,10518880000.0
5,BRANCH MISSES,5470665.0
6,L1 CACHE MISSES,4412791000.0
7,L2 CACHE MISSES,3953493000.0
8,L3 CACHE MISSES,496043400.0
9,REFERENCE CYCLES,82241090000.0


In [69]:
# scoped_event_per_cpu.append({"event": "xxx", "event_total": 15}, ignore_index=True)

pd.concat([scoped_event_per_cpu, pd.DataFrame({"event": ["xxx", "yyy"], "event_total": [15, 16]})], ignore_index=True)


Unnamed: 0,event,event_total
0,CPU TIME,28428.12
1,CYCLES,93558350000.0
2,INSTRUCTIONS,183480700000.0
3,TSC,82247750000.0
4,BRANCHES,10518880000.0
5,BRANCH MISSES,5470665.0
6,L1 CACHE MISSES,4412791000.0
7,L2 CACHE MISSES,3953493000.0
8,L3 CACHE MISSES,496043400.0
9,REFERENCE CYCLES,82241090000.0


In [70]:
metrics = [
    {
        "metric": "CPU UTILIZATION",
        "expression": "e22 / e10"
    },
    {
        "metric": "CPI",
        "expression": "e20 / e21"
    },
    {
        "metric": "L1 CACHE MPKI",
        "expression": "(1000 * e30) / e21"
    },
    {
        "metric": "L2 CACHE MPKI",
        "expression": "(1000 * e31) / e21"
    },
    {
        "metric": "L3 CACHE MPKI",
        "expression": "(1000 * e32) / e21"
    },
    {
        "metric": "BRANCH MISS RATE",
        "expression": "e34 / e33"
    }
]

In [71]:
scoped_event_per_cpu[scoped_event_per_cpu["event"]=="TSC"]["event_total"].iloc[0]

82247754894.0

In [72]:
mapping_name_id = {}
for item in events:
    mapping_name_id[item["name"]] = item["id"]

mapping_id_value = {}
for item in events:
    val = scoped_event_per_cpu[scoped_event_per_cpu["event"]==item["name"]]["event_total"].iloc[0]
    mapping_id_value[f"e{item['id']}"] = val

metric_results = {"event": [], "event_total": []}

for metric in metrics:
    metric_results["event"].append(metric["metric"])
    val = eval(metric["expression"], mapping_id_value)
    metric_results["event_total"].append(val)

mapping_id_value
metric_results

{'event': ['CPU UTILIZATION',
  'CPI',
  'L1 CACHE MPKI',
  'L2 CACHE MPKI',
  'L3 CACHE MPKI',
  'BRANCH MISS RATE'],
 'event_total': [0.9999189278539141,
  0.5099084766184462,
  24.05043979853046,
  21.547188654724106,
  2.703518485774917,
  0.0005200806189419652]}

In [73]:
pd.concat([scoped_event_per_cpu, pd.DataFrame(metric_results)], ignore_index=True)

Unnamed: 0,event,event_total
0,CPU TIME,28428.12
1,CYCLES,93558350000.0
2,INSTRUCTIONS,183480700000.0
3,TSC,82247750000.0
4,BRANCHES,10518880000.0
5,BRANCH MISSES,5470665.0
6,L1 CACHE MISSES,4412791000.0
7,L2 CACHE MISSES,3953493000.0
8,L3 CACHE MISSES,496043400.0
9,REFERENCE CYCLES,82241090000.0


In [76]:
for i in range(4, 6):
    print(i)

4
5


In [83]:
def get_cpu_id_list(lst):
    cpu_ids = []
    cpu_id_slices = lst.split(",")
    for item in cpu_id_slices:
        if item.find("-") == -1:
            cpu_ids.append(int(item))
        else:
            start_cpu_id = int(item.split("-")[0])
            end_cpu_id = int(item.split("-")[1])
            for i in range(start_cpu_id, end_cpu_id + 1):
                cpu_ids.append(i)
    reduced_cpu_ids = list(set(cpu_ids))
    reduced_cpu_ids.sort(key=cpu_ids.index)
    return reduced_cpu_ids

get_cpu_id_list("2,4-8,7-9")

[2, 4, 5, 6, 7, 8, 9]

In [84]:
int("asd")

ValueError: invalid literal for int() with base 10: 'asd'