# 用DTW度量不同方法内存带宽测量的差距

## watch point 方法的read/write带宽

得到两种方式的实时带宽序列

In [5]:
def get_DMC_series(perf_raw_data_DMC, read_or_write = "read"):
    
    perf_raw_data_DMC['type'] = perf_raw_data_DMC['event'].apply(
        lambda x: 'read' if x[-2:-1] == '0' else 'write'
    )
    perf_raw_data_DMC['device_id'] = perf_raw_data_DMC['event'].apply(
        lambda x: x[11:20]
    )
    perf_raw_data_DMC['socket'] = perf_raw_data_DMC['device_id'].apply(
        lambda x: 0 if x[0] == '1' else '1'
    )

    perf_raw_data_DMC = perf_raw_data_DMC[perf_raw_data_DMC["type"] == read_or_write]
    perf_raw_data_DMC = perf_raw_data_DMC.drop('event', axis=1)

    bw_all_DMC = perf_raw_data_DMC.groupby(['timestamp']).agg(value=('value', 'sum')).reset_index()
    bw_all_DMC['interval'] = bw_all_DMC['timestamp'].diff()
    bw_all_DMC.loc[0, 'interval'] = bw_all_DMC.loc[0, 'timestamp']
    bw_all_DMC['memory bandwidth (GB/s)'] = (bw_all_DMC['value'] * 64) / (bw_all_DMC['interval'] * 1000000000)

    return bw_all_DMC['memory bandwidth (GB/s)']


def get_WP_series(perf_raw_data_WP, up_or_down = "up"):
    

    perf_raw_data_WP = perf_raw_data_WP[perf_raw_data_WP["event"].str.contains("watchpoint_" + up_or_down)]
    perf_raw_data_WP = perf_raw_data_WP.drop('event', axis=1)
    # perf_raw_data_WP.loc[perf_raw_data_WP["type"] == "retries", "value"] *= -1

    bw_all_WP = perf_raw_data_WP.groupby(['timestamp']).agg(value=('value', 'sum')).reset_index()

    bw_all_WP['interval'] = bw_all_WP['timestamp'].diff()
    bw_all_WP.loc[0, 'interval'] = bw_all_WP.loc[0, 'timestamp']

    bw_all_WP['memory bandwidth (GB/s)'] = (bw_all_WP['value'] * 32) / (bw_all_WP['interval'] * 1000000000)
    
    return bw_all_WP['memory bandwidth (GB/s)']

In [20]:
import numpy as np
from dtaidistance import dtw

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

csv_path_WP = '../data/csv/watch_point/500.perlbench_r_iter0_1732727831.csv'
csv_path_DMC = './data/csv/dmc_620/500.perlbench_r_iter0_1732726637.csv'

DMC_csv_dir = "../data/csv/dmc_620/"
WP_csv_dir = "../data/csv/watch_point/"

In [None]:
import os
import pandas as pd

import numpy as np
import matplotlib.pyplot as plt
from dtaidistance import dtw

diff_image_path = '../data/diff_to_dmc_620/watch_point/image/write'

DMC_csv_dir = "../data/csv/dmc_620/"
WP_csv_dir = "../data/csv/watch_point/"

# TODO: 在这里定义一个输出文件，在当前目录里创建一个csv文件就可以了
output_file = '../data/diff_to_dmc_620/watch_point/DWT_diffs_write.csv'

def min_max_normalize(data):
    return (data - np.min(data, axis=0)) / (np.max(data, axis=0) - np.min(data, axis=0))

WP_files = os.listdir(WP_csv_dir)
print(WP_files)

DMC_files = os.listdir(DMC_csv_dir)
print(DMC_files)

bench_nums = list(set([fileName.split('.')[0] for fileName in WP_files]))
bench_nums.sort()

print(bench_nums)

# 打开输出文件
with open(output_file, 'w') as f_out:
    # 写入 CSV 文件的表头
    f_out.write("bench_num,distance_origin,distance_normalized\n")
    for bench_num in bench_nums:
        WP_bench_files = [benchName for benchName in WP_files if benchName.startswith(bench_num)]
        WP_bench_files.sort()

        WP_series = []
        for WP_bench_file in WP_bench_files:
            perf_raw_data_WP = pd.read_csv(WP_csv_dir + WP_bench_file,
                                        sep='|',
                                        header=None, 
                                        names=["timestamp", "value", "event"], 
                                        usecols=[0, 1, 3])
            WP_series += list(get_WP_series(perf_raw_data_WP, "down"))

        DMC_bench_files = [benchName for benchName in DMC_files if benchName.startswith(bench_num)]
        DMC_bench_files.sort()

        DMC_series = []
        for DMC_bench_file in DMC_bench_files:
            perf_raw_data_DMC = pd.read_csv(DMC_csv_dir + DMC_bench_file,
                                        sep='|',
                                        header=None, 
                                        names=["timestamp", "value", "event"], 
                                        usecols=[0, 1, 3])
            DMC_series += list(get_DMC_series(perf_raw_data_DMC, "write"))

        # 归一化
        DMC_series_normalized = min_max_normalize(DMC_series)
        WP_series_normalized = min_max_normalize(WP_series)

        # 计算原数据上的 DTW 距离
        distance_origin = dtw.distance(DMC_series, WP_series)
        print(f"DTW distance: {distance_origin}")
        
        # 计算归一化数据上的 DTW 距离
        distance_normalized = dtw.distance(DMC_series_normalized, WP_series_normalized)
        print(f"norm DTW distance: {distance_normalized}")
        # 如果需要对齐路径

        alignment = dtw.warping_path(DMC_series, WP_series)
        # plot_alignment(DMC_series, WP_series, alignment)

        plt.figure(figsize=(8, 4))
        plt.plot(DMC_series, label="DMC_curve")
        plt.plot(WP_series, label="Watch_Point_curve")
        plt.xlabel('Timestamp')
        plt.ylabel('Memory Bandwidth (GB/s)')
        for (i, j) in alignment:
            plt.plot([i, j], [DMC_series[i], WP_series[j]], color="gray", linestyle="--")
        plt.legend()
        plt.title(f"DTW Alignment on benchmark {bench_num}")
        #TODO: save the figure as diff_image_path/bench_num.png
        image_path = os.path.join(diff_image_path, f"{bench_num}.png")
        plt.savefig(image_path)  # 保存图像
        plt.close()
        print(f"Alignment figure saved to {image_path}")

        # 插入行到输出文件
        f_out.write(f"{bench_num},{distance_origin},{distance_normalized}\n")

        # print(len(WP_series))
        # print(len(DMC_series))
        # print("alignment:", alignment)



    #TODO: 关闭文件


['500.perlbench_r_iter0_1732727831.csv', '500.perlbench_r_iter0_1732728050.csv', '500.perlbench_r_iter0_1732728160.csv', '502.gcc_r_iter0_1732730960.csv', '502.gcc_r_iter0_1732731046.csv', '502.gcc_r_iter0_1732731162.csv', '502.gcc_r_iter0_1732731290.csv', '502.gcc_r_iter0_1732731439.csv', '503.bwaves_r_iter0_1732760425.csv', '503.bwaves_r_iter0_1732760831.csv', '503.bwaves_r_iter0_1732761489.csv', '503.bwaves_r_iter0_1732761996.csv', '505.mcf_r_iter0_1732735580.csv', '507.cactuBSSN_r_iter0_1732764322.csv', '508.namd_r_iter0_1732765783.csv', '510.parest_r_iter0_1732772409.csv', '511.povray_r_iter0_1732776073.csv', '519.lbm_r_iter0_1732783693.csv', '520.omnetpp_r_iter0_1732740779.csv', '521.wrf_r_iter0_1732791309.csv', '523.xalancbmk_r_iter0_1732743836.csv', '525.x264_r_iter0_1732745586.csv', '525.x264_r_iter0_1732745632.csv', '525.x264_r_iter0_1732745755.csv', '526.blender_r_iter0_1732793840.csv', '527.cam4_r_iter0_1732796372.csv', '531.deepsjeng_r_iter0_1732747080.csv', '538.imagick_r