# 用DTW度量不同方法内存带宽测量的差距

## cmn_mc方法和dmc_620方法

封装一下得到两种时间序列的方法：
输入都是已经读到pandas里的时间序列。

In [None]:
# 得到基于内存控制器的内存带宽数据
def get_DMC_series(perf_raw_data_DMC):
    
    perf_raw_data_DMC['type'] = perf_raw_data_DMC['event'].apply(
        lambda x: 'read' if x[-2:-1] == '0' else 'write'
    )
    perf_raw_data_DMC['device_id'] = perf_raw_data_DMC['event'].apply(
        lambda x: x[11:20]
    )
    perf_raw_data_DMC['socket'] = perf_raw_data_DMC['device_id'].apply(
        lambda x: 0 if x[0] == '1' else '1'
    )

    perf_raw_data_DMC = perf_raw_data_DMC.drop('event', axis=1)

    bw_all_DMC = perf_raw_data_DMC.groupby(['timestamp']).agg(value=('value', 'sum')).reset_index()
    bw_all_DMC['interval'] = bw_all_DMC['timestamp'].diff()
    bw_all_DMC.loc[0, 'interval'] = bw_all_DMC.loc[0, 'timestamp']
    bw_all_DMC['memory bandwidth (GB/s)'] = (bw_all_DMC['value'] * 64) / (bw_all_DMC['interval'] * 1000000000)

    return bw_all_DMC['memory bandwidth (GB/s)']

# 得到基于HN-F节点的内存带宽数据
def get_CMN_series(perf_raw_data_CMN):
    
    perf_raw_data_CMN['type'] = perf_raw_data_CMN['event'].apply(
        lambda x: 'reqs' if 'reqs'in x else 'retries'
    )
    perf_raw_data_CMN['socket'] = perf_raw_data_CMN['event'].apply(
        lambda x: 0 if x[8] == '0' else '1'
    )

    perf_raw_data_CMN = perf_raw_data_CMN.drop('event', axis=1)
    perf_raw_data_CMN.loc[perf_raw_data_CMN["type"] == "retries", "value"] *= -1

    bw_all_CMN = perf_raw_data_CMN.groupby(['timestamp']).agg(value=('value', 'sum')).reset_index()

    bw_all_CMN['interval'] = bw_all_CMN['timestamp'].diff()
    bw_all_CMN.loc[0, 'interval'] = bw_all_CMN.loc[0, 'timestamp']

    bw_all_CMN['memory bandwidth (GB/s)'] = (bw_all_CMN['value'] * 64) / (bw_all_CMN['interval'] * 1000000000)
    
    return bw_all_CMN['memory bandwidth (GB/s)']

In [5]:
import matplotlib.pyplot as plt
from dtaidistance import dtw
import os

In [6]:
def plot_alignment(series1, series2, alignment):
    plt.figure(figsize=(8, 4))
    plt.plot(series1, label="Series 1")
    plt.plot(series2, label="Series 2")
    for (i, j) in alignment:
        plt.plot([i, j], [series1[i], series2[j]], color="gray", linestyle="--")
    plt.legend()
    plt.title("DTW Alignment")
    plt.show()

批处理计算每个benchmark的DWT距离

In [None]:
diff_image_path = '../data/diff_to_dmc_620/cmn_mc/image'

DMC_csv_dir = "../data/csv/dmc_620/"
CMN_csv_dir = "../data/csv/cmn_mc/"


output_file = '../data/diff_to_dmc_620/cmn_mc/DWT_diffs.csv'

def min_max_normalize(data):
    return (data - np.min(data, axis=0)) / (np.max(data, axis=0) - np.min(data, axis=0))

CMN_files = os.listdir(CMN_csv_dir)
print(CMN_files)

DMC_files = os.listdir(DMC_csv_dir)
print(DMC_files)

bench_nums = list(set([fileName.split('.')[0] for fileName in CMN_files]))
bench_nums.sort()

print(bench_nums)

with open(output_file, 'w') as f_out:
    # 写入 CSV 文件的表头
    f_out.write("bench_num,distance_origin,distance_normalized\n")
    for bench_num in bench_nums:
        CMN_bench_files = [benchName for benchName in CMN_files if benchName.startswith(bench_num)]
        CMN_bench_files.sort()

        CMN_series = []
        for CMN_bench_file in CMN_bench_files:
            perf_raw_data_CMN = pd.read_csv(CMN_csv_dir + CMN_bench_file,
                                        sep='|',
                                        header=None, 
                                        names=["timestamp", "value", "event"], 
                                        usecols=[0, 1, 3])
            CMN_series += list(get_CMN_series(perf_raw_data_CMN))

        DMC_bench_files = [benchName for benchName in DMC_files if benchName.startswith(bench_num)]
        DMC_bench_files.sort()

        DMC_series = []
        for DMC_bench_file in DMC_bench_files:
            perf_raw_data_DMC = pd.read_csv(DMC_csv_dir + DMC_bench_file,
                                        sep='|',
                                        header=None, 
                                        names=["timestamp", "value", "event"], 
                                        usecols=[0, 1, 3])
            DMC_series += list(get_DMC_series(perf_raw_data_DMC))

        # 归一化
        DMC_series_normalized = min_max_normalize(DMC_series)
        CMN_series_normalized = min_max_normalize(CMN_series)

        # 计算原数据上的 DTW 距离
        distance_origin = dtw.distance(DMC_series, CMN_series)
        print(f"DTW distance: {distance_origin}")
        
        # 计算归一化数据上的 DTW 距离
        distance_normalized = dtw.distance(DMC_series_normalized, CMN_series_normalized)
        print(f"norm DTW distance: {distance_normalized}")
        # 如果需要对齐路径

        alignment = dtw.warping_path(DMC_series, CMN_series)
        # plot_alignment(DMC_series, CMN_series, alignment)

        plt.figure(figsize=(8, 4))
        plt.plot(DMC_series, label="DMC_curve")
        plt.plot(CMN_series, label="CMN_curve")
        plt.xlabel('Timestamp')
        plt.ylabel('Memory Bandwidth (GB/s)')
        for (i, j) in alignment:
            plt.plot([i, j], [DMC_series[i], CMN_series[j]], color="gray", linestyle="--")
        plt.legend()
        plt.title(f"DTW Alignment on benchmark {bench_num}")

        image_path = os.path.join(diff_image_path, f"{bench_num}.png")
        plt.savefig(image_path)  # 保存图像
        plt.close()
        print(f"Alignment figure saved to {image_path}")
        
        f_out.write(f"{bench_num},{distance_origin},{distance_normalized}\n")


['500.perlbench_r_iter0_1732727231.csv', '500.perlbench_r_iter0_1732727453.csv', '500.perlbench_r_iter0_1732727561.csv', '502.gcc_r_iter0_1732730115.csv', '502.gcc_r_iter0_1732730201.csv', '502.gcc_r_iter0_1732730308.csv', '502.gcc_r_iter0_1732730427.csv', '502.gcc_r_iter0_1732730562.csv', '503.bwaves_r_iter0_1732758224.csv', '503.bwaves_r_iter0_1732758672.csv', '503.bwaves_r_iter0_1732759320.csv', '503.bwaves_r_iter0_1732759830.csv', '505.mcf_r_iter0_1732734283.csv', '507.cactuBSSN_r_iter0_1732763763.csv', '508.namd_r_iter0_1732765498.csv', '510.parest_r_iter0_1732770298.csv', '511.povray_r_iter0_1732775545.csv', '519.lbm_r_iter0_1732781328.csv', '520.omnetpp_r_iter0_1732739475.csv', '521.wrf_r_iter0_1732789542.csv', '523.xalancbmk_r_iter0_1732743238.csv', '525.x264_r_iter0_1732745202.csv', '525.x264_r_iter0_1732745249.csv', '525.x264_r_iter0_1732745373.csv', '526.blender_r_iter0_1732793389.csv', '527.cam4_r_iter0_1732795664.csv', '531.deepsjeng_r_iter0_1732746710.csv', '538.imagick_r