In [45]:
import pandas as pd
import numpy as np

In [46]:
def populate_data_time(sampled_powermodes, path, filename, offset_dict, model):
    cpu_cores_multipler = 1
    cpu_frq_divider = 1
    gpu_frq_divider = 1
    mem_frq_divider = 1

    all_data = []

    for powermode in sampled_powermodes:
        # Get the offset before scaling
        offset = offset_dict.get(powermode, -1)
        file = path + "/" + "pm_" + powermode + "/" + filename
        cores = int(powermode.split("_")[0]) * cpu_cores_multipler
        cpu = int(powermode.split("_")[1]) / cpu_frq_divider
        gpu = int(powermode.split("_")[2]) / gpu_frq_divider
        mem = int(powermode.split("_")[3]) / mem_frq_divider

        temp_df = pd.read_csv(file, header=None)
        # temp_df = temp_df[temp_df[4].replace('.', '', 1).astype(str).str.isnumeric()]
        # temp_df = temp_df[temp_df[4].apply(is_numeric_value)]
        # print(temp_df.head())

        if offset == -1:
            start = len(temp_df) - 40
            end = len(temp_df)
        elif offset == 0:
            start = offset + 1
            end = start + 40
        else:
            start = offset
            end = start + 40

        diff = end-start
        # if len(temp_df[start:end]) < 40:
        #     print("Diff :", diff)
        #     print("Start :",start)
        #     print("End:",end)
        #     print("Offset: ",offset)
        #     print("Powermode :",powermode)
        #     print("Length :",len(temp_df))

        temp_df = temp_df.iloc[start:end]
        if model == 'yolo':
            temp_df = temp_df.iloc[:,2].astype(float)

        else:
            temp_df = temp_df.iloc[:,4].astype(float)

        temp_df = temp_df.to_frame() 
        # print(temp_df.head())
        temp_df['Cores'] = cores
        temp_df['CPU_frequency'] = cpu
        temp_df['GPU_frequency'] = gpu
        temp_df['Memory_frequency'] = mem
        temp_df.columns = ['Minibatch_time', 'Cores', 'CPU_frequency', 'GPU_frequency', 'Memory_frequency']
        # print(temp_df.head())

        all_data.append(temp_df)

    master_df = pd.concat(all_data, ignore_index=True)
    return master_df

In [47]:
def populate_data_power(all_powermodes, path, tg_filename, offset_dict, start_dict, end_dict):

    rows = []
    for powermode in all_powermodes:
        offset = offset_dict.get(powermode, -1)
        start_time = start_dict.get(powermode, -1)
        end_time = end_dict.get(powermode, -1)
        # file = path + "/" + "pm_" + powermode + "/" + filename
        tg_file = path + "/" + "pm_" + powermode + "/" + tg_filename

        tg_df = pd.read_csv(tg_file)
        filtered_df = tg_df[(tg_df['log_time'] >= start_time) & (tg_df['log_time'] <= end_time)]
        power_list = filtered_df['power cur'].astype(float).dropna().tolist()

        if len(power_list) == 0:
            print("No power samples found for powermode: ", powermode)
            continue

        required_length = 500
        if len(power_list) < required_length:
            repeats_required = -(-required_length // len(power_list))  
            power_list = (power_list * repeats_required)[:required_length]
            
        power_list = power_list[:500]  # If more than 12, truncate to the first 12 samples


        # Split the powermode into its components
        cores, cpu, gpu, mem = powermode.split("_")

        for sample in power_list:
            rows.append({
                'cores': cores,
                'cpu': cpu,
                'gpu': gpu,
                'mem': mem,
                'power_sample': sample
            })

    minibatch_power_df = pd.DataFrame(rows)
    # minibatch_power_df.to_csv("val_power_400.csv", index=False)
    return minibatch_power_df


In [48]:
def extract_offsets_from_csv(csv_file):
    df = pd.read_csv(csv_file) 
    powermode_offsets = {}
    start_times = {}
    end_times = {}
    # Iterate through the DataFrame and populate the dictionary
    for _, row in df.iterrows():
        powermode = f"{int(row['cores'])}_{int(row['cpu'])}_{int(row['gpu'])}_{int(row['mem'])}"
        offset = int(row['skip_index'])
        start_time = float(row['start_time'])
        end_time = float(row['end_time'])
        powermode_offsets[powermode] = offset
        start_times[powermode] = start_time
        end_times[powermode] = end_time
    return powermode_offsets, start_times, end_times

In [49]:
def generate_powermodes():
    core_vals=[4, 8, 12] #3 possible values
    gpu_vals=[114750000, 318750000, 522750000, 726750000, 930750000, 1134750000, 1300500000]
    cpu_vals=[422400, 729600, 1036800, 1344000, 1651200, 1958400, 2201600] #in kHz, 7 possible values
    mem_vals = [665600000, 2133000000, 3199000000]
    #get combinations of all 4 as powermode Ex.2_1300500000_268800_204000000
    all_powermodes=[] #6*13*14*4=4368 possible values
    for cpu_frequency in cpu_vals:
        for gpu_frequency in gpu_vals:
            for cpu_core in core_vals:
                for mem_frequency in mem_vals:
                    all_powermodes.append(str(cpu_core)+"_"+str(cpu_frequency)+"_"+str(gpu_frequency)+"_"+str(mem_frequency))


    return all_powermodes

In [50]:
all_powermodes = generate_powermodes()
time_filename = 'mn_nw4_pf2_epoch_stats.csv'
power_filename = 'mn_nw4_pf2_tegrastats.csv'

model = 'lstm'

In [51]:
bs_list = [1,4,16,32,64]
if model == 'bert':
    bs_list = [1, 4, 8, 16, 32]

In [52]:
time_df = pd.DataFrame(columns=['Minibatch_time', 'Cores', 'CPU_frequency', 'GPU_frequency', 'Memory_frequency','bs'])
power_df = pd.DataFrame(columns=['cores', 'cpu', 'gpu', 'mem', 'power_sample','bs'])


for i in bs_list:

    path = model + "_infer_bs_runs/bs_" + str(i)
    offset_file = "minibatch_index_list_" + model + "_bs" + str(i)+ ".csv"
    offsets, start_times, end_times = extract_offsets_from_csv(offset_file)
 
    val_time = []
    val_power = []
    for powermode in all_powermodes:
        val_time.append(populate_data_time([powermode], path, time_filename, offsets, model))
        val_power.append(populate_data_power([powermode], path, power_filename, offsets, start_times, end_times))
    temp_time_df = pd.concat(val_time, ignore_index=True)
    temp_power_df = pd.concat(val_power, ignore_index=True)

    temp_time_df['bs'] = i
    temp_power_df['bs'] = i

    time_df = pd.concat([time_df,temp_time_df], ignore_index=True)
    power_df = pd.concat([power_df,temp_power_df], ignore_index=True)

  time_df = pd.concat([time_df,temp_time_df], ignore_index=True)
  power_df = pd.concat([power_df,temp_power_df], ignore_index=True)


In [53]:
time_df['Minibatch_time'] = time_df['Minibatch_time'].astype(float)
power_df['cores'] = power_df['cores'].astype(int)
power_df['cpu'] = power_df['cpu'].astype(int)
power_df['gpu'] = power_df['gpu'].astype(int)
power_df['mem'] = power_df['mem'].astype(int)

In [54]:
# take median of every 40 sample for time df and 500 samples for power df
time_df = time_df.groupby(time_df.index // 40).apply(lambda x: x.median())
power_df = power_df.groupby(power_df.index // 500).apply(lambda x: x.median())

In [55]:
time_df = time_df[['Cores', 'CPU_frequency', 'GPU_frequency', 'Memory_frequency', 'Minibatch_time','bs']]
time_df.columns = ['cores', 'cpu', 'gpu', 'mem', 'observed_time','bs']

In [56]:
power_df.columns = ['cores', 'cpu', 'gpu', 'mem', 'observed_power','bs']

In [57]:
time_df

Unnamed: 0,cores,cpu,gpu,mem,observed_time,bs
0,4.0,422400.0,1.147500e+08,6.656000e+08,12.717248,1.0
1,4.0,422400.0,1.147500e+08,2.133000e+09,15.813504,1.0
2,4.0,422400.0,1.147500e+08,3.199000e+09,12.006416,1.0
3,8.0,422400.0,1.147500e+08,6.656000e+08,11.575360,1.0
4,8.0,422400.0,1.147500e+08,2.133000e+09,11.393920,1.0
...,...,...,...,...,...,...
2200,8.0,2201600.0,1.300500e+09,2.133000e+09,2.102432,64.0
2201,8.0,2201600.0,1.300500e+09,3.199000e+09,1.991840,64.0
2202,12.0,2201600.0,1.300500e+09,6.656000e+08,3.757072,64.0
2203,12.0,2201600.0,1.300500e+09,2.133000e+09,2.098240,64.0


In [58]:
power_df

Unnamed: 0,cores,cpu,gpu,mem,observed_power,bs
0,4.0,422400.0,1.147500e+08,6.656000e+08,13190.0,1.0
1,4.0,422400.0,1.147500e+08,2.133000e+09,14105.0,1.0
2,4.0,422400.0,1.147500e+08,3.199000e+09,14810.0,1.0
3,8.0,422400.0,1.147500e+08,6.656000e+08,13598.0,1.0
4,8.0,422400.0,1.147500e+08,2.133000e+09,14606.0,1.0
...,...,...,...,...,...,...
2200,8.0,2201600.0,1.300500e+09,2.133000e+09,28691.0,64.0
2201,8.0,2201600.0,1.300500e+09,3.199000e+09,30605.0,64.0
2202,12.0,2201600.0,1.300500e+09,6.656000e+08,24763.0,64.0
2203,12.0,2201600.0,1.300500e+09,2.133000e+09,29589.0,64.0


In [59]:
# merged both the dataframes on cores, cpu, gpu, mem
merged_df = pd.merge(time_df, power_df, on=['cores', 'cpu', 'gpu', 'mem','bs'], how='inner')

In [60]:
merged_df['cores'] = merged_df['cores'].astype(int)
merged_df['cpu'] = merged_df['cpu'].astype(int)
merged_df['gpu'] = merged_df['gpu'].astype(int)
merged_df['mem'] = merged_df['mem'].astype(int)

merged_df['observed_power'] = merged_df['observed_power']/1000.0

In [61]:
merged_df

Unnamed: 0,cores,cpu,gpu,mem,observed_time,bs,observed_power
0,4,422400,114750000,665600000,12.717248,1.0,13.190
1,4,422400,114750000,2133000000,15.813504,1.0,14.105
2,4,422400,114750000,3199000000,12.006416,1.0,14.810
3,8,422400,114750000,665600000,11.575360,1.0,13.598
4,8,422400,114750000,2133000000,11.393920,1.0,14.606
...,...,...,...,...,...,...,...
2200,8,2201600,1300500000,2133000000,2.102432,64.0,28.691
2201,8,2201600,1300500000,3199000000,1.991840,64.0,30.605
2202,12,2201600,1300500000,665600000,3.757072,64.0,24.763
2203,12,2201600,1300500000,2133000000,2.098240,64.0,29.589


In [62]:
merged_df.to_csv(f"{model}_infer_data_final.csv", index=False)