In [85]:
import pandas as pd
import numpy as np

In [86]:
sampled_pwds = pd.read_csv("tl/e24_random_50_pwmds.csv")

In [87]:
model_name = "bert"

In [88]:
nn_pareto_df = pd.read_csv(f"/home/saisamarth/exp/tl/e24_{model_name}_NN_pareto_powermodes_pred_250_sampled.csv")
nn_df = pd.read_csv(f"/home/saisamarth/exp/tl/e24_merged_observed_predicted_{model_name}_NN_sampled_250.csv")

In [89]:
sampled_pwds = list(sampled_pwds['powermode'].values)

In [90]:
random_df = nn_df[nn_df['powermode'].isin(sampled_pwds)]


In [91]:
random_df.reset_index(drop=True, inplace=True)

In [92]:
# drop predicted columns
random_df = random_df.drop(columns=['predicted_time', 'predicted_power'])

In [93]:
if model_name == "bert":
    power_list = np.arange(10, 61, 1)
else:
    power_list = np.arange(10, 51, 1)

In [94]:
result_df_random = pd.DataFrame(columns=['powermode', 'observed_time', 'observed_power', 'power_budget'])
for power_budget in power_list:
    # find the best time under power budget for each power_budget
    try:
        best_time = random_df[random_df['observed_power'] <= power_budget].sort_values(by='observed_time').iloc[0]
        result_df_random.loc[len(result_df_random)] = [best_time['powermode'], best_time['observed_time'], best_time['observed_power'], power_budget]
    except:
        result_df_random.loc[len(result_df_random)] = [np.nan, np.nan, np.nan, power_budget]


In [95]:
result_df_nn = pd.DataFrame(columns=['powermode', 'observed_time', 'observed_power', 'power_budget'])
for power_budget in power_list:
    # find the best time under power budget for each power_budget
    try:
        best_time = nn_df[nn_df['predicted_power'] <= power_budget].sort_values(by='predicted_time').iloc[0]
        result_df_nn.loc[len(result_df_nn)] = [best_time['powermode'], best_time['observed_time'], best_time['observed_power'], power_budget]
    except:
        result_df_nn.loc[len(result_df_nn)] = [np.nan, np.nan, np.nan, power_budget]


In [96]:
# merge the two dataframes on power_budget with suffixes (random, nn)
result_df = pd.merge(result_df_random, result_df_nn, on='power_budget', suffixes=('_random', '_nn'))

In [97]:
result_df.head()

Unnamed: 0,powermode_random,observed_time_random,observed_power_random,power_budget,powermode_nn,observed_time_nn,observed_power_nn
0,,,,10.0,,,
1,,,,11.0,,,
2,,,,12.0,,,
3,,,,13.0,,,
4,,,,14.0,,,


In [98]:
def validate_nn_solution(data):
    data['flag'] = True
    # check if optim_power_nn is less than power_budget, flag = False
    data.loc[data['observed_power_nn'] > (data['power_budget']*1.1), 'flag'] = False
    return data

In [99]:
result_df = validate_nn_solution(result_df)

In [100]:
result_df.dropna(subset=['observed_time_nn'], inplace=True)

In [101]:
result_df

Unnamed: 0,powermode_random,observed_time_random,observed_power_random,power_budget,powermode_nn,observed_time_nn,observed_power_nn,flag
7,4_1651200_114750000_2133000000,7859.723633,16.969,17.0,12_729600_114750000_665600000,7886.872314,16.065,True
8,4_1958400_114750000_3199000000,7854.647217,17.873,18.0,8_1344000_318750000_665600000,4145.834229,19.076,True
9,4_1036800_318750000_665600000,4144.67041,18.772,19.0,4_1036800_522750000_665600000,3967.600952,19.278,True
10,8_729600_522750000_665600000,3961.261963,19.569,20.0,8_1651200_522750000_665600000,3956.740845,19.682,True
11,4_1651200_726750000_665600000,3931.531006,20.878,21.0,8_1651200_318750000_2133000000,2869.890259,22.489,True
12,4_422400_318750000_2133000000,2908.035889,21.992,22.0,8_1958400_318750000_2133000000,2870.182251,22.895,True
13,12_1651200_318750000_2133000000,2862.352051,22.89,23.0,8_1958400_318750000_2133000000,2870.182251,22.895,True
14,12_1344000_318750000_3199000000,2845.308716,23.996,24.0,8_1958400_522750000_2133000000,1785.753418,28.109,False
15,12_1651200_318750000_3199000000,2844.659302,24.397,25.0,12_2201600_522750000_2133000000,1806.018188,28.911,False
16,12_2201600_318750000_3199000000,2843.609131,25.305,26.0,12_2201600_522750000_2133000000,1806.018188,28.911,False


In [102]:
nn_df[nn_df['powermode'] == '4_1036800_522750000_2133000000']

Unnamed: 0,observed_time,observed_power,powermode,predicted_time,predicted_power
145,1790.215393,27.307,4_1036800_522750000_2133000000,2102.1733,23.242609


In [103]:
#  calcuate mape between observed_power and predicted_power
((nn_pareto_df['predicted_power'] - nn_pareto_df['observed_power']).abs() / nn_pareto_df['observed_power']).mean()

0.09796135150065297

In [104]:
# print count of True, False and total (40)
print(result_df['flag'].value_counts())

flag
True     26
False    18
Name: count, dtype: int64


In [105]:
result_df.to_csv(f"{model_name}_random_vs_nn_train_alt.csv", index=False)