In [14]:
import json
import numpy as np

In [15]:
def load_json(filepath):
	with open(filepath, 'r') as file:
		data = json.load(file)
	return data

In [16]:
STANDARD_SIZE = 10

In [17]:
EFFICIENCY_TABLE_PATH = '../assets/result/energy/Efficiency-Table-960.json'
EFFICIENCY_TABLE = load_json(EFFICIENCY_TABLE_PATH)
EFFICIENCY_TABLE_INVERSE = (1. - np.array(EFFICIENCY_TABLE)).tolist()

In [18]:
def interpolate_2d_array(arr, new_shape):
		"""
		Reshape a 2D array to a specific shape, retaining the edge values and 
		maintaining the trend for non-edge values.
		
		:param arr: 2D list of numbers
		:param new_shape: tuple of (new_rows, new_cols)
		:return: reshaped 2D list
		"""
		arr = np.array(arr)
		orig_rows, orig_cols = arr.shape
		new_rows, new_cols = new_shape

		# Step 1: Interpolate rows
		intermediate_array = np.zeros((orig_rows, new_cols))
		for i in range(orig_rows):
				intermediate_array[i, :] = np.interp(np.linspace(0, orig_cols-1, new_cols), np.arange(orig_cols), arr[i, :])

		# Step 2: Interpolate columns
		new_array = np.zeros((new_rows, new_cols))
		for j in range(new_cols):
				new_array[:, j] = np.interp(np.linspace(0, orig_rows-1, new_rows), np.arange(orig_rows), intermediate_array[:, j])

		# Ensure edge values are exactly as in the original array
		new_array[0, :] = np.interp(np.linspace(0, orig_cols-1, new_cols), np.arange(orig_cols), arr[0, :])
		new_array[-1, :] = np.interp(np.linspace(0, orig_cols-1, new_cols), np.arange(orig_cols), arr[-1, :])
		new_array[:, 0] = np.interp(np.linspace(0, orig_rows-1, new_rows), np.arange(orig_rows), arr[:, 0])
		new_array[:, -1] = np.interp(np.linspace(0, orig_rows-1, new_rows), np.arange(orig_rows), arr[:, -1])

		return new_array.tolist()

In [19]:
STANDARD_EFFICIENCY_TABLE = interpolate_2d_array(EFFICIENCY_TABLE_INVERSE, (STANDARD_SIZE, STANDARD_SIZE))
with open("../assets/result/optimizer_energy/Standard-Efficiency-Table" + ".json", 'w') as file:
    json.dump(STANDARD_EFFICIENCY_TABLE, file, indent=4)

## Dataset

In [20]:
RESULT_PATH = '../assets/result/energy/YOLOv8-1800-960.json'
ENERGY_RESULT = load_json(RESULT_PATH)
ENERGY_SCALE = 10**6
NUM_FRAME = 1800
RESAMPLING_INTERVAL = 15

CPU_FREQ = [
		115200, 192000, 268800, 345600, 422400, 499200, 576000, 652800, 729600,
		806400, 883200, 960000, 1036800, 1113600, 1190400, 1267200, 1344000,
		1420800, 1497600, 1574400, 1651200, 1728000, 1804800, 1881600, 1958400,
		2035200, 2112000, 2188800, 2201600
]

GPU_FREQ = [
		306000000, 408000000, 510000000, 612000000, 714000000, 816000000, 918000000,
		1020000000, 1122000000, 1224000000, 1300500000
]

COMBINED_FPS_PATH = "../assets/result/accuracy/Combined-FPS.json"
REQUIRED_THROUGHPUT = load_json(COMBINED_FPS_PATH)

def evaluate_power(cpu_freq, gpu_freq, result_dict=ENERGY_RESULT):
		key = f"{cpu_freq}:{gpu_freq}"
		energy = result_dict[key][1] + result_dict[key][2]
		time = result_dict[key][0]
		power = energy / time / ENERGY_SCALE
		return power

def evaluate_throughput(cpu_freq, gpu_freq, result_dict=ENERGY_RESULT):
		key = f"{cpu_freq}:{gpu_freq}"
		time = result_dict[key][0]
		throughput = NUM_FRAME / time
		return throughput

## Optimization

In [21]:
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C
from sklearn.preprocessing import StandardScaler
from sklearn.exceptions import NotFittedError

In [22]:
real_efficiency_table = interpolate_2d_array(STANDARD_EFFICIENCY_TABLE, (len(GPU_FREQ), len(CPU_FREQ)))

In [23]:
w = 0.375
np.random.seed(42)
efficiency_array = np.array(real_efficiency_table)

# Data storage for observed points
observed_data = {'X': [], 'y': []}

# Initialize Gaussian Process Regressor
kernel = C(1.0, (1e-3, 1e3)) * RBF(1, (1e-2, 1e2))
gpr = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=10, alpha=1e-2)

# Standard Scaler for input normalization
scaler = StandardScaler()

def update_model(cpu_freq, gpu_freq, real_throughput):
	# Update observed data
	observed_data['X'].append([cpu_freq, gpu_freq])
	observed_data['y'].append(real_throughput)
	
	# Fit model if we have at least one observation
	if len(observed_data['X']) > 1:
		X = np.array(observed_data['X'])
		y = np.array(observed_data['y'])
		X_scaled = scaler.fit_transform(X) # Standardization
		gpr.fit(X_scaled, y)

def suggest_combination(required_throughput):
	# Define the search space
	search_space = np.array([[cf, gf] for cf in CPU_FREQ for gf in GPU_FREQ])

	# Predict throughputs using the model
	if len(observed_data['X']) == 0:
		return CPU_FREQ[-1], GPU_FREQ[-1]
	elif len(observed_data['X']) == 1:
		return CPU_FREQ[0], GPU_FREQ[0]
	else:
		search_space_scaled = scaler.transform(search_space) # Standardization
		predicted_throughputs, sigma = gpr.predict(search_space_scaled, return_std=True)
	
	# Filter combinations that meet the required throughput
	valid_combinations = search_space[predicted_throughputs >= required_throughput]
	
	# If no valid combination, return the most efficient combination
	if len(valid_combinations) == 0:
		max_efficiency_index = np.unravel_index(np.argmax(efficiency_array, axis=None), efficiency_array.shape)
		return CPU_FREQ[max_efficiency_index[1]], GPU_FREQ[max_efficiency_index[0]]

	# Calculate efficiency and distance for each valid combination
	efficiencies = [efficiency_array[GPU_FREQ.index(gf), CPU_FREQ.index(cf)] for cf, gf in valid_combinations]
	distances = [min((predicted_throughput - required_throughput) / required_throughput, 1) for predicted_throughput in predicted_throughputs[predicted_throughputs >= required_throughput]]

	# Calculate the combined score
	scores = [w * eff + (1 - w) * (1 - dist) for eff, dist in zip(efficiencies, distances)]
	best_combination_index = np.argmax(scores)
	
	best_combination = valid_combinations[best_combination_index]
	return best_combination[0], best_combination[1]

In [24]:
# highest_throughput = evaluate_throughput(CPU_FREQ[-1], GPU_FREQ[-1])
# lowest_throughput = evaluate_throughput(CPU_FREQ[0], GPU_FREQ[0])
# update_model(CPU_FREQ[-1], GPU_FREQ[-1], highest_throughput)
# update_model(CPU_FREQ[0], GPU_FREQ[0], lowest_throughput)

selected_frequencies = []

for i, required_throughput in enumerate(REQUIRED_THROUGHPUT):
	cpu_freq, gpu_freq = suggest_combination(required_throughput)
	selected_frequencies.append((cpu_freq, gpu_freq))
	real_throughput = evaluate_throughput(cpu_freq, gpu_freq)
	update_model(cpu_freq, gpu_freq, real_throughput)

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)
ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)
ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)
ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)
ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/st

## Evaluation

In [25]:
OPTIMIZER_ENERGY_RESULT = selected_frequencies
EFFICIENT_POWER_RESULT_PATH = '../assets/result/energy/Dict-Per-Real-Second-960.json'
EFFICIENT_POWER_RESULT = load_json(EFFICIENT_POWER_RESULT_PATH)

power_overuse_rate_list = []
output_throughput_list = []
output_throughput_surplus_list = []
output_throughput_shortage_list = []
for i, freq_tuple in enumerate(OPTIMIZER_ENERGY_RESULT):
	key = f"{freq_tuple[0]}:{freq_tuple[1]}"
	output_time = ENERGY_RESULT[key][0]
	output_throughput = round(NUM_FRAME / output_time)
	output_power = (ENERGY_RESULT[key][1] + ENERGY_RESULT[key][2]) / output_time / ENERGY_SCALE

	efficient_data = EFFICIENT_POWER_RESULT[str(output_throughput)][1]
	efficient_time = efficient_data[0]
	efficient_power = (efficient_data[1] + efficient_data[2]) / efficient_time / ENERGY_SCALE

	target_throughput = REQUIRED_THROUGHPUT[i]
	output_throughput_surplus_list.append(max((output_throughput - target_throughput), 0) / (0.5 * (output_throughput + target_throughput)))
	output_throughput_shortage_list.append(max((target_throughput - output_throughput), 0) / (0.5 * (output_throughput + target_throughput)))
	
	output_throughput_list.append(output_throughput)
	power_overuse_rate_list.append( (output_power - efficient_power) / (0.5 * (output_power + efficient_power)) )

time = list(np.array(range(len(REQUIRED_THROUGHPUT))) * RESAMPLING_INTERVAL * 1.)

print(f"Average Power Overuse Rate: {np.average(np.array(power_overuse_rate_list))}")
print(f"Average Throughput Surplus Rate: {np.average(np.array(output_throughput_surplus_list))}")
print(f"Average Throughput Shortage Rate: {np.average(np.array(output_throughput_shortage_list))}")

Average Power Overuse Rate: 0.02814101964359242
Average Throughput Surplus Rate: 0.039693562610229285
Average Throughput Shortage Rate: 0.10857422147744727


In [26]:
selected_frequencies = [(int(x), int(y)) for x, y in selected_frequencies]
with open("../assets/result/optimizer_energy/Real-Frequency" + ".json", 'w') as file:
		json.dump(selected_frequencies, file, indent=4)