diff --git a/Makefile b/Makefile index c8892b2..1ce6050 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ black: FORCE - black checkmate experiments setup.py --line-length 127 + black scratch checkmate experiments setup.py --line-length 127 test: FORCE pytest tests -FORCE: ; \ No newline at end of file +FORCE: ; diff --git a/checkmate/core/solvers/strategy_approx_lp.py b/checkmate/core/solvers/strategy_approx_lp.py index 11951e3..8b649b1 100644 --- a/checkmate/core/solvers/strategy_approx_lp.py +++ b/checkmate/core/solvers/strategy_approx_lp.py @@ -64,8 +64,9 @@ def solve_approx_lp_deterministic_sweep( s_ = (s >= threshold).astype(np.int) r_ = solve_r_opt(g, s_) schedule_, aux_data_ = schedule_from_rs(g, r_, s_) - if (allow_return_infeasible_schedule and aux_data is None) or \ - (aux_data_.activation_ram <= budget and (aux_data is None or aux_data_.cpu <= aux_data.cpu)): + if (allow_return_infeasible_schedule and aux_data is None) or ( + aux_data_.activation_ram <= budget and (aux_data is None or aux_data_.cpu <= aux_data.cpu) + ): aux_data = aux_data_ schedule = schedule_ min_threshold = threshold @@ -160,7 +161,7 @@ def solve_approx_lp_randomized( eps_noise=0.01, solver_cores=os.cpu_count(), num_rounds=100, - return_rounds=False + return_rounds=False, ): """Randomized rounding of LP relaxation @@ -224,7 +225,7 @@ def solve_approx_lp_randomized( if aux_data.activation_ram <= budget and (best_solution[2] is None or aux_data.cpu <= best_solution[0]): best_solution = (aux_data.cpu, schedule, aux_data) - if (i+1) % 1 == 0: + if (i + 1) % 1 == 0: print(f"Rounded relaxation argmin {i+1} / num_rounds times, best cost {best_solution[0]}") schedule, aux_data = best_solution[1], best_solution[2] @@ -247,9 +248,8 @@ def solve_approx_lp_randomized( ), ) if return_rounds: - return (scheduled_result, { - "cpu": rounding_cpus, - "activation_ram": rounding_activation_rams, - "in_budget": rounding_in_budgets - }) + return ( + scheduled_result, + {"cpu": rounding_cpus, "activation_ram": rounding_activation_rams, "in_budget": rounding_in_budgets}, + ) return scheduled_result diff --git a/experiments/experiment_approxcomparison.py b/experiments/experiment_approxcomparison.py index 0fdd643..c05e232 100644 --- a/experiments/experiment_approxcomparison.py +++ b/experiments/experiment_approxcomparison.py @@ -34,11 +34,13 @@ def extract_params(): parser.add_argument("--skip-ilp", action="store_true", help="If set, skip running the ILP during evaluation.") return parser.parse_args() + def b2gb(data): if hasattr(data, "__iter__"): return [d * 1e-9 for d in data] return data * 1e-9 + if __name__ == "__main__": args = extract_params() @@ -65,16 +67,11 @@ def b2gb(data): # load model from Keras print("Loading model {}".format(args.model_name)) model = get_keras_model(args.model_name) - g = dfgraph_from_keras(model, batch_size=args.batch_size, cost_model=cost_model, - loss_cpu_cost=0, loss_ram_cost=(4 * args.batch_size)) - - common_kwargs = dict( - g=g, - budget=B, - print_to_console=False, - eps_noise=0, - approx=False, - ) + g = dfgraph_from_keras( + model, batch_size=args.batch_size, cost_model=cost_model, loss_cpu_cost=0, loss_ram_cost=(4 * args.batch_size) + ) + + common_kwargs = dict(g=g, budget=B, print_to_console=False, eps_noise=0, approx=False) print("Common args:", common_kwargs) @@ -114,7 +111,7 @@ def b2gb(data): write_log_file=scratch_dir / "ilp.log", print_to_console=False, eps_noise=0, - approx=False + approx=False, ) if scheduler_result_ilp.schedule_aux_data is not None: plot_schedule(scheduler_result_ilp, False, save_file=scratch_dir / "CHECKM8_ILP.png") @@ -129,35 +126,33 @@ def b2gb(data): # Deterministic rounding scheduler_lp_det = solve_approx_lp_deterministic_05_threshold( - write_log_file=scratch_dir / "lp_det_05.log", - allow_return_infeasible_schedule=True, - **common_kwargs + write_log_file=scratch_dir / "lp_det_05.log", allow_return_infeasible_schedule=True, **common_kwargs ) if scheduler_lp_det.schedule_aux_data is not None: plot_schedule(scheduler_lp_det, False, save_file=scratch_dir / "CHECKM8_DET_APPROX_05.png") - data.append({ + data.append( + { "Strategy": str(scheduler_lp_det.solve_strategy.value), "Name": "CHECKM8_DET_APPROX_05", "CPU": scheduler_lp_det.schedule_aux_data.cpu, "Activation RAM": scheduler_lp_det.schedule_aux_data.activation_ram, - }) + } + ) # Randomized rounding scheduler_lp_rand, rounding_stats = solve_approx_lp_randomized( - write_log_file=scratch_dir / "lp_rand.log", - num_rounds=args.num_rounds, - return_rounds=True, - **common_kwargs + write_log_file=scratch_dir / "lp_rand.log", num_rounds=args.num_rounds, return_rounds=True, **common_kwargs ) if scheduler_lp_rand.schedule_aux_data is not None: plot_schedule(scheduler_lp_rand, False, save_file=scratch_dir / "CHECKM8_RAND_APPROX.png") - data.append({ + data.append( + { "Strategy": str(scheduler_lp_rand.solve_strategy.value), "Name": "CHECKM8_RAND_APPROX", "CPU": scheduler_lp_rand.schedule_aux_data.cpu, "Activation RAM": scheduler_lp_rand.schedule_aux_data.activation_ram, - }) - + } + ) # Plot solution memory usage vs cpu scatter plot sns.set() @@ -168,34 +163,56 @@ def b2gb(data): plt.ylabel("GPU time (ms)") color, marker, markersize = SolveStrategy.get_plot_params(scheduler_result_all.solve_strategy) - plt.axhline(y=scheduler_result_all.schedule_aux_data.cpu / 1000, color=color, linestyle='--', label="Checkpoint all (ideal)") + plt.axhline( + y=scheduler_result_all.schedule_aux_data.cpu / 1000, color=color, linestyle="--", label="Checkpoint all (ideal)" + ) if args.model_name in LINEAR_MODELS: color, marker, markersize = SolveStrategy.get_plot_params(scheduler_result_sqrtn.solve_strategy) - plt.scatter([b2gb(scheduler_result_sqrtn.schedule_aux_data.activation_ram)], [scheduler_result_sqrtn.schedule_aux_data.cpu / 1000], - s=markersize ** 2, color=color, marker=marker, label="Chen $\sqrt{n}$") + plt.scatter( + [b2gb(scheduler_result_sqrtn.schedule_aux_data.activation_ram)], + [scheduler_result_sqrtn.schedule_aux_data.cpu / 1000], + s=markersize ** 2, + color=color, + marker=marker, + label="Chen $\sqrt{n}$", + ) _, marker, markersize = SolveStrategy.get_plot_params(scheduler_lp_rand.solve_strategy) - plt.scatter(b2gb(rounding_stats["activation_ram"]), np.array(rounding_stats["cpu"]) / 1000, - s=markersize ** 2, color="lightcoral", marker=marker, label="Randomized rounding") - plt.axhline(y=np.mean(rounding_stats["cpu"]) / 1000, color="lightcoral", linestyle=':') + plt.scatter( + b2gb(rounding_stats["activation_ram"]), + np.array(rounding_stats["cpu"]) / 1000, + s=markersize ** 2, + color="lightcoral", + marker=marker, + label="Randomized rounding", + ) + plt.axhline(y=np.mean(rounding_stats["cpu"]) / 1000, color="lightcoral", linestyle=":") color, marker, markersize = SolveStrategy.get_plot_params(scheduler_lp_det.solve_strategy) - plt.scatter([b2gb(scheduler_lp_det.schedule_aux_data.activation_ram)], [scheduler_lp_det.schedule_aux_data.cpu / 1000], - s=markersize ** 2, color="royalblue", marker=marker, label="Deterministic rounding") + plt.scatter( + [b2gb(scheduler_lp_det.schedule_aux_data.activation_ram)], + [scheduler_lp_det.schedule_aux_data.cpu / 1000], + s=markersize ** 2, + color="royalblue", + marker=marker, + label="Deterministic rounding", + ) if not args.skip_ilp: color, marker, markersize = SolveStrategy.get_plot_params(scheduler_result_ilp.solve_strategy) - plt.scatter([b2gb(scheduler_result_ilp.schedule_aux_data.activation_ram)], [scheduler_result_ilp.schedule_aux_data.cpu / 1000], - s=markersize ** 2, color=color, marker=marker, label="ILP") + plt.scatter( + [b2gb(scheduler_result_ilp.schedule_aux_data.activation_ram)], + [scheduler_result_ilp.schedule_aux_data.cpu / 1000], + s=markersize ** 2, + color=color, + marker=marker, + label="ILP", + ) plt.legend() - plt.savefig(scratch_dir / "scatter.pdf", - bbox_inches="tight", - format="pdf") - plt.savefig(scratch_dir / "scatter.png", - bbox_inches="tight", - dpi=300) + plt.savefig(scratch_dir / "scatter.pdf", bbox_inches="tight", format="pdf") + plt.savefig(scratch_dir / "scatter.png", bbox_inches="tight", dpi=300) # Save results df = pd.DataFrame(data) @@ -205,7 +222,4 @@ def b2gb(data): # Save data with open(scratch_dir / "data.pickle", "wb") as f: - pickle.dump({ - "data": data, - "rounding_stats": rounding_stats - }, f, protocol=pickle.HIGHEST_PROTOCOL) + pickle.dump({"data": data, "rounding_stats": rounding_stats}, f, protocol=pickle.HIGHEST_PROTOCOL) diff --git a/scratch/tf_test.py b/scratch/tf_test.py index 2f28b08..6fd3a41 100644 --- a/scratch/tf_test.py +++ b/scratch/tf_test.py @@ -6,12 +6,7 @@ from copy import deepcopy # from tensorflow.python.ops import gradients_util as tfg -from checkmate.core.schedule import ( - OperatorEvaluation, - AllocateRegister, - DeallocateRegister, - Schedule -) +from checkmate.core.schedule import OperatorEvaluation, AllocateRegister, DeallocateRegister, Schedule # tf.compat.v1.disable_eager_execution() logging.basicConfig(level=logging.DEBUG) @@ -23,14 +18,8 @@ def __init__(self, units=32): self.units = units def build(self, input_shape): - self.w = self.add_weight( - shape=(input_shape[-1], self.units), - initializer="random_normal", - trainable=True, - ) - self.b = self.add_weight( - shape=(self.units,), initializer="random_normal", trainable=True - ) + self.w = self.add_weight(shape=(input_shape[-1], self.units), initializer="random_normal", trainable=True) + self.b = self.add_weight(shape=(self.units,), initializer="random_normal", trainable=True) def call(self, inputs): return tf.matmul(inputs, self.w) + self.b @@ -65,34 +54,17 @@ def get_grads(f, inputs, tvars): grad_conc = get_grads.get_concrete_function(mlp, x, mlp.trainable_variables) op_list = grad_conc.graph.get_operations() -exclude_list = [ - "Placeholder", - "ReadVariableOp", - "Const", - "BroadcastGradientArgs", - "Fill", -] - - -def copy_op( - op, new_name -): # taken from "tensorflow/contrib/copy_graph/python/util/copy_elements.py" +exclude_list = ["Placeholder", "ReadVariableOp", "Const", "BroadcastGradientArgs", "Fill"] + + +def copy_op(op, new_name): # taken from "tensorflow/contrib/copy_graph/python/util/copy_elements.py" nnd = deepcopy(op.node_def) nnd.name = new_name nod = deepcopy(op.op_def) output_types = op._output_types[:] input_types = op._input_types[:] control_inputs = op.control_inputs[:] - new_op = tf.Operation( - nnd, - op.graph, - list(op.inputs), - output_types, - control_inputs, - input_types, - op, - nod, - ) + new_op = tf.Operation(nnd, op.graph, list(op.inputs), output_types, control_inputs, input_types, op, nod) return new_op @@ -156,9 +128,7 @@ def execute(fxn, op_list, op_dict, schedule, samp_inputs): if type(inst) == OperatorEvaluation: args = [registers[i] for i in inst.arg_regs] op = op_list[inst.id] - assert ( - len(op.outputs) == 1 - ), "ops which output two tensors not yet supported" + assert len(op.outputs) == 1, "ops which output two tensors not yet supported" if op in output_ops: new_op = op #