From 3407acac05de86a584a8994f3df9625b6784bc0a Mon Sep 17 00:00:00 2001 From: CarlosGomes98 Date: Thu, 18 Sep 2025 14:56:18 +0100 Subject: [PATCH 1/2] add gbs 512 to flux rcp --- .../training_5.1.0/rcps_flux1.json | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/mlperf_logging/rcp_checker/training_5.1.0/rcps_flux1.json b/mlperf_logging/rcp_checker/training_5.1.0/rcps_flux1.json index 8b8075a..74aaaf3 100644 --- a/mlperf_logging/rcp_checker/training_5.1.0/rcps_flux1.json +++ b/mlperf_logging/rcp_checker/training_5.1.0/rcps_flux1.json @@ -1,4 +1,26 @@ { + "flux_ref_512": { + "Benchmark": "flux1", + "Creator": "NVIDIA", + "When": "Reference RCPs before v5.1", + "Platform": "8xDGX-B200", + "Precision": "BF16", + "BS": 512, + "Hyperparams": { + "opt_adamw_beta_1": 0.9, + "opt_adamw_beta_2": 0.95, + "opt_adamw_epsilon": 1e-8, + "opt_adamw_weight_decay": 0.1, + "opt_base_learning_rate": 2.0e-4, + "opt_learning_rate_warmup_steps": 0, + "opt_gradient_clip_norm": 1.0 + }, + "samples to converge": [ + 8388608, 8388608, 8388608, 8126464, 7864320, 8126464, 7864320, 7602176, + 8650752, 8126464, 8650752, 8126464, 8388608, 8126464, 8126464, 8126464, + 8126464, 8126464, 8126464, 7864320 + ] + }, "flux_ref_1024": { "Benchmark": "flux1", "Creator": "NVIDIA", From 02f3767e10f0f08efd43e045003baf78cb8a20cd Mon Sep 17 00:00:00 2001 From: CarlosGomes98 Date: Thu, 18 Sep 2025 14:58:21 +0100 Subject: [PATCH 2/2] fix hparams for rcp --- mlperf_logging/rcp_checker/training_5.1.0/rcps_flux1.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlperf_logging/rcp_checker/training_5.1.0/rcps_flux1.json b/mlperf_logging/rcp_checker/training_5.1.0/rcps_flux1.json index 74aaaf3..5da8269 100644 --- a/mlperf_logging/rcp_checker/training_5.1.0/rcps_flux1.json +++ b/mlperf_logging/rcp_checker/training_5.1.0/rcps_flux1.json @@ -12,7 +12,7 @@ "opt_adamw_epsilon": 1e-8, "opt_adamw_weight_decay": 0.1, "opt_base_learning_rate": 2.0e-4, - "opt_learning_rate_warmup_steps": 0, + "opt_learning_rate_warmup_steps": 1600, "opt_gradient_clip_norm": 1.0 }, "samples to converge": [