diff --git a/mlperf_logging/rcp_checker/training_5.1.0/rcps_llama31_405b.json b/mlperf_logging/rcp_checker/training_5.1.0/rcps_llama31_405b.json index 40866ea..d1a7620 100644 --- a/mlperf_logging/rcp_checker/training_5.1.0/rcps_llama31_405b.json +++ b/mlperf_logging/rcp_checker/training_5.1.0/rcps_llama31_405b.json @@ -1,27 +1,9 @@ -{ - "llama31_405b_ref_1008": - { - "Benchmark": "llama31_405b", - "Creator": "NVIDIA", - "When": "Reference RCPs before 5.0 submission", - "Platform": "288xDGX-H100", - "Precision": "BF16", - "BS": 1008, - "Hyperparams": { - "opt_base_learning_rate": 7e-05, - "opt_learning_rate_warmup_steps": 9143, - "gradient_accumulation_steps": 126 - }, - "Epochs to converge": [ - 324576,324576,324576, - 324576,324576,324576 - ] - }, +{ "llama31_405b_ref_1152": { "Benchmark": "llama31_405b", "Creator": "NVIDIA", - "When": "Reference RCPs before 5.0 submission", + "When": "Reference RCPs after 5.0 submission", "Platform": "288xDGX-H100", "Precision": "BF16", "BS": 1152, @@ -31,8 +13,8 @@ "gradient_accumulation_steps": 144 }, "Epochs to converge": [ - 322560,322560,322560, - 322560,322560,322560 + 313344,313344,313344, + 331776,313344,294912 ] }, @@ -40,7 +22,7 @@ { "Benchmark": "llama31_405b", "Creator": "NVIDIA", - "When": "Reference RCPs before 5.0 submission", + "When": "Reference RCPs after 5.0 submission", "Platform": "288xDGX-H100", "Precision": "BF16", "BS": 2304, @@ -50,15 +32,16 @@ "gradient_accumulation_steps": 288 }, "Epochs to converge": [ - 368640,368640,368640, - 368640,414720,414720 + 368640,350208,387072, + 368640,368640,368640 ] }, + "llama31_405b_ref_4608": { "Benchmark": "llama31_405b", "Creator": "NVIDIA", - "When": "Reference RCPs before 5.0 submission", + "When": "Reference RCPs after 5.0 submission", "Platform": "288xDGX-H100", "Precision": "BF16", "BS": 4608, @@ -68,45 +51,10 @@ "gradient_accumulation_steps": 576 }, "Epochs to converge": [ - 460800,460800,506880, - 506880,506880,506880 - ] - }, - "llama31_405b_ref_6912": - { - "Benchmark": "llama31_405b", - "Creator": "NVIDIA", - "When": "Reference RCPs before 5.0 submission", - "Platform": "72xDGX-H100", - "Precision": "BF16", - "BS": 6912, - "Hyperparams": { - "opt_base_learning_rate": 48e-05, - "opt_learning_rate_warmup_steps": 1334, - "gradient_accumulation_steps": 3456 - }, - "Epochs to converge": [ - 580608,580608,580608, - 628992,628992,628992 - ] - }, - "llama31_405b_ref_9216": - { - "Benchmark": "llama31_405b", - "Creator": "NVIDIA", - "When": "Reference RCPs before 5.0 submission", - "Platform": "288xDGX-H100", - "Precision": "BF16", - "BS": 9216, - "Hyperparams": { - "opt_base_learning_rate": 64e-05, - "opt_learning_rate_warmup_steps": 1000, - "gradient_accumulation_steps": 1152 - }, - "Epochs to converge": [ - 645120,645120,691200, - 691200,737280,737280 + 497664,497664,460800, + 497664,479232,497664 ] } } - \ No newline at end of file + +