From 4f4982a03428a02c1ae94d3eb8c39e2874ad938f Mon Sep 17 00:00:00 2001 From: Ann Kuruvilla Date: Fri, 21 Nov 2025 04:50:39 +0000 Subject: [PATCH 01/13] Reference values are updated Signed-off-by: Ann Kuruvilla --- tests/finetune/reference_data.py | 334 +++++++++++++++---------------- tests/finetune/test_finetune.py | 2 +- 2 files changed, 168 insertions(+), 168 deletions(-) diff --git a/tests/finetune/reference_data.py b/tests/finetune/reference_data.py index a2a5438f5..c94c03b0b 100644 --- a/tests/finetune/reference_data.py +++ b/tests/finetune/reference_data.py @@ -13,206 +13,206 @@ "llama_3.2_1B_config_alpaca_single_device": { "description": "Baseline for Llama on Alpaca single-device", "train_step_losses": [ - 1.5112206935882568, - 1.2211230993270874, - 1.9942185878753662, - 2.093623161315918, - 0.9168124198913574, - 1.2125635147094727, - 0.3648962676525116, - 1.6231939792633057, - 0.8259601593017578, - 0.7741442918777466, - 1.7359141111373901, - 2.118462085723877, - 2.061161994934082, - 0.8256913423538208, - 0.8088029623031616, - 1.761340618133545, - 1.6828027963638306, - 1.3538823127746582, - 2.0672550201416016, - 3.1532647609710693, + 1.5110896825790405, + 1.2206485271453857, + 1.9950776100158691, + 2.091615676879883, + 0.9182446599006653, + 1.1993569135665894, + 0.36413607001304626, + 1.6241482496261597, + 0.8270177245140076, + 0.7749958634376526, + 1.73696768283844, + 2.120077610015869, + 2.061460256576538, + 0.8267984390258789, + 0.8105809688568115, + 1.7627557516098022, + 1.6819559335708618, + 1.3528242111206055, + 2.0654125213623047, + 3.156151294708252, ], "eval_step_losses": [ - 1.462059736251831, - 0.24527676403522491, - 1.046107292175293, - 1.6403586864471436, - 1.395291805267334, - 2.8664817810058594, - 1.035412311553955, - 1.8670039176940918, - 3.8079662322998047, - 0.6516809463500977, + 1.4607517719268799, + 0.24302150309085846, + 1.0471211671829224, + 1.642044186592102, + 1.3949533700942993, + 2.8850066661834717, + 1.0366586446762085, + 1.8661959171295166, + 3.81632924079895, + 0.6577113270759583, ], "train_step_metrics": [ - 4.532259941101074, - 3.390994071960449, - 7.34645938873291, - 8.114261627197266, - 2.5013046264648438, - 3.3620924949645996, - 1.4403645992279053, - 5.069255828857422, - 2.2840728759765625, - 2.1687355041503906, - 5.674112319946289, - 8.318334579467773, - 7.855090141296387, - 2.283458948135376, - 2.2452187538146973, - 5.820234775543213, - 5.380615711212158, - 3.872429847717285, - 7.903097629547119, - 23.412376403808594, + 4.531666278839111, + 3.389385223388672, + 7.352773189544678, + 8.09798812866211, + 2.504889488220215, + 3.3179824352264404, + 1.43927001953125, + 5.074095249176025, + 2.286489486694336, + 2.1705832481384277, + 5.680093288421631, + 8.33178424835205, + 7.857433319091797, + 2.2859883308410645, + 2.2492144107818604, + 5.828476905822754, + 5.376060962677002, + 3.8683345317840576, + 7.8885498046875, + 23.480052947998047, ], "eval_step_metrics": [ # steps 0-9 - 4.31483793258667, - 1.2779749631881714, - 2.8465487957000732, - 5.157018661499023, - 4.036152362823486, - 17.575077056884766, - 2.816267251968384, - 6.468885898590088, - 45.05870819091797, - 1.9187631607055664, + 4.309197902679443, + 1.27509605884552, + 2.8494362831115723, + 5.1657185554504395, + 4.034786224365234, + 17.9036865234375, + 2.819779396057129, + 6.463661193847656, + 45.437110900878906, + 1.9303690195083618, ], }, # Scenario 2: Single-device llama 3.2-1B training on GSM8k dataset. "llama_3.2_1B_config_gsm8k_single_device": { "description": "Baseline for Llama on GSM8k single-device", "train_step_losses": [ - 2.250276803970337, - 2.3231687545776367, - 1.9379945993423462, - 1.5981022119522095, - 1.9867562055587769, - 1.4573354721069336, - 1.8969658613204956, - 1.2177824974060059, - 1.6489791870117188, - 1.5380687713623047, - 1.4025083780288696, - 1.5301083326339722, - 1.6858205795288086, - 1.383747935295105, - 1.7968919277191162, - 1.4075607061386108, - 1.6447738409042358, - 1.2807793617248535, - 0.8450672030448914, - 1.5795941352844238, + 2.250361204147339, + 2.3252110481262207, + 1.9360781908035278, + 1.5984115600585938, + 1.9874038696289062, + 1.4579044580459595, + 1.8975679874420166, + 1.2175723314285278, + 1.6473736763000488, + 1.537960410118103, + 1.4019465446472168, + 1.5310447216033936, + 1.6878201961517334, + 1.3849903345108032, + 1.7976438999176025, + 1.4060133695602417, + 1.646375060081482, + 1.2835280895233154, + 0.8465587496757507, + 1.5783095359802246, ], "eval_step_losses": [ - 1.7081595659255981, - 1.719305157661438, - 1.153528094291687, - 2.0051634311676025, - 1.3372926712036133, - 1.3009852170944214, - 1.2207027673721313, - 1.3452664613723755, - 1.329830288887024, - 1.307450532913208, + 1.707140326499939, + 1.7226355075836182, + 1.1531383991241455, + 2.0035903453826904, + 1.3362350463867188, + 1.3013248443603516, + 1.2195535898208618, + 1.3454742431640625, + 1.3299248218536377, + 1.3073854446411133, ], "train_step_metrics": [ - 9.490362167358398, - 10.207969665527344, - 6.944809913635254, - 4.943641662597656, - 7.291841506958008, - 4.294501304626465, - 6.6656389236450195, - 3.3796849250793457, - 5.201667308807373, - 4.655590534210205, - 4.065384864807129, - 4.618677139282227, - 5.396877765655518, - 3.989826202392578, - 6.030873775482178, - 4.0859761238098145, - 5.179838180541992, - 3.5994436740875244, - 2.328134298324585, - 4.852985858917236, + 9.49116325378418, + 10.228837966918945, + 6.93151330947876, + 4.945170879364014, + 7.296566009521484, + 4.296945571899414, + 6.66965389251709, + 3.378974676132202, + 5.193322658538818, + 4.655086040496826, + 4.063101291656494, + 4.623003959655762, + 5.407680034637451, + 3.994786262512207, + 6.0354108810424805, + 4.0796589851379395, + 5.188138961791992, + 3.60935115814209, + 2.3316092491149902, + 4.846755504608154, ], "eval_step_metrics": [ # steps 0-9 - 5.518795013427734, - 5.580649375915527, - 3.1693549156188965, - 7.42730712890625, - 3.8087174892425537, - 3.672913074493408, - 3.38956880569458, - 3.8392088413238525, - 3.7804012298583984, - 3.6967368125915527, + 5.5131731033325195, + 5.599266052246094, + 3.1681201457977295, + 7.415632247924805, + 3.8046915531158447, + 3.674160957336426, + 3.3856759071350098, + 3.8400065898895264, + 3.7807586193084717, + 3.69649600982666, ], }, # Scenario 3: Single-device google-bert/bert-base-uncased training on IMDB dataset. "bert_base_uncased_config_imdb_single_device": { "description": "Baseline for google-bert/bert-base-uncased on IMDB single-device", "train_step_losses": [ - 0.357421875, - 0.546875, - 0.98486328125, - 0.35302734375, - 1.23828125, - 0.60791015625, - 0.44384765625, - 0.791015625, - 0.7861328125, - 0.51318359375, - 0.50244140625, - 0.90087890625, - 0.8818359375, - 0.86279296875, - 0.6396484375, - 0.49267578125, - 0.97119140625, - 0.7451171875, - 0.798828125, - 0.7080078125, + 0.390625, + 0.51220703125, + 0.9208984375, + 0.4052734375, + 1.1640625, + 0.6533203125, + 0.5087890625, + 0.76171875, + 0.63525390625, + 0.50146484375, + 0.5439453125, + 0.947265625, + 0.89013671875, + 0.80419921875, + 0.6533203125, + 0.4580078125, + 0.92041015625, + 0.7412109375, + 0.7197265625, + 0.62158203125, ], "eval_step_losses": [ - 0.634765625, - 0.8173828125, + 0.6044921875, + 0.798828125, 0.9072265625, - 0.7177734375, - 0.59423828125, - 0.69921875, - 0.7109375, - 0.7216796875, - 0.6064453125, - 0.7041015625, + 0.70361328125, + 0.59912109375, + 0.66357421875, + 0.6962890625, + 0.75390625, + 0.61328125, + 0.6806640625, ], "train_step_metrics": [ 1.0, 1.0, 0.5, - 0.5, - 0.5, - 0.5, - 0.5, - 0.5, - 0.5, - 0.5, - 0.5, - 0.5, - 0.5, - 0.5, - 0.5, - 0.5, - 0.5, - 0.5, - 0.449951171875, - 0.4091796875, + 0.49999988079071045, + 0.49999988079071045, + 0.5, + 0.5000002384185791, + 0.5000002384185791, + 0.6250002384185791, + 0.6249998807907104, + 0.625, + 0.6000000238418579, + 0.5833332538604736, + 0.5714285373687744, + 0.5714285373687744, + 0.5714285373687744, + 0.5625, + 0.555555522441864, + 0.5055557489395142, + 0.5101010203361511, ], - "eval_step_metrics": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0], + "eval_step_metrics": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0], }, # Scenario 4: Distributed google-bert/bert-base-uncased training (world_size=2) "bert_base_uncased_config_imdb_distributed_ws2": { diff --git a/tests/finetune/test_finetune.py b/tests/finetune/test_finetune.py index 300ade704..b174d6fa0 100644 --- a/tests/finetune/test_finetune.py +++ b/tests/finetune/test_finetune.py @@ -140,7 +140,7 @@ def assert_list_close(ref_list, actual_list, atol, name, scenario_key, current_w ] -@pytest.mark.skip() # remove when it's clear why diff val_step_loss values are observed in diff runs on existing code (even without PR #478 changes) +# @pytest.mark.skip() # remove when it's clear why diff val_step_loss values are observed in diff runs on existing code (even without PR #478 changes) @pytest.mark.cli @pytest.mark.on_qaic @pytest.mark.finetune From 951ded4c462696c495bbfb4c56295b25e9159438 Mon Sep 17 00:00:00 2001 From: Ann Kuruvilla Date: Fri, 21 Nov 2025 10:29:23 +0000 Subject: [PATCH 02/13] jenkins env changed Signed-off-by: Ann Kuruvilla --- scripts/Jenkinsfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/Jenkinsfile b/scripts/Jenkinsfile index 683ef5018..5f6c880c8 100644 --- a/scripts/Jenkinsfile +++ b/scripts/Jenkinsfile @@ -25,7 +25,6 @@ pipeline { pip install junitparser pytest-xdist && pip install librosa==0.10.2 soundfile==0.13.1 && #packages needed to load example for whisper testing pip install --extra-index-url https://download.pytorch.org/whl/cpu timm==1.0.14 torchvision==0.22.0+cpu einops==0.8.1 && #packages to load VLMs - pip install /opt/qti-aic/integrations/torch_qaic/py310/torch_qaic-0.1.0-cp310-cp310-linux_x86_64.whl && # For finetuning tests rm -rf QEfficient" ''' } @@ -166,8 +165,9 @@ pipeline { timeout(time: 5, unit: 'MINUTES') { sh ''' sudo docker exec ${BUILD_TAG} bash -c " + source /opt/torch-qaic-env/bin/activate && cd /efficient-transformers && - . preflight_qeff/bin/activate && + pip install .[test] && mkdir -p $PWD/cli_qaic_finetuning && export TOKENIZERS_PARALLELISM=false && export QEFF_HOME=$PWD/cli_qaic_finetuning && From 359e98bfc6e0c0429662344ec1efc1e591f112c8 Mon Sep 17 00:00:00 2001 From: Tanisha Date: Mon, 24 Nov 2025 06:54:32 +0000 Subject: [PATCH 03/13] Separated the finetune test for functional and assertion cases Signed-off-by: Tanisha --- tests/finetune/constants.py | 4 +- tests/finetune/test_finetune.py | 220 ++++++++++++++++++++++---------- 2 files changed, 157 insertions(+), 67 deletions(-) diff --git a/tests/finetune/constants.py b/tests/finetune/constants.py index 23219ce2e..2709b2986 100644 --- a/tests/finetune/constants.py +++ b/tests/finetune/constants.py @@ -6,5 +6,5 @@ # ----------------------------------------------------------------------------- # Finetuning Test Constants -LOSS_ATOL = 1e-3 -METRIC_ATOL = 1e-3 +LOSS_ATOL = 2e-2 +METRIC_ATOL = 3e-2 diff --git a/tests/finetune/test_finetune.py b/tests/finetune/test_finetune.py index b174d6fa0..ad51e779c 100644 --- a/tests/finetune/test_finetune.py +++ b/tests/finetune/test_finetune.py @@ -140,15 +140,7 @@ def assert_list_close(ref_list, actual_list, atol, name, scenario_key, current_w ] -# @pytest.mark.skip() # remove when it's clear why diff val_step_loss values are observed in diff runs on existing code (even without PR #478 changes) -@pytest.mark.cli -@pytest.mark.on_qaic -@pytest.mark.finetune -@pytest.mark.parametrize( - "model_name,task_mode,max_eval_step,max_train_step,dataset_name,data_path,intermediate_step_save,context_length,run_validation,use_peft,device,scenario_key", # This parameter will be used to look up reference data - configs, -) -def test_finetune( +def train_function( model_name, task_mode, max_eval_step, @@ -211,93 +203,191 @@ def test_finetune( download_alpaca() results = finetune(**kwargs) + all_ref_metrices = { + "ref_train_losses": ref_train_losses, + "ref_eval_losses": ref_eval_losses, + "ref_train_metrics": ref_train_metrics, + "ref_eval_metrics": ref_eval_metrics, + } - # Assertions for step-level values using the helper function - assert_list_close( - ref_train_losses, - results["train_step_loss"], - constant.LOSS_ATOL, - "Train Step Losses", - scenario_key, - current_world_size, - current_rank, - ) - assert_list_close( - ref_eval_losses, - results["eval_step_loss"], - constant.LOSS_ATOL, - "Eval Step Losses", - scenario_key, - current_world_size, - current_rank, - ) - assert_list_close( - ref_train_metrics, - results["train_step_metric"], - constant.METRIC_ATOL, - "Train Step Metrics", - scenario_key, - current_world_size, - current_rank, - ) - assert_list_close( - ref_eval_metrics, - results["eval_step_metric"], - constant.METRIC_ATOL, - "Eval Step Metrics", + all_config_spy = { + "train_config_spy": train_config_spy, + "generate_dataset_config_spy": generate_dataset_config_spy, + "generate_peft_config_spy": generate_peft_config_spy, + "get_dataloader_kwargs_spy": get_dataloader_kwargs_spy, + "update_config_spy": update_config_spy, + "get_custom_data_collator_spy": get_custom_data_collator_spy, + "get_preprocessed_dataset_spy": get_preprocessed_dataset_spy, + "get_longest_seq_length_spy": get_longest_seq_length_spy, + "print_model_size_spy": print_model_size_spy, + "train_spy": train_spy, + "current_world_size": current_world_size, + "current_rank": current_rank, + } + return results, all_ref_metrices, all_config_spy + + +@pytest.mark.skip() # remove when it's clear why diff val_step_loss values are observed in diff runs on existing code (even without PR #478 changes) +@pytest.mark.cli +@pytest.mark.on_qaic +@pytest.mark.finetune +@pytest.mark.parametrize( + "model_name,task_mode,max_eval_step,max_train_step,dataset_name,data_path,intermediate_step_save,context_length,run_validation,use_peft,device,scenario_key", # This parameter will be used to look up reference data + configs, +) +def test_finetune_functional( + model_name, + task_mode, + max_eval_step, + max_train_step, + dataset_name, + data_path, + intermediate_step_save, + context_length, + run_validation, + use_peft, + device, + scenario_key, + mocker, +): + results, all_ref_metrices, all_config_spy = train_function( + model_name, + task_mode, + max_eval_step, + max_train_step, + dataset_name, + data_path, + intermediate_step_save, + context_length, + run_validation, + use_peft, + device, scenario_key, - current_world_size, - current_rank, + mocker, ) + # Assertions for step-level values using the helper function assert results["avg_epoch_time"] < 60, "Training should complete within 60 seconds." - - train_config_spy.assert_called_once() - generate_dataset_config_spy.assert_called_once() + all_config_spy["train_config_spy"].assert_called_once() + all_config_spy["generate_dataset_config_spy"].assert_called_once() if task_mode == Task_Mode.GENERATION: - generate_peft_config_spy.assert_called_once() - get_longest_seq_length_spy.assert_called_once() - print_model_size_spy.assert_called_once() - train_spy.assert_called_once() - - assert update_config_spy.call_count == 1 - assert get_custom_data_collator_spy.call_count == 2 - assert get_dataloader_kwargs_spy.call_count == 2 - assert get_preprocessed_dataset_spy.call_count == 2 - - args, kwargs = train_spy.call_args + all_config_spy["generate_peft_config_spy"].assert_called_once() + all_config_spy["get_longest_seq_length_spy"].assert_called_once() + all_config_spy["print_model_size_spy"].assert_called_once() + all_config_spy["train_spy"].assert_called_once() + assert all_config_spy["update_config_spy"].call_count == 1 + assert all_config_spy["get_custom_data_collator_spy"].call_count == 2 + assert all_config_spy["get_dataloader_kwargs_spy"].call_count == 2 + assert all_config_spy["get_preprocessed_dataset_spy"].call_count == 2 + args, kwargs = all_config_spy["train_spy"].call_args train_dataloader = args[2] eval_dataloader = args[3] optimizer = args[4] - batch = next(iter(train_dataloader)) assert "labels" in batch.keys() assert "input_ids" in batch.keys() assert "attention_mask" in batch.keys() - assert isinstance(optimizer, optim.AdamW) - assert isinstance(train_dataloader, DataLoader) if run_validation: assert isinstance(eval_dataloader, DataLoader) else: assert eval_dataloader is None - - args, kwargs = update_config_spy.call_args_list[0] + args, kwargs = all_config_spy["update_config_spy"].call_args_list[0] train_config = args[0] assert max_train_step >= train_config.gradient_accumulation_steps, ( "Total training step should be more than " f"{train_config.gradient_accumulation_steps} which is gradient accumulation steps." ) - if use_peft: saved_file = os.path.join(train_config.output_dir, "complete_epoch_1/adapter_model.safetensors") else: saved_file = os.path.join(train_config.output_dir, "complete_epoch_1/model.safetensors") assert os.path.isfile(saved_file) - clean_up(train_config.output_dir) clean_up("qaic-dumps") if dataset_name == "alpaca_dataset": clean_up(alpaca_json_path) + + +@pytest.mark.skip() # remove when it's clear why diff val_step_loss values are observed in diff runs on existing code (even without PR #478 changes) +@pytest.mark.cli +@pytest.mark.on_qaic +@pytest.mark.finetune +@pytest.mark.parametrize( + "model_name,task_mode,max_eval_step,max_train_step,dataset_name,data_path,intermediate_step_save,context_length,run_validation,use_peft,device,scenario_key", # This parameter will be used to look up reference data + configs, +) +def test_finetune_assert( + model_name, + task_mode, + max_eval_step, + max_train_step, + dataset_name, + data_path, + intermediate_step_save, + context_length, + run_validation, + use_peft, + device, + scenario_key, + mocker, +): + results, all_ref_metrices, all_config_spy = train_function( + model_name, + task_mode, + max_eval_step, + max_train_step, + dataset_name, + data_path, + intermediate_step_save, + context_length, + run_validation, + use_peft, + device, + scenario_key, + mocker, + ) + + # Assertions for step-level values using the helper function + assert_list_close( + all_ref_metrices["ref_train_losses"], + results["train_step_loss"], + constant.LOSS_ATOL, + "Train Step Losses", + scenario_key, + all_config_spy["current_world_size"], + all_config_spy["current_rank"], + ) + assert_list_close( + all_ref_metrices["ref_eval_losses"], + results["eval_step_loss"], + constant.LOSS_ATOL, + "Eval Step Losses", + scenario_key, + all_config_spy["current_world_size"], + all_config_spy["current_rank"], + ) + assert_list_close( + all_ref_metrices["ref_train_metrics"], + results["train_step_metric"], + constant.METRIC_ATOL, + "Train Step Metrics", + scenario_key, + all_config_spy["current_world_size"], + all_config_spy["current_rank"], + ) + assert_list_close( + all_ref_metrices["ref_eval_metrics"], + results["eval_step_metric"], + constant.METRIC_ATOL, + "Eval Step Metrics", + scenario_key, + all_config_spy["current_world_size"], + all_config_spy["current_rank"], + ) + clean_up("qaic-dumps") + + if dataset_name == "alpaca_dataset": + clean_up(alpaca_json_path) From d01717db3b6c48d67aecbaeeed3dc01cf1bcc3fa Mon Sep 17 00:00:00 2001 From: Tanisha Date: Mon, 24 Nov 2025 08:28:41 +0000 Subject: [PATCH 04/13] Separated the finetune test for functional and assertion cases Signed-off-by: Tanisha --- tests/finetune/test_finetune.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/finetune/test_finetune.py b/tests/finetune/test_finetune.py index ad51e779c..72488fae0 100644 --- a/tests/finetune/test_finetune.py +++ b/tests/finetune/test_finetune.py @@ -227,7 +227,7 @@ def train_function( return results, all_ref_metrices, all_config_spy -@pytest.mark.skip() # remove when it's clear why diff val_step_loss values are observed in diff runs on existing code (even without PR #478 changes) +# @pytest.mark.skip() # remove when it's clear why diff val_step_loss values are observed in diff runs on existing code (even without PR #478 changes) @pytest.mark.cli @pytest.mark.on_qaic @pytest.mark.finetune @@ -311,7 +311,7 @@ def test_finetune_functional( clean_up(alpaca_json_path) -@pytest.mark.skip() # remove when it's clear why diff val_step_loss values are observed in diff runs on existing code (even without PR #478 changes) +# @pytest.mark.skip() # remove when it's clear why diff val_step_loss values are observed in diff runs on existing code (even without PR #478 changes) @pytest.mark.cli @pytest.mark.on_qaic @pytest.mark.finetune From 844f223dd579164b71af7b26223f70d51f6d0328 Mon Sep 17 00:00:00 2001 From: Ann Kuruvilla Date: Mon, 24 Nov 2025 09:21:38 +0000 Subject: [PATCH 05/13] Skip assert test Signed-off-by: Ann Kuruvilla --- tests/finetune/test_finetune.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/finetune/test_finetune.py b/tests/finetune/test_finetune.py index 72488fae0..07261bdcf 100644 --- a/tests/finetune/test_finetune.py +++ b/tests/finetune/test_finetune.py @@ -227,7 +227,6 @@ def train_function( return results, all_ref_metrices, all_config_spy -# @pytest.mark.skip() # remove when it's clear why diff val_step_loss values are observed in diff runs on existing code (even without PR #478 changes) @pytest.mark.cli @pytest.mark.on_qaic @pytest.mark.finetune @@ -311,7 +310,7 @@ def test_finetune_functional( clean_up(alpaca_json_path) -# @pytest.mark.skip() # remove when it's clear why diff val_step_loss values are observed in diff runs on existing code (even without PR #478 changes) +@pytest.mark.skip() # remove when it's clear why diff val_step_loss values are observed in diff runs on existing code (even without PR #478 changes) @pytest.mark.cli @pytest.mark.on_qaic @pytest.mark.finetune From 4f383d291c7899f4d21b00c6ee31fffbc2f4e65e Mon Sep 17 00:00:00 2001 From: Ann Kuruvilla Date: Mon, 24 Nov 2025 09:23:39 +0000 Subject: [PATCH 06/13] Preflight env activate for FT with extra torch whl Signed-off-by: Ann Kuruvilla --- scripts/Jenkinsfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/Jenkinsfile b/scripts/Jenkinsfile index 5f6c880c8..96fb06529 100644 --- a/scripts/Jenkinsfile +++ b/scripts/Jenkinsfile @@ -167,7 +167,8 @@ pipeline { sudo docker exec ${BUILD_TAG} bash -c " source /opt/torch-qaic-env/bin/activate && cd /efficient-transformers && - pip install .[test] && + . preflight_qeff/bin/activate && + pip install /opt/qti-aic/integrations/torch_qaic/py310/torch_qaic-0.1.0-cp310-cp310-linux_x86_64.whl && mkdir -p $PWD/cli_qaic_finetuning && export TOKENIZERS_PARALLELISM=false && export QEFF_HOME=$PWD/cli_qaic_finetuning && From a8956ebeedd0296b8da37c03ce0f446d533a017f Mon Sep 17 00:00:00 2001 From: Ann Kuruvilla Date: Mon, 24 Nov 2025 09:27:51 +0000 Subject: [PATCH 07/13] Comment torch-qaic-env Signed-off-by: Ann Kuruvilla --- scripts/Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Jenkinsfile b/scripts/Jenkinsfile index 96fb06529..9c90be652 100644 --- a/scripts/Jenkinsfile +++ b/scripts/Jenkinsfile @@ -165,7 +165,7 @@ pipeline { timeout(time: 5, unit: 'MINUTES') { sh ''' sudo docker exec ${BUILD_TAG} bash -c " - source /opt/torch-qaic-env/bin/activate && + // source /opt/torch-qaic-env/bin/activate && cd /efficient-transformers && . preflight_qeff/bin/activate && pip install /opt/qti-aic/integrations/torch_qaic/py310/torch_qaic-0.1.0-cp310-cp310-linux_x86_64.whl && From ef30c4432d686d84c86c559d191c555d38f95b72 Mon Sep 17 00:00:00 2001 From: Ann Kuruvilla Date: Mon, 24 Nov 2025 15:05:22 +0000 Subject: [PATCH 08/13] jenkins script update Signed-off-by: Ann Kuruvilla --- scripts/Jenkinsfile | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/Jenkinsfile b/scripts/Jenkinsfile index 9c90be652..48fb03fc8 100644 --- a/scripts/Jenkinsfile +++ b/scripts/Jenkinsfile @@ -165,7 +165,6 @@ pipeline { timeout(time: 5, unit: 'MINUTES') { sh ''' sudo docker exec ${BUILD_TAG} bash -c " - // source /opt/torch-qaic-env/bin/activate && cd /efficient-transformers && . preflight_qeff/bin/activate && pip install /opt/qti-aic/integrations/torch_qaic/py310/torch_qaic-0.1.0-cp310-cp310-linux_x86_64.whl && From dd550031bf8c63c3721ee7ede69e106a028afff6 Mon Sep 17 00:00:00 2001 From: Ann Kuruvilla Date: Tue, 25 Nov 2025 06:18:28 +0000 Subject: [PATCH 09/13] jenkins script update Signed-off-by: Ann Kuruvilla --- scripts/Jenkinsfile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/Jenkinsfile b/scripts/Jenkinsfile index 48fb03fc8..5bb2d4f74 100644 --- a/scripts/Jenkinsfile +++ b/scripts/Jenkinsfile @@ -166,8 +166,7 @@ pipeline { sh ''' sudo docker exec ${BUILD_TAG} bash -c " cd /efficient-transformers && - . preflight_qeff/bin/activate && - pip install /opt/qti-aic/integrations/torch_qaic/py310/torch_qaic-0.1.0-cp310-cp310-linux_x86_64.whl && + source /opt/torch-qaic-env/bin/activate && mkdir -p $PWD/cli_qaic_finetuning && export TOKENIZERS_PARALLELISM=false && export QEFF_HOME=$PWD/cli_qaic_finetuning && From 48f06b01e1d4bcca5bcfa79f83caa512e0939d23 Mon Sep 17 00:00:00 2001 From: Ann Kuruvilla Date: Thu, 27 Nov 2025 04:09:20 +0000 Subject: [PATCH 10/13] jenkins script update Signed-off-by: Ann Kuruvilla --- scripts/Jenkinsfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/Jenkinsfile b/scripts/Jenkinsfile index 5bb2d4f74..37036e21b 100644 --- a/scripts/Jenkinsfile +++ b/scripts/Jenkinsfile @@ -166,7 +166,8 @@ pipeline { sh ''' sudo docker exec ${BUILD_TAG} bash -c " cd /efficient-transformers && - source /opt/torch-qaic-env/bin/activate && + .preflight_qeff/bin/activate && + pip install /opt/qti-aic/integrations/torch_qaic/py310/torch_qaic-0.1.0-cp310-cp310-linux_x86_64.whl && mkdir -p $PWD/cli_qaic_finetuning && export TOKENIZERS_PARALLELISM=false && export QEFF_HOME=$PWD/cli_qaic_finetuning && From ad8897dc3d72bdaf69eadccef2fd186804eb91f2 Mon Sep 17 00:00:00 2001 From: Ann Kuruvilla Date: Thu, 27 Nov 2025 06:19:51 +0000 Subject: [PATCH 11/13] jenkins script update Signed-off-by: Ann Kuruvilla --- scripts/Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Jenkinsfile b/scripts/Jenkinsfile index 37036e21b..48fb03fc8 100644 --- a/scripts/Jenkinsfile +++ b/scripts/Jenkinsfile @@ -166,7 +166,7 @@ pipeline { sh ''' sudo docker exec ${BUILD_TAG} bash -c " cd /efficient-transformers && - .preflight_qeff/bin/activate && + . preflight_qeff/bin/activate && pip install /opt/qti-aic/integrations/torch_qaic/py310/torch_qaic-0.1.0-cp310-cp310-linux_x86_64.whl && mkdir -p $PWD/cli_qaic_finetuning && export TOKENIZERS_PARALLELISM=false && From 0a81fa789c33ec75e5b7394f793f8029f29c7c0e Mon Sep 17 00:00:00 2001 From: Ann Kuruvilla Date: Fri, 28 Nov 2025 08:31:55 +0000 Subject: [PATCH 12/13] Update dataset path Signed-off-by: Ann Kuruvilla --- tests/finetune/test_finetune.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/finetune/test_finetune.py b/tests/finetune/test_finetune.py index 07261bdcf..dc9acf1ca 100644 --- a/tests/finetune/test_finetune.py +++ b/tests/finetune/test_finetune.py @@ -21,7 +21,7 @@ from tests.finetune import constants as constant from tests.finetune import reference_data as ref_data -alpaca_json_path = os.path.join(os.getcwd(), "alpaca_data.json") +alpaca_json_path = os.path.join(os.getcwd(), "./dataset/alpaca_data.json") def clean_up(path): @@ -34,7 +34,8 @@ def clean_up(path): def download_alpaca(): alpaca_url = "https://raw.githubusercontent.com/tatsu-lab/stanford_alpaca/refs/heads/main/alpaca_data.json" response = requests.get(alpaca_url) - + # Create directory if it doesn't exist + os.makedirs(os.path.dirname(alpaca_json_path), exist_ok=True) with open(alpaca_json_path, "wb") as f: f.write(response.content) From 0009cacbeb22ec20213feda7ac08a3e6f711ac66 Mon Sep 17 00:00:00 2001 From: Ann Kuruvilla Date: Fri, 28 Nov 2025 09:49:01 +0000 Subject: [PATCH 13/13] Update atol Signed-off-by: Ann Kuruvilla --- tests/finetune/constants.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/finetune/constants.py b/tests/finetune/constants.py index 2709b2986..23219ce2e 100644 --- a/tests/finetune/constants.py +++ b/tests/finetune/constants.py @@ -6,5 +6,5 @@ # ----------------------------------------------------------------------------- # Finetuning Test Constants -LOSS_ATOL = 2e-2 -METRIC_ATOL = 3e-2 +LOSS_ATOL = 1e-3 +METRIC_ATOL = 1e-3