From 4f4982a03428a02c1ae94d3eb8c39e2874ad938f Mon Sep 17 00:00:00 2001
From: Ann Kuruvilla <quic_akuruvil@quicinc.com>
Date: Fri, 21 Nov 2025 04:50:39 +0000
Subject: [PATCH 01/13] Reference values are updated

Signed-off-by: Ann Kuruvilla <akuruvil@qti.qualcomm.com>
---
 tests/finetune/reference_data.py | 334 +++++++++++++++----------------
 tests/finetune/test_finetune.py  |   2 +-
 2 files changed, 168 insertions(+), 168 deletions(-)

diff --git a/tests/finetune/reference_data.py b/tests/finetune/reference_data.py
index a2a5438f5..c94c03b0b 100644
--- a/tests/finetune/reference_data.py
+++ b/tests/finetune/reference_data.py
@@ -13,206 +13,206 @@
     "llama_3.2_1B_config_alpaca_single_device": {
         "description": "Baseline for Llama on Alpaca single-device",
         "train_step_losses": [
-            1.5112206935882568,
-            1.2211230993270874,
-            1.9942185878753662,
-            2.093623161315918,
-            0.9168124198913574,
-            1.2125635147094727,
-            0.3648962676525116,
-            1.6231939792633057,
-            0.8259601593017578,
-            0.7741442918777466,
-            1.7359141111373901,
-            2.118462085723877,
-            2.061161994934082,
-            0.8256913423538208,
-            0.8088029623031616,
-            1.761340618133545,
-            1.6828027963638306,
-            1.3538823127746582,
-            2.0672550201416016,
-            3.1532647609710693,
+            1.5110896825790405,
+            1.2206485271453857,
+            1.9950776100158691,
+            2.091615676879883,
+            0.9182446599006653,
+            1.1993569135665894,
+            0.36413607001304626,
+            1.6241482496261597,
+            0.8270177245140076,
+            0.7749958634376526,
+            1.73696768283844,
+            2.120077610015869,
+            2.061460256576538,
+            0.8267984390258789,
+            0.8105809688568115,
+            1.7627557516098022,
+            1.6819559335708618,
+            1.3528242111206055,
+            2.0654125213623047,
+            3.156151294708252,
         ],
         "eval_step_losses": [
-            1.462059736251831,
-            0.24527676403522491,
-            1.046107292175293,
-            1.6403586864471436,
-            1.395291805267334,
-            2.8664817810058594,
-            1.035412311553955,
-            1.8670039176940918,
-            3.8079662322998047,
-            0.6516809463500977,
+            1.4607517719268799,
+            0.24302150309085846,
+            1.0471211671829224,
+            1.642044186592102,
+            1.3949533700942993,
+            2.8850066661834717,
+            1.0366586446762085,
+            1.8661959171295166,
+            3.81632924079895,
+            0.6577113270759583,
         ],
         "train_step_metrics": [
-            4.532259941101074,
-            3.390994071960449,
-            7.34645938873291,
-            8.114261627197266,
-            2.5013046264648438,
-            3.3620924949645996,
-            1.4403645992279053,
-            5.069255828857422,
-            2.2840728759765625,
-            2.1687355041503906,
-            5.674112319946289,
-            8.318334579467773,
-            7.855090141296387,
-            2.283458948135376,
-            2.2452187538146973,
-            5.820234775543213,
-            5.380615711212158,
-            3.872429847717285,
-            7.903097629547119,
-            23.412376403808594,
+            4.531666278839111,
+            3.389385223388672,
+            7.352773189544678,
+            8.09798812866211,
+            2.504889488220215,
+            3.3179824352264404,
+            1.43927001953125,
+            5.074095249176025,
+            2.286489486694336,
+            2.1705832481384277,
+            5.680093288421631,
+            8.33178424835205,
+            7.857433319091797,
+            2.2859883308410645,
+            2.2492144107818604,
+            5.828476905822754,
+            5.376060962677002,
+            3.8683345317840576,
+            7.8885498046875,
+            23.480052947998047,
         ],
         "eval_step_metrics": [  # steps 0-9
-            4.31483793258667,
-            1.2779749631881714,
-            2.8465487957000732,
-            5.157018661499023,
-            4.036152362823486,
-            17.575077056884766,
-            2.816267251968384,
-            6.468885898590088,
-            45.05870819091797,
-            1.9187631607055664,
+            4.309197902679443,
+            1.27509605884552,
+            2.8494362831115723,
+            5.1657185554504395,
+            4.034786224365234,
+            17.9036865234375,
+            2.819779396057129,
+            6.463661193847656,
+            45.437110900878906,
+            1.9303690195083618,
         ],
     },
     # Scenario 2: Single-device llama 3.2-1B training on GSM8k dataset.
     "llama_3.2_1B_config_gsm8k_single_device": {
         "description": "Baseline for Llama on GSM8k single-device",
         "train_step_losses": [
-            2.250276803970337,
-            2.3231687545776367,
-            1.9379945993423462,
-            1.5981022119522095,
-            1.9867562055587769,
-            1.4573354721069336,
-            1.8969658613204956,
-            1.2177824974060059,
-            1.6489791870117188,
-            1.5380687713623047,
-            1.4025083780288696,
-            1.5301083326339722,
-            1.6858205795288086,
-            1.383747935295105,
-            1.7968919277191162,
-            1.4075607061386108,
-            1.6447738409042358,
-            1.2807793617248535,
-            0.8450672030448914,
-            1.5795941352844238,
+            2.250361204147339,
+            2.3252110481262207,
+            1.9360781908035278,
+            1.5984115600585938,
+            1.9874038696289062,
+            1.4579044580459595,
+            1.8975679874420166,
+            1.2175723314285278,
+            1.6473736763000488,
+            1.537960410118103,
+            1.4019465446472168,
+            1.5310447216033936,
+            1.6878201961517334,
+            1.3849903345108032,
+            1.7976438999176025,
+            1.4060133695602417,
+            1.646375060081482,
+            1.2835280895233154,
+            0.8465587496757507,
+            1.5783095359802246,
         ],
         "eval_step_losses": [
-            1.7081595659255981,
-            1.719305157661438,
-            1.153528094291687,
-            2.0051634311676025,
-            1.3372926712036133,
-            1.3009852170944214,
-            1.2207027673721313,
-            1.3452664613723755,
-            1.329830288887024,
-            1.307450532913208,
+            1.707140326499939,
+            1.7226355075836182,
+            1.1531383991241455,
+            2.0035903453826904,
+            1.3362350463867188,
+            1.3013248443603516,
+            1.2195535898208618,
+            1.3454742431640625,
+            1.3299248218536377,
+            1.3073854446411133,
         ],
         "train_step_metrics": [
-            9.490362167358398,
-            10.207969665527344,
-            6.944809913635254,
-            4.943641662597656,
-            7.291841506958008,
-            4.294501304626465,
-            6.6656389236450195,
-            3.3796849250793457,
-            5.201667308807373,
-            4.655590534210205,
-            4.065384864807129,
-            4.618677139282227,
-            5.396877765655518,
-            3.989826202392578,
-            6.030873775482178,
-            4.0859761238098145,
-            5.179838180541992,
-            3.5994436740875244,
-            2.328134298324585,
-            4.852985858917236,
+            9.49116325378418,
+            10.228837966918945,
+            6.93151330947876,
+            4.945170879364014,
+            7.296566009521484,
+            4.296945571899414,
+            6.66965389251709,
+            3.378974676132202,
+            5.193322658538818,
+            4.655086040496826,
+            4.063101291656494,
+            4.623003959655762,
+            5.407680034637451,
+            3.994786262512207,
+            6.0354108810424805,
+            4.0796589851379395,
+            5.188138961791992,
+            3.60935115814209,
+            2.3316092491149902,
+            4.846755504608154,
         ],
         "eval_step_metrics": [  # steps 0-9
-            5.518795013427734,
-            5.580649375915527,
-            3.1693549156188965,
-            7.42730712890625,
-            3.8087174892425537,
-            3.672913074493408,
-            3.38956880569458,
-            3.8392088413238525,
-            3.7804012298583984,
-            3.6967368125915527,
+            5.5131731033325195,
+            5.599266052246094,
+            3.1681201457977295,
+            7.415632247924805,
+            3.8046915531158447,
+            3.674160957336426,
+            3.3856759071350098,
+            3.8400065898895264,
+            3.7807586193084717,
+            3.69649600982666,
         ],
     },
     # Scenario 3: Single-device google-bert/bert-base-uncased training on IMDB dataset.
     "bert_base_uncased_config_imdb_single_device": {
         "description": "Baseline for google-bert/bert-base-uncased on IMDB single-device",
         "train_step_losses": [
-            0.357421875,
-            0.546875,
-            0.98486328125,
-            0.35302734375,
-            1.23828125,
-            0.60791015625,
-            0.44384765625,
-            0.791015625,
-            0.7861328125,
-            0.51318359375,
-            0.50244140625,
-            0.90087890625,
-            0.8818359375,
-            0.86279296875,
-            0.6396484375,
-            0.49267578125,
-            0.97119140625,
-            0.7451171875,
-            0.798828125,
-            0.7080078125,
+            0.390625,
+            0.51220703125,
+            0.9208984375,
+            0.4052734375,
+            1.1640625,
+            0.6533203125,
+            0.5087890625,
+            0.76171875,
+            0.63525390625,
+            0.50146484375,
+            0.5439453125,
+            0.947265625,
+            0.89013671875,
+            0.80419921875,
+            0.6533203125,
+            0.4580078125,
+            0.92041015625,
+            0.7412109375,
+            0.7197265625,
+            0.62158203125,
         ],
         "eval_step_losses": [
-            0.634765625,
-            0.8173828125,
+            0.6044921875,
+            0.798828125,
             0.9072265625,
-            0.7177734375,
-            0.59423828125,
-            0.69921875,
-            0.7109375,
-            0.7216796875,
-            0.6064453125,
-            0.7041015625,
+            0.70361328125,
+            0.59912109375,
+            0.66357421875,
+            0.6962890625,
+            0.75390625,
+            0.61328125,
+            0.6806640625,
         ],
         "train_step_metrics": [
             1.0,
             1.0,
             0.5,
-            0.5,
-            0.5,
-            0.5,
-            0.5,
-            0.5,
-            0.5,
-            0.5,
-            0.5,
-            0.5,
-            0.5,
-            0.5,
-            0.5,
-            0.5,
-            0.5,
-            0.5,
-            0.449951171875,
-            0.4091796875,
+            0.49999988079071045,
+            0.49999988079071045,
+            0.5,
+            0.5000002384185791,
+            0.5000002384185791,
+            0.6250002384185791,
+            0.6249998807907104,
+            0.625,
+            0.6000000238418579,
+            0.5833332538604736,
+            0.5714285373687744,
+            0.5714285373687744,
+            0.5714285373687744,
+            0.5625,
+            0.555555522441864,
+            0.5055557489395142,
+            0.5101010203361511,
         ],
-        "eval_step_metrics": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0],
+        "eval_step_metrics": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0],
     },
     # Scenario 4: Distributed google-bert/bert-base-uncased training  (world_size=2)
     "bert_base_uncased_config_imdb_distributed_ws2": {
diff --git a/tests/finetune/test_finetune.py b/tests/finetune/test_finetune.py
index 300ade704..b174d6fa0 100644
--- a/tests/finetune/test_finetune.py
+++ b/tests/finetune/test_finetune.py
@@ -140,7 +140,7 @@ def assert_list_close(ref_list, actual_list, atol, name, scenario_key, current_w
 ]
 
 
-@pytest.mark.skip()  # remove when it's clear why diff val_step_loss values are observed in diff runs on existing code (even without PR #478 changes)
+# @pytest.mark.skip()  # remove when it's clear why diff val_step_loss values are observed in diff runs on existing code (even without PR #478 changes)
 @pytest.mark.cli
 @pytest.mark.on_qaic
 @pytest.mark.finetune

From 951ded4c462696c495bbfb4c56295b25e9159438 Mon Sep 17 00:00:00 2001
From: Ann Kuruvilla <quic_akuruvil@quicinc.com>
Date: Fri, 21 Nov 2025 10:29:23 +0000
Subject: [PATCH 02/13] jenkins env changed

Signed-off-by: Ann Kuruvilla <akuruvil@qti.qualcomm.com>
---
 scripts/Jenkinsfile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/Jenkinsfile b/scripts/Jenkinsfile
index 683ef5018..5f6c880c8 100644
--- a/scripts/Jenkinsfile
+++ b/scripts/Jenkinsfile
@@ -25,7 +25,6 @@ pipeline {
                    pip install junitparser pytest-xdist &&
                    pip install librosa==0.10.2 soundfile==0.13.1 && #packages needed to load example for whisper testing
                    pip install --extra-index-url https://download.pytorch.org/whl/cpu timm==1.0.14 torchvision==0.22.0+cpu einops==0.8.1 && #packages to load VLMs
-                   pip install /opt/qti-aic/integrations/torch_qaic/py310/torch_qaic-0.1.0-cp310-cp310-linux_x86_64.whl && # For finetuning tests
                    rm -rf QEfficient"
                '''
            }
@@ -166,8 +165,9 @@ pipeline {
                 timeout(time: 5, unit: 'MINUTES') {
                     sh '''
                     sudo docker exec ${BUILD_TAG} bash -c "
+                    source /opt/torch-qaic-env/bin/activate &&
                     cd /efficient-transformers &&
-                    . preflight_qeff/bin/activate &&
+                    pip install .[test] &&
                     mkdir -p $PWD/cli_qaic_finetuning &&
                     export TOKENIZERS_PARALLELISM=false &&
                     export QEFF_HOME=$PWD/cli_qaic_finetuning &&

From 359e98bfc6e0c0429662344ec1efc1e591f112c8 Mon Sep 17 00:00:00 2001
From: Tanisha <tchawada@qti.qualcomm.com>
Date: Mon, 24 Nov 2025 06:54:32 +0000
Subject: [PATCH 03/13] Separated the finetune test for functional and
 assertion cases

Signed-off-by: Tanisha <tchawada@qti.qualcomm.com>
---
 tests/finetune/constants.py     |   4 +-
 tests/finetune/test_finetune.py | 220 ++++++++++++++++++++++----------
 2 files changed, 157 insertions(+), 67 deletions(-)

diff --git a/tests/finetune/constants.py b/tests/finetune/constants.py
index 23219ce2e..2709b2986 100644
--- a/tests/finetune/constants.py
+++ b/tests/finetune/constants.py
@@ -6,5 +6,5 @@
 # -----------------------------------------------------------------------------
 
 # Finetuning Test Constants
-LOSS_ATOL = 1e-3
-METRIC_ATOL = 1e-3
+LOSS_ATOL = 2e-2
+METRIC_ATOL = 3e-2
diff --git a/tests/finetune/test_finetune.py b/tests/finetune/test_finetune.py
index b174d6fa0..ad51e779c 100644
--- a/tests/finetune/test_finetune.py
+++ b/tests/finetune/test_finetune.py
@@ -140,15 +140,7 @@ def assert_list_close(ref_list, actual_list, atol, name, scenario_key, current_w
 ]
 
 
-# @pytest.mark.skip()  # remove when it's clear why diff val_step_loss values are observed in diff runs on existing code (even without PR #478 changes)
-@pytest.mark.cli
-@pytest.mark.on_qaic
-@pytest.mark.finetune
-@pytest.mark.parametrize(
-    "model_name,task_mode,max_eval_step,max_train_step,dataset_name,data_path,intermediate_step_save,context_length,run_validation,use_peft,device,scenario_key",  # This parameter will be used to look up reference data
-    configs,
-)
-def test_finetune(
+def train_function(
     model_name,
     task_mode,
     max_eval_step,
@@ -211,93 +203,191 @@ def test_finetune(
         download_alpaca()
 
     results = finetune(**kwargs)
+    all_ref_metrices = {
+        "ref_train_losses": ref_train_losses,
+        "ref_eval_losses": ref_eval_losses,
+        "ref_train_metrics": ref_train_metrics,
+        "ref_eval_metrics": ref_eval_metrics,
+    }
 
-    # Assertions for step-level values using the helper function
-    assert_list_close(
-        ref_train_losses,
-        results["train_step_loss"],
-        constant.LOSS_ATOL,
-        "Train Step Losses",
-        scenario_key,
-        current_world_size,
-        current_rank,
-    )
-    assert_list_close(
-        ref_eval_losses,
-        results["eval_step_loss"],
-        constant.LOSS_ATOL,
-        "Eval Step Losses",
-        scenario_key,
-        current_world_size,
-        current_rank,
-    )
-    assert_list_close(
-        ref_train_metrics,
-        results["train_step_metric"],
-        constant.METRIC_ATOL,
-        "Train Step Metrics",
-        scenario_key,
-        current_world_size,
-        current_rank,
-    )
-    assert_list_close(
-        ref_eval_metrics,
-        results["eval_step_metric"],
-        constant.METRIC_ATOL,
-        "Eval Step Metrics",
+    all_config_spy = {
+        "train_config_spy": train_config_spy,
+        "generate_dataset_config_spy": generate_dataset_config_spy,
+        "generate_peft_config_spy": generate_peft_config_spy,
+        "get_dataloader_kwargs_spy": get_dataloader_kwargs_spy,
+        "update_config_spy": update_config_spy,
+        "get_custom_data_collator_spy": get_custom_data_collator_spy,
+        "get_preprocessed_dataset_spy": get_preprocessed_dataset_spy,
+        "get_longest_seq_length_spy": get_longest_seq_length_spy,
+        "print_model_size_spy": print_model_size_spy,
+        "train_spy": train_spy,
+        "current_world_size": current_world_size,
+        "current_rank": current_rank,
+    }
+    return results, all_ref_metrices, all_config_spy
+
+
+@pytest.mark.skip()  # remove when it's clear why diff val_step_loss values are observed in diff runs on existing code (even without PR #478 changes)
+@pytest.mark.cli
+@pytest.mark.on_qaic
+@pytest.mark.finetune
+@pytest.mark.parametrize(
+    "model_name,task_mode,max_eval_step,max_train_step,dataset_name,data_path,intermediate_step_save,context_length,run_validation,use_peft,device,scenario_key",  # This parameter will be used to look up reference data
+    configs,
+)
+def test_finetune_functional(
+    model_name,
+    task_mode,
+    max_eval_step,
+    max_train_step,
+    dataset_name,
+    data_path,
+    intermediate_step_save,
+    context_length,
+    run_validation,
+    use_peft,
+    device,
+    scenario_key,
+    mocker,
+):
+    results, all_ref_metrices, all_config_spy = train_function(
+        model_name,
+        task_mode,
+        max_eval_step,
+        max_train_step,
+        dataset_name,
+        data_path,
+        intermediate_step_save,
+        context_length,
+        run_validation,
+        use_peft,
+        device,
         scenario_key,
-        current_world_size,
-        current_rank,
+        mocker,
     )
 
+    # Assertions for step-level values using the helper function
     assert results["avg_epoch_time"] < 60, "Training should complete within 60 seconds."
-
-    train_config_spy.assert_called_once()
-    generate_dataset_config_spy.assert_called_once()
+    all_config_spy["train_config_spy"].assert_called_once()
+    all_config_spy["generate_dataset_config_spy"].assert_called_once()
     if task_mode == Task_Mode.GENERATION:
-        generate_peft_config_spy.assert_called_once()
-    get_longest_seq_length_spy.assert_called_once()
-    print_model_size_spy.assert_called_once()
-    train_spy.assert_called_once()
-
-    assert update_config_spy.call_count == 1
-    assert get_custom_data_collator_spy.call_count == 2
-    assert get_dataloader_kwargs_spy.call_count == 2
-    assert get_preprocessed_dataset_spy.call_count == 2
-
-    args, kwargs = train_spy.call_args
+        all_config_spy["generate_peft_config_spy"].assert_called_once()
+    all_config_spy["get_longest_seq_length_spy"].assert_called_once()
+    all_config_spy["print_model_size_spy"].assert_called_once()
+    all_config_spy["train_spy"].assert_called_once()
+    assert all_config_spy["update_config_spy"].call_count == 1
+    assert all_config_spy["get_custom_data_collator_spy"].call_count == 2
+    assert all_config_spy["get_dataloader_kwargs_spy"].call_count == 2
+    assert all_config_spy["get_preprocessed_dataset_spy"].call_count == 2
+    args, kwargs = all_config_spy["train_spy"].call_args
     train_dataloader = args[2]
     eval_dataloader = args[3]
     optimizer = args[4]
-
     batch = next(iter(train_dataloader))
     assert "labels" in batch.keys()
     assert "input_ids" in batch.keys()
     assert "attention_mask" in batch.keys()
-
     assert isinstance(optimizer, optim.AdamW)
-
     assert isinstance(train_dataloader, DataLoader)
     if run_validation:
         assert isinstance(eval_dataloader, DataLoader)
     else:
         assert eval_dataloader is None
-
-    args, kwargs = update_config_spy.call_args_list[0]
+    args, kwargs = all_config_spy["update_config_spy"].call_args_list[0]
     train_config = args[0]
     assert max_train_step >= train_config.gradient_accumulation_steps, (
         "Total training step should be more than "
         f"{train_config.gradient_accumulation_steps} which is gradient accumulation steps."
     )
-
     if use_peft:
         saved_file = os.path.join(train_config.output_dir, "complete_epoch_1/adapter_model.safetensors")
     else:
         saved_file = os.path.join(train_config.output_dir, "complete_epoch_1/model.safetensors")
     assert os.path.isfile(saved_file)
-
     clean_up(train_config.output_dir)
     clean_up("qaic-dumps")
 
     if dataset_name == "alpaca_dataset":
         clean_up(alpaca_json_path)
+
+
+@pytest.mark.skip()  # remove when it's clear why diff val_step_loss values are observed in diff runs on existing code (even without PR #478 changes)
+@pytest.mark.cli
+@pytest.mark.on_qaic
+@pytest.mark.finetune
+@pytest.mark.parametrize(
+    "model_name,task_mode,max_eval_step,max_train_step,dataset_name,data_path,intermediate_step_save,context_length,run_validation,use_peft,device,scenario_key",  # This parameter will be used to look up reference data
+    configs,
+)
+def test_finetune_assert(
+    model_name,
+    task_mode,
+    max_eval_step,
+    max_train_step,
+    dataset_name,
+    data_path,
+    intermediate_step_save,
+    context_length,
+    run_validation,
+    use_peft,
+    device,
+    scenario_key,
+    mocker,
+):
+    results, all_ref_metrices, all_config_spy = train_function(
+        model_name,
+        task_mode,
+        max_eval_step,
+        max_train_step,
+        dataset_name,
+        data_path,
+        intermediate_step_save,
+        context_length,
+        run_validation,
+        use_peft,
+        device,
+        scenario_key,
+        mocker,
+    )
+
+    # Assertions for step-level values using the helper function
+    assert_list_close(
+        all_ref_metrices["ref_train_losses"],
+        results["train_step_loss"],
+        constant.LOSS_ATOL,
+        "Train Step Losses",
+        scenario_key,
+        all_config_spy["current_world_size"],
+        all_config_spy["current_rank"],
+    )
+    assert_list_close(
+        all_ref_metrices["ref_eval_losses"],
+        results["eval_step_loss"],
+        constant.LOSS_ATOL,
+        "Eval Step Losses",
+        scenario_key,
+        all_config_spy["current_world_size"],
+        all_config_spy["current_rank"],
+    )
+    assert_list_close(
+        all_ref_metrices["ref_train_metrics"],
+        results["train_step_metric"],
+        constant.METRIC_ATOL,
+        "Train Step Metrics",
+        scenario_key,
+        all_config_spy["current_world_size"],
+        all_config_spy["current_rank"],
+    )
+    assert_list_close(
+        all_ref_metrices["ref_eval_metrics"],
+        results["eval_step_metric"],
+        constant.METRIC_ATOL,
+        "Eval Step Metrics",
+        scenario_key,
+        all_config_spy["current_world_size"],
+        all_config_spy["current_rank"],
+    )
+    clean_up("qaic-dumps")
+
+    if dataset_name == "alpaca_dataset":
+        clean_up(alpaca_json_path)

From d01717db3b6c48d67aecbaeeed3dc01cf1bcc3fa Mon Sep 17 00:00:00 2001
From: Tanisha <tchawada@qti.qualcomm.com>
Date: Mon, 24 Nov 2025 08:28:41 +0000
Subject: [PATCH 04/13] Separated the finetune test for functional and
 assertion cases

Signed-off-by: Tanisha <tchawada@qti.qualcomm.com>
---
 tests/finetune/test_finetune.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/finetune/test_finetune.py b/tests/finetune/test_finetune.py
index ad51e779c..72488fae0 100644
--- a/tests/finetune/test_finetune.py
+++ b/tests/finetune/test_finetune.py
@@ -227,7 +227,7 @@ def train_function(
     return results, all_ref_metrices, all_config_spy
 
 
-@pytest.mark.skip()  # remove when it's clear why diff val_step_loss values are observed in diff runs on existing code (even without PR #478 changes)
+# @pytest.mark.skip()  # remove when it's clear why diff val_step_loss values are observed in diff runs on existing code (even without PR #478 changes)
 @pytest.mark.cli
 @pytest.mark.on_qaic
 @pytest.mark.finetune
@@ -311,7 +311,7 @@ def test_finetune_functional(
         clean_up(alpaca_json_path)
 
 
-@pytest.mark.skip()  # remove when it's clear why diff val_step_loss values are observed in diff runs on existing code (even without PR #478 changes)
+# @pytest.mark.skip()  # remove when it's clear why diff val_step_loss values are observed in diff runs on existing code (even without PR #478 changes)
 @pytest.mark.cli
 @pytest.mark.on_qaic
 @pytest.mark.finetune

From 844f223dd579164b71af7b26223f70d51f6d0328 Mon Sep 17 00:00:00 2001
From: Ann Kuruvilla <quic_akuruvil@quicinc.com>
Date: Mon, 24 Nov 2025 09:21:38 +0000
Subject: [PATCH 05/13] Skip assert test

Signed-off-by: Ann Kuruvilla <akuruvil@qti.qualcomm.com>
---
 tests/finetune/test_finetune.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/finetune/test_finetune.py b/tests/finetune/test_finetune.py
index 72488fae0..07261bdcf 100644
--- a/tests/finetune/test_finetune.py
+++ b/tests/finetune/test_finetune.py
@@ -227,7 +227,6 @@ def train_function(
     return results, all_ref_metrices, all_config_spy
 
 
-# @pytest.mark.skip()  # remove when it's clear why diff val_step_loss values are observed in diff runs on existing code (even without PR #478 changes)
 @pytest.mark.cli
 @pytest.mark.on_qaic
 @pytest.mark.finetune
@@ -311,7 +310,7 @@ def test_finetune_functional(
         clean_up(alpaca_json_path)
 
 
-# @pytest.mark.skip()  # remove when it's clear why diff val_step_loss values are observed in diff runs on existing code (even without PR #478 changes)
+@pytest.mark.skip()  # remove when it's clear why diff val_step_loss values are observed in diff runs on existing code (even without PR #478 changes)
 @pytest.mark.cli
 @pytest.mark.on_qaic
 @pytest.mark.finetune

From 4f383d291c7899f4d21b00c6ee31fffbc2f4e65e Mon Sep 17 00:00:00 2001
From: Ann Kuruvilla <quic_akuruvil@quicinc.com>
Date: Mon, 24 Nov 2025 09:23:39 +0000
Subject: [PATCH 06/13] Preflight env activate for FT with extra torch whl

Signed-off-by: Ann Kuruvilla <akuruvil@qti.qualcomm.com>
---
 scripts/Jenkinsfile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/Jenkinsfile b/scripts/Jenkinsfile
index 5f6c880c8..96fb06529 100644
--- a/scripts/Jenkinsfile
+++ b/scripts/Jenkinsfile
@@ -167,7 +167,8 @@ pipeline {
                     sudo docker exec ${BUILD_TAG} bash -c "
                     source /opt/torch-qaic-env/bin/activate &&
                     cd /efficient-transformers &&
-                    pip install .[test] &&
+                    . preflight_qeff/bin/activate &&
+                    pip install /opt/qti-aic/integrations/torch_qaic/py310/torch_qaic-0.1.0-cp310-cp310-linux_x86_64.whl &&
                     mkdir -p $PWD/cli_qaic_finetuning &&
                     export TOKENIZERS_PARALLELISM=false &&
                     export QEFF_HOME=$PWD/cli_qaic_finetuning &&

From a8956ebeedd0296b8da37c03ce0f446d533a017f Mon Sep 17 00:00:00 2001
From: Ann Kuruvilla <quic_akuruvil@quicinc.com>
Date: Mon, 24 Nov 2025 09:27:51 +0000
Subject: [PATCH 07/13] Comment torch-qaic-env

Signed-off-by: Ann Kuruvilla <akuruvil@qti.qualcomm.com>
---
 scripts/Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/Jenkinsfile b/scripts/Jenkinsfile
index 96fb06529..9c90be652 100644
--- a/scripts/Jenkinsfile
+++ b/scripts/Jenkinsfile
@@ -165,7 +165,7 @@ pipeline {
                 timeout(time: 5, unit: 'MINUTES') {
                     sh '''
                     sudo docker exec ${BUILD_TAG} bash -c "
-                    source /opt/torch-qaic-env/bin/activate &&
+                    // source /opt/torch-qaic-env/bin/activate &&
                     cd /efficient-transformers &&
                     . preflight_qeff/bin/activate &&
                     pip install /opt/qti-aic/integrations/torch_qaic/py310/torch_qaic-0.1.0-cp310-cp310-linux_x86_64.whl &&

From ef30c4432d686d84c86c559d191c555d38f95b72 Mon Sep 17 00:00:00 2001
From: Ann Kuruvilla <quic_akuruvil@quicinc.com>
Date: Mon, 24 Nov 2025 15:05:22 +0000
Subject: [PATCH 08/13] jenkins script update

Signed-off-by: Ann Kuruvilla <akuruvil@qti.qualcomm.com>
---
 scripts/Jenkinsfile | 1 -
 1 file changed, 1 deletion(-)

diff --git a/scripts/Jenkinsfile b/scripts/Jenkinsfile
index 9c90be652..48fb03fc8 100644
--- a/scripts/Jenkinsfile
+++ b/scripts/Jenkinsfile
@@ -165,7 +165,6 @@ pipeline {
                 timeout(time: 5, unit: 'MINUTES') {
                     sh '''
                     sudo docker exec ${BUILD_TAG} bash -c "
-                    // source /opt/torch-qaic-env/bin/activate &&
                     cd /efficient-transformers &&
                     . preflight_qeff/bin/activate &&
                     pip install /opt/qti-aic/integrations/torch_qaic/py310/torch_qaic-0.1.0-cp310-cp310-linux_x86_64.whl &&

From dd550031bf8c63c3721ee7ede69e106a028afff6 Mon Sep 17 00:00:00 2001
From: Ann Kuruvilla <quic_akuruvil@quicinc.com>
Date: Tue, 25 Nov 2025 06:18:28 +0000
Subject: [PATCH 09/13] jenkins script update

Signed-off-by: Ann Kuruvilla <akuruvil@qti.qualcomm.com>
---
 scripts/Jenkinsfile | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/scripts/Jenkinsfile b/scripts/Jenkinsfile
index 48fb03fc8..5bb2d4f74 100644
--- a/scripts/Jenkinsfile
+++ b/scripts/Jenkinsfile
@@ -166,8 +166,7 @@ pipeline {
                     sh '''
                     sudo docker exec ${BUILD_TAG} bash -c "
                     cd /efficient-transformers &&
-                    . preflight_qeff/bin/activate &&
-                    pip install /opt/qti-aic/integrations/torch_qaic/py310/torch_qaic-0.1.0-cp310-cp310-linux_x86_64.whl &&
+                    source /opt/torch-qaic-env/bin/activate &&
                     mkdir -p $PWD/cli_qaic_finetuning &&
                     export TOKENIZERS_PARALLELISM=false &&
                     export QEFF_HOME=$PWD/cli_qaic_finetuning &&

From 48f06b01e1d4bcca5bcfa79f83caa512e0939d23 Mon Sep 17 00:00:00 2001
From: Ann Kuruvilla <quic_akuruvil@quicinc.com>
Date: Thu, 27 Nov 2025 04:09:20 +0000
Subject: [PATCH 10/13] jenkins script update

Signed-off-by: Ann Kuruvilla <akuruvil@qti.qualcomm.com>
---
 scripts/Jenkinsfile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/Jenkinsfile b/scripts/Jenkinsfile
index 5bb2d4f74..37036e21b 100644
--- a/scripts/Jenkinsfile
+++ b/scripts/Jenkinsfile
@@ -166,7 +166,8 @@ pipeline {
                     sh '''
                     sudo docker exec ${BUILD_TAG} bash -c "
                     cd /efficient-transformers &&
-                    source /opt/torch-qaic-env/bin/activate &&
+                    .preflight_qeff/bin/activate &&
+                    pip install /opt/qti-aic/integrations/torch_qaic/py310/torch_qaic-0.1.0-cp310-cp310-linux_x86_64.whl &&
                     mkdir -p $PWD/cli_qaic_finetuning &&
                     export TOKENIZERS_PARALLELISM=false &&
                     export QEFF_HOME=$PWD/cli_qaic_finetuning &&

From ad8897dc3d72bdaf69eadccef2fd186804eb91f2 Mon Sep 17 00:00:00 2001
From: Ann Kuruvilla <quic_akuruvil@quicinc.com>
Date: Thu, 27 Nov 2025 06:19:51 +0000
Subject: [PATCH 11/13] jenkins script update

Signed-off-by: Ann Kuruvilla <akuruvil@qti.qualcomm.com>
---
 scripts/Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/Jenkinsfile b/scripts/Jenkinsfile
index 37036e21b..48fb03fc8 100644
--- a/scripts/Jenkinsfile
+++ b/scripts/Jenkinsfile
@@ -166,7 +166,7 @@ pipeline {
                     sh '''
                     sudo docker exec ${BUILD_TAG} bash -c "
                     cd /efficient-transformers &&
-                    .preflight_qeff/bin/activate &&
+                    . preflight_qeff/bin/activate &&
                     pip install /opt/qti-aic/integrations/torch_qaic/py310/torch_qaic-0.1.0-cp310-cp310-linux_x86_64.whl &&
                     mkdir -p $PWD/cli_qaic_finetuning &&
                     export TOKENIZERS_PARALLELISM=false &&

From 0a81fa789c33ec75e5b7394f793f8029f29c7c0e Mon Sep 17 00:00:00 2001
From: Ann Kuruvilla <quic_akuruvil@quicinc.com>
Date: Fri, 28 Nov 2025 08:31:55 +0000
Subject: [PATCH 12/13] Update dataset path

Signed-off-by: Ann Kuruvilla <akuruvil@qti.qualcomm.com>
---
 tests/finetune/test_finetune.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/finetune/test_finetune.py b/tests/finetune/test_finetune.py
index 07261bdcf..dc9acf1ca 100644
--- a/tests/finetune/test_finetune.py
+++ b/tests/finetune/test_finetune.py
@@ -21,7 +21,7 @@
 from tests.finetune import constants as constant
 from tests.finetune import reference_data as ref_data
 
-alpaca_json_path = os.path.join(os.getcwd(), "alpaca_data.json")
+alpaca_json_path = os.path.join(os.getcwd(), "./dataset/alpaca_data.json")
 
 
 def clean_up(path):
@@ -34,7 +34,8 @@ def clean_up(path):
 def download_alpaca():
     alpaca_url = "https://raw.githubusercontent.com/tatsu-lab/stanford_alpaca/refs/heads/main/alpaca_data.json"
     response = requests.get(alpaca_url)
-
+    # Create directory if it doesn't exist
+    os.makedirs(os.path.dirname(alpaca_json_path), exist_ok=True)
     with open(alpaca_json_path, "wb") as f:
         f.write(response.content)
 

From 0009cacbeb22ec20213feda7ac08a3e6f711ac66 Mon Sep 17 00:00:00 2001
From: Ann Kuruvilla <quic_akuruvil@quicinc.com>
Date: Fri, 28 Nov 2025 09:49:01 +0000
Subject: [PATCH 13/13] Update atol

Signed-off-by: Ann Kuruvilla <akuruvil@qti.qualcomm.com>
---
 tests/finetune/constants.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/finetune/constants.py b/tests/finetune/constants.py
index 2709b2986..23219ce2e 100644
--- a/tests/finetune/constants.py
+++ b/tests/finetune/constants.py
@@ -6,5 +6,5 @@
 # -----------------------------------------------------------------------------
 
 # Finetuning Test Constants
-LOSS_ATOL = 2e-2
-METRIC_ATOL = 3e-2
+LOSS_ATOL = 1e-3
+METRIC_ATOL = 1e-3