## **Engineering with Generative AI **

Downloading a required libraries :

In [1]:

!pip install -q transformers>=4.32.0 datasets evaluate                                                # Comes from HuggingFace
!pip install -q auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/   # Use cu117 if on CUDA 11.7                                                                              # For GPTQ Optimization
!pip install -q -U bitsandbytes                                                                       # For quantization
!pip install -q -U peft                                                                               # Parameter-efficient Fine-tuning
!pip install -q -U accelerate                                                                         # Loading models across GPUs/CPU/disk
!pip install -q trl                                                                                   # For supervised fine-tuning for LLMs

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.7/23.7 MB[0m [31m88.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m102.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m89.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m52.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m12.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━

Mounting Google Drive :

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Basic imports
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

**Bitsandbytes configuration** : Uses Quantization config to bnb config. this is QLORA -> Quanitzing the pretrained wieights of model to 4-bit & keeping them fixed during FT

In [4]:
from transformers import BitsAndBytesConfig

# 1. Setup the quantization configuarion
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,                     # Q = 4 bits
    bnb_4bit_use_double_quant=True,        # double quantization, quantizing the quantization constants for saving an additional 0.4 bits per parameter
    bnb_4bit_quant_type="nf4",             # 4-bit NormalFloat Quantization (optimal for normal weights; enforces w ∈ [-1,1])
    bnb_4bit_compute_dtype=torch.bfloat16  # Dequantize to 16-bits before computations (as in the paper)
)

**Loading the pretrained model :**

In [5]:
model_name_or_path = "microsoft/phi-2"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, trust_remote_code=True, quantization_config=bnb_config, device_map="auto")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/735 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/35.7k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/564M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

**Tokenization** :

In [6]:
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

tokenizer_config.json:   0%|          | 0.00/7.34k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/1.08k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

In [7]:
from datasets import load_dataset

Loading Dataset : (MBPP)

In [8]:
dataset_name = "mbpp"
dataset = load_dataset(dataset_name)

README.md:   0%|          | 0.00/9.06k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/87.2k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/116k [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

prompt-00000-of-00001.parquet:   0%|          | 0.00/7.88k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/374 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/500 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/90 [00:00<?, ? examples/s]

Generating prompt split:   0%|          | 0/10 [00:00<?, ? examples/s]

In [9]:
dataset['test'][0]

{'task_id': 11,
 'text': 'Write a python function to remove first and last occurrence of a given character from the string.',
 'code': 'def remove_Occ(s,ch): \r\n    for i in range(len(s)): \r\n        if (s[i] == ch): \r\n            s = s[0 : i] + s[i + 1:] \r\n            break\r\n    for i in range(len(s) - 1,-1,-1):  \r\n        if (s[i] == ch): \r\n            s = s[0 : i] + s[i + 1:] \r\n            break\r\n    return s ',
 'test_list': ['assert remove_Occ("hello","l") == "heo"',
  'assert remove_Occ("abcda","a") == "bcd"',
  'assert remove_Occ("PHP","P") == "H"'],
 'test_setup_code': '',
 'challenge_test_list': ['assert remove_Occ("hellolloll","l") == "helollol"',
  'assert remove_Occ("","l") == ""']}

**Method for Saving Dataset splits in JSON format.**

In [14]:
import json
import os

def save_model_data(dataset, model_name, datasetType):
    if datasetType == "test":
        dataset_list = list(map(lambda item: {'text': item['text'], 'code': item['code']}, dataset))
    else:
        dataset_list = [{'prompt': item} for item in dataset['prompt']]

    # Define directory & file name
    directory = "/content/drive/MyDrive/ColabResults/"
    file_name = f"{directory}{model_name}_{datasetType}_data.json"

    # ✅ Create the directory if it does not exist
    if not os.path.exists(directory):
        os.makedirs(directory)
        print(f"Created directory: {directory}")

    # ✅ Overwrite existing file
    if os.path.exists(file_name):
        print(f"File '{file_name}' already exists. Overriding its content.")

    # ✅ Save dataset as JSON
    with open(file_name, 'w') as json_file:
        json.dump(dataset_list, json_file, indent=4)

    print(f"Dataset saved to '{file_name}'")


Loading Test Split from Dataset

In [15]:
test_dataset = dataset["test"].shuffle(seed=42).select([i for i in range(130)])

In [16]:
test_dataset

Dataset({
    features: ['task_id', 'text', 'code', 'test_list', 'test_setup_code', 'challenge_test_list'],
    num_rows: 130
})

Calling save_model_data method to save test split in JSON format

In [17]:
model_name = 'Model'
datasetType = "test"
save_model_data(test_dataset,model_name,datasetType)

Created directory: /content/drive/MyDrive/ColabResults/
Dataset saved to '/content/drive/MyDrive/ColabResults/Model_test_data.json'


In [18]:
import re
def clean_text(text):
    # Remove special characters and extra spaces
    text = re.sub(r'[^\x00-\x7F]+', ' ', text)
    text = re.sub(r'\t+', ' ', text)
    text = re.sub(r'\n+', ' ', text)
    text = re.sub(r'\r+', ' ', text)
    text = re.sub(r'\s+', ' ', text)
    return text.strip()

In [19]:
def create_prompt_formats(data_point):

    data_point['text'] = clean_text(data_point['text'])
    data_point['code'] = clean_text(data_point['code'])

    full_prompt = f""" Below is an instruction that describes a python code writing task. Write a response that appropriately passes the test cases.
    ### Instruction: {data_point['text'].strip()}
    ### Response:
    {data_point['code'].strip()}
    """.strip()
    return full_prompt;

In [22]:

def generate(model,instruction, maxlen=128):

    instruction = clean_text(instruction)
    full_prompt = f"""
        Below is an instruction that describes a python code writing task. Write a response that appropriately passes the test cases.
        ### Instruction: {instruction.strip()}
        ### Response:
        """
    device = model.device  # Automatically get the model's device
    inputs = tokenizer(full_prompt, return_tensors="pt").to(device)


    with torch.no_grad():
        generation_output = model.generate(
            **inputs,
            max_new_tokens=maxlen,
            )

        result = tokenizer.decode(generation_output[0], skip_special_tokens=True)
        if result:
            output = result.strip()

            # Using regular expression to extract only the response section
            response_section = re.search(r'### Response:\n(.+?)(?=# Test Cases\n|\Z)', output, re.DOTALL)


            if response_section:
                model_response = response_section.group(1).strip()
                return model_response;
            else:
                print("Response section not found.")
        else:
            print("Failed to generate the response.")

    return ""


**Test the model with Zero Shot Inferencing - Model A**

In [23]:
%%time

model_response = ''
text_input = test_dataset[2]["text"]

instruction = text_input
print("instruction ", instruction)


result = generate(model, instruction);
print("result",result)


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


instruction  Write a python function to find the first natural number whose factorial is divisible by x.
result def first_factorial_divisible_by_x(x):
            n = 1
            while True:
                if math.factorial(n) % x == 0:
                    return n
                n += 1
        
        # Test cases
        assert first_factorial_divisible_by_x(2) == 1
        assert first_factorial_divisible_by_x(3) == 2
        assert first_factorial_divisible_by_x(4) == 3
        assert first_factorial_divisible_by_x(
CPU times: user 10.7 s, sys: 353 ms, total: 11.1 s
Wall time: 15 s


**Installing Codebleu Library**

In [24]:
!pip install codebleu

Collecting codebleu
  Downloading codebleu-0.7.0-py3-none-any.whl.metadata (8.1 kB)
Collecting tree-sitter<0.23.0,>=0.22.0 (from codebleu)
  Downloading tree_sitter-0.22.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (10 kB)
Downloading codebleu-0.7.0-py3-none-any.whl (31 kB)
Downloading tree_sitter-0.22.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (544 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m544.2/544.2 kB[0m [31m17.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tree-sitter, codebleu
Successfully installed codebleu-0.7.0 tree-sitter-0.22.3


Method for evaluating Model Code Response and Ground Truth Code.

In [25]:
from codebleu import calc_codebleu

def codebleu_evaluation(original_output,model_response):
    ref = original_output
    pred = model_response
    res = calc_codebleu([ref], [pred], "python")
    if(res):
        return res['codebleu'];


This "final_Evaluation_results" used for storing Codebleu Accuracy Values

In [26]:
final_Evaluation_results = {}

**Method to Calculate Model's Accuracy against respective dataset :**

In [27]:
def calculate_model_accuracy(model,dataset):
   evaluation_readings = []
   model_accuracy = 0;

   with torch.no_grad():
      for data_point in dataset:
         instruction =  data_point["text"];
         model_response  = generate(model,instruction);

         if model_response  :
            evaluation_value = codebleu_evaluation(data_point["code"],model_response)
            evaluation_readings.append(evaluation_value)


   if(evaluation_readings):
      totalNoOfReadings = len(evaluation_readings);
      sum = 0;
      for reading in evaluation_readings:
         sum += reading;

      model_accuracy = sum/totalNoOfReadings;
   return {"accuracy" : model_accuracy, "evaluation_readings":evaluation_readings};


**Evaluate the Model A**

Calculated & Added ModelA accuracy into "final_Evaluation_results"

In [35]:
!pip uninstall -y tree-sitter-python tree-sitter
!pip install tree-sitter tree-sitter-languages


[0mFound existing installation: tree-sitter 0.24.0
Uninstalling tree-sitter-0.24.0:
  Successfully uninstalled tree-sitter-0.24.0
Collecting tree-sitter
  Using cached tree_sitter-0.24.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.8 kB)
Using cached tree_sitter-0.24.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (575 kB)
Installing collected packages: tree-sitter
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
codebleu 0.7.0 requires tree-sitter<0.23.0,>=0.22.0, but you have tree-sitter 0.24.0 which is incompatible.[0m[31m
[0mSuccessfully installed tree-sitter-0.24.0


In [40]:
def evaluate_simple(model_response, expected_output):
    return int(expected_output.strip() in model_response.strip())

# Example Usage
instruction = test_dataset[2]["text"]
expected_code = test_dataset[2]["code"]
generated_code = generate(model, instruction)

score = evaluate_simple(generated_code, expected_code)
print("Pass" if score else "Fail")


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Fail


**Method for Adding Model's Accuracy permanentaly into the CSV file.**

In [43]:
import os
import pandas as pd

def add_Evaluation_Result(data_object):
    file_path = '/content/drive/MyDrive/ColabResults/final_evaluation_results.csv'

    # ✅ Check if the data_object is empty
    if not data_object:
        print("Error: No evaluation results found. Ensure evaluation is performed before saving.")
        return

    modelName = list(data_object.keys())[0]  # Extract model name
    accuracy = data_object[modelName].get('accuracy', None)  # Extract accuracy safely

    if accuracy is None:
        print(f"Error: Accuracy not found for model {modelName}.")
        return

    file_exists = os.path.exists(file_path)

    if file_exists:
        try:
            existing_df = pd.read_csv(file_path)
        except pd.errors.EmptyDataError:
            existing_df = pd.DataFrame(columns=['Model', 'Accuracy'])
    else:
        existing_df = pd.DataFrame(columns=['Model', 'Accuracy'])

    new_df = pd.DataFrame({'Model': [modelName], 'Accuracy': [accuracy]})
    combined_df = pd.concat([existing_df, new_df], ignore_index=True)

    combined_df.to_csv(file_path, index=False)
    print(f"✅ Data for {modelName} with accuracy {accuracy} has been saved in final_evaluation_results.csv.")


Added ModelA accuracy into CSV file.

In [44]:
add_Evaluation_Result(final_Evaluation_results)

Error: No evaluation results found. Ensure evaluation is performed before saving.


**Preprocessing of Dataset :**

In [45]:
def convert_to_instruction_format(data_point):
    return {"prompt": create_prompt_formats(data_point)}

In [46]:
def process_dataset(data):
    dataset =  data.map(convert_to_instruction_format).remove_columns(['task_id', 'text', 'code', 'test_list', 'test_setup_code', 'challenge_test_list'])
    return dataset;


Loading Train Data :

In [47]:
train_data = process_dataset(dataset["train"].shuffle(seed=42).select([i for i in range(360)]))

Map:   0%|          | 0/360 [00:00<?, ? examples/s]

Save Train Data split into JSON file

In [48]:
model_name = 'ModelB'
datasetType = "train"
save_model_data(train_data,model_name,datasetType)

Dataset saved to '/content/drive/MyDrive/ColabResults/ModelB_train_data.json'


Loading Validation Data :

In [49]:
validation_data  = process_dataset(dataset["validation"].shuffle(seed=42).select([i for i in range(60)]))

Map:   0%|          | 0/60 [00:00<?, ? examples/s]

Save Validation Data split into JSON file.

In [50]:
model_name = 'ModelB'
datasetType = "validation"
save_model_data(validation_data,model_name,datasetType)

Dataset saved to '/content/drive/MyDrive/ColabResults/ModelB_validation_data.json'


Train Data Sample after preprocessing :

In [51]:
train_data["prompt"]

['Below is an instruction that describes a python code writing task. Write a response that appropriately passes the test cases.\n    ### Instruction: Write a function to check if the given tuple has any none value or not.\n    ### Response:\n    def check_none(test_tup): res = any(map(lambda ele: ele is None, test_tup)) return (res)',
 "Below is an instruction that describes a python code writing task. Write a response that appropriately passes the test cases.\n    ### Instruction: Write a function to convert camel case string to snake case string.\n    ### Response:\n    def camel_to_snake(text): import re str1 = re.sub('(.)([A-Z][a-z]+)', r'\\1_\\2', text) return re.sub('([a-z0-9])([A-Z])', r'\\1_\\2', str1).lower()",
 'Below is an instruction that describes a python code writing task. Write a response that appropriately passes the test cases.\n    ### Instruction: Write a function to find if there is a triplet in the array whose sum is equal to a given value.\n    ### Response:\n   

**Preparing the model for QLoRA :**

In [52]:
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
model = prepare_model_for_kbit_training(model)

In [53]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )



 **Set Up LoRA :**
Now, to start my fine-tuning, i have to apply some preprocessing to the model to prepare it for training. Let's set up our LoRA layers.

In [54]:

lora_config = LoraConfig(
    r=16,                                                       # The rank of the LoRA matrices A, B
    lora_alpha=64,                                              # Scales the product AB
   target_modules=[ "Wqkv",
        "fc1",
        "fc2"],                                                 # Will apply LoRA to attention matrices
    lora_dropout=0.05,                                          # Dropout to help lessen overfitting
    bias="none",                                                # Don't train the bias parameter
    task_type="CAUSAL_LM"
)

#Get model with unfrozen LoRA layers

model = get_peft_model(model, lora_config)
print_trainable_parameters(model)

trainable params: 13107200 || all params: 1534499840 || trainable%: 0.8541675703270194


Here I define the LoRA config.

r is the rank of the low-rank matrix used in the adapters, which thus controls the number of parameters trained. A higher rank will allow for more expressivity, but there is a compute tradeoff.

alpha is the scaling factor for the learned weights. The weight matrix is scaled by alpha/r, and thus a higher value for alpha assigns more weight to the LoRA activations.

The values used in the QLoRA paper were r=64 and lora_alpha=16, and these are said to generalize well, but i will use r=16 and lora_alpha=64 so that we have more emphasis on the new fine-tuned data while also reducing computational complexity.

Training Arguments

In [61]:
from transformers import TrainingArguments

project = "finetune2"
base_model_name = "llm2"
run_name = base_model_name + "-" + project
output_dir = "./" + run_name

training_arguments = TrainingArguments(
    fp16=True,                           # Training computations in 16 bits
    # batch-related
    per_device_train_batch_size=1,       # Batch Size
    gradient_accumulation_steps=4,       # Batch Size (Mathematically)
    # optimizer-related
    optim="paged_adamw_32bit",           # Variant of AdamW designed to be more efficient on 32-bit GPUs
    learning_rate=1e-4,                  # Learning Rate
    warmup_ratio=0.05,                   # After 5% of the data, learning rate has linearly  from 0 to 1e-4
    lr_scheduler_type="linear",          # Adjust learning rate sinusoidally
    max_grad_norm=0.3,                   # Clip gradients if less than 0.3 (prevent gradient explosion)
    # epochs and saving
    num_train_epochs=2,                  # Number of Epochs
    save_strategy="epoch",               # Save after each epoch
    output_dir=output_dir,        # Where to save the model
    # validation
    evaluation_strategy="steps",         # For the next argument
    eval_steps=0.2,                      # Evaluate after 20% of training steps
    # logging-related
    logging_steps=1,                     # Number of update steps between two logs
    group_by_length=True,                # Minimize padding by grouping sentences of similar length
    seed=42,                             # For consistent results
    weight_decay=0.01
)
model.gradient_checkpointing_enable()    # Store less activations and recompute later
model.config.use_cache = False           # Disable using attention output cache. Should be enabled in inference.



**Fine tuned Model A on training dataset :**

In [73]:
from trl import SFTTrainer, DataCollatorForCompletionOnlyLM

response_template = "### Response:"
collator = DataCollatorForCompletionOnlyLM(response_template, tokenizer=tokenizer)

trainer = SFTTrainer(
    model=model,
    train_dataset=train_data.rename_columns({"prompt": "text"}),  # Rename "prompt" to "text"
    eval_dataset=validation_data.rename_columns({"prompt": "text"}),
    peft_config=lora_config,
    args=training_arguments,
    data_collator=collator
)





In [74]:
trainer.train()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33msoureesh1211[0m ([33msoureesh1211-university-of-illinois-chicago[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss,Validation Loss
36,0.6093,0.741288
72,0.8043,0.67813
108,0.9194,0.674724
144,0.7057,0.657462
180,0.4706,0.68032


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


TrainOutput(global_step=180, training_loss=0.753661371436384, metrics={'train_runtime': 502.547, 'train_samples_per_second': 1.433, 'train_steps_per_second': 0.358, 'total_flos': 1223677417697280.0, 'train_loss': 0.753661371436384})

In [96]:
peft_model_path = "/content/drive/MyDrive/ColabResults/fine-tuned-modelA"
trainer.model.save_pretrained(peft_model_path)
tokenizer.save_pretrained(peft_model_path)


('/content/drive/MyDrive/ColabResults/fine-tuned-modelA/tokenizer_config.json',
 '/content/drive/MyDrive/ColabResults/fine-tuned-modelA/special_tokens_map.json',
 '/content/drive/MyDrive/ColabResults/fine-tuned-modelA/vocab.json',
 '/content/drive/MyDrive/ColabResults/fine-tuned-modelA/merges.txt',
 '/content/drive/MyDrive/ColabResults/fine-tuned-modelA/added_tokens.json',
 '/content/drive/MyDrive/ColabResults/fine-tuned-modelA/tokenizer.json')

In [97]:
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer

tuned_modelA = AutoPeftModelForCausalLM.from_pretrained(peft_model_path, low_cpu_mem_usage=True, torch_dtype=torch.float16, load_in_4bit=True)
tokenizerA = AutoTokenizer.from_pretrained(peft_model_path)


The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [98]:
test_input = "Write a Python function to compute the factorial of a number."
response = generate_tuned_model(tuned_modelA, tokenizerA, test_input)
print(f"Fine-Tuned Model Response:\n{response}")


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Fine-Tuned Model Response:
def factorial(n): if (n == 1): return 1 else: return n * factorial(n - 1) n = int(input("Enter a number: ")) if (n < 0): return -1 else: return factorial(n) n = int(input("Enter a number: ")) if (n < 0): return -1 else: return factorial(n) n = int(input("Enter a number: ")) if (n < 0): return -1 else: return factorial(n) n = int(input("Enter a number: ")) if (n
