In [1]:
# import dependecies
import os
import re
import torch
import pandas as pd
import datasets
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline, set_seed
from peft import LoraConfig, PeftModel
from trl import SFTConfig, SFTTrainer
from trl import DataCollatorForCompletionOnlyLM

In [2]:
# load the dataset
path_dir = '/teamspace/studios/this_studio/Fine_tuning'

dataset = datasets.load_from_disk(path_dir + '/dataset')
test_dataset = dataset['test']

In [3]:
def format_test_prompts(example):
    questions = []
    answers = []
    for text in example:
        question = re.findall(r'### Question: (.*?)\n\s*### Answer:', text, re.DOTALL)[0]
        answer = re.findall(r'### Answer: (.*)', text, re.DOTALL)[0]
        
        questions.append(question)
        answers.append(answer)
    return {'questions': questions, 'answers': answers}

In [4]:
formated_test_dataset = format_test_prompts(test_dataset['text'])
# convert the formated test dataset to a dataset object
test_dataset = datasets.Dataset.from_dict(formated_test_dataset)
test_dataset

Dataset({
    features: ['questions', 'answers'],
    num_rows: 4980
})

In [5]:
test_dataset['questions'][0], test_dataset['answers'][0]

('Can you help me write a program to calculate the average value of a list of numbers?',
 "Sure! Here's an example solution in Python:\n\n```python\ndef calculate_average(nums):\n    return sum(nums) / len(nums)\n\nprint(calculate_average([3, 4, 5, 6, 7]))\n```\n\nIn this code, the `calculate_average` function takes a list of numbers as input. It uses the `sum` function to calculate the sum of all the numbers in the list, and then divides it by the length of the list using the `len` function. Finally, it returns the average value.\n\nBy calling `calculate_average([3, 4, 5, 6, 7])`, the program will output the average value of the given list, which is `5.0` in this case.")

In [6]:
quant_4_bit = True
if quant_4_bit:
    quant_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_compute_dtype=torch.bfloat16,
        bnb_4bit_quant_type="nf4",
)
else:
    quant_config = BitsAndBytesConfig(
        load_in_8bit=True,
        bnb_4bit_compute_dtype=torch.bfloat16,
)

In [7]:
# load the tokenizer
max_sequence_length = 1024 # gpt2

tokenizer = AutoTokenizer.from_pretrained(
    'openai-community/gpt2',
    trust_remote_code = True
)
## pad the sequence if it is < max_sequence_length
tokenizer.pad_token = tokenizer.eos_token

tokenizer.padding_side = 'right'

tokenizer.model_max_length = max_sequence_length

tokenizer.truncation_side = 'right'

In [8]:
fine_tuned_model_name = path_dir + '/gpt2_large_fine_tuned'


model = AutoModelForCausalLM.from_pretrained(
        'openai-community/gpt2',
        quantization_config = quant_config,
        device_map = 'auto',
)
fine_tuned_model_loaded = PeftModel.from_pretrained(model, fine_tuned_model_name)

model.generation_config.pad_token_id = tokenizer.eos_token_id
print(f"{(fine_tuned_model_loaded.get_memory_footprint() / 1e6):.2f} MB")

# 134.06 MB = using quant_4_bit
# 176.53 MB = using quant_8_bit
# 510.34 MB = without quantization


RuntimeError: Error(s) in loading state_dict for PeftModelForCausalLM:
	size mismatch for base_model.model.transformer.h.0.attn.c_attn.lora_A.default.weight: copying a param with shape torch.Size([16, 1280]) from checkpoint, the shape in current model is torch.Size([16, 768]).
	size mismatch for base_model.model.transformer.h.0.attn.c_attn.lora_B.default.weight: copying a param with shape torch.Size([3840, 16]) from checkpoint, the shape in current model is torch.Size([2304, 16]).
	size mismatch for base_model.model.transformer.h.0.mlp.c_fc.lora_A.default.weight: copying a param with shape torch.Size([16, 1280]) from checkpoint, the shape in current model is torch.Size([16, 768]).
	size mismatch for base_model.model.transformer.h.0.mlp.c_fc.lora_B.default.weight: copying a param with shape torch.Size([5120, 16]) from checkpoint, the shape in current model is torch.Size([3072, 16]).
	size mismatch for base_model.model.transformer.h.1.attn.c_attn.lora_A.default.weight: copying a param with shape torch.Size([16, 1280]) from checkpoint, the shape in current model is torch.Size([16, 768]).
	size mismatch for base_model.model.transformer.h.1.attn.c_attn.lora_B.default.weight: copying a param with shape torch.Size([3840, 16]) from checkpoint, the shape in current model is torch.Size([2304, 16]).
	size mismatch for base_model.model.transformer.h.1.mlp.c_fc.lora_A.default.weight: copying a param with shape torch.Size([16, 1280]) from checkpoint, the shape in current model is torch.Size([16, 768]).
	size mismatch for base_model.model.transformer.h.1.mlp.c_fc.lora_B.default.weight: copying a param with shape torch.Size([5120, 16]) from checkpoint, the shape in current model is torch.Size([3072, 16]).
	size mismatch for base_model.model.transformer.h.2.attn.c_attn.lora_A.default.weight: copying a param with shape torch.Size([16, 1280]) from checkpoint, the shape in current model is torch.Size([16, 768]).
	size mismatch for base_model.model.transformer.h.2.attn.c_attn.lora_B.default.weight: copying a param with shape torch.Size([3840, 16]) from checkpoint, the shape in current model is torch.Size([2304, 16]).
	size mismatch for base_model.model.transformer.h.2.mlp.c_fc.lora_A.default.weight: copying a param with shape torch.Size([16, 1280]) from checkpoint, the shape in current model is torch.Size([16, 768]).
	size mismatch for base_model.model.transformer.h.2.mlp.c_fc.lora_B.default.weight: copying a param with shape torch.Size([5120, 16]) from checkpoint, the shape in current model is torch.Size([3072, 16]).
	size mismatch for base_model.model.transformer.h.3.attn.c_attn.lora_A.default.weight: copying a param with shape torch.Size([16, 1280]) from checkpoint, the shape in current model is torch.Size([16, 768]).
	size mismatch for base_model.model.transformer.h.3.attn.c_attn.lora_B.default.weight: copying a param with shape torch.Size([3840, 16]) from checkpoint, the shape in current model is torch.Size([2304, 16]).
	size mismatch for base_model.model.transformer.h.3.mlp.c_fc.lora_A.default.weight: copying a param with shape torch.Size([16, 1280]) from checkpoint, the shape in current model is torch.Size([16, 768]).
	size mismatch for base_model.model.transformer.h.3.mlp.c_fc.lora_B.default.weight: copying a param with shape torch.Size([5120, 16]) from checkpoint, the shape in current model is torch.Size([3072, 16]).
	size mismatch for base_model.model.transformer.h.4.attn.c_attn.lora_A.default.weight: copying a param with shape torch.Size([16, 1280]) from checkpoint, the shape in current model is torch.Size([16, 768]).
	size mismatch for base_model.model.transformer.h.4.attn.c_attn.lora_B.default.weight: copying a param with shape torch.Size([3840, 16]) from checkpoint, the shape in current model is torch.Size([2304, 16]).
	size mismatch for base_model.model.transformer.h.4.mlp.c_fc.lora_A.default.weight: copying a param with shape torch.Size([16, 1280]) from checkpoint, the shape in current model is torch.Size([16, 768]).
	size mismatch for base_model.model.transformer.h.4.mlp.c_fc.lora_B.default.weight: copying a param with shape torch.Size([5120, 16]) from checkpoint, the shape in current model is torch.Size([3072, 16]).
	size mismatch for base_model.model.transformer.h.5.attn.c_attn.lora_A.default.weight: copying a param with shape torch.Size([16, 1280]) from checkpoint, the shape in current model is torch.Size([16, 768]).
	size mismatch for base_model.model.transformer.h.5.attn.c_attn.lora_B.default.weight: copying a param with shape torch.Size([3840, 16]) from checkpoint, the shape in current model is torch.Size([2304, 16]).
	size mismatch for base_model.model.transformer.h.5.mlp.c_fc.lora_A.default.weight: copying a param with shape torch.Size([16, 1280]) from checkpoint, the shape in current model is torch.Size([16, 768]).
	size mismatch for base_model.model.transformer.h.5.mlp.c_fc.lora_B.default.weight: copying a param with shape torch.Size([5120, 16]) from checkpoint, the shape in current model is torch.Size([3072, 16]).
	size mismatch for base_model.model.transformer.h.6.attn.c_attn.lora_A.default.weight: copying a param with shape torch.Size([16, 1280]) from checkpoint, the shape in current model is torch.Size([16, 768]).
	size mismatch for base_model.model.transformer.h.6.attn.c_attn.lora_B.default.weight: copying a param with shape torch.Size([3840, 16]) from checkpoint, the shape in current model is torch.Size([2304, 16]).
	size mismatch for base_model.model.transformer.h.6.mlp.c_fc.lora_A.default.weight: copying a param with shape torch.Size([16, 1280]) from checkpoint, the shape in current model is torch.Size([16, 768]).
	size mismatch for base_model.model.transformer.h.6.mlp.c_fc.lora_B.default.weight: copying a param with shape torch.Size([5120, 16]) from checkpoint, the shape in current model is torch.Size([3072, 16]).
	size mismatch for base_model.model.transformer.h.7.attn.c_attn.lora_A.default.weight: copying a param with shape torch.Size([16, 1280]) from checkpoint, the shape in current model is torch.Size([16, 768]).
	size mismatch for base_model.model.transformer.h.7.attn.c_attn.lora_B.default.weight: copying a param with shape torch.Size([3840, 16]) from checkpoint, the shape in current model is torch.Size([2304, 16]).
	size mismatch for base_model.model.transformer.h.7.mlp.c_fc.lora_A.default.weight: copying a param with shape torch.Size([16, 1280]) from checkpoint, the shape in current model is torch.Size([16, 768]).
	size mismatch for base_model.model.transformer.h.7.mlp.c_fc.lora_B.default.weight: copying a param with shape torch.Size([5120, 16]) from checkpoint, the shape in current model is torch.Size([3072, 16]).
	size mismatch for base_model.model.transformer.h.8.attn.c_attn.lora_A.default.weight: copying a param with shape torch.Size([16, 1280]) from checkpoint, the shape in current model is torch.Size([16, 768]).
	size mismatch for base_model.model.transformer.h.8.attn.c_attn.lora_B.default.weight: copying a param with shape torch.Size([3840, 16]) from checkpoint, the shape in current model is torch.Size([2304, 16]).
	size mismatch for base_model.model.transformer.h.8.mlp.c_fc.lora_A.default.weight: copying a param with shape torch.Size([16, 1280]) from checkpoint, the shape in current model is torch.Size([16, 768]).
	size mismatch for base_model.model.transformer.h.8.mlp.c_fc.lora_B.default.weight: copying a param with shape torch.Size([5120, 16]) from checkpoint, the shape in current model is torch.Size([3072, 16]).
	size mismatch for base_model.model.transformer.h.9.attn.c_attn.lora_A.default.weight: copying a param with shape torch.Size([16, 1280]) from checkpoint, the shape in current model is torch.Size([16, 768]).
	size mismatch for base_model.model.transformer.h.9.attn.c_attn.lora_B.default.weight: copying a param with shape torch.Size([3840, 16]) from checkpoint, the shape in current model is torch.Size([2304, 16]).
	size mismatch for base_model.model.transformer.h.9.mlp.c_fc.lora_A.default.weight: copying a param with shape torch.Size([16, 1280]) from checkpoint, the shape in current model is torch.Size([16, 768]).
	size mismatch for base_model.model.transformer.h.9.mlp.c_fc.lora_B.default.weight: copying a param with shape torch.Size([5120, 16]) from checkpoint, the shape in current model is torch.Size([3072, 16]).
	size mismatch for base_model.model.transformer.h.10.attn.c_attn.lora_A.default.weight: copying a param with shape torch.Size([16, 1280]) from checkpoint, the shape in current model is torch.Size([16, 768]).
	size mismatch for base_model.model.transformer.h.10.attn.c_attn.lora_B.default.weight: copying a param with shape torch.Size([3840, 16]) from checkpoint, the shape in current model is torch.Size([2304, 16]).
	size mismatch for base_model.model.transformer.h.10.mlp.c_fc.lora_A.default.weight: copying a param with shape torch.Size([16, 1280]) from checkpoint, the shape in current model is torch.Size([16, 768]).
	size mismatch for base_model.model.transformer.h.10.mlp.c_fc.lora_B.default.weight: copying a param with shape torch.Size([5120, 16]) from checkpoint, the shape in current model is torch.Size([3072, 16]).
	size mismatch for base_model.model.transformer.h.11.attn.c_attn.lora_A.default.weight: copying a param with shape torch.Size([16, 1280]) from checkpoint, the shape in current model is torch.Size([16, 768]).
	size mismatch for base_model.model.transformer.h.11.attn.c_attn.lora_B.default.weight: copying a param with shape torch.Size([3840, 16]) from checkpoint, the shape in current model is torch.Size([2304, 16]).
	size mismatch for base_model.model.transformer.h.11.mlp.c_fc.lora_A.default.weight: copying a param with shape torch.Size([16, 1280]) from checkpoint, the shape in current model is torch.Size([16, 768]).
	size mismatch for base_model.model.transformer.h.11.mlp.c_fc.lora_B.default.weight: copying a param with shape torch.Size([5120, 16]) from checkpoint, the shape in current model is torch.Size([3072, 16]).

In [None]:
fine_tuned_model_loaded

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): GPT2LMHeadModel(
      (transformer): GPT2Model(
        (wte): Embedding(50257, 1280)
        (wpe): Embedding(1024, 1280)
        (drop): Dropout(p=0.1, inplace=False)
        (h): ModuleList(
          (0-35): 36 x GPT2Block(
            (ln_1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
            (attn): GPT2Attention(
              (c_attn): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=1280, out_features=3840, bias=True)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.2, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=1280, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=3840, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (l

In [None]:
def model_predict(prompt):
    inputs = tokenizer.encode(prompt, return_tensors='pt').to('cuda')
    attention_mask = torch.ones(inputs.shape, device='cuda')
    output = fine_tuned_model_loaded.generate(inputs, attention_mask=attention_mask, max_new_tokens=150, num_return_sequences=1)
    response = tokenizer.decode(output[0])
    return response

In [None]:
for i in range(len(test_dataset['questions'])):
    prompt = test_dataset['questions'][i]
    response = model_predict(prompt + '\n\n' + 'AI Agent:')
    print(f"User: {response}")
    print('-'* 100)
    print(f"Expected Answer:")
    print(f"{test_dataset['answers'][i]}")
    print("*"* 100)
    if i == 5:
        break

User: Can you help me write a program to calculate the average value of a list of numbers?

AI Agent: Sure! Here's a solution in Python:

```python
def calculate_average(nums):
    total = 0
    for num in nums:
        total += num
    return total / len(nums)
```

In this solution, we define a function called `calculate_average` that takes a list of numbers as input. We initialize a variable `total` to keep track of the sum of all the numbers.

Then, we iterate over each number in the input list using a `for` loop. For each number, we add it to the `total` variable.

Finally,
----------------------------------------------------------------------------------------------------
Expected Answer:
Sure! Here's an example solution in Python:

```python
def calculate_average(nums):
    return sum(nums) / len(nums)

print(calculate_average([3, 4, 5, 6, 7]))
```

In this code, the `calculate_average` function takes a list of numbers as input. It uses the `sum` function to calculate the sum of 

In [None]:
prompt = test_dataset['questions'][57]
response = model_predict(prompt + '\n\n' + 'AI Agent:')
print(f"User: {response}")
print('-'* 100)
print(f"Expected Answer:")
print(f"{test_dataset['answers'][57]}")
print("*"* 100)

User: What is the type of the variable "data" in the following code snippet?

```python
data = "Hello, World!"
```

AI Agent:
    "Hello, World!"
```

In this code, the variable "data" is assigned the value "Hello, World!". The AI agent uses the `data` variable to display the value of the variable "data" on the console.

By executing this code, the AI agent will display the value of "data" on the console. The value of "data" will be displayed on the console when the AI agent is executed.

Please note that this code assumes that the variable "data" is initialized to the value "Hello, World!". If you want to change the value of "data" to "Hello, World!", you can modify the value of "data" to whatever you want
----------------------------------------------------------------------------------------------------
Expected Answer:
The type of the variable "data" in the given code is a string.
****************************************************************************************************
