In [19]:
# import dependecies
import os
import re
import torch
import pandas as pd
import datasets
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline, set_seed
from peft import LoraConfig, PeftModel
from trl import SFTConfig, SFTTrainer
from trl import DataCollatorForCompletionOnlyLM

In [20]:
# load the dataset
path_dir = '/teamspace/studios/this_studio/Fine_tuning'

dataset = datasets.load_from_disk(path_dir + '/dataset')
test_dataset = dataset['test']

In [21]:
def format_test_prompts(example):
    questions = []
    answers = []
    for text in example:
        # question = re.findall(r'(### Question: .*?)\n\s*### Answer:', text, re.DOTALL)[0]
        question = re.findall(r'### Question: (.*?)\n\s*### Answer:', text, re.DOTALL)[0]
        answer = re.findall(r'### Answer: (.*)', text, re.DOTALL)[0]
        
        questions.append(question)
        answers.append(answer)
    return {'questions': questions, 'answers': answers}

In [22]:
formated_test_dataset = format_test_prompts(test_dataset['text'])
# convert the formated test dataset to a dataset object
test_dataset = datasets.Dataset.from_dict(formated_test_dataset)
test_dataset

Dataset({
    features: ['questions', 'answers'],
    num_rows: 4980
})

In [23]:
test_dataset['questions'][0], test_dataset['answers'][0]

('Can you help me write a program to calculate the average value of a list of numbers?',
 "Sure! Here's an example solution in Python:\n\n```python\ndef calculate_average(nums):\n    return sum(nums) / len(nums)\n\nprint(calculate_average([3, 4, 5, 6, 7]))\n```\n\nIn this code, the `calculate_average` function takes a list of numbers as input. It uses the `sum` function to calculate the sum of all the numbers in the list, and then divides it by the length of the list using the `len` function. Finally, it returns the average value.\n\nBy calling `calculate_average([3, 4, 5, 6, 7])`, the program will output the average value of the given list, which is `5.0` in this case.")

In [24]:
quant_4_bit = True
if quant_4_bit:
    quant_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_compute_dtype=torch.bfloat16,
        bnb_4bit_quant_type="nf4",
)
else:
    quant_config = BitsAndBytesConfig(
        load_in_8bit=True,
        bnb_4bit_compute_dtype=torch.bfloat16,
)

In [25]:
# load the tokenizer
max_sequence_length = 1024 # gpt2

tokenizer = AutoTokenizer.from_pretrained(
    'openai-community/gpt2',
    trust_remote_code = True
)
## pad the sequence if it is < max_sequence_length
tokenizer.pad_token = tokenizer.eos_token

tokenizer.padding_side = 'right'

tokenizer.model_max_length = max_sequence_length

tokenizer.truncation_side = 'right'

In [26]:
fine_tuned_model_name = path_dir + '/gpt2_fine_tuned_model'


model = AutoModelForCausalLM.from_pretrained(
        'openai-community/gpt2',
        quantization_config = quant_config,
        device_map = 'auto',
)
fine_tuned_model_loaded = PeftModel.from_pretrained(model, fine_tuned_model_name)

model.generation_config.pad_token_id = tokenizer.eos_token_id
print(f"{(fine_tuned_model_loaded.get_memory_footprint() / 1e6):.2f} MB")

# 134.06 MB = using quant_4_bit
# 176.53 MB = using quant_8_bit
# 510.34 MB = without quantization


139.37 MB


In [27]:
fine_tuned_model_loaded

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): GPT2LMHeadModel(
      (transformer): GPT2Model(
        (wte): Embedding(50257, 768)
        (wpe): Embedding(1024, 768)
        (drop): Dropout(p=0.1, inplace=False)
        (h): ModuleList(
          (0-11): 12 x GPT2Block(
            (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
            (attn): GPT2Attention(
              (c_attn): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=768, out_features=2304, bias=True)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.2, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=768, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=2304, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_e

In [46]:
def model_predict(prompt):
    inputs = tokenizer.encode(prompt, return_tensors='pt').to('cuda')
    attention_mask = torch.ones(inputs.shape, device='cuda')
    output = fine_tuned_model_loaded.generate(inputs, attention_mask=attention_mask, max_new_tokens=150, num_return_sequences=1)
    response = tokenizer.decode(output[0])
    return response

In [50]:
for i in range(len(test_dataset['questions'])):
    prompt = test_dataset['questions'][i]
    response = model_predict(prompt)
    print(f"Answer: {response}")
    print('-'* 1000)
    print(f"Expected Answer:")
    print(f"{test_dataset['answers'][i]}")
    print("*"* 1000)
    if i == 3:
        break

Answer: Can you help me write a program to calculate the average value of a list of numbers?

Here's an example code snippet that calculates the average value of a list of numbers:

```python
def average_value(nums):
    return sum(nums)
```

In this code, the `average_value` function takes a list of numbers as input. It then uses the `sum()` function to calculate the sum of all the elements in the list. The result is then returned as the average value of the list.

To use this function, you can simply call it and pass your list of numbers as an argument. For example, if you have a list `[1, 2, 3, 4, 5]`, you can call `average_
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------