In [1]:
# import dependecies
import os
import re
import torch
import pandas as pd
import datasets
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline, set_seed
from peft import LoraConfig, PeftModel
from trl import SFTConfig, SFTTrainer
from trl import DataCollatorForCompletionOnlyLM

In [2]:
# load the dataset
path_dir = '/teamspace/studios/this_studio/Fine_tuning'

dataset = datasets.load_from_disk(path_dir + '/dataset')
test_dataset = dataset['test']

In [3]:
def format_test_prompts(example):
    questions = []
    answers = []
    for text in example:
        question = re.findall(r'### Question: (.*?)\n\s*### Answer:', text, re.DOTALL)[0]
        answer = re.findall(r'### Answer: (.*)', text, re.DOTALL)[0]
        
        questions.append(question)
        answers.append(answer)
    return {'questions': questions, 'answers': answers}

In [4]:
formated_test_dataset = format_test_prompts(test_dataset['text'])
# convert the formated test dataset to a dataset object
test_dataset = datasets.Dataset.from_dict(formated_test_dataset)
test_dataset

Dataset({
    features: ['questions', 'answers'],
    num_rows: 4980
})

In [5]:
test_dataset['questions'][0], test_dataset['answers'][0]

('Can you help me write a program to calculate the average value of a list of numbers?',
 "Sure! Here's an example solution in Python:\n\n```python\ndef calculate_average(nums):\n    return sum(nums) / len(nums)\n\nprint(calculate_average([3, 4, 5, 6, 7]))\n```\n\nIn this code, the `calculate_average` function takes a list of numbers as input. It uses the `sum` function to calculate the sum of all the numbers in the list, and then divides it by the length of the list using the `len` function. Finally, it returns the average value.\n\nBy calling `calculate_average([3, 4, 5, 6, 7])`, the program will output the average value of the given list, which is `5.0` in this case.")

In [6]:
# load the tokenizer
max_sequence_length = 1024 # gpt2

tokenizer = AutoTokenizer.from_pretrained(
    'openai-community/gpt2-large',
    trust_remote_code = True
)
## pad the sequence if it is < max_sequence_length
tokenizer.pad_token = tokenizer.eos_token

tokenizer.padding_side = 'right'

tokenizer.model_max_length = max_sequence_length

tokenizer.truncation_side = 'right'

In [7]:
quant_4_bit = True
if quant_4_bit:
    quant_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_compute_dtype=torch.bfloat16,
        bnb_4bit_quant_type="nf4",
)
else:
    quant_config = BitsAndBytesConfig(
        load_in_8bit=True,
        bnb_4bit_compute_dtype=torch.bfloat16,
)

In [8]:
# load the base model and test it
base_model = AutoModelForCausalLM.from_pretrained(
        'openai-community/gpt2-large',
        quantization_config = quant_config,
        device_map = 'auto',
)

base_model.generation_config.pad_token_id = tokenizer.eos_token_id
print(f"{(base_model.get_memory_footprint() / 1e6):.2f} MB")


524.13 MB


In [9]:
base_model

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 1280)
    (wpe): Embedding(1024, 1280)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-35): 36 x GPT2Block(
        (ln_1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Linear4bit(in_features=1280, out_features=3840, bias=True)
          (c_proj): Linear4bit(in_features=1280, out_features=1280, bias=True)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Linear4bit(in_features=1280, out_features=5120, bias=True)
          (c_proj): Linear4bit(in_features=5120, out_features=1280, bias=True)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((1280,), eps=1e-05, element

In [10]:
def model_predict(model, prompt, max_new_tokens = 150):
    inputs = tokenizer.encode(prompt, return_tensors='pt').to('cuda')
    attention_mask = torch.ones(inputs.shape, device='cuda')
    output = model.generate(inputs, attention_mask=attention_mask, max_new_tokens=max_new_tokens, num_return_sequences=1)
    response = tokenizer.decode(output[0])
    return response

In [11]:
for i in range(len(test_dataset['questions'])):
    prompt = test_dataset['questions'][i]
    response = model_predict(base_model, prompt + '\n\n' + 'AI Agent:')
    print(f"User: {response}")
    print('-'* 100)
    print(f"Expected Answer:")
    print(f"{test_dataset['answers'][i]}")
    print("*"* 100)
    if i == 5:
        break

User: Can you help me write a program to calculate the average value of a list of numbers?

AI Agent: Yes, I can.

AI Agent: I can calculate the average value of a list of numbers.

AI Agent: I can calculate the average value of a list of numbers.

AI Agent: I can calculate the average value of a list of numbers.

AI Agent: I can calculate the average value of a list of numbers.

AI Agent: I can calculate the average value of a list of numbers.

AI Agent: I can calculate the average value of a list of numbers.

AI Agent: I can calculate the average value of a list of numbers.

AI Agent: I can calculate the average value of a list of numbers.

AI Agent: I can calculate the
----------------------------------------------------------------------------------------------------
Expected Answer:
Sure! Here's an example solution in Python:

```python
def calculate_average(nums):
    return sum(nums) / len(nums)

print(calculate_average([3, 4, 5, 6, 7]))
```

In this code, the `calculate_average

In [12]:
prompt = test_dataset['questions'][57]
response = model_predict(base_model, prompt + '\n\n' + 'AI Agent:')
print(f"User: {response}")
print('-'* 100)
print(f"Expected Answer:")
print(f"{test_dataset['answers'][57]}")
print("*"* 100)

User: What is the type of the variable "data" in the following code snippet?

```python
data = "Hello, World!"
```

AI Agent:

```python

print(data)

```

The variable "data" is a Python dictionary.

The variable "data" is a Python dictionary.

The variable "data" is a Python dictionary.

The variable "data" is a Python dictionary.

The variable "data" is a Python dictionary.

The variable "data" is a Python dictionary.

The variable "data" is a Python dictionary.

The variable "data" is a Python dictionary.

The variable "data" is a Python dictionary.

The variable "data" is a Python dictionary.

The variable "data" is a Python dictionary.

The
----------------------------------------------------------------------------------------------------
Expected Answer:
The type of the variable "data" in the given code is a string.
****************************************************************************************************


In [13]:
prompt = "Create a python class called 'Car' with the following attributes: 'make', 'model', 'year', 'color', 'mileage'."
response = model_predict(base_model, prompt + '\n\n' + 'AI Agent:')
print(f"User: {response}")
print('-'* 100)

User: Create a python class called 'Car' with the following attributes: 'make', 'model', 'year', 'color', 'mileage'.

AI Agent:

Create a python class called 'Car' with the following attributes: 'make', 'model', 'year', 'color', 'mileage'.

Create a python class called 'Car' with the following attributes: 'make', 'model', 'year', 'color', 'mileage'.

Create a python class called 'Car' with the following attributes: 'make', 'model', 'year', 'color', 'mileage'.

Create a python class called 'Car' with the following attributes: 'make', 'model', 'year', 'color', 'mileage'.

Create a python class called 'Car' with the following attributes: 'make', 'model', 'year', 'color
----------------------------------------------------------------------------------------------------


In [14]:
prompt = "What is the difference between a PyTorch and TensorFlow?"
response = model_predict(base_model, prompt + '\n\n' + 'AI Agent:')
print(f"User: {response}")
print('-'* 100)

User: What is the difference between a PyTorch and TensorFlow?

AI Agent:

PyTorch:

TensorFlow:

What is the difference between a PyTorch and TensorFlow?

PyTorch:

TensorFlow:

What is the difference between a PyTorch and TensorFlow?

PyTorch:

TensorFlow:

What is the difference between a PyTorch and TensorFlow?

PyTorch:

TensorFlow:

What is the difference between a PyTorch and TensorFlow?

PyTorch:

TensorFlow:

What is the difference between a PyTorch and TensorFlow?

PyTorch:

Tensor
----------------------------------------------------------------------------------------------------


In [15]:
fine_tuned_model_name = path_dir + '/gpt2_large_fine_tuned'

fine_tuned_model_loaded = PeftModel.from_pretrained(base_model, fine_tuned_model_name)

print(f"{(fine_tuned_model_loaded.get_memory_footprint() / 1e6):.2f} MB")

550.67 MB


In [16]:
fine_tuned_model_loaded

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): GPT2LMHeadModel(
      (transformer): GPT2Model(
        (wte): Embedding(50257, 1280)
        (wpe): Embedding(1024, 1280)
        (drop): Dropout(p=0.1, inplace=False)
        (h): ModuleList(
          (0-35): 36 x GPT2Block(
            (ln_1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
            (attn): GPT2Attention(
              (c_attn): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=1280, out_features=3840, bias=True)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.2, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=1280, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=3840, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (l

In [17]:
for i in range(len(test_dataset['questions'])):
    prompt = test_dataset['questions'][i]
    response = model_predict(fine_tuned_model_loaded, prompt + '\n\n' + 'AI Agent:')
    print(f"User: {response}")
    print('-'* 100)
    print(f"Expected Answer:")
    print(f"{test_dataset['answers'][i]}")
    print("*"* 100)
    if i == 5:
        break

User: Can you help me write a program to calculate the average value of a list of numbers?

AI Agent: Sure! Here's a solution in Python:

```python
def calculate_average(nums):
    total = 0
    for num in nums:
        total += num
    return total / len(nums)
```

In this solution, we define a function called `calculate_average` that takes a list of numbers as input. We initialize a variable `total` to keep track of the sum of all the numbers.

Then, we iterate over each number in the input list using a `for` loop. For each number, we add it to the `total` variable.

Finally,
----------------------------------------------------------------------------------------------------
Expected Answer:
Sure! Here's an example solution in Python:

```python
def calculate_average(nums):
    return sum(nums) / len(nums)

print(calculate_average([3, 4, 5, 6, 7]))
```

In this code, the `calculate_average` function takes a list of numbers as input. It uses the `sum` function to calculate the sum of 

In [18]:
prompt = test_dataset['questions'][57]
response = model_predict(fine_tuned_model_loaded, prompt + '\n\n' + 'AI Agent:')
print(f"User: {response}")
print('-'* 100)
print(f"Expected Answer:")
print(f"{test_dataset['answers'][57]}")
print("*"* 100)

User: What is the type of the variable "data" in the following code snippet?

```python
data = "Hello, World!"
```

AI Agent: "Hello, World!"
```

In this code, the variable "data" is assigned the value "Hello, World!". The variable "data" is a string literal in Python, and the value assigned to it is "Hello, World!". The AI Agent then reads this string and assigns it to the variable "data".

The AI Agent can now access and manipulate the value of "data" in a programmatic manner. For example, if you want to print the value of "data", you can use the following code:

```python
print(data)
```

This will output "Hello, World!" to the console.

By using string literals in Python, you can
----------------------------------------------------------------------------------------------------
Expected Answer:
The type of the variable "data" in the given code is a string.
****************************************************************************************************


In [23]:
prompt = "Create a python class called 'Car' with the following attributes: 'make', 'model', 'year', 'color', 'mileage'."
response = model_predict(fine_tuned_model_loaded, prompt + '\n\n' + 'AI Agent:', max_new_tokens=150)
print(f"User: {response}")
print('-'* 100)

User: Create a python class called 'Car' with the following attributes: 'make', 'model', 'year', 'color', 'mileage'.

AI Agent:

class Car(Agent):
    def __init__(self, make, model, year, color, mileage):
        self.make = make
        self.model = model
        self.year = year
        self.color = color
        self.mileage = mileage

Car.__init__()

In this example, the AI Agent class is created with the attributes 'make', 'model', 'year', 'color', and 'mileage'. The '__init__' method is used to initialize these attributes when the class
----------------------------------------------------------------------------------------------------


In [24]:
prompt = "What is the difference between a PyTorch and TensorFlow?"
response = model_predict(fine_tuned_model_loaded, prompt + '\n\n' + 'AI Agent:', max_new_tokens=200)
print(f"User: {response}")
print('-'* 100)

User: What is the difference between a PyTorch and TensorFlow?

AI Agent: PyTorch is a Python framework for machine learning. It provides a framework for building models that can learn from data and make predictions. TensorFlow is a popular open-source framework for machine learning. It provides a framework for building models that can learn from data and make predictions.

What is the difference between a Torch and TensorFlow model?

Torch and TensorFlow are both frameworks for developing machine learning models. Torch is a Python framework for deep learning, while TensorFlow is a Python framework for machine learning. Both frameworks provide a common API for developers to work with.

What is the difference between a Torch and TensorFlow model in a neural network?

Torch and TensorFlow models are similar in that they both provide a common API for developers to work with. However, Torch models are more general in nature, while TensorFlow models are more specific.

What is the differenc