In [1]:
# import dependecies
import os
import re
import torch
import pandas as pd
import datasets
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline, set_seed
from peft import LoraConfig, PeftModel
from trl import SFTConfig, SFTTrainer
from trl import DataCollatorForCompletionOnlyLM

In [2]:
def format_test_prompts(example):
    fomated_texts = []
    for text in example:
        formated_text = re.sub(r'\s*### Answer.*', '', text, flags=re.DOTALL).strip()
        fomated_texts.append(formated_text)
    return fomated_texts


In [3]:
# load the dataset
path_dir = '/teamspace/studios/this_studio/Fine_tuning'

dataset = datasets.load_from_disk(path_dir + '/dataset')
test_dataset = dataset['test']

In [4]:
formated_test_dataset = format_test_prompts(test_dataset['text'])
test_dataset = pd.DataFrame(formated_test_dataset, columns=['text'])

In [5]:
test_dataset = datasets.Dataset.from_pandas(test_dataset)

In [6]:
test_dataset['text'][6]


'### Question: Can you provide a program that converts Fahrenheit to Celsius?'

In [7]:
quant_4_bit = True
if quant_4_bit:
    quant_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_compute_dtype=torch.bfloat16,
        bnb_4bit_quant_type="nf4",
)
else:
    quant_config = BitsAndBytesConfig(
        load_in_8bit=True,
        bnb_4bit_compute_dtype=torch.bfloat16,
)

In [8]:
# load the tokenizer
max_sequence_length = 1024 # gpt2

tokenizer = AutoTokenizer.from_pretrained(
    'openai-community/gpt2',
    trust_remote_code = True
)
## pad the sequence if it is < max_sequence_length
tokenizer.pad_token = tokenizer.eos_token

tokenizer.padding_side = 'right'

tokenizer.model_max_length = max_sequence_length

tokenizer.truncation_side = 'right'

In [9]:
fine_tuned_model_name = path_dir + '/gpt2_fine_tuned_model'


model = AutoModelForCausalLM.from_pretrained(
        'openai-community/gpt2',
        quantization_config = quant_config,
        device_map = 'auto',
        # local_files_only = True,
)
fine_tuned_model_loaded = PeftModel.from_pretrained(model, fine_tuned_model_name)

model.generation_config.pad_token_id = tokenizer.eos_token_id
print(f"{(fine_tuned_model_loaded.get_memory_footprint() / 1e6):.2f} MB")

# 134.06 MB = using quant_4_bit
# 176.53 MB = using quant_8_bit
# 510.34 MB = without quantization


139.37 MB


In [10]:
fine_tuned_model_loaded

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): GPT2LMHeadModel(
      (transformer): GPT2Model(
        (wte): Embedding(50257, 768)
        (wpe): Embedding(1024, 768)
        (drop): Dropout(p=0.1, inplace=False)
        (h): ModuleList(
          (0-11): 12 x GPT2Block(
            (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
            (attn): GPT2Attention(
              (c_attn): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=768, out_features=2304, bias=True)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.2, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=768, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=2304, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_e

In [11]:
def model_predict(prompt):
    set_seed(42)
    inputs = tokenizer.encode(prompt, return_tensors='pt').to('cuda')
    attention_mask = torch.ones(inputs.shape, device='cuda')
    output = fine_tuned_model_loaded.generate(inputs, attention_mask=attention_mask, max_new_tokens=200, num_return_sequences=1)
    response = tokenizer.decode(output[0])
    return response

In [14]:
import pprint 
prompt = 'Write a Python program to find the factorial of a number provided by the user.'
print(prompt)
response = model_predict(prompt)
pprint.pprint(response)

Write a Python program to find the factorial of a number provided by the user.


('Write a Python program to find the factorial of a number provided by the '
 "user. Here's an example code snippet:\n"
 '\n'
 '```python\n'
 'def factorial(n):\n'
 '    if n == 0:\n'
 '        return 1\n'
 '    else:\n'
 '        return n * factorial(n-1)\n'
 '```\n'
 '\n'
 'In this code, the `factorial` function takes a parameter `n`, which '
 'represents the number you want to find the factorial of. It then checks if '
 '`n` is equal to 0. If it is, it returns 1, otherwise it returns 0.\n'
 '\n'
 'To use this function, simply call it and pass the desired number as an '
 'argument. For example, if you want to find the factorial of 5, you can do:\n'
 '\n'
 '```python\n'
 'factorial(5)\n'
 '```\n'
 '\n'
 'The function will then return the factorial of 5, which is 1.\n'
 '\n'
 'Feel free to modify the code according to your')


In [16]:
import pprint 
prompt = 'Write a Python program to find the sum of the list of random numbers.'
print(prompt)
response = model_predict(prompt)
pprint.pprint(response)

Write a Python program to find the sum of the list of random numbers.


("Write a Python program to find the sum of the list of random numbers. Here's "
 'an example code snippet:\n'
 '\n'
 '```python\n'
 'def find_sum(nums):\n'
 '    sum = 0\n'
 '    for num in nums:\n'
 '        sum += num\n'
 '    return sum\n'
 '```\n'
 '\n'
 'In this code, the `find_sum` function takes a list of numbers as input. It '
 'initializes a variable `sum` to 0. Then, it iterates through each number in '
 'the list using a `for` loop. Inside the loop, it adds each number to the '
 '`sum` variable. Finally, it returns the `sum` as the result.\n'
 '\n'
 'To use this function, you can simply call it and pass your desired list of '
 'numbers as an argument. For example, `find_sum(5, 10)` would return the sum '
 'of 5 and 10.\n'
 '\n'
 'Feel free to modify the code to suit your specific requirements. Let me know '
 'if you have')


In [17]:
import pprint 
prompt = 'Give me a list of random numbers between 1 and 100.'
print(prompt)
response = model_predict(prompt)
pprint.pprint(response)

Give me a list of random numbers between 1 and 100.
('Give me a list of random numbers between 1 and 100.\n'
 '\n'
 "Here's an example code snippet that demonstrates how to generate random "
 'numbers between 1 and 100:\n'
 '\n'
 '```python\n'
 'random_num = random.randint(1, 100)\n'
 'print(random_num)\n'
 '```\n'
 '\n'
 'In this code, we first define a list called `random_num` containing the '
 'numbers 1, 100, and 1. Then, we use the `random.randint()` function to '
 'generate a random integer between 1 and 100. Finally, we print the generated '
 'random number using the `print()` function.\n'
 '\n'
 'When you run this code, it will output the following:\n'
 '\n'
 '```\n'
 '1\n'
 '1\n'
 '1\n'
 '1\n'
 '1\n'
 '1\n'
 '1\n'
 '1\n'
 '1\n'
 '1\n'
 '1\n'
 '1\n'
 '1\n'
 '1\n'
 '1\n'
 '1\n'
 '1\n'
 '1\n'
 '1\n'
 '1\n'
 '1\n'
 '1\n'
 '1\n'
 '1\n'
 '1\n'
 '1\n'
 '1\n'
 '1\n'
 '1\n'
 '1\n'
 '1\n'
 '1\n'
 '1\n')
