In [1]:
! nvidia-smi

Fri Aug  4 02:51:30 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   39C    P8     9W /  70W |      0MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

#### Parameter-Efficient Fine-tuning (PEFT)
PEFT approaches enable you to get performance comparable to full fine-tuning while only having a small number of trainable parameters.[https://huggingface.co/blog/peft]

In [None]:
! pip install transformers datasets accelerate peft

In [3]:
import pandas as pd
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from peft import LoraConfig, get_peft_model, TaskType
from transformers import Seq2SeqTrainer, Seq2SeqTrainingArguments, DataCollatorForSeq2Seq

In [4]:
# read parquet file
train_df = pd.read_parquet("/content/drive/MyDrive/Chatbot/train.parquet")
test_df = pd.read_parquet("/content/drive/MyDrive/Chatbot/test.parquet")

In [5]:
# Change to dataframe
train_data = Dataset.from_pandas(train_df)
test_data = Dataset.from_pandas(test_df)
train_data

Dataset({
    features: ['Human', 'Assistant'],
    num_rows: 15000
})

In [6]:
model_id="google/flan-t5-large"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForSeq2SeqLM.from_pretrained(model_id)

Downloading (…)okenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

Downloading spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/3.13G [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

https://huggingface.co/google/flan-t5-large

In [7]:
# text preprocess function
def preprocess_function(sample,padding="max_length"):
    model_inputs = tokenizer(sample["Human"], max_length=256, padding=padding, truncation=True)
    labels = tokenizer(sample["Assistant"], max_length=256, padding=padding, truncation=True)
    if padding == "max_length":
        labels["input_ids"] = [
            [(l if l != tokenizer.pad_token_id else -100) for l in label] for label in labels["input_ids"]
        ]
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

In [8]:
train_tokenized_dataset = train_data.map(preprocess_function, batched=True, remove_columns=train_data.column_names)
test_tokenized_dataset = test_data.map(preprocess_function, batched=True, remove_columns=test_data.column_names)
print(f"Keys of tokenized dataset: {list(train_tokenized_dataset.features)}")

Map:   0%|          | 0/15000 [00:00<?, ? examples/s]

Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

Keys of tokenized dataset: ['input_ids', 'attention_mask', 'labels']


In [10]:
# Low-Rank Adaptation (LoRA) [https://lightning.ai/pages/community/tutorial/lora-llm/]
lora_config = LoraConfig(
 r=16,
 lora_alpha=32,
 target_modules=["q", "v"],
 lora_dropout=0.1,
 bias="none",
 task_type=TaskType.SEQ_2_SEQ_LM
)

In [11]:
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

trainable params: 4,718,592 || all params: 787,868,672 || trainable%: 0.5989059049678777


In [14]:
label_pad_token_id = -100
data_collator = DataCollatorForSeq2Seq(
    tokenizer,
    model=model,
    label_pad_token_id=label_pad_token_id,
    pad_to_multiple_of=8
)

In [12]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [15]:
output_dir="/content/drive/MyDrive/Chatbot/lora-flan-t5-large-chat"
training_args = Seq2SeqTrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=4,
    learning_rate=1e-3,
    num_train_epochs=1,
    logging_dir=f"{output_dir}/logs",
    logging_strategy="epoch",
    save_strategy="epoch",
    report_to="tensorboard",
    push_to_hub = True
)

In [16]:
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=train_tokenized_dataset,
)
model.config.use_cache = False

Cloning https://huggingface.co/Rasith/lora-flan-t5-large-chat into local empty directory.


In [17]:
trainer.train()

You're using a T5TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
3750,2.0376


TrainOutput(global_step=3750, training_loss=2.0376092447916667, metrics={'train_runtime': 4883.463, 'train_samples_per_second': 3.072, 'train_steps_per_second': 0.768, 'total_flos': 1.739447599104e+16, 'train_loss': 2.0376092447916667, 'epoch': 1.0})

In [18]:
peft_save_model_id="/content/drive/MyDrive/Chatbot/lora-flan-t5-large-chat"
trainer.model.save_pretrained(peft_save_model_id, push_to_hub=True)
tokenizer.save_pretrained(peft_save_model_id, push_to_hub=True)
trainer.model.base_model.save_pretrained(peft_save_model_id, push_to_hub=True)

pytorch_model.bin:   0%|          | 0.00/3.15G [00:00<?, ?B/s]

In [None]:
! cp -r /content/lora-flan-t5-large-chat/ /content/drive/MyDrive/Chatbot/

In [21]:
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

# Load peft config for pre-trained checkpoint etc.
peft_model_id = "/content/drive/MyDrive/Chatbot/lora-flan-t5-large-chat"
config = PeftConfig.from_pretrained(peft_model_id)

model = AutoModelForSeq2SeqLM.from_pretrained(config.base_model_name_or_path)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

model = PeftModel.from_pretrained(model, peft_model_id, device_map={"":0}).cuda()
model.eval()

sample = "Human: \nExplain me about the working of Artificial Intelligence. \nAssistant: "
input_ids = tokenizer(sample, return_tensors="pt", truncation=True, max_length=256).input_ids.cuda()
outputs = model.generate(input_ids=input_ids, do_sample=True, top_p=0.9, max_length=256)
print(f"{sample}")

print(tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0])

Human: 
Explain me about the working of Artificial Intelligence. 
Assistant: 
Artificial intelligence, also known as AI, is the process of developing artificial intelligence algorithms for computer programs, such as robotics or machine learning. AI uses computer vision technology to interpret, analyze, and make predictions about information, making them more accurate than humans. AI is also employed in the design, automation, and performance of robotics and automation systems. Some of the main applications of AI include automation, robotics, and security. AI plays a vital role in developing and implementing automation, automation, and security. It can help machines to do certain tasks such as adjusting lighting or changing the temperature, for example. AI can also help machines to learn from the mistakes of humans and other machines, as well as predict and avoid problems. AI can also play a key role in solving problems and improving productivity. AI can also help human beings in their 