In [1]:
print("Start")
print("Importing Libraries")
import torch
import os
import numpy as np
from tqdm import tqdm
from accelerate import Accelerator
from transformers import (T5ForConditionalGeneration, 
AdamW, T5Tokenizer, TrainingArguments, Trainer,
get_linear_schedule_with_warmup)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training, PeftModel
from datasets import load_dataset, Dataset, DatasetDict
from torch.utils.data import DataLoader
import pandas as pd
from torch.utils.tensorboard import SummaryWriter
print("Libraries Imported Successfully")


Start


  from .autonotebook import tqdm as notebook_tqdm


Libraries Imported Successfully


In [2]:
df_train = pd.read_csv("/home/pravin/Desktop/April/7thapril/Sentiment/train_data_sentiment.csv")
df_train.dropna(inplace = True)
df_train = df_train.head(50000)
df_train = df_train.astype(str)
df_train['context'] = df_train['context'].apply(lambda x: ' context: ' + x )
df_train['question'] = df_train['question'].apply(lambda x: 'question: ' + x)
df_train = df_train.to_dict(orient="list")
train_dataset = Dataset.from_dict(df_train)

In [3]:
accelerator = Accelerator(gradient_accumulation_steps=4)
device = accelerator.device

In [4]:
checkpoint_dir = "/home/pravin/Desktop/April/7thapril/Sentiment/second_iteration"


In [5]:
model_id = 't5-large'
tokenizer = T5Tokenizer.from_pretrained(model_id)
model = T5ForConditionalGeneration.from_pretrained(model_id)

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [6]:
# peft_config = LoraConfig(
#     inference_mode=False,
#     r=16,
#     lora_alpha=32,
#     lora_dropout=0.05,
#     target_modules=["q", "k", "v"],
#     init_lora_weights="gaussian"
# )
# model = get_peft_model(model, peft_config)


In [7]:
model = PeftModel.from_pretrained(model, "/home/pravin/Desktop/April/7thapril/Sentiment/second_iteration/epoch0")

In [8]:
for name, param in model.named_parameters():
    if "lora" not in name:
        param.requires_grad = False
    else:
        param.requires_grad = True

In [9]:
model.train()
model.print_trainable_parameters()
model.to(device)

trainable params: 7,077,888 || all params: 744,745,984 || trainable%: 0.9503761217999398


PeftModel(
  (base_model): LoraModel(
    (model): T5ForConditionalGeneration(
      (shared): Embedding(32128, 1024)
      (encoder): T5Stack(
        (embed_tokens): Embedding(32128, 1024)
        (block): ModuleList(
          (0): T5Block(
            (layer): ModuleList(
              (0): T5LayerSelfAttention(
                (SelfAttention): T5Attention(
                  (q): lora.Linear(
                    (base_layer): Linear(in_features=1024, out_features=1024, bias=False)
                    (lora_dropout): ModuleDict(
                      (default): Dropout(p=0.05, inplace=False)
                    )
                    (lora_A): ModuleDict(
                      (default): Linear(in_features=1024, out_features=16, bias=False)
                    )
                    (lora_B): ModuleDict(
                      (default): Linear(in_features=16, out_features=1024, bias=False)
                    )
                    (lora_embedding_A): ParameterDict()
                  

In [10]:
NUM_EPOCHS = 3
BATCH_SIZE = 8
LEARNING_RATE = 0.0003

In [11]:
writer = SummaryWriter(f"runs/runs1/T5_large_lora_batch_{BATCH_SIZE}_lr_{LEARNING_RATE}")
optimizer = AdamW(model.parameters(), lr = LEARNING_RATE)
data_loader = DataLoader(train_dataset, batch_size = BATCH_SIZE, shuffle=True)
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps = 0, num_training_steps = len(train_dataset) * NUM_EPOCHS)
model, optimizer, training_dataloader, scheduler = accelerator.prepare(model, optimizer, data_loader, scheduler)



In [12]:
for epoch in range(NUM_EPOCHS):
    for batch_number, batch in tqdm(enumerate(training_dataloader)):
        total_loss = 0.0
        losses = []
        avg_losses = []
        with accelerator.accumulate(model):
            optimizer.zero_grad()
            input_ids = tokenizer( batch["question"], batch["context"], max_length = 512, padding='max_length', truncation='longest_first', return_tensors="pt").input_ids.to(device)
            attention_mask = tokenizer(batch["question"], batch["context"], max_length = 512, padding='max_length', truncation='longest_first', return_tensors="pt").attention_mask.to(device)
            labels = tokenizer(batch["answer"], padding=True, truncation=True, return_tensors="pt").input_ids.to(device)
            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            accelerator.backward(loss)
            optimizer.step()
            scheduler.step()
            total_loss += loss.item()
            avg_train_loss = (BATCH_SIZE * total_loss)/(batch_number+1)
            losses.append(loss)
            avg_losses.append(avg_train_loss)
            for name, param in model.named_parameters():
                if name == "base_model.model.encoder.block.15.layer.0.SelfAttention.v.lora_A.default.weight":
                    writer.add_histogram(name, param,  global_step = batch_number)
            writer.add_scalar('Training loss', loss, global_step = batch_number)
            writer.add_scalar('avg_train_loss', avg_train_loss, global_step = batch_number)
            if batch_number%1000 == 0:
                checkpoint_path = os.path.join(checkpoint_dir, f"epoch{epoch}")
                model.save_pretrained(checkpoint_path)
                print(f"Model Saved in folder{checkpoint_path}, Batch number: {batch_number}, epoch: {epoch}, Loss = {loss}")    
    print(f"Epoch {epoch + 1 } Done!")


0it [00:00, ?it/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
1it [00:01,  2.00s/it]

Model Saved in folder/home/pravin/Desktop/April/7thapril/Sentiment/second_iteration/epoch0, Batch number: 0, epoch: 0, Loss = 0.00868138112127781


1001it [15:39,  1.20s/it]

Model Saved in folder/home/pravin/Desktop/April/7thapril/Sentiment/second_iteration/epoch0, Batch number: 1000, epoch: 0, Loss = 0.033623673021793365


2001it [31:17,  1.24s/it]

Model Saved in folder/home/pravin/Desktop/April/7thapril/Sentiment/second_iteration/epoch0, Batch number: 2000, epoch: 0, Loss = 0.010446126572787762


3001it [46:57,  1.29s/it]

Model Saved in folder/home/pravin/Desktop/April/7thapril/Sentiment/second_iteration/epoch0, Batch number: 3000, epoch: 0, Loss = 0.08629095554351807


4001it [1:02:36,  1.43s/it]

Model Saved in folder/home/pravin/Desktop/April/7thapril/Sentiment/second_iteration/epoch0, Batch number: 4000, epoch: 0, Loss = 0.06064416468143463


5001it [1:18:15,  1.19s/it]

Model Saved in folder/home/pravin/Desktop/April/7thapril/Sentiment/second_iteration/epoch0, Batch number: 5000, epoch: 0, Loss = 0.003860006108880043


6001it [1:33:57,  1.65s/it]

Model Saved in folder/home/pravin/Desktop/April/7thapril/Sentiment/second_iteration/epoch0, Batch number: 6000, epoch: 0, Loss = 0.08750130981206894


6250it [1:37:51,  1.06it/s]


Epoch 1 Done!


1it [00:01,  1.61s/it]

Model Saved in folder/home/pravin/Desktop/April/7thapril/Sentiment/second_iteration/epoch1, Batch number: 0, epoch: 1, Loss = 0.060578200966119766


1001it [15:42,  1.29s/it]

Model Saved in folder/home/pravin/Desktop/April/7thapril/Sentiment/second_iteration/epoch1, Batch number: 1000, epoch: 1, Loss = 0.0806998759508133


2001it [31:21,  1.24s/it]

Model Saved in folder/home/pravin/Desktop/April/7thapril/Sentiment/second_iteration/epoch1, Batch number: 2000, epoch: 1, Loss = 0.020019032061100006


3001it [46:58,  1.33s/it]

Model Saved in folder/home/pravin/Desktop/April/7thapril/Sentiment/second_iteration/epoch1, Batch number: 3000, epoch: 1, Loss = 0.2183934450149536


4001it [1:02:36,  1.27s/it]

Model Saved in folder/home/pravin/Desktop/April/7thapril/Sentiment/second_iteration/epoch1, Batch number: 4000, epoch: 1, Loss = 0.06624727696180344


5001it [1:18:16,  1.44s/it]

Model Saved in folder/home/pravin/Desktop/April/7thapril/Sentiment/second_iteration/epoch1, Batch number: 5000, epoch: 1, Loss = 0.014107747003436089


6001it [1:33:54,  1.32s/it]

Model Saved in folder/home/pravin/Desktop/April/7thapril/Sentiment/second_iteration/epoch1, Batch number: 6000, epoch: 1, Loss = 0.01813841424882412


6250it [1:37:47,  1.07it/s]


Epoch 2 Done!


1it [00:02,  2.02s/it]

Model Saved in folder/home/pravin/Desktop/April/7thapril/Sentiment/second_iteration/epoch2, Batch number: 0, epoch: 2, Loss = 0.28804463148117065


1001it [15:39,  1.28s/it]

Model Saved in folder/home/pravin/Desktop/April/7thapril/Sentiment/second_iteration/epoch2, Batch number: 1000, epoch: 2, Loss = 0.02263427898287773


2001it [31:18,  1.39s/it]

Model Saved in folder/home/pravin/Desktop/April/7thapril/Sentiment/second_iteration/epoch2, Batch number: 2000, epoch: 2, Loss = 0.14595651626586914


3001it [46:55,  1.30s/it]

Model Saved in folder/home/pravin/Desktop/April/7thapril/Sentiment/second_iteration/epoch2, Batch number: 3000, epoch: 2, Loss = 0.08463691920042038


4001it [1:02:31,  1.52s/it]

Model Saved in folder/home/pravin/Desktop/April/7thapril/Sentiment/second_iteration/epoch2, Batch number: 4000, epoch: 2, Loss = 0.00425028195604682


5001it [1:18:08,  1.38s/it]

Model Saved in folder/home/pravin/Desktop/April/7thapril/Sentiment/second_iteration/epoch2, Batch number: 5000, epoch: 2, Loss = 0.05236300826072693


6001it [1:33:43,  1.29s/it]

Model Saved in folder/home/pravin/Desktop/April/7thapril/Sentiment/second_iteration/epoch2, Batch number: 6000, epoch: 2, Loss = 0.011198504827916622


6250it [1:37:36,  1.07it/s]

Epoch 3 Done!





### Inference

In [14]:
model_1 = T5ForConditionalGeneration.from_pretrained(model_id)

peft_config = LoraConfig(
    inference_mode=False,
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    target_modules=["q", "k", "v"],
    init_lora_weights="gaussian"
)
model_1 = get_peft_model(model_1, peft_config)
model_1.print_trainable_parameters()

checkpoint = "/home/pravin/Desktop/April/7thapril/checkpoint_large_LoRA_16qkv0.pt"

checkpoint = torch.load(checkpoint)
model_1.load_state_dict(checkpoint['model_state_dict'])

trainable params: 7,077,888 || all params: 744,745,984 || trainable%: 0.9503761217999398


<All keys matched successfully>

In [15]:
input_ids = tokenizer(" question: What is his nick name? context: Hi, guys my name is Pravin but I go by Tillya and I live in Banglore ", return_tensors='pt').input_ids
model_1.generate(input_ids = input_ids)
tokenizer.decode(model_1.generate(inputs = input_ids)[0], skip_special_tokens=True)

'Tillya'