In [1]:
import sys
import os

import argparse
import time
import json
from datetime import date
import tqdm

import torch

#Transformers
import transformers
import bitsandbytes as bnb
from transformers import AutoModelForCausalLM , AutoTokenizer
from transformers import pipeline, set_seed
from transformers import get_linear_schedule_with_warmup, AdamW
from transformers import AutoConfig
from transformers import BitsAndBytesConfig

#Dataset
from datasets import load_dataset

#PEFT
from peft import LoraConfig
from peft import PeftConfig
from peft import PeftModel
from peft import get_peft_model
from peft import prepare_model_for_kbit_training


import warnings
warnings.filterwarnings("ignore")

torch.set_float32_matmul_precision('medium')
torch.cuda.empty_cache()

2024-03-21 18:06:33.885583: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Define a function to print the number of trainable parameters in the given model
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(f"Trainable params: {trainable_params} || All params: {all_param} || Trainable %: {100 * trainable_params / all_param}")

def tokenize_input(df,tokenizer,tokenizer_chapter_max_length,tokenizer_summary_max_length):

    prompt_start = "Summarize the following : \n"
    prompt_end = "\n\nSummary:"

    prompt = [prompt_start + dialogue + prompt_end for dialogue in df["chapter"]]

    df["input_ids"] = tokenizer(prompt, max_length=tokenizer_chapter_max_length , padding="max_length" , truncation=True , return_tensors="pt").input_ids
    df["labels"] = tokenizer(df["summary_text"],max_length=tokenizer_summary_max_length , padding="max_length" , truncation=True , return_tensors="pt").input_ids

    return df

In [3]:
cache_dir = "/work/LitArt/nair/cache/" 
log_path = "/work/LitArt/nair/outdir/"

tokenizer_chapter_max_length = 1024
tokenizer_summary_max_length = 256

#base_model_name = "tiiuae/falcon-7b"
#tokenizer_name = "tiiuae/falcon-7b"

model = "tiiuae/falcon-40b-instruct"
tokenizer_name = "tiiuae/falcon-40b-instruct"

today = date.today()


log_path = log_path+model.replace("/","-")+"-" +str(today)+"-"+time.strftime("%H:%M:%S", time.localtime())
#logger = TensorBoardLogger(log_path, name="my_model")


In [4]:
log_path

'/work/LitArt/nair/outdir/tiiuae-falcon-40b-instruct-2024-03-21-18:06:41'

In [5]:

# Load the 7b llama model
#model_id = "tiiuae/falcon-7b"

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)

# Load model
model = AutoModelForCausalLM.from_pretrained(model, 
                                                  quantization_config=quantization_config, 
                                                  cache_dir=cache_dir,
                                                  device_map="auto",
                                                  trust_remote_code=True,
                                                 )






Loading checkpoint shards:   0%|          | 0/9 [00:00<?, ?it/s]

In [6]:

tokenizer = AutoTokenizer.from_pretrained(tokenizer_name,cache_dir=cache_dir)
# Set the padding token of the tokenizer to its end-of-sentence token
tokenizer.pad_token = tokenizer.eos_token

# Enable gradient checkpointing for the model. Gradient checkpointing is a technique used to reduce the memory consumption during the backward pas. Instead of storing all intermediate activations in the forward pass (which is what's typically done to compute gradients in the backward pass), gradient checkpointing stores only a subset of them
model.gradient_checkpointing_enable() 

# Prepare the model for k-bit training . Applies some preprocessing to the model to prepare it for training.
model = prepare_model_for_kbit_training(model)

lora_config = LoraConfig(
    r=18, #The rank of decomposition r is << min(d,k). The default of r is 8.
    lora_alpha=32,#∆W is scaled by α/r where α is a constant. When optimizing with Adam, tuning α is similar as tuning the learning rate.
    target_modules=["query_key_value"], #Modules to Apply LoRA to target_modules. You can select specific modules to fine-tune.
    lora_dropout=0.05,#Dropout Probability for LoRA Layers #to reduce overfitting
    bias="none", #Bias Type for Lora. Bias can be ‘none’, ‘all’ or ‘lora_only’. If ‘all’ or ‘lora_only’, the corresponding biases will be updated during training. 
    task_type= "CAUSAL_LM", #Task Type
    )

model = get_peft_model(model, lora_config)

# Print the number of trainable parameters in the model
print_trainable_parameters(model)

You are using an old version of the checkpointing format that is deprecated (We will also silently ignore `gradient_checkpointing_kwargs` in case you passed it).Please update to the new format on your modeling file. To use the new format, you need to completely remove the definition of the method `_set_gradient_checkpointing` in your model.
You are using an old version of the checkpointing format that is deprecated (We will also silently ignore `gradient_checkpointing_kwargs` in case you passed it).Please update to the new format on your modeling file. To use the new format, you need to completely remove the definition of the method `_set_gradient_checkpointing` in your model.


Trainable params: 18800640 || All params: 20937777152 || Trainable %: 0.08979291289383191


In [7]:
data = load_dataset('csv', 
                    data_files={
                        'train': "/work/LitArt/data/chunked_dataset/train_dataset_with_summaries.csv",
                        'test': "/work/LitArt/data/chunked_dataset/test_dataset_with_summaries.csv",
                        'val':"/work/LitArt/data/chunked_dataset/validation_dataset_with_summaries.csv"})

In [8]:
data

DatasetDict({
    train: Dataset({
        features: ['chapter', 'human_summary', '__index_level_0__', 'summary_text'],
        num_rows: 10668
    })
    test: Dataset({
        features: ['chapter', 'human_summary', '__index_level_0__', 'summary_text'],
        num_rows: 1614
    })
    val: Dataset({
        features: ['chapter', 'human_summary', '__index_level_0__', 'summary_text'],
        num_rows: 1548
    })
})

In [9]:
tokenized_dataset = data["train"].shuffle().map(tokenize_input, batched=True, fn_kwargs={"tokenizer": tokenizer, "tokenizer_chapter_max_length": tokenizer_chapter_max_length,"tokenizer_summary_max_length":tokenizer_summary_max_length})
tokenized_dataset = tokenized_dataset.remove_columns(['chapter', 'human_summary', '__index_level_0__', 'summary_text'])

Map:   0%|          | 0/10668 [00:00<?, ? examples/s]

In [10]:
from transformers import TrainingArguments
#Training Parameters
batch_size = 2
epochs = 3

training_args = transformers.TrainingArguments(
    per_device_train_batch_size = batch_size ,     # Specifies the batch size for training on each device (GPU).
    #auto_find_batch_size=True,      # Uncommenting this would let the library automatically find an optimal batch size.
    gradient_accumulation_steps=2,   # Number of forward and backward passes to accumulate gradients before performing an optimizer step.
    # This effectively multiplies the batch size by this number without increasing memory usage.
    num_train_epochs=epochs,              # Specifies the total number of training epochs.
    learning_rate=1e-5,              # Specifies the learning rate for the optimizer.
    fp16=True,     # Enables mixed precision training (fp16) which can speed up training and reduce memory usage.
    save_total_limit=3,              # Limits the total number of model checkpoints saved. Only the last 3 checkpoints are saved.
    logging_steps=10,                 # Specifies how often to log training updates. 
    output_dir=log_path ,          # Directory where the model checkpoints and training outputs will be saved.
    max_steps = 400 ,                 # Limits the total number of training steps. Training will stop after 80 steps regardless of epochs.
    save_strategy='epoch',    # Uncommenting this would change the strategy for saving model checkpoints. 'epoch' means save after each epoch.
    optim="paged_adamw_8bit",     # Specifies the optimizer to use. it's set to a specific variant of AdamW.
    lr_scheduler_type = 'cosine',     # Specifies the learning rate scheduler type. 'cosine' means it uses cosine annealing.
    warmup_ratio = 0.05,           # Specifies the ratio of total steps for the learning rate warmup phase.
    max_grad_norm=0.3,


)

In [11]:
def save_hyperparameters(log_path, quantization_config, lora_config , training_args):
    import os
    os.makedirs(log_path, exist_ok=True)
    
    file_path = os.path.join(log_path, 'hyperparameters.txt')  
    
    with open(file_path, 'w') as file:
        file.write(str(quantization_config))
        file.write(str(lora_config))
        file.write(str(training_args))
        
    
save_hyperparameters(log_path, quantization_config , lora_config , training_args)


In [None]:
import matplotlib.pyplot as plt
from transformers import TrainerCallback

class LossLoggingCallback(TrainerCallback):
    "A custom callback that logs the loss after each epoch."
    def __init__(self):
        super().__init__()
        self.losses = []
    
    def on_epoch_end(self, args, state, control, **kwargs):
        # Log the average training loss of the last epoch
        self.losses.append(state.log_history[-1]['loss'])

# Instantiate your callback
loss_logging_callback = LossLoggingCallback()

# Add your callback to the list of callbacks in the Trainer
trainer = transformers.Trainer(
    model=model,
    train_dataset=tokenized_dataset,
    args=training_args,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
    callbacks=[loss_logging_callback]  # Add your custom callback here
)

model.config.use_cache = False  # Adjust as per your needs

# Assuming training is performed here
trainer.train()

# After training, plot the loss vs. epochs
plt.figure(figsize=(10, 6))
plt.plot(loss_logging_callback.losses, label='Training Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Loss vs. Epochs')
plt.legend()
plt.grid(True)

# Save the plot to the specified log_path folder
#plt.savefig(f'{log_path}/loss_vs_epochs.png')



Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Step,Training Loss
10,2.7936
20,2.7352
30,2.6566
40,2.765
50,2.8379
60,2.584


In [None]:
#trainer.train() # [276/500 39:37 < 32:23, 0.12 it/s, Epoch 0.10/1] at 3pm

In [None]:
model.save_pretrained(log_path)



In [None]:
trainer.save_model(log_path)

In [None]:
model_dir = log_path

In [None]:
# Load the configuration for the trained model
config = PeftConfig.from_pretrained(model_dir)

In [None]:
# Load the trained model using the loaded configuration and other parameters
model = AutoModelForCausalLM.from_pretrained(
	config.base_model_name_or_path,
	return_dict=True,
	quantization_config=quantization_config,
	device_map="auto",
	trust_remote_code=True,
    cache_dir=cache_dir,
)

In [None]:
# Load the tokenizer for the model
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)



In [None]:
# Set the padding token of the tokenizer to its end-of-sentence token
tokenizer.pad_token = tokenizer.eos_token

In [None]:
model =  PeftModel.from_pretrained(model, model_dir)

In [None]:
#Inference
generation_config = model.generation_config
generation_config.max_new_tokens = 200
generation_config.temperature = 0.7
generation_config.top_p = 0.7
generation_config.num_return_sequences = 1
generation_config.pad_token_id = tokenizer.eos_token_id
generation_config.eos_token_id = tokenizer.eos_token_id





In [None]:
def generate_response(chapter : str) -> str:    
    prompt =  f'''
    "Summarize the following : \n" {chapter}
    "\n\nSummary:" 
    '''.strip()
    DEVICE  = "cuda"
    encoding = tokenizer(prompt, return_tensors = "pt").to(DEVICE)
    #with torch.inference_mode():
    with torch.no_grad():
        outputs = model.generate(
            input_ids=encoding.input_ids,
            attention_mask=encoding.attention_mask,
            generation_config=generation_config,
        )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    

    return response.strip()




In [None]:
#prompt

chapter = '''In a village where the mountains kissed the clouds and the rivers sang to the valleys, there lived a boy named Idris. Idris had the peculiar ability to understand the language of the wind. It was a gift that had been passed down through his family for generations, but in Idris, it found its most curious student.

Each morning, Idris would climb to the highest peak, sit among the whispers of the passing breezes, and listen. The wind spoke of distant lands, of the secrets of the forest, and of the tales of the creatures that walked within it. Idris's favorite stories were those of the Guardians of the Forest, mythical beings said to protect the balance between nature and the world of men.

One day, a tempest unlike any other approached the village. The wind's voice was frantic, warning of a darkness that sought to devour the forest and everything within it. Idris understood what he had to do. He remembered the tales of the Guardians and knew that if he could find them, they could save his home.

With nothing but the clothes on his back and the courage in his heart, Idris ventured into the forest. The wind guided him, whispering paths through the twisting undergrowth and overgrown trails. After days of journeying deeper than any villager had dared, Idris found the heart of the forest. It was there, in a clearing bathed in moonlight, that he met the Guardians.

They were not what he expected. The Guardians were the forest itself — the trees, the rivers, the stones, and the wind. They spoke in a chorus of natural harmony, and Idris understood them. He pleaded for their help, telling them of the impending darkness.

The Guardians listened and then spoke in a voice like the rustling of leaves. "The darkness you speak of is born from the hearts of men," they said. "It cannot be fought with force but with understanding. Return to your people, Idris. Teach them to listen as you have listened."

Idris returned to his village as the storm broke upon the mountains. He told them of the Guardians, of the language of the wind, and of the darkness that was born from their own actions. At first, they did not understand, but Idris did not give up. He showed them how to listen, how to care for the land that had cared for them.

In time, the village changed. The people learned to live in harmony with the land, to listen to the wind, and to respect the balance of nature. The darkness receded, the forest flourished, and Idris's village prospered.

Idris grew old, but he continued to climb to the peak each morning, to listen to the wind, and to teach others to do the same. And though the story of the boy who spoke to the wind became a legend, the lesson it taught remained: to listen, to understand, and to respect the voices of the world around us.
'''

print (generate_response(chapter))




In [None]:
import pandas as pd
pd.DataFrame(trainer.state.log_history)