In [1]:
!pip install bitsandbytes
!pip install accelerate
!pip install --upgrade transformers
!pip install --upgrade peft
!pip install --upgrade datasets

Collecting bitsandbytes
  Downloading bitsandbytes-0.45.0-py3-none-manylinux_2_24_x86_64.whl.metadata (2.9 kB)
Downloading bitsandbytes-0.45.0-py3-none-manylinux_2_24_x86_64.whl (69.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.1/69.1 MB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.45.0
Collecting transformers
  Downloading transformers-4.47.1-py3-none-any.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.1/44.1 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
Downloading transformers-4.47.1-py3-none-any.whl (10.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.1/10.1 MB[0m [31m87.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: transformers
  Attempting uninstall: transformers
    Found existing installation: transformers 4.47.0
    Uninstalling transformers-4.47.0:
      Successfully uninstalled t

In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch

In [16]:
from huggingface_hub import login
hf_token = "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"
login(hf_token)

In [4]:
tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T", padding_side="right",)
tokenizer.pad_token = tokenizer.eos_token

##Quantization Config
bnb_config = BitsAndBytesConfig(
   load_in_8bit=True,                  ##loads the model in 8 bit config thereby reducing the size
#    bnb_4bit_quant_type="nf4",
#    bnb_4bit_use_double_quant=True,
   bnb_8bit_compute_dtype=torch.bfloat16     ##specifying the data type for operations
)

##Model Initialization
model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T", device_map="auto", quantization_config=bnb_config)  #auto for automtically mapping the model with the cpu/gpu

Unused kwargs: ['bnb_8bit_compute_dtype']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.


In [5]:
txt = """###SYSTEM: Based on INPUT title generate the prompt for generative model

###INPUT: Linux Terminal

###PROMPT:"""
tokens = tokenizer(txt, return_tensors="pt")['input_ids'].to("cuda")    ##coverts text into token ids ##pt-pytorch ##to("cuda")- sends token ids to GPU for faster processing
op = model.generate(tokens, max_new_tokens=200)        ##generates text based on input tokens
print(tokenizer.decode(op[0]))

<s> ###SYSTEM: Based on INPUT title generate the prompt for generative model

###INPUT: Linux Terminal

###PROMPT:

###SYSTEM: Based on INPUT title generate the prompt for generative model

###INPUT: Linux Terminal

###PROMPT:

###SYSTEM: Based on INPUT title generate the prompt for generative model

###INPUT: Linux Terminal

###PROMPT:

###SYSTEM: Based on INPUT title generate the prompt for generative model

###INPUT: Linux Terminal

###PROMPT:

###SYSTEM: Based on INPUT title generate the prompt for generative model

###INPUT: Linux Terminal

###PROMPT:

###SYSTEM: Based on INPUT title generate the prompt for generative model

###INPUT: Linux Terminal

###PROMPT:

###SYSTEM: Based on INPUT title generate the


In [6]:
##Preparing PEFT Model
from peft import get_peft_model, LoraConfig, TaskType, prepare_model_for_kbit_training

model.gradient_checkpointing_enable()                    ##saves memory
model = prepare_model_for_kbit_training(model)           ##prepares model for quantized training where weights are stores in lower memory format like 4/8 bit

peft_config = LoraConfig(inference_mode=False, r=8, lora_alpha=32, lora_dropout=0.1, peft_type=TaskType.CAUSAL_LM)    ##lora configuration ##false- showing model is being used for training and not inference ##lora-alpha-scale factor(bade ko chote mei)
model = get_peft_model(model, peft_config)               ##applying lora to the model

print(model.print_trainable_parameters())

trainable params: 1,126,400 || all params: 1,101,174,784 || trainable%: 0.1023
None


In [11]:
## Preparing Dataset
def format_dataset(data_point):                      ##will take a single data point and convert it into a format suitable for fine tuning
    prompt = f"""###SYSTEM: Based on INPUT title generate the prompt for generative model

###INPUT: {data_point['act']}

###PROMPT: {data_point['prompt']}
"""
    tokens = tokenizer(prompt,
        truncation=True,    ##ensures that sequences longer than max length are truncated
        max_length=256,
        padding="max_length",)
    tokens["labels"] = tokens['input_ids'].copy()          ##check w S
    return tokens

In [8]:
from datasets import load_dataset

dataset = load_dataset("fka/awesome-chatgpt-prompts", split="train")

dataset = dataset.map(format_dataset)             ##after this all the dataset token(words) will stored as token ids

README.md:   0%|          | 0.00/339 [00:00<?, ?B/s]

prompts.csv:   0%|          | 0.00/84.1k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/170 [00:00<?, ? examples/s]

Map:   0%|          | 0/170 [00:00<?, ? examples/s]

In [9]:
print(tokenizer.decode(dataset[0]['input_ids']))   ##this will convert tokens back in human readable format
##our dataset has two columns 'act' and 'prompt'

<s> ###SYSTEM: Based on INPUT title generate the prompt for generative model

###INPUT: An Ethereum Developer

###PROMPT: Imagine you are an experienced Ethereum developer tasked with creating a smart contract for a blockchain messenger. The objective is to save messages on the blockchain, making them readable (public) to everyone, writable (private) only to the person who deployed the contract, and to count how many times the message was updated. Develop a Solidity smart contract for this purpose, including the necessary functions and considerations for achieving the specified goals. Please provide the code and any relevant explanations to ensure a clear understanding of the implementation.
</s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s

In [10]:
dataset = dataset.remove_columns(['act', "prompt"])                 ## we are removing the columns because our model requires only preprocessed tokenized data i.e. of tokenized ids therefore these columns are no longer needed
print(dataset)

Dataset({
    features: ['input_ids', 'attention_mask', 'labels'],
    num_rows: 170
})


In [12]:
import torch
if torch.cuda.device_count() > 1:              ##checking if system has more than 1 gpu
    model.is_parallelizable = True               ##'indicates' that the model can be split and run across multiple GPUs parallely
    model.model_parallel = True                  ##'enables' the model to do the splitting

In [13]:
from transformers import Trainer, TrainingArguments, DataCollatorForLanguageModeling
data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False) ##for making batches of data ##False-task is casual language modelling anf not masked language modelling

trainer = Trainer(
                    model = model,
                    train_dataset=dataset,
#                     eval_dataset = eval_dataset,
                    tokenizer = tokenizer,
                    data_collator = data_collator,

                    args = TrainingArguments(
                        output_dir="./training",
                        remove_unused_columns=False,
                        per_device_train_batch_size=2,
                        gradient_checkpointing=True,
                        gradient_accumulation_steps=4,
                        max_steps=400,
                        learning_rate=2.5e-5,
                        logging_steps=5,
                        fp16=True,
                        optim="paged_adamw_8bit",
                        save_strategy="steps",
                        save_steps=50,
#                         evaluation_strategy="steps",
#                         eval_steps=5,
#                         do_eval=True,
                        report_to = "none",

                ))

  trainer = Trainer(


In [14]:
trainer.train()

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss
5,2.9119
10,3.0484
15,2.9199
20,2.9047
25,2.7893
30,2.7744
35,2.8307
40,2.8092
45,2.6945
50,2.6541




TrainOutput(global_step=400, training_loss=1.9008051943778992, metrics={'train_runtime': 905.7149, 'train_samples_per_second': 3.533, 'train_steps_per_second': 0.442, 'total_flos': 4918571704516608.0, 'train_loss': 1.9008051943778992, 'epoch': 18.188235294117646})

In [17]:
trainer.model.push_to_hub("Prompt_Generator_Fine_Tuned", private=False)


adapter_model.safetensors:   0%|          | 0.00/4.52M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/Shivank91/Prompt_Generator_Fine_Tuned/commit/4211f5f1be271863b215bd765cbad3ab02d045eb', commit_message='Upload model', commit_description='', oid='4211f5f1be271863b215bd765cbad3ab02d045eb', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Shivank91/Prompt_Generator_Fine_Tuned', endpoint='https://huggingface.co', repo_type='model', repo_id='Shivank91/Prompt_Generator_Fine_Tuned'), pr_revision=None, pr_num=None)