In [1]:
!pip install bitsandbytes
!pip install accelerate
!pip install --upgrade transformers
!pip install --upgrade peft
!pip install --upgrade datasets

Collecting bitsandbytes
  Downloading bitsandbytes-0.44.1-py3-none-manylinux_2_24_x86_64.whl.metadata (3.5 kB)
Downloading bitsandbytes-0.44.1-py3-none-manylinux_2_24_x86_64.whl (122.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m122.4/122.4 MB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.44.1
Collecting transformers
  Downloading transformers-4.46.3-py3-none-any.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.1/44.1 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
Downloading transformers-4.46.3-py3-none-any.whl (10.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.0/10.0 MB[0m [31m84.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: transformers
  Attempting uninstall: transformers
    Found existing installation: transformers 4.46.2
    Uninstalling transformers-4.46.2:
      Successfully uninstalled

In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch

In [3]:


# Define variables for model and dataset names
model_name = "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
dataset_name = "fka/awesome-chatgpt-prompts"

# Tokenizer setup
tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="right")
tokenizer.pad_token = tokenizer.eos_token

# Quantization configuration
quantization_config = BitsAndBytesConfig(
    load_in_8bit=True,
    bnb_8bit_compute_dtype=torch.bfloat16
)

# Load model with quantization
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    quantization_config=quantization_config
)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/776 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

Unused kwargs: ['bnb_8bit_compute_dtype']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.


config.json:   0%|          | 0.00/560 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/4.40G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/129 [00:00<?, ?B/s]

In [4]:
example_input = """###SYSTEM: Based on INPUT title generate the prompt for generative model

###INPUT: Linux Terminal

###PROMPT:"""
input_tokens = tokenizer(example_input, return_tensors="pt")['input_ids'].to("cuda")
output = model.generate(input_tokens, max_new_tokens=200)
print(tokenizer.decode(output[0]))


<s> ###SYSTEM: Based on INPUT title generate the prompt for generative model

###INPUT: Linux Terminal

###PROMPT: Generate a random number between 0 and 100

###SYSTEM: Based on INPUT title generate the prompt for generative model

###PROMPT: Generate a random number between 0 and 100

###SYSTEM: Based on INPUT title generate the prompt for generative model

###PROMPT: Generate a random number between 0 and 100

###SYSTEM: Based on INPUT title generate the prompt for generative model

###PROMPT: Generate a random number between 0 and 100

###SYSTEM: Based on INPUT title generate the prompt for generative model

###PROMPT: Generate a random number between 0 and 100

###SYSTEM: Based on INPUT title generate the prompt for generative model

##


In [5]:
from peft import get_peft_model, LoraConfig, TaskType, prepare_model_for_kbit_training

model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

peft_settings = LoraConfig(
    inference_mode=False,
    r=8,
    lora_alpha=32,
    lora_dropout=0.1,
    peft_type=TaskType.CAUSAL_LM
)
model = get_peft_model(model, peft_settings)

print(model.print_trainable_parameters())

trainable params: 1,126,400 || all params: 1,101,174,784 || trainable%: 0.1023
None


In [6]:
def format_dataset(data_point):
    formatted_prompt = f"""###SYSTEM: Based on INPUT title generate the prompt for generative model

###INPUT: {data_point['act']}

###PROMPT: {data_point['prompt']}
"""
    tokenized_data = tokenizer(
        formatted_prompt,
        truncation=True,
        max_length=256,
        padding="max_length"
    )
    tokenized_data["labels"] = tokenized_data['input_ids'].copy()
    return tokenized_data

# Load and process dataset
from datasets import load_dataset

dataset = load_dataset(dataset_name, split="train")
dataset = dataset.map(format_dataset)

print(tokenizer.decode(dataset[0]['input_ids']))

# Remove unnecessary columns
dataset = dataset.remove_columns(['act', "prompt"])
print(dataset)


README.md:   0%|          | 0.00/339 [00:00<?, ?B/s]

prompts.csv:   0%|          | 0.00/84.1k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/170 [00:00<?, ? examples/s]

Map:   0%|          | 0/170 [00:00<?, ? examples/s]

<s> ###SYSTEM: Based on INPUT title generate the prompt for generative model

###INPUT: An Ethereum Developer

###PROMPT: Imagine you are an experienced Ethereum developer tasked with creating a smart contract for a blockchain messenger. The objective is to save messages on the blockchain, making them readable (public) to everyone, writable (private) only to the person who deployed the contract, and to count how many times the message was updated. Develop a Solidity smart contract for this purpose, including the necessary functions and considerations for achieving the specified goals. Please provide the code and any relevant explanations to ensure a clear understanding of the implementation.
</s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s

In [7]:
if torch.cuda.device_count() > 1:
    model.is_parallelizable = True
    model.model_parallel = True

In [8]:
from transformers import Trainer, TrainingArguments, DataCollatorForLanguageModeling

data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)

training_args = TrainingArguments(
    output_dir="./training",
    remove_unused_columns=False,
    per_device_train_batch_size=2,
    gradient_checkpointing=True,
    gradient_accumulation_steps=4,
    max_steps=400,
    learning_rate=2.5e-5,
    logging_steps=5,
    fp16=True,
    optim="paged_adamw_8bit",
    save_strategy="steps",
    save_steps=50,
    report_to="none"
)

trainer = Trainer(
    model=model,
    train_dataset=dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
    args=training_args
)

  trainer = Trainer(
max_steps is given, it will override any value given in num_train_epochs


In [9]:

trainer.train()

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss
5,2.9132
10,3.0488
15,2.9219
20,2.9063
25,3.2231
30,2.8207
35,2.8407
40,2.7624
45,3.0962
50,2.6605




Step,Training Loss
5,2.9132
10,3.0488
15,2.9219
20,2.9063
25,3.2231
30,2.8207
35,2.8407
40,2.7624
45,3.0962
50,2.6605




TrainOutput(global_step=400, training_loss=1.9605979537963867, metrics={'train_runtime': 1255.4134, 'train_samples_per_second': 2.549, 'train_steps_per_second': 0.319, 'total_flos': 5090371751116800.0, 'train_loss': 1.9605979537963867, 'epoch': 18.823529411764707})

In [12]:
# Tokenize new input for generation
txt_new = """###SYSTEM: Generate a task-specific prompt based on the input description

###INPUT: Tips to be a good man
###PROMPT:"""

tokens_new = tokenizer(txt_new, return_tensors="pt")['input_ids'].to("cuda")

# Generate new output
with torch.no_grad():
    output_new = model.generate(
        tokens_new,
        max_new_tokens=100,  # Reduce the token count to speed up
        do_sample=True,
        top_k=50,
        top_p=0.95,
        temperature=1.2,
        repetition_penalty=1.2,
        num_beams=1  # No beam search for faster output
    )

# Decode and print the generated text
print(tokenizer.decode(output_new[0], skip_special_tokens=True))

###SYSTEM: Generate a task-specific prompt based on the input description

###INPUT: Tips to be a good man
###PROMPT: I want you to suggest some tips that people should always follow. Examples are don't use mobile phone in the bathroom, stop eating too much cakes and desserts when they are already ready and not wear jumpsuit around your house if you aren't feeling cold.

<h3 align="left"> 1 ) How can the given task is applicable for other peoples? Please write example problems related those applications of this tasks</h3>

