In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!ln -s /content/drive/'My Drive'/Zalo_HT /content/

In [None]:
cd Zalo_HT

/content/drive/My Drive/Zalo_HT


In [None]:
!pip install -r requirements.txt

Collecting transformers==4.27.1 (from -r requirements.txt (line 1))
  Downloading transformers-4.27.1-py3-none-any.whl (6.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.7/6.7 MB[0m [31m21.8 MB/s[0m eta [36m0:00:00[0m
Collecting datasets==2.10.1 (from -r requirements.txt (line 3))
  Downloading datasets-2.10.1-py3-none-any.whl (469 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m469.0/469.0 kB[0m [31m31.3 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0 (from transformers==4.27.1->-r requirements.txt (line 1))
  Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.8/268.8 kB[0m [31m28.2 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers==4.27.1->-r requirements.txt (line 1))
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━

**Model loading**

In [None]:
import torch
import torch.nn as nn
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import transformers
import bitsandbytes as bnb
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM

model_name = "bloom-1b1"
model = AutoModelForCausalLM.from_pretrained(f"bigscience/{model_name}",
                                         device_map='auto',
                                         load_in_8bit=True,
                                         torch_dtype=torch.float16)
tokenizer = AutoTokenizer.from_pretrained("bigscience/tokenizer")

Downloading (…)lve/main/config.json:   0%|          | 0.00/693 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/2.13G [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/227 [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/14.5M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/85.0 [00:00<?, ?B/s]

**Post-processing on the model**
</br>
Post-processing on the 8-bit model to enable training, let's freeze all our layers

In [None]:
for param in model.parameters():
  param.requires_grad = False # freeze the model - train adapters later
  if param.ndim == 1:
    # cast the small parameters (e.g layernorm) to fp32 for stability
    param.data = param.data.to(torch.float32)

model.gradient_checkpointing_enable() # reduce number of stored activations
model.enable_input_require_grads()

class CastOutputToFloat(nn.Sequential):
    def forward(self, x): return super().forward(x).to(torch.float32)
model.lm_head = CastOutputToFloat(model.lm_head)

**Apply LoRA**
</br>
Load a `PeftModel` and specify that we are going to use Low-rank adapters (LoRA) using `get_peft_model` </br>

In [None]:
def print_trainable_parameters(model):
  """
  Prints the number of trainable parameters in the model.
  """
  trainable_params = 0
  all_params = 0
  for _, param in model.named_parameters():
    all_params += param.numel()
    if param.requires_grad:
      trainable_params += param.numel()
  print(
      f"trainable params: {trainable_params} || all params: {all_params} || trainable%: {100 * trainable_params / all_params}"
  )

In [None]:
from peft import (
    LoraConfig,
    get_peft_model,
)

config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["query_key_value"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)
model = get_peft_model(model, config)
print_trainable_parameters(model)

trainable params: 2359296 || all params: 1067673600 || trainable%: 0.22097539922313336


**Preprocessing dataset**

In [None]:
from datasets import load_dataset, concatenate_datasets

alpaca_dataset = load_dataset('tatsu-lab/alpaca')
my_dataset = load_dataset("csv", data_files="data_10k_processed.csv")
my_dataset["train"][0]

Downloading readme:   0%|          | 0.00/7.47k [00:00<?, ?B/s]

Downloading and preparing dataset parquet/tatsu-lab--alpaca to /root/.cache/huggingface/datasets/tatsu-lab___parquet/tatsu-lab--alpaca-2b32f0433506ef5f/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec...


Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/24.2M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Dataset parquet downloaded and prepared to /root/.cache/huggingface/datasets/tatsu-lab___parquet/tatsu-lab--alpaca-2b32f0433506ef5f/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec. Subsequent calls will reuse this data.


  0%|          | 0/1 [00:00<?, ?it/s]

Downloading and preparing dataset csv/default to /root/.cache/huggingface/datasets/csv/default-af9ee4148ff9acd2/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317...


Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

  return pd.read_csv(xopen(filepath_or_buffer, "rb", use_auth_token=use_auth_token), **kwargs)


Dataset csv downloaded and prepared to /root/.cache/huggingface/datasets/csv/default-af9ee4148ff9acd2/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317. Subsequent calls will reuse this data.


  0%|          | 0/1 [00:00<?, ?it/s]

{'instruction': "Describe the worst vacation you've ever had",
 'input': None,
 'output': "The worst vacation I ever had was a skiing trip to the mountains where I had planned to spend the entire week skiing. However, the conditions were terrible and it started snowing heavily the day before my arrival. On top of that, the main ski resort was closed due to a lack of snow. I ended up spending the entire week cooped up in a hotel room while it continued to snow outside. I didn't get to experience any of the activities I had been looking forward to, and I felt like a complete waste of time and money.",
 'text': "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\nDescribe the worst vacation you've ever had\n\n### Response:\nThe worst vacation I ever had was a skiing trip to the mountains where I had planned to spend the entire week skiing. However, the conditions were terrible and it started snowing heavily the day

In [None]:
# concat two datasets to growing data for training
dataset_cc = concatenate_datasets([alpaca_dataset["train"], my_dataset["train"]])
dataset_cc

Dataset({
    features: ['instruction', 'input', 'output', 'text'],
    num_rows: 61983
})

In [None]:
def generate_prompt(data_point):
    if data_point["input"]:
        return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
        ### Instruction:
        {data_point["instruction"]}
        ### Input:
        {data_point["input"]}
        ### Response:
        {data_point["output"]}"""
    else:
        return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
        ### Instruction:
        {data_point["instruction"]}
        ### Response:
        {data_point["output"]}"""

dataset = dataset_cc.map(lambda samples: tokenizer(generate_prompt(samples)))

Map:   0%|          | 0/61983 [00:00<?, ? examples/s]

In [None]:
!nvidia-smi

Thu Jul 20 13:34:19 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A100-SXM...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   34C    P0    52W / 400W |   2763MiB / 40960MiB |      0%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

**Training**

In [None]:
trainer = transformers.Trainer(
    model=model,
    train_dataset=dataset,
    args=transformers.TrainingArguments(
          output_dir="./outputs",
          fp16=True,
          gradient_accumulation_steps = 4,
          per_device_train_batch_size = 8,
          learning_rate = 1e-3,
          warmup_steps=100,
          num_train_epochs=2,
          logging_steps=500,
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
model.config.use_cache = False
trainer.train()

You're using a PreTrainedTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
500,1.4602
1000,1.3626
1500,1.3377
2000,1.3235
2500,1.2675
3000,1.2709
3500,1.2532




TrainOutput(global_step=3874, training_loss=1.3178272867670489, metrics={'train_runtime': 9030.2673, 'train_samples_per_second': 13.728, 'train_steps_per_second': 0.429, 'total_flos': 9.697834352812032e+16, 'train_loss': 1.3178272867670489, 'epoch': 2.0})

**Push model to Hub**

In [None]:
HUGGING_FACE_USER_NAME = "namngduc"

In [None]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
model.push_to_hub(f"{HUGGING_FACE_USER_NAME}/GenerationText-Bloom", use_auth_token=True, create_pr=1)

CommitInfo(commit_url='https://huggingface.co/namngduc/GenerationText-Bloom/commit/049938e42cae3bd67c05fe047afe4989d33bd900', commit_message='Upload model', commit_description='', oid='049938e42cae3bd67c05fe047afe4989d33bd900', pr_url='https://huggingface.co/namngduc/GenerationText-Bloom/discussions/2', pr_revision='refs/pr/2', pr_num=2)

In [None]:
model.save_pretrained("./bloom_ai")

**Reference**
</br>
[Alpaca-LoRA](https://github.com/tloen/alpaca-lora/tree/main)
</br>
[Fine-tuning BLOOM](https://www.youtube.com/watch?v=Nf-b1VXD3kI)
</br>
[https://www.philschmid.de/bloom-sagemaker-peft](https://www.philschmid.de/bloom-sagemaker-peft)