In [None]:
!pip install -q -U transformers datasets accelerate peft trl bitsandbytes wandb

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m521.2/521.2 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m265.7/265.7 kB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m168.3/168.3 kB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m133.9/133.9 kB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.6/92.6 MB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m38.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m12.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m10.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━

In [None]:
from google.colab import userdata

# Defined in the secrets tab in Google Colab
hf_token = userdata.get('huggingface')

In [None]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    AutoTokenizer,
    TrainingArguments,
    pipeline,
)
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training
from trl import SFTTrainer



In [None]:
from torch import cuda, bfloat16
import transformers
device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

In [None]:
# Dataset
dataset = load_dataset("thudoann/finetuningllm2", split="train")

model_id = 'meta-llama/Llama-2-7b-chat-hf'

# set quantization configuration to load large model with less GPU memory
# this requires the `bitsandbytes` library
bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)

# begin initializing HF items, need auth token for these
hf_auth = hf_token
model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)
tokenizer.pad_token = tokenizer.unk_token
tokenizer.padding_side = "right"

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,
    device_map='auto',
    use_auth_token=hf_auth
)
model.eval()
print(f"Model loaded on {device}")

Downloading readme:   0%|          | 0.00/96.0 [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/143M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]



config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]



tokenizer_config.json:   0%|          | 0.00/1.62k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]



model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

Model loaded on cuda:0


Learn more about padding [in the following article](https://medium.com/towards-data-science/padding-large-language-models-examples-with-llama-2-199fb10df8ff) written by Benjamin Marie.

In [None]:
# Quantization configuration
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)

# LoRA configuration
peft_config = LoraConfig(
    lora_alpha=32,
    lora_dropout=0.05,
    r=16,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules= ['up_proj', 'down_proj', 'gate_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj']
)

# Load base moodel
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map={"": 0}
)

# Cast the layernorm in fp32, make output embedding layer require grads, add the upcasting of the lmhead to fp32
model = prepare_model_for_kbit_training(model)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
import torch
torch.cuda.empty_cache()

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Set training arguments
training_arguments = TrainingArguments(
        output_dir="./results",
        num_train_epochs=5,
        per_device_train_batch_size=100,
        gradient_accumulation_steps=1,
        evaluation_strategy="steps",
        eval_steps=1000,
        logging_steps=1,
        optim="paged_adamw_8bit",
        learning_rate=2e-3,
        lr_scheduler_type="linear",
        warmup_steps=100,
        report_to="wandb",
        max_steps=20,
)

# Set supervised fine-tuning parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    eval_dataset=dataset,
    peft_config=peft_config,
    dataset_text_field="Instruction",
    max_seq_length=512,
    tokenizer=tokenizer,
    args=training_arguments,
)

# Train model
trainer.train()

# Save trained model
#trainer.model.save_pretrained(new_model)

Map:   0%|          | 0/42204 [00:00<?, ? examples/s]

Map:   0%|          | 0/42204 [00:00<?, ? examples/s]

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Step,Training Loss,Validation Loss


TrainOutput(global_step=20, training_loss=1.6411407008767127, metrics={'train_runtime': 1145.7728, 'train_samples_per_second': 1.746, 'train_steps_per_second': 0.017, 'total_flos': 3852787005849600.0, 'train_loss': 1.6411407008767127, 'epoch': 0.05})

In [None]:
import os
os.listdir('.//drive//MyDrive')

['Document sans titre.gdoc',
 'Untitled document (10).gdoc',
 'learner (1).xlsx',
 'learner (3).gsheet',
 'learner.xlsx',
 'learner (2).gsheet',
 'learner (1).gsheet',
 'Young - Multidimensional Scaling - History, theory and applications - only the introduction - 1987.pdf',
 'Young - Multidimensional Scaling - History, theory and applications - only the introduction - 1987.gdoc',
 'learner.gsheet',
 'algo.pdf',
 'algo.gdoc',
 'Doan, Nguyen.pdf',
 'Colab Notebooks',
 'Final evaluation grading scheme.pdf',
 'Stages Bachelor 2020.gsheet',
 'Homework_Chap5_6_7_19_20.pdf',
 'Final evaluation grading scheme.gdoc',
 'Homework_Chap5_6_7_19_20.gdoc',
 'Report.gdoc',
 'Test3_Chap4_5_6_19_20.pdf',
 'Test3_Chap4_5_6_19_20.gdoc',
 'PIB1_intership_19_20_PIB1.pdf',
 'PIB1_intership_19_20_PIB1.gdoc',
 'Options 20212022.xlsx',
 'New Recording 3.m4a',
 'Housing Study.xlsx',
 'Untitled document (9).gdoc',
 'Timetable B3S1 20212022 students version (1).docx',
 'Timetable B3S1 20212022 students version.doc

In [None]:
trainer.model.save_pretrained('.//drive//MyDrive//llama23htrained2')

In [None]:
!sudo apt-get install git-lfs

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
git-lfs is already the newest version (3.0.2-1ubuntu0.2).
0 upgraded, 0 newly installed, 0 to remove and 15 not upgraded.


In [None]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Token: 
Add token as git credential? (Y/n) y
Token is valid (permission: write).
[1m[31mCannot authenticate through git-credential as no helper is defined on your machine.
You might have to re-authenticate when pushing to the Hugging Face Hub.
Run the following command in your terminal in case you want to set the 'stor

In [None]:
!huggingface-cli repo create "Llama-2-7b-chat-hf-Movies-FineTuned-2e"

[90mgit version 2.34.1[0m
[90mgit-lfs/3.0.2 (GitHub; linux amd64; go 1.18.1)[0m

You are about to create [1mthudoann/Llama-2-7b-chat-hf-Movies-FineTuned-2e[0m
Proceed? [Y/n] y

Your repo now lives at:
  [1mhttps://huggingface.co/thudoann/Llama-2-7b-chat-hf-Movies-FineTuned-2e[0m

You can clone it locally with the command below, and commit/push as usual.

  git clone https://huggingface.co/thudoann/Llama-2-7b-chat-hf-Movies-FineTuned-2e



In [None]:
!huggingface-cli repo "Llama-2-7b-chat-hf-Movies-FineTuned-2e"

usage: huggingface-cli <command> [<args>] repo [-h] {create} ...
huggingface-cli <command> [<args>] repo: error: argument {create}: invalid choice: 'Llama-2-7b-chat-hf-Movies-FineTuned-2e' (choose from 'create')


In [None]:
!git lfs install

Git LFS initialized.


In [None]:
!git clone 'https://thudoann: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@huggingface.co/thudoann/Llama-2-7b-chat-hf-Movies-FineTuned-2e'

Cloning into 'Llama-2-7b-chat-hf-Movies-FineTuned-2e'...
remote: Enumerating objects: 3, done.[K
remote: Total 3 (delta 0), reused 0 (delta 0), pack-reused 3[K
Unpacking objects: 100% (3/3), 421 bytes | 421.00 KiB/s, done.


In [None]:
!cd './Llama-2-7b-chat-hf-Movies-FineTuned-2e'

In [None]:
!git config --global user.email "thudoann45@gmail.com"

In [None]:
# Tip: using the same email than for your huggingface.co account will link your commits to your profile
!git config --global user.name "thudoann"

In [None]:
os.listdir()

['.config',
 'drive',
 'wandb',
 'results',
 'Llama-2-7b-chat-hf-Movies-FineTuned-2e',
 'sample_data']

In [None]:
trainer.model.save_pretrained("Llama-2-7b-chat-hf-Movies-FineTuned-2e")
trainer.tokenizer.save_pretrained("Llama-2-7b-chat-hf-Movies-FineTuned-2e")

('Llama-2-7b-chat-hf-Movies-FineTuned-2e/tokenizer_config.json',
 'Llama-2-7b-chat-hf-Movies-FineTuned-2e/special_tokens_map.json',
 'Llama-2-7b-chat-hf-Movies-FineTuned-2e/tokenizer.json')

In [None]:
os.chdir('./Llama-2-7b-chat-hf-Movies-FineTuned-2e')

In [None]:
!git add .

In [None]:
!git commit -m "Initial commit"

[main 0689731] Initial commit
 6 files changed, 93712 insertions(+)
 create mode 100644 README.md
 create mode 100644 adapter_config.json
 create mode 100644 adapter_model.safetensors
 create mode 100644 special_tokens_map.json
 create mode 100644 tokenizer.json
 create mode 100644 tokenizer_config.json


In [None]:
!git push

Enumerating objects: 9, done.
Counting objects: 100% (9/9), done.
Delta compression using up to 2 threads
Compressing objects: 100% (8/8), done.
Writing objects: 100% (8/8), 479.86 KiB | 4.40 MiB/s, done.
Total 8 (delta 0), reused 0 (delta 0), pack-reused 0
To https://huggingface.co/thudoann/Llama-2-7b-chat-hf-Movies-FineTuned-2e
   0ffc270..0689731  main -> main


In [None]:
os.chdir('./..')