## Merging Fine-tuned LoRa Adaptor with parent pretrained model

### Install requirements

First, run the cells below to install the requirements:

In [1]:
!pip install -q bitsandbytes datasets accelerate loralib einops
!pip install -q git+https://github.com/huggingface/transformers.git@main git+https://github.com/huggingface/peft.git

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.5/92.5 MB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m486.2/486.2 kB[0m [31m39.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.2/244.2 kB[0m [31m23.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.2/42.2 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m110.5/110.5 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m212.5/212.5 kB[0m [31m19.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.3/134.3 kB[0m [31m15.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.8/268.8 kB[0m [31m26.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependen

## Importing Packages

In [2]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftConfig, PeftModel

## Loading Pre-trained Model & Tokenizer from HF repositoiry

In [3]:
%%script true
BASE_MODEL_NAME = "tiiuae/falcon-7b"

base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL_NAME,
    return_dict=True,
    device_map='auto',
    trust_remote_code=True,
    load_in_4bit = True,
    bnb_4bit_compute_dtype=torch.bfloat16
)

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_NAME, padding_side="left")
tokenizer.pad_token = tokenizer.eos_token

In [6]:
peft_model_id = 'TariqJamil/falcon-7b-peft-qlora-finetuned-0704-instruct'

In [7]:
dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] == 8 else torch.float16

config = PeftConfig.from_pretrained(peft_model_id)

model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    return_dict=True,
    device_map='auto',
    trust_remote_code=True,
    #load_in_8bit = True,
    torch_dtype=dtype,
)

tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path, padding_side="left")
tokenizer.pad_token = tokenizer.eos_token

# Load the PeftModel
model = PeftModel.from_pretrained(model_id = peft_model_id, model = model)

Downloading (…)/adapter_config.json:   0%|          | 0.00/419 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/667 [00:00<?, ?B/s]

Downloading (…)/configuration_RW.py:   0%|          | 0.00/2.61k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/tiiuae/falcon-7b-instruct:
- configuration_RW.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


Downloading (…)main/modelling_RW.py:   0%|          | 0.00/47.5k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/tiiuae/falcon-7b-instruct:
- modelling_RW.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


Downloading (…)model.bin.index.json:   0%|          | 0.00/16.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)l-00001-of-00002.bin:   0%|          | 0.00/9.95G [00:00<?, ?B/s]

Downloading (…)l-00002-of-00002.bin:   0%|          | 0.00/4.48G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)neration_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/220 [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/2.73M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/281 [00:00<?, ?B/s]

Downloading adapter_model.bin:   0%|          | 0.00/18.9M [00:00<?, ?B/s]

## Merging Adapter with Model to form exclusive model

In [8]:
model = model.merge_and_unload()

In [9]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [11]:
MODEL_ID = peft_model_id + '-r1'  # destinatio n merged model name
print(MODEL_ID)

TariqJamil/falcon-7b-peft-qlora-finetuned-0704-instruct-r1


In [12]:
model.push_to_hub(MODEL_ID, use_auth_token=True)

pytorch_model-00002-of-00002.bin:   0%|          | 0.00/3.89G [00:00<?, ?B/s]

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.95G [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/TariqJamil/falcon-7b-peft-qlora-finetuned-0704-instruct-r1/commit/157debc595eb281d9722d1b4c74cb3d13b5f56a5', commit_message='Upload RWForCausalLM', commit_description='', oid='157debc595eb281d9722d1b4c74cb3d13b5f56a5', pr_url=None, pr_revision=None, pr_num=None)

In [13]:
tokenizer.push_to_hub(MODEL_ID, use_auth_token=True)

CommitInfo(commit_url='https://huggingface.co/TariqJamil/falcon-7b-peft-qlora-finetuned-0704-instruct-r1/commit/36c65591b0c116d0725628ec123027b488e218ea', commit_message='Upload tokenizer', commit_description='', oid='36c65591b0c116d0725628ec123027b488e218ea', pr_url=None, pr_revision=None, pr_num=None)

In [14]:
config.push_to_hub(MODEL_ID, use_auth_token=True)

CommitInfo(commit_url='https://huggingface.co/TariqJamil/falcon-7b-peft-qlora-finetuned-0704-instruct-r1/commit/8fdc152433ba9a89b286f2316a1670b1538e7b94', commit_message='Upload config', commit_description='', oid='8fdc152433ba9a89b286f2316a1670b1538e7b94', pr_url=None, pr_revision=None, pr_num=None)