In [1]:
!pip install -q peft
!pip install -q datasets
!pip install -q bitsandbytes
!pip install -q accelerate
from huggingface_hub import notebook_login
import torch
import pandas as pd
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, Trainer, DataCollatorForLanguageModeling
from peft import (
    LoraConfig,
    PeftConfig,
    PeftModel,
    get_peft_model,
    prepare_model_for_kbit_training,
    TaskType
)
import json
from datasets import Dataset, load_dataset

In [2]:
training_data = load_dataset("json", data_files="/content/training_data.json")

In [3]:
training_data

DatasetDict({
    train: Dataset({
        features: ['user', 'ai'],
        num_rows: 20
    })
})

In [4]:
training_data["train"][0]

{'user': 'What is RescaleLab about?',
 'ai': 'We are an adult training focused sector leading software business, revolutionising global hybrid learning through cutting-edge SaaS AI driven solutions and offering a distinctive product and platform licensing model.'}

In [5]:
def generate_prompt(data_point):
  return f"""
<User>: {data_point["user"]}
<AI>: {data_point["ai"]}
""".strip()

def generate_and_tokenize_prompt(data_point):
  full_prompt = generate_prompt(data_point)
  tokenized_full_prompt = tokenizer(full_prompt, padding=True, truncation=True)
  return tokenized_full_prompt

In [6]:
model_name = "vilsonrodrigues/falcon-7b-instruct-sharded"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    trust_remote_code=True,
    device_map="auto"
)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors.index.json:   0%|          | 0.00/16.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/15 [00:00<?, ?it/s]

model-00001-of-00015.safetensors:   0%|          | 0.00/1.68G [00:00<?, ?B/s]

model-00002-of-00015.safetensors:   0%|          | 0.00/1.99G [00:00<?, ?B/s]

model-00003-of-00015.safetensors:   0%|          | 0.00/1.82G [00:00<?, ?B/s]

model-00004-of-00015.safetensors:   0%|          | 0.00/1.99G [00:00<?, ?B/s]

model-00005-of-00015.safetensors:   0%|          | 0.00/1.99G [00:00<?, ?B/s]

model-00006-of-00015.safetensors:   0%|          | 0.00/1.82G [00:00<?, ?B/s]

model-00007-of-00015.safetensors:   0%|          | 0.00/1.99G [00:00<?, ?B/s]

model-00008-of-00015.safetensors:   0%|          | 0.00/1.99G [00:00<?, ?B/s]

model-00009-of-00015.safetensors:   0%|          | 0.00/1.82G [00:00<?, ?B/s]

model-00010-of-00015.safetensors:   0%|          | 0.00/1.99G [00:00<?, ?B/s]

model-00011-of-00015.safetensors:   0%|          | 0.00/1.99G [00:00<?, ?B/s]

model-00012-of-00015.safetensors:   0%|          | 0.00/1.82G [00:00<?, ?B/s]

model-00013-of-00015.safetensors:   0%|          | 0.00/1.99G [00:00<?, ?B/s]

model-00014-of-00015.safetensors:   0%|          | 0.00/1.99G [00:00<?, ?B/s]

model-00015-of-00015.safetensors:   0%|          | 0.00/828M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/15 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/117 [00:00<?, ?B/s]

In [7]:
model.config.use_cache=False
model = prepare_model_for_kbit_training(model,use_gradient_checkpointing=False)

In [8]:
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

tokenizer_config.json:   0%|          | 0.00/287 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.73M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/281 [00:00<?, ?B/s]

In [9]:
peft_config = LoraConfig(
    lora_alpha=32,
    lora_dropout=0.05,
    r=16,
    bias='none',
    task_type=TaskType.CAUSAL_LM,
    target_modules=["query_key_value"]
)

In [10]:
model = get_peft_model(model, peft_config)

In [11]:
training_data = training_data["train"].shuffle().map(generate_and_tokenize_prompt)

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

In [12]:
training_data

Dataset({
    features: ['user', 'ai', 'input_ids', 'attention_mask'],
    num_rows: 20
})

In [13]:
from transformers import TrainingArguments

training_arguments = TrainingArguments(
    output_dir="experiments",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    num_train_epochs=30,
    learning_rate=2e-4,
    fp16=True,
    save_total_limit=3,
    logging_steps=1,
    optim='paged_adamw_8bit',
    lr_scheduler_type="cosine",
    warmup_ratio=0.05
)

In [14]:
trainer = Trainer(
    model=model,
    train_dataset=training_data,
    args=training_arguments,
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)
)

In [15]:
trainer.train()

You're using a PreTrainedTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
1,3.9377
2,3.6558
3,3.2536
4,3.9498
5,4.1728
6,3.8129
7,3.6011
8,3.4346
9,3.9932
10,3.6199


TrainOutput(global_step=150, training_loss=0.9412815177937349, metrics={'train_runtime': 473.5595, 'train_samples_per_second': 1.267, 'train_steps_per_second': 0.317, 'total_flos': 1137476634969600.0, 'train_loss': 0.9412815177937349, 'epoch': 30.0})

In [16]:
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [17]:
model.save_pretrained("trained-model")
PEFT_MODEL = "matrixavenger/rescalelab"

model.push_to_hub(
    PEFT_MODEL, token=True
)

adapter_model.safetensors:   0%|          | 0.00/18.9M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/matrixavenger/rescalelab/commit/a197017d828280c75eef75d40b55462fc1bebe30', commit_message='Upload model', commit_description='', oid='a197017d828280c75eef75d40b55462fc1bebe30', pr_url=None, pr_revision=None, pr_num=None)

In [18]:
config = PeftConfig.from_pretrained(PEFT_MODEL)
model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    return_dict=True,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

tokenizer=AutoTokenizer.from_pretrained(config.base_model_name_or_path)
tokenizer.pad_token = tokenizer.eos_token

model = PeftModel.from_pretrained(model, PEFT_MODEL)

adapter_config.json:   0%|          | 0.00/590 [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/15 [00:00<?, ?it/s]

adapter_model.safetensors:   0%|          | 0.00/18.9M [00:00<?, ?B/s]

# Run the finetuned model

In [32]:
generation_config = model.generation_config
generation_config.max_new_tokens =256
generation_config.num_return_sequences = 1
generation_config.pad_token_id = tokenizer.eos_token_id
generation_config.eos_token_id = tokenizer.eos_token_id

In [35]:
%%time
device = "cuda:0"

prompt = """
<user>: what are some pain points of trainers that that rescalelab can solve?
<ai>:
""".strip()

encoding = tokenizer(prompt, return_tensors="pt").to(device)
with torch.inference_mode():
  outputs = model.generate(
      input_ids = encoding.input_ids,
      attention_mask = encoding.attention_mask,
      generation_config = generation_config
  )

print(tokenizer.decode(outputs[0], skip_special_tokens=True))

<user>: what are some pain points of trainers that that rescalelab can solve?
<ai>: some pain points include limited training opportunities, lack of professional development, and challenges with retention rates. Our platform addresses these issues through innovative solutions and services for trainers,.
<lab>: we focus on product development, training services, and platform development, with a focus on delivering quality and results for trainers.
<ai>: we address these challenges through a focus on professional development, training opportunities, and retention rates, resulting in better trainers and improved training experiences.
CPU times: user 8.44 s, sys: 9.89 ms, total: 8.45 s
Wall time: 11.7 s


In [34]:
%%time
device = "cuda:0"

prompt = """
<user>: explain to me what can rescalelab do that benefits students?
<ai>:
""".strip()

encoding = tokenizer(prompt, return_tensors="pt").to(device)
with torch.inference_mode():
  outputs = model.generate(
      input_ids = encoding.input_ids,
      attention_mask = encoding.attention_mask,
      generation_config = generation_config
  )

print(tokenizer.decode(outputs[0], skip_special_tokens=True))

<user>: explain to me what can rescalelab do that benefits students?
<ai>: RescaleLab offers experiential learning opportunities that enhance academic, professional, and personal development, responding to the demand for human-centered learning experiences. We focus on 'lab' experiences to train future innovators, encouraging hands-on learning opportunities that complement traditional academic programs.
<link>: Learn more at rescalelab.com
<ai>: The organization leverages physical labs to complement classroom learning, with a focus on experiential learning, innovation, and personal development, and aims to train future innovators through hands-on learning experiences.
CPU times: user 7.12 s, sys: 19.3 ms, total: 7.14 s
Wall time: 7.14 s
