In [None]:
!pip install transformers torch bitsandbytes accelerate datasets peft



### 1. Load Model

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import transformers
from transformers import BitsAndBytesConfig
import torch

config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.float16
)

tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-7b")
model = AutoModelForCausalLM.from_pretrained("tiiuae/falcon-7b", quantization_config=config)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

### 2. Load Dataset

In [None]:
from datasets import load_dataset

def create_full_article_col(example):

  return {'full_article': f"TITLE:{example['title']}\n\nBODY:{example['body']}"}

dataset = load_dataset("ingeniumacademy/reuters_articles")
dataset = dataset.map(create_full_article_col)
dataset

### 3. Test Model on example from train dataset

In [None]:
dataset['test'][2]['full_article']

"TITLE:CHEFS <CHEF.O> COMPLETES PRIVATE SALE\n\nBODY:Chefs International\nInc said it completed a private sale of nine mln units of its\nsecurities for 20 cts per unit for a total price of 1,800,000\ndlrs.\n    The company said each unit consisted of one share of Chef's\ncommon stock and one three-year warrant exerciseable to\npurchase one share of Chef's stock at 25 cents.\n    The company also said Robert E. Brennan purchased 8,250,000\nof the units.\n   \n Reuter\n\x03"

In [None]:
test_prompt = dataset['test'][2]['full_article'][:200] # use first 200 characters as test prompt
print(test_prompt)

TITLE:CHEFS <CHEF.O> COMPLETES PRIVATE SALE

BODY:Chefs International
Inc said it completed a private sale of nine mln units of its
securities for 20 cts per unit for a total price of 1,800,000
dlrs.



In [None]:
def generate(prompt):
  tokenized_text = tokenizer(prompt, return_tensors="pt").to("cuda")
  output = model.generate(**tokenized_text, eos_token_id=tokenizer.eos_token_id, do_sample=True, max_new_tokens=100)
  result = tokenizer.batch_decode(output,  skip_special_tokens=True)[0]
  return result


result = generate(test_prompt)
print(result)

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


TITLE:CHEFS <CHEF.O> COMPLETES PRIVATE SALE

BODY:Chefs International
Inc said it completed a private sale of nine mln units of its
securities for 20 cts per unit for a total price of 1,800,000
dlrs.
The company, which was acquired by CIE Investments Inc in August
2000, said the purchasers of the units were "a combination of
institutions, individual investors, and former officers and
employees..."
The price, effective May 23, 2003. is equivalent to 60%
premium to the net asset value of 14 cts for the units in
public trading on the TSE.
The proceeds of the sale will be used for


In [None]:
print("Model prediction: ", result)
print("-"*100)
print("Actual body: ", dataset['test'][2]['full_article'][:600])

Model prediction:  TITLE:CHEFS <CHEF.O> COMPLETES PRIVATE SALE

BODY:Chefs International
Inc said it completed a private sale of nine mln units of its
securities for 20 cts per unit for a total price of 1,800,000
dlrs.
The company, which was acquired by CIE Investments Inc in August
2000, said the purchasers of the units were "a combination of
institutions, individual investors, and former officers and
employees..."
The price, effective May 23, 2003. is equivalent to 60%
premium to the net asset value of 14 cts for the units in
public trading on the TSE.
The proceeds of the sale will be used for
----------------------------------------------------------------------------------------------------
Actual body:  TITLE:CHEFS <CHEF.O> COMPLETES PRIVATE SALE

BODY:Chefs International
Inc said it completed a private sale of nine mln units of its
securities for 20 cts per unit for a total price of 1,800,000
dlrs.
    The company said each unit consisted of one share of Chef's
common stock and o

### 4. Prepare Model and Dataset for training

In [None]:
from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model

lora_alpha = 16
lora_dropout = 0.1
lora_r = 32

peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=32,
    bias="none",
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, peft_config)
model = prepare_model_for_kbit_training(model)

model.add_adapter(peft_config=peft_config, adapter_name='falcon_adapter')

In [None]:
CONTEXT_LENGTH = 512

def tokenize(element):
    outputs = tokenizer(
        element["full_article"],
        truncation=True,
        max_length=CONTEXT_LENGTH,
        return_overflowing_tokens=False
    )

    return outputs


tokenized_datasets = dataset.map(
    tokenize, batched=True, remove_columns=dataset["train"].column_names
)
tokenized_datasets['train'] = tokenized_datasets['train'].shuffle().select(range(1000))
tokenized_datasets['validation'] = tokenized_datasets['validation'].shuffle().select(range(100))

tokenized_datasets

Map:   0%|          | 0/17262 [00:00<?, ? examples/s]

Map:   0%|          | 0/2158 [00:00<?, ? examples/s]

Map:   0%|          | 0/2158 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask'],
        num_rows: 1000
    })
    validation: Dataset({
        features: ['input_ids', 'attention_mask'],
        num_rows: 100
    })
    test: Dataset({
        features: ['input_ids', 'attention_mask'],
        num_rows: 2158
    })
})

### 5. Train


This should take about 15 minutes.

You can ignore the use_cache warnings below

In [None]:
from transformers import DataCollatorForLanguageModeling

tokenizer.pad_token = tokenizer.eos_token
data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)

In [None]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
from transformers import Trainer, TrainingArguments

training_args = TrainingArguments(
    output_dir="./reuters-falcon-qlora",  # local directory
    hub_model_id="ingeniumacademy/reuters-falcon-qlora",  # identifier on the Hub
    evaluation_strategy="epoch",
    auto_find_batch_size=True,
    num_train_epochs=1,
    gradient_accumulation_steps=8,
    weight_decay=0.1,
    lr_scheduler_type="cosine",
    learning_rate=5e-4,
    fp16=True,
    push_to_hub=True,
    logging_steps=10
)

trainer = Trainer(
    model=model,
    tokenizer=tokenizer,
    args=training_args,
    data_collator=data_collator,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"]
)



In [None]:
trainer.train()

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Epoch,Training Loss,Validation Loss
0,1.9745,2.029503


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...

TrainOutput(global_step=15, training_loss=2.112158981959025, metrics={'train_runtime': 1063.2338, 'train_samples_per_second': 0.941, 'train_steps_per_second': 0.014, 'total_flos': 1.6114483749808128e+16, 'train_loss': 2.112158981959025, 'epoch': 0.96})

In [None]:
trainer.push_to_hub()

adapter_model.safetensors:   0%|          | 0.00/37.8M [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/37.8M [00:00<?, ?B/s]

events.out.tfevents.1718147097.432a6526aab4.6107.0:   0%|          | 0.00/6.08k [00:00<?, ?B/s]

events.out.tfevents.1718147508.432a6526aab4.6107.1:   0%|          | 0.00/9.80k [00:00<?, ?B/s]

Upload 5 LFS files:   0%|          | 0/5 [00:00<?, ?it/s]

training_args.bin:   0%|          | 0.00/5.18k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/ingeniumacademy/reuters-gpt2-text-gen/commit/14a4009c1979472ad8f32188fbf0af2d1046c3aa', commit_message='End of training', commit_description='', oid='14a4009c1979472ad8f32188fbf0af2d1046c3aa', pr_url=None, pr_revision=None, pr_num=None)

### Restart the session before you do this!

In [None]:
!pip install transformers torch bitsandbytes accelerate datasets peft



In [None]:
from datasets import load_dataset

def create_full_article_col(example):

  return {'full_article': f"TITLE:{example['title']}\n\nBODY:{example['body']}"}

dataset = load_dataset("ingeniumacademy/reuters_articles")
dataset = dataset.map(create_full_article_col)
dataset

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


DatasetDict({
    train: Dataset({
        features: ['title', 'body', 'full_article'],
        num_rows: 17262
    })
    validation: Dataset({
        features: ['title', 'body', 'full_article'],
        num_rows: 2158
    })
    test: Dataset({
        features: ['title', 'body', 'full_article'],
        num_rows: 2158
    })
})

In [None]:
from peft import PeftModel, PeftConfig
from transformers import AutoTokenizer, AutoModelForCausalLM
import transformers
from transformers import BitsAndBytesConfig
import torch

config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.float16
)

tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-7b")


model = AutoModelForCausalLM.from_pretrained("tiiuae/falcon-7b", quantization_config=config)
config = PeftConfig.from_pretrained("ingeniumacademy/reuters-gpt2-text-gen")
lora_model = PeftModel.from_pretrained(model, "ingeniumacademy/reuters-gpt2-text-gen")

`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

adapter_model.safetensors:   0%|          | 0.00/37.8M [00:00<?, ?B/s]

In [None]:
test_prompt = dataset['test'][2]['full_article'][:200] # use first 200 characters as test prompt
print(test_prompt)

TITLE:CHEFS <CHEF.O> COMPLETES PRIVATE SALE

BODY:Chefs International
Inc said it completed a private sale of nine mln units of its
securities for 20 cts per unit for a total price of 1,800,000
dlrs.



In [None]:
def generate(prompt):
  tokenized_text = tokenizer(prompt, return_tensors="pt").to("cuda")
  output = lora_model.generate(**tokenized_text, eos_token_id=tokenizer.eos_token_id, do_sample=True, max_new_tokens=100)
  result = tokenizer.batch_decode(output,  skip_special_tokens=True)[0]
  return result


result = generate(test_prompt)
print(result)

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


TITLE:CHEFS <CHEF.O> COMPLETES PRIVATE SALE

BODY:Chefs International
Inc said it completed a private sale of nine mln units of its
securities for 20 cts per unit for a total price of 1,800,000
dlrs.
The mblized global restaurant, hospitality, entertainment and
digital technology, management and operating company said
it completed the offering through a sale to a number of
individuals and groups.
The company said it intends to use the net proceeds of
approximately 1.6 mln dlrs from the sale to repay in full
the approximately 9.5 mln dlrs of convertible debt that is
outstanding.
The company's board has determined there will be a new


In [None]:
result = generate(test_prompt)
print(result)

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


TITLE:CHEFS <CHEF.O> COMPLETES PRIVATE SALE

BODY:Chefs International
Inc said it completed a private sale of nine mln units of its
securities for 20 cts per unit for a total price of 1,800,000
dlrs.
Each common unit consists of one common share and one warrant.

The warrants are exercisable into common shares for 60 cts until
Feb 14

Each common unit consists of one common share and one warrant.
Each warrant entitles owner to purchase one common share at 50 cts
until Feb 14 Is this one of those reverse splits? It's a reverse merger Thanks!

Just another ticker to add to my watch list.
