In [1]:
!pip install tokenizers
!pip install transformers accelerate bitsandbytes evaluate
!pip install datasets
!pip install zstandard
!pip install loralib
!pip install peft
!pip install einops

import transformers
from datasets import load_dataset, Dataset
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import LoraConfig, get_peft_model, PeftModel, PeftConfig
from peft import prepare_model_for_kbit_training
from torch.optim import AdamW
from torch.utils.data import DataLoader
import torch
import evaluate



In [2]:
model = AutoModelForCausalLM.from_pretrained("tiiuae/falcon-7b-instruct",load_in_4bit=True, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-7b-instruct")
tokenizer.pad_token = tokenizer.eos_token
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [3]:
from datasets import load_dataset
dataset = load_dataset("jawerty/html_dataset")
dataset = dataset["train"].train_test_split(test_size=0.2)
train_labels = dataset["train"][:]['label']
train_htmls = dataset["train"][:]['html']
train_dataset = [[a,b] for (a, b) in zip(train_labels, train_htmls)]
test_labels = dataset["test"][:]['label']
test_htmls = dataset["test"][:]['html']
test_dataset = [[a,b] for (a, b) in zip(test_labels, test_htmls)]

def tokenize_prompt(text):
  label = text[0]
  html = text[1]
  input_str = f"""
    <human>: {label}
    <assistant>: {html}
    """.strip()
  return tokenizer(input_str, padding = True , truncation =True)

def tokenize_prompt_test(text):
  label = text[0]
  html = text[1]
  input_str = f"""
    <human>: {label}
    <assistant>:
    """.strip()
  return tokenizer(input_str, padding = True , truncation =True)
train_dataset = [tokenize_prompt(elt) for elt in train_dataset]
test_dataset = [tokenize_prompt_test(elt) for elt in test_dataset]


In [4]:
import numpy as np
seed = 1.0
num_epochs = 2
batch_size = 3
metric = evaluate.load("bleu")
train_dataloader = DataLoader(train_dataset,shuffle=True, batch_size = 1)
test_dataloader = DataLoader(test_dataset,shuffle=False,batch_size = 1)
optimizer = AdamW(params=model.parameters(), lr=1e-6)
lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,mode = 'min',factor = 0.5,patience = 2)
for epoch in range(num_epochs):
        model.train()
        lr_scheduler.step(epoch)
        for step, batch in enumerate(train_dataloader):
            outputs = model(torch.tensor(train_dataset[step]['input_ids']).reshape(-1,1))
            loss = outputs.loss
            loss = torch.sum(loss['logits'])
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
The current implementation of Falcon calls `torch.scaled_dot_product_attention` directly, this will be deprecated in the future in favor of the `BetterTransformer` API. Please install the latest optimum library with `pip install -U optimum` and call `model.to_bettertransformer()` to benefit from `torch.scaled_dot_product_attention` and future performance optimizations.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=F

KeyboardInterrupt: ignored

In [None]:
print(torch.tensor(train_dataset['input_ids']))

In [None]:
model.save_pretrained('config.pt')

In [5]:
#config = PeftConfig.from_pretrained("config.pt")
#model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)
#tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
#model_inf = PeftModel.from_pretrained(model,"config.pt" )

# create your own prompt
prompt = f"""
    <human>: {test_labels[0]}
    <assistant>:
    """.strip()

# encode the prompt
encoding = tokenizer(prompt, return_tensors= "pt").to(model.device)

gen_config = model.generation_config
gen_config.max_new_tokens = 200
gen_config.temperature = 0.2
gen_config.top_p = 0.7
gen_config.num_return_sequences = 1
gen_config.pad_token_id = tokenizer.eos_token_id
gen_config.eos_token_id = tokenizer.eos_token_id

with torch.inference_mode():
    outputs = model.generate(input_ids = encoding.input_ids, attention_mask = encoding.attention_mask,generation_config = gen_config )
print(tokenizer.decode(outputs[0]))

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...

<human>: professional about page with a lead video
    <assistant>: a human assistant that can answer questions and provide information
User <|endoftext|>


In [6]:
prompt = f"""
    <human>: {test_labels[1]}
    <assistant>:
    """.strip()

# encode the prompt
encoding = tokenizer(prompt, return_tensors= "pt").to(model.device)

gen_config = model.generation_config
gen_config.max_new_tokens = 200
gen_config.temperature = 0.2
gen_config.top_p = 0.7
gen_config.num_return_sequences = 1
gen_config.pad_token_id = tokenizer.eos_token_id
gen_config.eos_token_id = tokenizer.eos_token_id

with torch.inference_mode():
    outputs = model.generate(input_ids = encoding.input_ids, attention_mask = encoding.attention_mask,generation_config = gen_config )
print(tokenizer.decode(outputs[0]))

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


<human>: this is a about page
    <assistant>: this is a about page
User <|endoftext|>
