## Requirements

In [26]:
# Settings for autoreloading.

%load_ext autoreload
%reload_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
!pip install -q -U bitsandbytes
!pip install -q xformers
!pip install -q -U git+https://github.com/huggingface/transformers.git
!pip install -q -U git+https://github.com/huggingface/peft.git
!pip install -q -U git+https://github.com/huggingface/accelerate.git
!pip install -q datasets
!pip install -q einops
!pip install -q wandb
!pip install -q scipy

In [5]:
!apt-get update
!apt-get install git-lfs

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
git-lfs is already the newest version (3.0.2-1ubuntu0.2).
0 upgraded, 0 newly installed, 0 to remove and 2 not upgraded.


In [8]:
%cd ../falcon-7b-sql/src

/workspace/falcon-7b-sql/src


### Login

In [None]:
import wandb
wandb.login()

In [None]:
from huggingface_hub import login
login()

## Model & Dataset

In [27]:
model_id = "tiiuae/falcon-7b"
dataset_id = 'spider'
spider_schema = '/workspace/falcon-7b-sql/data/tables.json'

In [None]:
from data.dataset_handler import get_dataset
dataset = get_dataset(spider_schema, dataset_id, use_fields=True)

In [29]:
import torch
from transformers import BitsAndBytesConfig
from peft import LoraConfig

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

lora_config = LoraConfig(
    r=2, # 64
    lora_alpha=8, # 16
    target_modules=["query_key_value"],
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM"
)

In [30]:
from model import get_model_and_tokenizer

model, tokenizer = get_model_and_tokenizer(model_id, bnb_config, lora_config)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

trainable params: 589824 || all params: 3609348288 || trainable%: 0.016341565095310637


In [31]:
from data.data_collator import DialogueDataCollator
collate_fn = DialogueDataCollator(tokenizer, 
                                  use_system_prefix=True,
                                  max_length = 512,
                                  system_prefix='Convert text into SQL statements by providing a database schema and a query, and generate the corresponding SQL statement.'
                                 )

## Training

In [32]:
batch_size = 16
gradient_accumulation_steps = 1
total_training_steps = len(dataset['train']) // (batch_size * gradient_accumulation_steps)

warmup_steps = (total_training_steps) * 0.1 # 10% of total steps for

In [33]:
import transformers

training_args = transformers.TrainingArguments(
        full_determinism=False,
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=batch_size,
        gradient_accumulation_steps=gradient_accumulation_steps,
        warmup_steps=warmup_steps,
        num_train_epochs=3,
        learning_rate=1e-4,
        weight_decay=0,
        fp16=True,
        logging_steps=1,
        output_dir="../../falcon_qlora_sql_r2",
        optim="paged_adamw_8bit",
        seed=42,
        push_to_hub=True,
        report_to="wandb",
        save_strategy='steps',
        evaluation_strategy='steps',
        eval_steps=100,
        save_steps=100,
    )

In [34]:
import wandb
wandb.init(project='falcon_qlora_sql', entity='maidacundo', config=training_args)

0,1
eval/loss,█▂▂▂▂▂▂▂▂▂▁▁▁▁▁
eval/runtime,▁▅▅▅▆▆▅▆▅▆▇▇▇▇█
eval/samples_per_second,█▄▄▄▃▃▄▃▄▃▂▂▂▂▁
eval/steps_per_second,█▄▄▄▃▃▄▃▄▄▂▂▂▂▁
train/epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/learning_rate,▄███▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁
train/loss,▇▃▂▂▂▂▃▂▂▃▃▂▂▃▂▁▂▄▂▂▄▂▄▃▂▂▂▃▂▂▁▂▂█▂▂▂▂▁▁
train/total_flos,▁
train/train_loss,▁

0,1
eval/loss,0.17716
eval/runtime,83.884
eval/samples_per_second,12.327
eval/steps_per_second,0.775
train/epoch,3.0
train/global_step,1314.0
train/learning_rate,0.0
train/loss,0.1243
train/total_flos,1.623570697907159e+17
train/train_loss,0.3017


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016669267116715975, max=1.0…

In [19]:
small_train = dataset['train'].select(range(64))
small_eval = dataset['validation'].select(range(64))

In [35]:
from training.sft_trainer import preprocess_logits_for_metrics, SFTTrainer
trainer = SFTTrainer(
    model=model,
    args=training_args,
    sampler=None,
    train_collate_fn=collate_fn,
    train_dataset=dataset['train'],
    eval_dataset=dataset['validation'],
    data_collator=collate_fn,
    tokenizer=tokenizer,
    preprocess_logits_for_metrics=preprocess_logits_for_metrics,
)
model.config.use_cache = False

/workspace/falcon-7b-sql/src/../../falcon_qlora_sql_r2 is already a clone of https://huggingface.co/maidacundo/falcon_qlora_sql_r2. Make sure you pull the latest changes with `repo.git_pull()`.


In [36]:
trainer.evaluate(dataset['validation'])

You're using a PreTrainedTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


{'eval_loss': 1.2636007070541382,
 'eval_runtime': 81.9444,
 'eval_samples_per_second': 12.618,
 'eval_steps_per_second': 0.793}

In [37]:
trainer.train()

Step,Training Loss,Validation Loss
100,0.2993,0.2863
200,0.8003,0.335807
300,0.1872,0.242406
400,0.1267,0.236244
500,0.2214,0.256431
600,0.2885,0.218684
700,0.1654,0.198765
800,0.1633,0.206228
900,0.0381,0.186784
1000,0.0633,0.176708


TrainOutput(global_step=1314, training_loss=0.24357877032151956, metrics={'train_runtime': 6948.0175, 'train_samples_per_second': 3.022, 'train_steps_per_second': 0.189, 'total_flos': 1.614661660624896e+17, 'train_loss': 0.24357877032151956, 'epoch': 3.0})

In [38]:
trainer.evaluate(dataset['validation'])

{'eval_loss': 0.17348535358905792,
 'eval_runtime': 83.4912,
 'eval_samples_per_second': 12.385,
 'eval_steps_per_second': 0.779,
 'epoch': 3.0}

In [39]:
trainer.push_to_hub()

Upload file adapter_model.bin:   1%|1         | 32.0k/2.27M [00:00<?, ?B/s]

To https://huggingface.co/maidacundo/falcon_qlora_sql_r2
   eb69a94..bec40ca  main -> main

To https://huggingface.co/maidacundo/falcon_qlora_sql_r2
   bec40ca..b81efcd  main -> main



'https://huggingface.co/maidacundo/falcon_qlora_sql_r2/commit/bec40caccbfd62694c4d30fbbc330198e05866f3'