In [None]:
!pip install datasets
!pip install transformers -U
!pip install accelerate -U
!pip install trl
!pip install bitsandbytes

In [None]:
!pip install peft

In [None]:
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

In [None]:
from datasets import load_dataset

DATASET_NAME = "ChrisHayduk/Llama-2-SQL-Dataset"

dataset = load_dataset(DATASET_NAME)

In [None]:
full_training_dataset = dataset["train"]
shuffled = full_training_dataset.shuffle()
training_dataset = shuffled.select(range(1000))

In [None]:
import bitsandbytes as bnb
from transformers import BitsAndBytesConfig

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype="float16",
    bnb_4bit_quant_type="nf4")


In [None]:
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM

model_name = "NousResearch/Llama-2-7b-hf"

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=quantization_config,
    device_map="auto"
)

model.config.use_cache = True

tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    trust_remote_code=True
)

tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [None]:
def construct_datapoint(x):
  combined = x['input'] + x['output']
  return tokenizer(combined, padding = True)

training_dataset = training_dataset.map(construct_datapoint)

In [None]:
print(training_dataset)

In [None]:
from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model

peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=['q_proj', 'k_proj', 'down_proj', 'v_proj', 'gate_proj', 'o_proj', 'up_proj'],
    lora_dropout=0.05,
    task_type="CAUSAL_LM"
)

model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, peft_config) # all layers except attention are frozen

generation_config = model.generation_config
generation_config.max_new_tokens = 256
generation_config.pad_token_id = tokenizer.eos_token_id
generation_config.eos_token_id = tokenizer.eos_token_id
generation_config.temperature = 0.7
generation_config.top_p = 0.9
generation_config.do_sample = True

In [None]:
def generate(prompt):
  generation_config.max_new_tokens = 20

  encoded = tokenizer.encode(prompt, add_special_tokens=True, return_tensors="pt").to(device)
  with torch.inference_mode():
    output_tokens = model.generate(
        input_ids=encoded,
        generation_config=generation_config,
        repetition_penalty = 1.3
    )

  string_decoded = tokenizer.decode(output_tokens[0], clean_up_tokenization_spaces=True)
  print(string_decoded)

In [None]:
generate('today I want to')

In [None]:
training_arguements = transformers.TrainingArguments(
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    num_train_epochs=1,
    learning_rate=2e-4,
    fp16=True,
    optim = "paged_adamw_8bit",
    lr_scheduler_type="cosine",
    warmup_ratio=0.05,
    output_dir="fine_tuning"
)

trainer = transformers.Trainer(
    model=model,
    train_dataset=training_dataset,
    args=training_arguements,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)

model.config.use_cache = False

In [None]:
trainer.train()

In [None]:
#API Key: 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

In [None]:
eval_dataset = dataset['eval'].shuffle()

sample_sql_question = eval_dataset[0]['input']
correct_answer= eval_dataset[0]['output']

print('Query:',sample_sql_question)
print('Answer:',correct_answer)

In [None]:
generate(sample_sql_question)