In [None]:
!pip install transformers datasets torch peft accelerate bitsandbytes wandb

In [None]:
!pip install peft
!pip install flash-attn --no-build-isolation

In [None]:
!pip install bitsandbytes
!pip install safetensors

In [None]:
import torch
import os
import json
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, DataCollatorForLanguageModeling
from peft import LoraConfig, get_peft_model
from torch.utils.data import Dataset
import wandb
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Download the model from the Hugging Face Hub
print("Downloading model from Hugging Face Hub...")
model = AutoModelForCausalLM.from_pretrained(
    "microsoft/Phi-3-mini-4k-instruct",
    trust_remote_code=True,
    torch_dtype=torch.float16,
    attn_implementation='flash_attention_2'
)
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["o_proj", "qkv_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)

# Login to Weights and Biases
wandb.login(key="48a243780b68e6ef0db81eb14adf70405cbc42ea")

# Initialize Weights and Biases
wandb.init(project="phi3-finetuning")

# SQLDataset class for handling the Text2SQL data
class SQLDataset(Dataset):
    def __init__(self, file_path, tokenizer, max_length=1024):
        self.tokenizer = tokenizer
        self.data = []
        self.max_length = max_length
        with open(file_path, 'r', encoding='utf-8') as f:
            for line in f:
                self.data.append(json.loads(line))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        record = self.data[idx]
        output = record["output"]
        query = record["query"]
        schema = record["schema"]
        input = f"Schema: {schema}\nInstructions: {query}\nAnswer: {output}"
        encoding = self.tokenizer(input, truncation=True, max_length=self.max_length, return_tensors="pt")

        input_ids = encoding['input_ids'].squeeze(0)
        attention_mask = encoding['attention_mask'].squeeze(0)

        return {'input_ids': input_ids, 'attention_mask': attention_mask}

# Load the training and evaluation datasets
train_file_path = "/content/drive/MyDrive/AI Experiments/SQL/train.jsonl"
eval_file_path = "/content/drive/MyDrive/AI Experiments/SQL/test.jsonl"
train_dataset = SQLDataset(file_path=train_file_path, tokenizer=tokenizer)
eval_dataset = SQLDataset(file_path=eval_file_path, tokenizer=tokenizer)

# Set up the data collator
collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

# Set up the training arguments
training_args = TrainingArguments(
    output_dir="/content/drive/MyDrive/AI Experiments/Models",
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=16,
    num_train_epochs=3,
    learning_rate=1e-4,
    weight_decay=0.01,
    logging_steps=10,
    evaluation_strategy="steps",
    save_strategy="steps",
    eval_steps=200,
    save_steps=200,
    save_total_limit=3,
    load_best_model_at_end=True,
    report_to="wandb",
)

# Set up the trainer
trainer = Trainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    args=training_args,
    data_collator=collator,
)

# Start training
trainer.train()

# Save the fine-tuned model
trainer.save_model("/content/drive/MyDrive/AI Experiments/Models")

# Finish the Weights and Biases run
wandb.finish()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Downloading model from Hugging Face Hub...


You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
train/epoch,▁
train/global_step,▁
train/learning_rate,▁
train/loss,▁

0,1
train/epoch,0.00974
train/global_step,10.0
train/grad_norm,
train/learning_rate,0.0001
train/loss,98582036480.0


Step,Training Loss,Validation Loss
200,0.0,
400,0.0,
