In [None]:
!pip install -q -U transformers accelerate peft datasets bitsandbytes

In [None]:
import torch
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments,
    Trainer,
    BitsAndBytesConfig
)
from peft import (
    LoraConfig,
    get_peft_model,
    TaskType,
    prepare_model_for_kbit_training
)

In [None]:
# Verify GPU availability
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Free memory: {torch.cuda.mem_get_info()[0]/1024**3:.2f} GB")


In [None]:

# Load model and tokenizer with 4-bit quantization
model_name = "mistralai/Mistral-7B-v0.1"

In [None]:
# Configure 4-bit quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True
)

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

In [None]:
# Prepare model for training
model = prepare_model_for_kbit_training(model)

In [None]:
# Apply LoRA configuration
peft_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=8,
    lora_alpha=16,
    lora_dropout=0.1,
    target_modules=["q_proj", "v_proj", "k_proj", "out_proj", "fc_in", "fc_out", "wte"],
    bias="none"
)
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()


In [None]:
# Load & tokenize dataset with efficient processing
dataset = load_dataset("Abirate/english_quotes", split="train[:100]")  # Reduced dataset size

def preprocess(example):
    tokenized_inputs = tokenizer(
        example["quote"],
        padding="max_length",
        truncation=True,
        max_length=64,  # Reduced max length
        return_tensors="pt"
    )
    tokenized_inputs["labels"] = tokenized_inputs["input_ids"].clone()
    return tokenized_inputs

tokenized_dataset = dataset.map(
    preprocess,
    batched=True,
    batch_size=8,
    remove_columns=dataset.column_names
)
split_dataset = tokenized_dataset.train_test_split(test_size=0.1)


In [None]:
# Load & tokenize dataset with efficient processing
dataset = load_dataset("Abirate/english_quotes", split="train[:100]")  # Reduced dataset size

def preprocess(example):
    tokenized_inputs = tokenizer(
        example["quote"],
        padding="max_length",
        truncation=True,
        max_length=64,  # Reduced max length
        return_tensors="pt"
    )
    tokenized_inputs["labels"] = tokenized_inputs["input_ids"].clone()
    return tokenized_inputs

tokenized_dataset = dataset.map(
    preprocess,
    batched=True,
    batch_size=8,
    remove_columns=dataset.column_names
)
split_dataset = tokenized_dataset.train_test_split(test_size=0.1)


In [None]:
# Launch trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=split_dataset["train"],
    eval_dataset=split_dataset["test"]
)
trainer.train()

In [None]:
# Test inference
input_text = "The secret of life is"
inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens=50)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))