In [None]:
!pip install datasets transformers torch peft --quiet

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m527.3/527.3 kB[0m [31m29.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m296.4/296.4 kB[0m [31m23.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m39.9/39.9 MB[0m [31m41.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
cudf-cu12 24.4.1 requires pyarrow<15.0.0a0,>=14.0.1, but you have pyarrow 17.0.0 which is i

In [None]:
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer, TrainingArguments, Trainer
from datasets import load_dataset
from peft import get_peft_model, LoraConfig, TaskType

# Check if bitsandbytes is available
try:
    import bitsandbytes as bnb
    from transformers import BitsAndBytesConfig
    use_4bit = True
    print("Using 4-bit quantization with bitsandbytes")
except ImportError:
    use_4bit = False
    print("bitsandbytes not found. Falling back to 16-bit training.")

# Load the IMDB dataset
dataset = load_dataset("imdb")

# Load the tokenizer
model_name = "bert-base-uncased"  # You can change this to any other suitable model
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Tokenize function
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=512)

# Tokenize the dataset
tokenized_dataset = dataset.map(tokenize_function, batched=True)

# Load the model
if use_4bit:
    bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4")
    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2, quantization_config=bnb_config)
else:
    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

# Define LoRA Config
peft_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    r=16,
    lora_alpha=32,
    lora_dropout=0.1,
    bias="none",
)

# Get the PEFT model
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=16,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
    evaluation_strategy="steps",
    eval_steps=500,
    save_steps=1000,
    learning_rate=2e-5,
    fp16=True,  # Enable mixed precision training
)

# Define compute_metrics function
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = predictions.argmax(axis=-1)
    accuracy = (predictions == labels).mean()
    return {"accuracy": accuracy}

# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    compute_metrics=compute_metrics,
)

# Perform evaluation before training
print("Evaluating before training...")
pre_train_metrics = trainer.evaluate()
print(f"Pre-training metrics: {pre_train_metrics}")

# Start training
print("Starting training...")
trainer.train()

# Perform evaluation after training
print("Evaluating after training...")
post_train_metrics = trainer.evaluate()
print(f"Post-training metrics: {post_train_metrics}")

# Save the fine-tuned model
model.save_pretrained("./fine_tuned_model")
tokenizer.save_pretrained("./fine_tuned_model")

# Print improvement
print(f"Accuracy improvement: {post_train_metrics['eval_accuracy'] - pre_train_metrics['eval_accuracy']}")

bitsandbytes not found. Falling back to 16-bit training.


Downloading readme:   0%|          | 0.00/7.81k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/21.0M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/20.5M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/42.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating unsupervised split:   0%|          | 0/50000 [00:00<?, ? examples/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

Map:   0%|          | 0/50000 [00:00<?, ? examples/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 591,362 || all params: 110,075,140 || trainable%: 0.5372
Evaluating before training...


Pre-training metrics: {'eval_loss': 0.7862803339958191, 'eval_accuracy': 0.49996, 'eval_runtime': 57.7526, 'eval_samples_per_second': 432.881, 'eval_steps_per_second': 27.064}
Starting training...


Step,Training Loss,Validation Loss,Accuracy
500,0.6829,0.622682,0.66836
1000,0.3711,0.32251,0.8708
1500,0.2657,0.30646,0.88196
2000,0.2971,0.291829,0.8896
2500,0.3011,0.316427,0.88352
3000,0.2859,0.279742,0.89888
3500,0.2771,0.283108,0.8976
4000,0.2043,0.270664,0.90588
4500,0.3914,0.284527,0.90632
5000,0.3869,0.260939,0.90812


Evaluating after training...


Post-training metrics: {'eval_loss': 0.2581176459789276, 'eval_accuracy': 0.91296, 'eval_runtime': 56.1386, 'eval_samples_per_second': 445.326, 'eval_steps_per_second': 27.842, 'epoch': 3.0}
Accuracy improvement: 0.413
