In [1]:
pip install numpy==1.23.1

Collecting numpy==1.23.1
  Downloading numpy-1.23.1-cp39-cp39-win_amd64.whl.metadata (2.2 kB)
Downloading numpy-1.23.1-cp39-cp39-win_amd64.whl (14.7 MB)
   ---------------------------------------- 0.0/14.7 MB ? eta -:--:--
   - -------------------------------------- 0.5/14.7 MB 3.3 MB/s eta 0:00:05
   ----- ---------------------------------- 2.1/14.7 MB 5.9 MB/s eta 0:00:03
   ---------- ----------------------------- 3.9/14.7 MB 7.1 MB/s eta 0:00:02
   ------------ --------------------------- 4.7/14.7 MB 6.5 MB/s eta 0:00:02
   --------------- ------------------------ 5.5/14.7 MB 5.9 MB/s eta 0:00:02
   ------------------- -------------------- 7.1/14.7 MB 5.8 MB/s eta 0:00:02
   ---------------------- ----------------- 8.4/14.7 MB 5.8 MB/s eta 0:00:02
   ---------------------------- ----------- 10.5/14.7 MB 6.3 MB/s eta 0:00:01
   -------------------------------- ------- 12.1/14.7 MB 6.4 MB/s eta 0:00:01
   ------------------------------------- -- 13.6/14.7 MB 6.5 MB/s eta 0:00:01
   -

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow 2.10.0 requires libclang>=13.0.0, which is not installed.
tensorflow 2.10.0 requires tensorflow-io-gcs-filesystem>=0.23.1, which is not installed.
tensorboard 2.10.0 requires protobuf<3.20,>=3.9.2, but you have protobuf 3.20.3 which is incompatible.
tensorflow 2.10.0 requires protobuf<3.20,>=3.9.2, but you have protobuf 3.20.3 which is incompatible.


In [None]:
import os
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from datasets import load_dataset
from transformers import ViTForImageClassification, TrainingArguments, Trainer
from transformers import ViTImageProcessor
from peft import get_peft_model, LoraConfig
import evaluate
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

# Paths and Constants
DATASET_DIR = "dataset"
MODEL_NAME = "google/vit-base-patch16-224-in21k"
OUTPUT_DIR = "./vit-weed-pest-model-lora"

# 1. Load dataset
dataset = load_dataset("imagefolder", data_dir=DATASET_DIR)
split_dataset = dataset["train"].train_test_split(test_size=0.2, seed=42)
train_ds = split_dataset["train"]
test_ds = split_dataset["test"]

# 2. Label mappings
labels = train_ds.features["label"].names
id2label = {i: label for i, label in enumerate(labels)}
label2id = {label: i for i, label in id2label.items()}
num_labels = len(labels)

# 3. Image Processor & Transforms
image_processor = ViTImageProcessor.from_pretrained(MODEL_NAME)

train_transforms = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.CenterCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.3, contrast=0.3),
    transforms.GaussianBlur(kernel_size=3),
    transforms.ToTensor(),
    transforms.Normalize(mean=image_processor.image_mean, std=image_processor.image_std),
])

test_transforms = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=image_processor.image_mean, std=image_processor.image_std),
])

# Custom Dataset wrapper to apply transforms
class HFDataset(Dataset):
    def __init__(self, hf_dataset, transform=None):
        self.dataset = hf_dataset
        self.transform = transform

    def __getitem__(self, idx):
        item = self.dataset[idx]
        image = item["image"]  # PIL Image
        label = item["label"]
        if self.transform:
            image = self.transform(image)
        return {"pixel_values": image, "label": label}

    def __len__(self):
        return len(self.dataset)

# Wrap datasets with transforms
train_dataset = HFDataset(train_ds, transform=train_transforms)
test_dataset = HFDataset(test_ds, transform=test_transforms)


# 5. Load base model
base_model = ViTForImageClassification.from_pretrained(
    MODEL_NAME,
    num_labels=num_labels,
    id2label=id2label,
    label2id=label2id
)

# 6. LoRA config
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["query", "value"],
    lora_dropout=0.1,
    bias="none"
)

model = get_peft_model(base_model, lora_config)
model.print_trainable_parameters()

# 7. Accuracy metric
metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=1)
    accuracy = metric.compute(predictions=preds, references=labels)["accuracy"]
    return {"accuracy": accuracy}

# 8. Data collator
def collate_fn(batch):
    pixel_values = torch.stack([item["pixel_values"] for item in batch])
    labels = torch.tensor([item["label"] for item in batch])
    return {"pixel_values": pixel_values, "labels": labels}

# 9. Training Arguments
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    do_train=True,
    do_eval=True,
    num_train_epochs=2,
    learning_rate=2e-4,
    weight_decay=0.01,
    warmup_ratio=0.1,
    fp16=True,
    logging_dir="./logs",
    save_strategy="epoch",
    logging_steps=10,
    evaluation_strategy="epoch"
)

# 10. Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
    data_collator=collate_fn,
)

# 11. Train
trainer.train()

# 12. Predict manually with transformed test dataset
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.eval()
model.to(device)

dataloader = DataLoader(test_dataset, batch_size=32)
all_preds = []
all_labels = []

with torch.no_grad():
    for batch in dataloader:
        inputs = batch["pixel_values"].to(device)
        labels = batch["label"]
        outputs = model(pixel_values=inputs)
        preds = torch.argmax(outputs.logits, dim=1).cpu().numpy()
        all_preds.extend(preds)
        all_labels.extend(labels)

y_pred = np.array(all_preds)
y_true = np.array(all_labels)

# 13. Confusion Matrix
cm = confusion_matrix(y_true, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=[id2label[i] for i in range(num_labels)])
fig, ax = plt.subplots(figsize=(10, 8))
disp.plot(ax=ax, cmap='Blues', xticks_rotation=45)
plt.title("Confusion Matrix on Test Dataset")
plt.tight_layout()
plt.show()

# 14. Save model
model_path = os.path.join(OUTPUT_DIR, "final")
trainer.save_model(model_path)
print(f"✅ LoRA Model saved to: {model_path}")

Resolving data files:   0%|          | 0/20112 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/5014 [00:00<?, ?it/s]

TypeError: HFDataset() takes no arguments

In [29]:
import os
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from datasets import load_dataset
from transformers import ViTForImageClassification, TrainingArguments, Trainer
from transformers import ViTImageProcessor
from peft import get_peft_model, LoraConfig
import evaluate
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay


# Paths and Constants
DATASET_DIR = "dataset"
MODEL_NAME = "google/vit-base-patch16-224-in21k"
OUTPUT_DIR = "./vit-weed-pest-model-lora"

# 1. Load dataset
dataset = load_dataset("imagefolder", data_dir=DATASET_DIR)
split_dataset = dataset["train"].train_test_split(test_size=0.2, seed=42)
train_ds = split_dataset["train"]
test_ds = split_dataset["test"]

# 2. Label mappings
labels = train_ds.features["label"].names
id2label = {i: label for i, label in enumerate(labels)}
label2id = {label: i for i, label in id2label.items()}
num_labels = len(labels)

# 3. Image Processor & Transforms
image_processor = ViTImageProcessor.from_pretrained(MODEL_NAME)

train_transforms = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.CenterCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.3, contrast=0.3),
    transforms.GaussianBlur(kernel_size=3),
    transforms.ToTensor(),
    transforms.Normalize(mean=image_processor.image_mean, std=image_processor.image_std),
])

test_transforms = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=image_processor.image_mean, std=image_processor.image_std),
])

# 4. Dataset Wrapper
class HFDataset(Dataset):
    def __init__(self, hf_dataset, transform=None):
        self.dataset = hf_dataset
        self.transform = transform

    def __getitem__(self, idx):
        item = self.dataset[idx]
        image = item["image"]
        label = item["label"]
        if self.transform:
            image = self.transform(image)
        return {"pixel_values": image, "label": label}

    def __len__(self):
        return len(self.dataset)

train_dataset = HFDataset(train_ds, transform=train_transforms)
test_dataset = HFDataset(test_ds, transform=test_transforms)

# 5. Load base model
base_model = ViTForImageClassification.from_pretrained(
    MODEL_NAME,
    num_labels=num_labels,
    id2label=id2label,
    label2id=label2id
)

# 6. LoRA config
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["query", "value"],
    lora_dropout=0.1,
    bias="none"
)

model = get_peft_model(base_model, lora_config)
model.print_trainable_parameters()


# Load accuracy metric
metric = evaluate.load("accuracy")

# Metric computation
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    return accuracy.compute(predictions=preds, references=labels)

# 8. Data collator
def collate_fn(batch):
    pixel_values = torch.stack([item["pixel_values"] for item in batch])
    labels = torch.tensor([item["label"] for item in batch])
    return {"pixel_values": pixel_values, "labels": labels}


#9.Training Arguments
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    eval_strategy="epoch",
    save_strategy="epoch",
    num_train_epochs=2,
    learning_rate=2e-4,
    weight_decay=0.01,
    warmup_ratio=0.1,
    fp16=True,
    logging_dir="./logs",
    logging_steps=10,
    logging_first_step=True,
    log_level="info",
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    save_total_limit=2,
    
    
)

    


# 10. Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
    data_collator=collate_fn,
    tokenizer=image_processor,
    
)

# 11. Train
trainer.train()



# Final evaluation
metrics = trainer.evaluate()
print(f"📊 Final Accuracy: {metrics['eval_accuracy']*100:.2f}%")


# 13. Confusion Matrix
cm = confusion_matrix(y_true, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=[id2label[i] for i in range(num_labels)])
fig, ax = plt.subplots(figsize=(10, 8))
disp.plot(ax=ax, cmap='Blues', xticks_rotation=45)
plt.title("Confusion Matrix on Test Dataset")
plt.tight_layout()
plt.show()

# 14. Save model
model_path = os.path.join(OUTPUT_DIR, "final")
trainer.save_model(model_path)
print(f"✅ LoRA Model saved to: {model_path}")

Resolving data files:   0%|          | 0/20112 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/5014 [00:00<?, ?it/s]

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 294,912 || all params: 86,110,486 || trainable%: 0.3425


  trainer = Trainer(
Using auto half precision backend
No label_names provided for model class `PeftModel`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
***** Running training *****
  Num examples = 16,089
  Num Epochs = 2
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 1,006
  Number of trainable parameters = 294,912


Epoch,Training Loss,Validation Loss
1,1.9499,No log



***** Running Evaluation *****
  Num examples = 4023
  Batch size = 32


KeyError: "The `metric_for_best_model` training argument is set to 'eval_accuracy', which is not found in the evaluation metrics. The available evaluation metrics are: []. Consider changing the `metric_for_best_model` via the TrainingArguments."

In [6]:
# 11.5 Evaluate after training and print results
evaluation_results = trainer.evaluate()
print(f"\n📊 Evaluation Results:")
print(f"Validation Loss: {eval_results['eval_loss']:.4f}")
print(f"Validation Accuracy: {eval_results['eval_accuracy']:.4f}")


NameError: name 'trainer' is not defined

In [12]:
predictions = trainer.predict(test_dataset)
print(predictions.metrics)  # ✅ Should include "test_accuracy"


{'test_runtime': 59.2231, 'test_samples_per_second': 67.93, 'test_steps_per_second': 2.128}
