In [None]:
# Import Libraries
import torch
import torch.nn as nn
from transformers import ViTForImageClassification
from minlora import add_lora, get_lora_params

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load pre-trained Vision Transformer model directly to the device
model = ViTForImageClassification.from_pretrained(
    "google/vit-base-patch16-224", 
    attn_implementation="sdpa", 
    torch_dtype=torch.float32
).to(device)

# Adjust classifier for 100 classes
model.classifier = nn.Linear(model.classifier.in_features, 100).to(device)

# Add LoRA layers to the model
add_lora(model)

# Freeze all parameters except LoRA
for param in model.parameters():
    param.requires_grad = False

# Enable only LoRA parameters for training
for param in get_lora_params(model):
    param.requires_grad = True

# Save model setup for next notebook
torch.save(model.state_dict(), "vit_lora_initialized.pth")
print("Model setup complete and saved.")


In [4]:
# Import Libraries
import torch
from torchvision import transforms
from torch.utils.data import DataLoader, random_split
from torchvision.datasets import ImageFolder

# Dataset setup
transform = transforms.Compose([
    transforms.Resize(224),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

dataset_path = '/mnt/c/Users/kdtar/Kasun_stuff/My_datasets/mini_imgenet'
dataset = ImageFolder(root=dataset_path, transform=transform)

# Split dataset into training and validation subsets
total_size = len(dataset)
subset_size = int(0.1 * total_size)  # Use 10% of the total dataset
train_size = int(0.8 * subset_size)
val_size = subset_size - train_size

subset_dataset, _ = random_split(dataset, [subset_size, total_size - subset_size])
train_dataset, val_dataset = random_split(subset_dataset, [train_size, val_size])

# Dataloaders
train_loader = DataLoader(train_dataset, batch_size=24, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=24, shuffle=False, num_workers=4)

# Save Dataloaders
torch.save((train_loader, val_loader), "dataloaders.pth")
print("Dataloaders prepared and saved.")


Layer (type:depth-idx)                                                           Output Shape              Param #
ViTForImageClassification                                                        [1, 100]                  --
├─ViTModel: 1-1                                                                  [1, 197, 768]             --
│    └─ViTEmbeddings: 2-1                                                        [1, 197, 768]             152,064
│    │    └─ViTPatchEmbeddings: 3-1                                              [1, 196, 768]             (590,592)
│    │    └─Dropout: 3-2                                                         [1, 197, 768]             --
│    └─ViTEncoder: 2-2                                                           [1, 197, 768]             --
│    │    └─ModuleList: 3-3                                                      --                        85,718,016
│    └─LayerNorm: 2-3                                                            [1, 197, 768] 

In [None]:
# Import Libraries
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
from minlora import get_lora_params

# Load model and data
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
from transformers import ViTForImageClassification
model = ViTForImageClassification.from_pretrained("google/vit-base-patch16-224")
model.load_state_dict(torch.load("vit_lora_initialized.pth"))
model.to(device)

train_loader, val_loader = torch.load("dataloaders.pth")

# Loss and optimizer setup
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(get_lora_params(model), lr=1e-3)

# Training function
def train_epoch(model, dataloader, criterion, optimizer):
    model.train()
    running_loss, correct, total = 0.0, 0, 0
    for inputs, labels in dataloader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()

        outputs = model(inputs).logits
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    return running_loss / total, 100 * correct / total

# Validation function
def validate(model, dataloader, criterion):
    model.eval()
    running_loss, correct, total = 0.0, 0, 0
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs).logits
            loss = criterion(outputs, labels)

            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    return running_loss / total, 100 * correct / total


In [None]:
# Import Libraries
from notebook3 import train_epoch, validate
import torch
from tqdm import tqdm

# Load Model & Data
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.load_state_dict(torch.load("vit_lora_initialized.pth"))
model.to(device)
train_loader, val_loader = torch.load("dataloaders.pth")

# Optimizer, Loss
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(get_lora_params(model), lr=1e-3)

# Training Loop
num_epochs = 10
for epoch in range(num_epochs):
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer)
    val_loss, val_acc = validate(model, val_loader, criterion)

    print(f"Epoch [{epoch+1}/{num_epochs}]")
    print(f"Training Loss: {train_loss:.4f}, Training Accuracy: {train_acc:.2f}%")
    print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_acc:.2f}%")

print("Training complete.")
