In [1]:
import timm
import torch
import torch.nn as nn
import sys
import os
from torch.optim import Adam
from torch.utils.data import DataLoader
from tqdm import tqdm
from torchvision import datasets, transforms
sys.path.append(os.path.abspath(".."))
from data.ImageDataset import ImageDataset
from segmentation_models_pytorch import DeepLabV3Plus

In [2]:
class ViTBinaryClassifier(nn.Module):
    def __init__(self, model_name="vit_base_patch16_224", pretrained=True, num_classes=1):
        super(ViTBinaryClassifier, self).__init__()
        self.vit = timm.create_model(model_name, pretrained=pretrained)
        in_features = self.vit.head.in_features
        self.vit.head = nn.Sequential(
            nn.Linear(in_features, num_classes),
            nn.Sigmoid()  # Sigmoid for binary classification
        )

    def forward(self, x):
        return self.vit(x)


In [3]:
model = ViTBinaryClassifier()
criterion = nn.BCELoss()
optimizer = Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-4)

scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
model = model.to(device)

cuda


In [5]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))  # Normalize for ImageNet
])

train_dataset = ImageDataset(
    annotations_path="/home/ec2-user/CS230Project/data/annotations/train.json",
    images_dir="/home/ec2-user/CS230Project/data/train",
    transform=transform,
)

val_dataset = ImageDataset(
    annotations_path="/home/ec2-user/CS230Project/data/annotations/val.json",
    images_dir="/home/ec2-user/CS230Project/data/val",
    transform=transform,
)


train_loader = DataLoader(train_dataset, batch_size=64, num_workers=7, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, num_workers=7, shuffle=False)

In [None]:
num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    train_loss = 0
    correct = 0
    total = 0

    for images, labels in tqdm(train_loader, desc=f"Training Epoch {epoch+1}/{num_epochs}"):
        images, labels = images.to(device), labels.to(device).float()
        labels = labels.view(-1, 1)
        outputs = model(images)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        predicted = (outputs > 0.5).float()
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    train_accuracy = 100. * correct / total
    print(f"Epoch {epoch+1}, Train Loss: {train_loss/len(train_loader):.4f}, Accuracy: {train_accuracy:.2f}%")
    model.eval()
    val_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device).float()
            labels = labels.view(-1, 1)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            predictions = (outputs > 0.5).float()
            correct += (predictions == labels).sum().item()
            total += labels.size(0)

    val_accuracy = 100. * correct / total
    print(f"Validation Loss: {val_loss/len(val_loader):.4f}, Accuracy: {val_accuracy:.2f}%")

    scheduler.step()

    checkpoint_path = f"/home/ec2-user/CS230Project/code/models/saved-weights/deeplabv3plus_{epoch+1}.pth"
    torch.save(model.state_dict(), checkpoint_path)
    print(f"Model saved to {checkpoint_path}")


Training Epoch 1/10: 100%|██████████| 690/690 [06:57<00:00,  1.65it/s]

Epoch 1, Train Loss: 0.5997, Accuracy: 66.60%





Validation Loss: 0.5125, Accuracy: 76.38%
Model saved to /home/ec2-user/CS230Project/code/models/saved-weights/deeplabv3plus_1.pth


Training Epoch 2/10: 100%|██████████| 690/690 [06:57<00:00,  1.65it/s]

Epoch 2, Train Loss: 0.4895, Accuracy: 75.86%





Validation Loss: 0.4485, Accuracy: 80.16%
Model saved to /home/ec2-user/CS230Project/code/models/saved-weights/deeplabv3plus_2.pth


Training Epoch 3/10: 100%|██████████| 690/690 [06:57<00:00,  1.65it/s]

Epoch 3, Train Loss: 0.3881, Accuracy: 82.67%





Validation Loss: 0.4166, Accuracy: 81.17%
Model saved to /home/ec2-user/CS230Project/code/models/saved-weights/deeplabv3plus_3.pth


Training Epoch 4/10: 100%|██████████| 690/690 [06:57<00:00,  1.65it/s]

Epoch 4, Train Loss: 0.3102, Accuracy: 87.14%





Validation Loss: 0.4704, Accuracy: 79.93%
Model saved to /home/ec2-user/CS230Project/code/models/saved-weights/deeplabv3plus_4.pth


Training Epoch 5/10: 100%|██████████| 690/690 [06:57<00:00,  1.65it/s]

Epoch 5, Train Loss: 0.2712, Accuracy: 89.03%





Validation Loss: 0.7424, Accuracy: 79.34%
Model saved to /home/ec2-user/CS230Project/code/models/saved-weights/deeplabv3plus_5.pth


Training Epoch 6/10: 100%|██████████| 690/690 [06:57<00:00,  1.65it/s]

Epoch 6, Train Loss: 0.2141, Accuracy: 91.75%





Validation Loss: 0.6442, Accuracy: 80.82%
Model saved to /home/ec2-user/CS230Project/code/models/saved-weights/deeplabv3plus_6.pth


Training Epoch 7/10:  53%|█████▎    | 365/690 [03:42<03:16,  1.66it/s]