Colab Link: https://colab.research.google.com/drive/1tU4Leou1F6XDVlBaLfBphWTL43-BPV_d

In [None]:
# This mounts your Google Drive to the Colab VM.
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# TODO: Enter the foldername in your Drive where you have saved the unzipped
# assignment folder, e.g. 'cs231n/assignments/assignment3/'
FOLDERNAME = "cs231n-project"
assert FOLDERNAME is not None, "[!] Enter the foldername."

# Now that we've mounted your Drive, this ensures that
# the Python interpreter of the Colab VM can load
# python files from within it.
import sys
sys.path.append('/content/drive/My Drive/{}'.format(FOLDERNAME))

# Load Dataset and create Train/Test Split

In [None]:
dataset_path = f"/content/drive/My Drive/cs231n-project/dataset/Taskent"

dataset_size = 21
test_indices = [1,2]
train_indices = [i for i in range(dataset_size) if not i in test_indices]
# train_indices = [0,3,4]

In [None]:
from image_dataset import *


train_dataset = PetroSubImageDataset(dataset_path, image_indices=train_indices)
test_dataset = PetroSubImageDataset(dataset_path, image_indices=test_indices)

# Set device to use GPU if available

In [None]:
import torch
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
print(f"Using device: {device}")

In [None]:
!nvidia-smi

# Load DINO Model

In [None]:
# Load smallest dino model. ViT-S/8. Here ViT-S has ~22M parameters and
# works on 8x8 patches.
dino_model = torch.hub.load('facebookresearch/dino:main', 'dino_vits8')
dino_model.eval().to(device)

# Loop through DINO for all training data

In [None]:
from torch.utils.data import DataLoader
from clip_dino import DINOSegmentation, compute_iou
from dino_model import DINOImageClassifier
from tqdm import tqdm

In [None]:
from torchvision import transforms as T

transform = T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))

# transform = T.Compose([
#     # T.Resize((480, 480)),
#     T.ToTensor(),
#     T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
# ])


In [None]:

# def get_dino_tokens(sample_image):
#     img_tensor = transform(sample_image)
#     # (3,480,480)
#     w, h = img_tensor.shape[1:]
#     img_tensor = img_tensor[None].to(device)
#     # (1,3,480,480)

#     with torch.no_grad():
#         attn = dino_model.get_last_selfattention(img_tensor)[0, :, 0, 1:]
#         # (6,3600)
#         nh, tokens = attn.shape
#         w_feat, h_feat = w // 8, h // 8
#         attn = attn.reshape(nh, w_feat, h_feat)
#         attn = torch.nn.functional.interpolate(attn.unsqueeze(0), scale_factor=8, mode="nearest")[0].cpu().numpy()
#         all_tokens = dino_model.get_intermediate_layers(img_tensor, n=1)[0]  # (1, 1+N, D)
#     # pprint(all_tokens)
#     return all_tokens.cpu()

def get_dino_tokens_batch(X_batch):

    # X_batch.shape = [batch_size,480,480,3]

    X_batch = X_batch.float() / 255.0 # normalize
    # X_batch = X_batch.permute(0,3,1,2) # [N,H,W,C] -> [N,C,H,W]
    X_transform = torch.stack([
        transform(x) for x in X_batch
    ])
    X_transform = X_transform.to(device)

    w, h = X_transform.shape[2:]
    # 480,480

    with torch.no_grad():
        attn = dino_model.get_last_selfattention(X_transform)[:, :, 0, 1:]
        # (N,6,3600)
        # print(f"attn.shape={attn.shape}")
        nh, tokens = attn.shape[1:]
        w_feat, h_feat = w // 8, h // 8
        attn = attn.reshape(-1, nh, w_feat, h_feat)
        attn = torch.nn.functional.interpolate(attn, scale_factor=8, mode="nearest").cpu().numpy()
        all_tokens = dino_model.get_intermediate_layers(X_transform, n=1)[0]  # (N, 1+pixels, D)
    return all_tokens.cpu()

In [None]:
# How to convert from 480x480x1 to 1
def get_Y_labels(Y_batch):
    # input (N,480,480)
    # output (N)

    # print(Y_batch.shape)

    # use most common pixel classification for training
    Y_batch_modes = torch.stack([
        torch.bincount(Y_batch[i].flatten()).argmax()
        for i in range(Y_batch.shape[0])
    ])

    return Y_batch_modes

## test get_Y_labels

# train_indices = [i for i in range(10)] # 10 images
# train_dataset = PetroSubImageDataset_v2(dataset_path, image_indices=train_indices)

# train_dataloader = DataLoader(
#     train_dataset,
#     batch_size=7
# )
# batch = next(iter(train_dataloader))
# print(batch.shape)
# X_batch = batch[:,:-1]
# Y_batch = batch[:,-1]
# print(X_batch.shape,Y_batch.shape)
# print(get_Y_labels(Y_batch))

In [None]:
num_classes = 18
batch_size = 32
num_iters = 50
hidden_dim = 768

train_dataloader = DataLoader(
    train_dataset,
    batch_size=batch_size
)

test_dataloader = DataLoader(
    test_dataset,
    batch_size=batch_size
)

model = DINOImageClassifier().to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3)
loss_fn = torch.nn.CrossEntropyLoss()

train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []
train_ious = []
val_ious = []

for iter in range(num_iters):

    ### TRAINING ###

    model.train()
    total_train_loss = 0.0
    correct_train = 0.0
    total_train = 0.0
    total_train_iou = 0.0
    pbar = tqdm(train_dataloader, desc=f"Train Epoch {iter+1}/{num_iters}")
    for batch in pbar:

        X_batch = batch[:,:-1]
        Y_batch = batch[:,-1]

        Y_labels = get_Y_labels(Y_batch).to(device)
        # print(f"X: {X_batch.shape}, Y: {Y_labels.shape}")

        with torch.no_grad():
            X_tokens = get_dino_tokens_batch(X_batch) # [N, 3601, D=384]
            X_tokens = X_tokens[:, 1:, :]  # (N, 3600, D) # uncomment to drop CLS token

        # print(f"X: {X_batch.shape}->{X_tokens.shape}, Y: {Y_batch.shape}->{Y_labels.shape}")

        X = X_tokens.to(device)
        Y = Y_labels.to(device)

        optimizer.zero_grad()
        logits = model(X)
        loss = loss_fn(logits, Y)
        loss.backward()
        optimizer.step()

        batch_loss = loss.item()
        total_train_loss += batch_loss

        preds = torch.argmax(logits, dim=1)
        correct_train += (preds == Y).sum().item()
        total_train += Y.size(0)

        batch_iou = compute_iou(preds.cpu().numpy(), Y.cpu().numpy(), num_classes)
        total_train_iou += batch_iou

        pbar.set_postfix(loss=f"{batch_loss:.2f}", iou=f"{batch_iou:.2f}")


    avg_train_loss = total_train_loss / len(train_dataloader)
    avg_train_accuracy = correct_train / total_train
    avg_train_iou = total_train_iou / len(train_dataloader)

    train_losses.append(avg_train_loss)
    train_accuracies.append(avg_train_accuracy)
    train_ious.append(avg_train_iou)

    # print(f"Epoch {iter+1}: Train Loss={avg_train_loss:.2f}, Accuracy={avg_train_accuracy:.2f}, IoU={avg_train_iou:.2f}")

    ### VALIDATION ###

    model.eval()
    total_val_loss = 0.0
    correct_val = 0.0
    total_val = 0.0
    total_val_iou = 0.0
    with torch.no_grad():

        pbar = tqdm(test_dataloader, desc=f"Val Epoch {iter+1}/{num_iters}")

        for batch in pbar:
            X_batch = batch[:, :-1]
            Y_batch = batch[:, -1]
            Y_labels = get_Y_labels(Y_batch).to(device)

            X_tokens = get_dino_tokens_batch(X_batch) # [N, 3601, D=384]
            X_tokens = X_tokens[:, 1:, :]  # (N, 3600, D) # uncomment to drop CLS token
            X = X_tokens.to(device)
            Y = Y_labels.to(device)

            logits = model(X)
            loss = loss_fn(logits, Y)
            val_loss = loss.item()
            total_val_loss += val_loss

            preds = torch.argmax(logits, dim=1)
            correct_val += (preds == Y).sum().item()
            total_val += Y.size(0)

            batch_iou = compute_iou(preds.cpu().numpy(), Y.cpu().numpy(), num_classes)
            total_val_iou += batch_iou

            pbar.set_postfix(loss=f"{val_loss:.2f}", iou=f"{batch_iou:.2f}")


    avg_val_loss = total_val_loss / len(test_dataloader)
    avg_val_accuracy = correct_val / total_val
    avg_val_iou = total_val_iou / len(test_dataloader)

    val_losses.append(avg_val_loss)
    val_accuracies.append(avg_val_accuracy)
    val_ious.append(avg_val_iou)
    # print(f"Epoch {iter+1}: Val Loss={avg_val_loss:.2f}, Accuracy={avg_val_accuracy:.2f}, IoU={avg_val_iou:.2f}")

    print()

    print(f"Epoch {iter+1}: Train Loss={avg_train_loss:.2f}, Accuracy={avg_train_accuracy:.2f} // Val Loss={avg_val_loss:.2f}, Accuracy={avg_val_accuracy:.2f}")



In [None]:
import matplotlib.pyplot as plt

epochs = range(1, num_iters + 1)

plt.figure(figsize=(18, 5))

plt.subplot(1, 3, 1)
plt.plot(epochs, train_losses, label="Train Loss")
plt.plot(epochs, val_losses, label="Val Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Loss Curve")
plt.legend()

plt.subplot(1, 3, 2)
plt.plot(epochs, train_accuracies, label="Train Accuracy")
plt.plot(epochs, val_accuracies, label="Val Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.title("Accuracy Curve")
plt.legend()


plt.subplot(1, 3, 3)
plt.plot(epochs, train_ious, label="Train IoU")
plt.plot(epochs, val_ious, label="Val IoU")
plt.xlabel("Epoch")
plt.ylabel("IoU")
plt.title("IoU Curve")
plt.legend()

plt.tight_layout()
plt.show()

# Train for BIOTIC OR NOT

In [None]:
# Lets use Biotic/Abiotic only

# Abiotic
# -  1 - Micrite
# -  2 - Cement
# - 11 - Ooids
# - 17 - Other Abiotic

# Biotic
# -  3 - Peloid/Pellet
# -  4 - Protist
# -  5 - Microbial
# -  6 - Mollusk
# -  7 - Echinoderm
# -  9 - Coral
# - 10 - Aglae
# - 12 - Biotic
# - 13 - Ostracod
# - 14 - Trilobite
# - 15 - Bryozoan
# - 16 - Sponge


# Scale Bar
# - 8 - Scale Bar


def get_Y_biotic_ornot(Y_batch):

    biotic_ornot_mask = np.full_like(Y_batch, fill_value=3, dtype=np.uint8)

    abiotic = {1, 2, 11, 17}
    biotic = {3, 4, 5, 6, 7, 9, 10, 12, 13, 14, 15, 16}
    scale_bar = {8}

    biotic_ornot_mask[np.isin(Y_batch, list(abiotic))] = 0
    biotic_ornot_mask[np.isin(Y_batch, list(biotic))] = 1
    biotic_ornot_mask[np.isin(Y_batch, list(scale_bar))] = 2

    any_biotic = (biotic_ornot_mask == 1).any(axis=(1, 2)).astype(np.uint8)


    return torch.tensor(any_biotic)

## test get_Y_labels

# train_indices = [i for i in range(20)] # N=20 images
# train_dataset = PetroSubImageDataset(dataset_path, image_indices=train_indices)

# train_dataloader = DataLoader(
#     train_dataset,
#     batch_size=20
# )
# for batch in train_dataloader:
#     print(batch.shape)
#     X_batch = batch[:,:-1]
#     Y_batch = batch[:,-1]
#     print(X_batch.shape,Y_batch.shape)
#     biotic_ornot = get_Y_biotic_ornot(Y_batch)
#     print(biotic_ornot.shape)
#     print(biotic_ornot)

In [None]:
num_classes = 2 # 18
batch_size = 32
num_iters = 50
hidden_dim = 768

train_dataloader = DataLoader(
    train_dataset,
    batch_size=batch_size
)

test_dataloader = DataLoader(
    test_dataset,
    batch_size=batch_size
)

model = DINOImageClassifier().to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3)
loss_fn = torch.nn.CrossEntropyLoss()

train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []
train_ious = []
val_ious = []

for iter in range(num_iters):

    ### TRAINING ###

    model.train()
    total_train_loss = 0.0
    correct_train = 0.0
    total_train = 0.0
    total_train_iou = 0.0
    pbar = tqdm(train_dataloader, desc=f"Train Epoch {iter+1}/{num_iters}")
    for batch in pbar:

        X_batch = batch[:,:-1]
        Y_batch = batch[:,-1]

        Y_labels = get_Y_biotic_ornot(Y_batch).to(device)
        # print(f"X: {X_batch.shape}, Y: {Y_labels.shape}")

        with torch.no_grad():
            X_tokens = get_dino_tokens_batch(X_batch) # [N, 3601, D=384]
            X_tokens = X_tokens[:, 1:, :]  # (N, 3600, D) # uncomment to drop CLS token

        # print(f"X: {X_batch.shape}->{X_tokens.shape}, Y: {Y_batch.shape}->{Y_labels.shape}")

        X = X_tokens.to(device)
        Y = Y_labels.to(device)

        optimizer.zero_grad()
        logits = model(X)
        loss = loss_fn(logits, Y)
        loss.backward()
        optimizer.step()

        batch_loss = loss.item()
        total_train_loss += batch_loss

        preds = torch.argmax(logits, dim=1)
        correct_train += (preds == Y).sum().item()
        total_train += Y.size(0)

        batch_iou = compute_iou(preds.cpu().numpy(), Y.cpu().numpy(), num_classes)
        total_train_iou += batch_iou

        pbar.set_postfix(loss=f"{batch_loss:.2f}", iou=f"{batch_iou:.2f}")


    avg_train_loss = total_train_loss / len(train_dataloader)
    avg_train_accuracy = correct_train / total_train
    avg_train_iou = total_train_iou / len(train_dataloader)

    train_losses.append(avg_train_loss)
    train_accuracies.append(avg_train_accuracy)
    train_ious.append(avg_train_iou)

    # print(f"Epoch {iter+1}: Train Loss={avg_train_loss:.2f}, Accuracy={avg_train_accuracy:.2f}, IoU={avg_train_iou:.2f}")

    ### VALIDATION ###

    model.eval()
    total_val_loss = 0.0
    correct_val = 0.0
    total_val = 0.0
    total_val_iou = 0.0
    with torch.no_grad():

        pbar = tqdm(test_dataloader, desc=f"Val Epoch {iter+1}/{num_iters}")

        for batch in pbar:
            X_batch = batch[:, :-1]
            Y_batch = batch[:, -1]
            Y_labels = get_Y_biotic_ornot(Y_batch).to(device)

            X_tokens = get_dino_tokens_batch(X_batch) # [N, 3601, D=384]
            X_tokens = X_tokens[:, 1:, :]  # (N, 3600, D) # uncomment to drop CLS token
            X = X_tokens.to(device)
            Y = Y_labels.to(device)

            logits = model(X)
            loss = loss_fn(logits, Y)
            val_loss = loss.item()
            total_val_loss += val_loss

            preds = torch.argmax(logits, dim=1)
            correct_val += (preds == Y).sum().item()
            total_val += Y.size(0)

            batch_iou = compute_iou(preds.cpu().numpy(), Y.cpu().numpy(), num_classes)
            total_val_iou += batch_iou

            pbar.set_postfix(loss=f"{val_loss:.2f}", iou=f"{batch_iou:.2f}")


    avg_val_loss = total_val_loss / len(test_dataloader)
    avg_val_accuracy = correct_val / total_val
    avg_val_iou = total_val_iou / len(test_dataloader)

    val_losses.append(avg_val_loss)
    val_accuracies.append(avg_val_accuracy)
    val_ious.append(avg_val_iou)
    # print(f"Epoch {iter+1}: Val Loss={avg_val_loss:.2f}, Accuracy={avg_val_accuracy:.2f}, IoU={avg_val_iou:.2f}")

    print()

    print(f"Epoch {iter+1}: Train Loss={avg_train_loss:.2f}, Accuracy={avg_train_accuracy:.2f} // Val Loss={avg_val_loss:.2f}, Accuracy={avg_val_accuracy:.2f}")



In [None]:
import matplotlib.pyplot as plt

epochs = range(1, num_iters + 1)

plt.figure(figsize=(18, 5))

plt.subplot(1, 3, 1)
plt.plot(epochs, train_losses, label="Train Loss")
plt.plot(epochs, val_losses, label="Val Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Loss Curve")
plt.legend()

plt.subplot(1, 3, 2)
plt.plot(epochs, train_accuracies, label="Train Accuracy")
plt.plot(epochs, val_accuracies, label="Val Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.title("Accuracy Curve")
plt.legend()


plt.subplot(1, 3, 3)
plt.plot(epochs, train_ious, label="Train IoU")
plt.plot(epochs, val_ious, label="Val IoU")
plt.xlabel("Epoch")
plt.ylabel("IoU")
plt.title("IoU Curve")
plt.legend()

plt.tight_layout()
plt.show()