In [1]:
import torch
import torch.nn as nn

from pathlib import Path


In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)
# Set the seed for general torch operations
torch.manual_seed(42)
# Set the seed for CUDA torch operations (ones that happen on the GPU)
torch.cuda.manual_seed(42)


cuda


In [3]:
NUM_EPOCHS = 10
BATCH_SIZE = 128
NUM_WORKERS = 8
LEARNING_RATE = 0.001


In [4]:
train_dir = Path("./data/train/")
test_dir = Path("./data/test/")

train_dir, test_dir


(WindowsPath('data/train'), WindowsPath('data/test'))

In [5]:
from torchvision.transforms import v2

manual_transforms = v2.Compose(
    [
        v2.ToImage(),
        v2.Resize((224, 224)),
        v2.ToDtype(torch.float32, scale=True),
        v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]
)


In [6]:
from torchvision import datasets

# Use ImageFolder to create dataset(s)
train_data = datasets.ImageFolder(str(train_dir), transform=manual_transforms)
test_data = datasets.ImageFolder(str(test_dir), transform=manual_transforms)

class_names = test_data.classes

train_data, test_data, class_names

(Dataset ImageFolder
     Number of datapoints: 1250
     Root location: data\train
     StandardTransform
 Transform: Compose(
                  ToImage()
                  Resize(size=[224, 224], interpolation=InterpolationMode.BILINEAR, antialias=True)
                  ToDtype(scale=True)
                  Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], inplace=False)
            ),
 Dataset ImageFolder
     Number of datapoints: 500
     Root location: data\test
     StandardTransform
 Transform: Compose(
                  ToImage()
                  Resize(size=[224, 224], interpolation=InterpolationMode.BILINEAR, antialias=True)
                  ToDtype(scale=True)
                  Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], inplace=False)
            ),
 ['downdog', 'goddess', 'plank', 'tree', 'warrior2'])

In [7]:
from torch.utils.data import DataLoader

train_dataloader = DataLoader(
    train_data,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=NUM_WORKERS,
    pin_memory=True,
)

test_dataloader = DataLoader(
    test_data,
    batch_size=BATCH_SIZE,
    shuffle=False,  # don't need to shuffle test data
    num_workers=NUM_WORKERS,
    pin_memory=True,
)

train_dataloader, test_dataloader

(<torch.utils.data.dataloader.DataLoader at 0x20206122bc0>,
 <torch.utils.data.dataloader.DataLoader at 0x20206122da0>)

In [8]:
from model_vig import vig_ti_224_gelu
from model_mobile_vig import mobilevig_ti

model = mobilevig_ti(num_classes=len(class_names))

In [9]:
# target = torch.ones(1, dtype=torch.long)
# target


In [10]:
# from model_vig import vig_ti_224_gelu
# from model_mobile_vig import mobilevig_ti

# mobile_model = mobilevig_ti(num_classes=len(class_names))
# vig_model = vig_ti_224_gelu(num_classes=len(class_names))

# img = torch.randn(1, 3, 224, 224)
# mobile_model.eval()
# vig_model.eval()
# with torch.inference_mode():
#     mobile_pred = mobile_model(img)
#     vig_pred = vig_model(img)

# print(f"mobile {mobile_pred.shape}")
# print(f"vig {vig_pred.shape}")

In [11]:
# loss_fn = torch.nn.CrossEntropyLoss()
# vig_loss = loss_fn(vig_pred, target)  # Assuming `target` is your ground truth labels
# mobile_loss = loss_fn(
#     mobile_pred, target
# )  # Assuming `target` is your ground truth labels

# print(f"vig loss: {vig_loss.item()}")
# print(f"mobile loss: {mobile_loss.item()}")


In [12]:
from torchinfo import summary

# Print a summary using torchinfo (uncomment for actual output)
summary(
    model=model,
    input_size=(
        BATCH_SIZE,
        3,
        224,
        224,
    ),  # make sure this is "input_size", not "input_shape"
    # col_names=["input_size"], # uncomment for smaller output
    col_names=["input_size", "output_size", "num_params", "trainable"],
    col_width=20,
    row_settings=["var_names"],
)


Layer (type (var_name))                            Input Shape          Output Shape         Param #              Trainable
MobileViG (MobileViG)                              [128, 3, 224, 224]   [128, 5]             --                   True
├─Stem (stem)                                      [128, 3, 224, 224]   [128, 42, 56, 56]    --                   True
│    └─Sequential (stem)                           [128, 3, 224, 224]   [128, 42, 56, 56]    --                   True
│    │    └─Conv2d (0)                             [128, 3, 224, 224]   [128, 21, 112, 112]  588                  True
│    │    └─BatchNorm2d (1)                        [128, 21, 112, 112]  [128, 21, 112, 112]  42                   True
│    │    └─GELU (2)                               [128, 21, 112, 112]  [128, 21, 112, 112]  --                   --
│    │    └─Conv2d (3)                             [128, 21, 112, 112]  [128, 42, 56, 56]    7,980                True
│    │    └─BatchNorm2d (4)                  

In [13]:
# Define loss and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)


In [14]:
import engine
import torch.nn as nn
from timeit import default_timer as timer

print(device)

torch.manual_seed(42)
torch.cuda.manual_seed(42)

# Start the timer
start_time = timer()

# Setup training and save the results
results = engine.train(
    model=model.to(device),
    train_dataloader=train_dataloader,
    test_dataloader=test_dataloader,
    optimizer=optimizer,
    loss_fn=loss_fn,
    epochs=NUM_EPOCHS,
    device=torch.device(device),
    use_progress_bar=True,
)

# End the timer and print out how long it took
end_time = timer()
print(f"[INFO] Total training time: {end_time-start_time:.3f} seconds")


cuda


  0%|          | 0/14 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1 | Train loss: 1.85696 | Test loss: 5.85790 | Train acc: 24.28% | Test acc: 21.73%
Epoch: 2 | Train loss: 1.53297 | Test loss: 1.76362 | Train acc: 35.09% | Test acc: 36.22%
Epoch: 3 | Train loss: 1.31567 | Test loss: 1.38042 | Train acc: 48.41% | Test acc: 45.36%
Epoch: 4 | Train loss: 1.08317 | Test loss: 1.35707 | Train acc: 57.45% | Test acc: 42.82%
Epoch: 5 | Train loss: 0.83059 | Test loss: 1.08740 | Train acc: 69.19% | Test acc: 57.85%
Epoch: 6 | Train loss: 0.54105 | Test loss: 0.98681 | Train acc: 82.54% | Test acc: 65.36%
Epoch: 7 | Train loss: 0.38272 | Test loss: 0.96972 | Train acc: 87.40% | Test acc: 68.28%
Epoch: 8 | Train loss: 0.25722 | Test loss: 1.24421 | Train acc: 91.61% | Test acc: 62.39%
Epoch: 9 | Train loss: 0.18873 | Test loss: 1.45020 | Train acc: 93.71% | Test acc: 61.61%
Epoch: 10 | Train loss: 0.21613 | Test loss: 1.43805 | Train acc: 93.65% | Test acc: 63.62%
[INFO] Total training time: 1556.623 seconds
