## code for testing functionality of the NN

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
!pip install torchtext



In [3]:
# need to upload the .py files before this
import Layers, loaders

In [4]:
# validation

device = "cuda" if torch.cuda.is_available() else "cpu"
model = Layers.BCVNN(image_channels=3, filter_dimension=3).to(device)

# Dummy RGB input: batch of 8 images, 3×256×256
dummy = torch.randn(8, 3, 256, 256).to(device)
out = model(dummy)

print("Output shape:", out.shape)
# Expected: [8, 101]

Output shape: torch.Size([8, 101])


## validation with synthetic, random dataset

In [5]:
# validation with synthetic dataset
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import Layers, loaders
from tqdm import tqdm

In [6]:
# make synthetic dataset
num_samples = 1000
num_classes = 101
image_size = 256
batch_size = 64

# Fake RGB images (float32 in [0, 1])
X = torch.randn(num_samples, 3, image_size, image_size)
# Random integer labels between 0 and num_classes-1
y = torch.randint(0, num_classes, (num_samples,))

# Wrap as TensorDataset for easy DataLoader batching
dataset = TensorDataset(X, y)
loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)


In [7]:
# Initialize model, loss, optimizer
device = "cuda" if torch.cuda.is_available() else "cpu"
model = Layers.BCVNN(image_channels=3, filter_dimension=3).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [8]:
# Training loop
model.train()
for epoch in range(2):  # a couple of epochs for sanity check
    total_loss = 0.0
    for batch_idx, (images, labels) in tqdm(enumerate(loader), desc="training batches"):
        images, labels = images.to(device), labels.to(device)

        outputs = model(images)

        # Verify output shape
        assert outputs.shape == (images.size(0), num_classes), \
            f"Unexpected output shape {outputs.shape}"

        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

        if (batch_idx + 1) % 5 == 0:
            print(f"Epoch {epoch+1}, Batch {batch_idx+1}, Loss: {loss.item():.4f}")

    avg_loss = total_loss / len(loader)
    print(f"Epoch {epoch+1} complete. Avg Loss: {avg_loss:.4f}")

training batches: 5it [00:03,  1.41it/s]

Epoch 1, Batch 5, Loss: 4.6144


training batches: 10it [00:06,  1.69it/s]

Epoch 1, Batch 10, Loss: 4.6091


training batches: 15it [00:09,  1.72it/s]

Epoch 1, Batch 15, Loss: 4.6101


training batches: 16it [00:09,  1.65it/s]


Epoch 1 complete. Avg Loss: 4.6156


training batches: 5it [00:02,  1.74it/s]

Epoch 2, Batch 5, Loss: 4.6128


training batches: 10it [00:05,  1.71it/s]

Epoch 2, Batch 10, Loss: 4.6147


training batches: 15it [00:08,  1.70it/s]

Epoch 2, Batch 15, Loss: 4.6062


training batches: 16it [00:09,  1.75it/s]

Epoch 2 complete. Avg Loss: 4.6135





In [9]:
# Evaluation sanity check
model.eval()
with torch.no_grad():
    images, labels = next(iter(loader))
    images, labels = images.to(device), labels.to(device)
    outputs = model(images)
    preds = outputs.argmax(dim=1)

    print("\nValidation batch:")
    print("Output shape:", outputs.shape)
    print("Predictions:", preds[:10])
    print("True labels:", labels[:10])


Validation batch:
Output shape: torch.Size([64, 101])
Predictions: tensor([15, 15, 15, 15, 15, 15, 15, 15, 15, 15], device='cuda:0')
True labels: tensor([ 9, 50, 46, 73,  5, 11, 82, 71, 55, 94], device='cuda:0')


## Now trying with food 101 data

In [1]:
import torch
print(torch.__version__)

2.3.1+cu121


In [8]:
!pip install torch==2.3.1+cu121 torchvision==0.18.1+cu121 torchtext==0.18.0 --index-url https://download.pytorch.org/whl/cu121



Looking in indexes: https://download.pytorch.org/whl/cu121
Collecting torch==2.3.1+cu121
  Downloading https://download.pytorch.org/whl/cu121/torch-2.3.1%2Bcu121-cp312-cp312-linux_x86_64.whl (780.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m780.9/780.9 MB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torchvision==0.18.1+cu121
  Downloading https://download.pytorch.org/whl/cu121/torchvision-0.18.1%2Bcu121-cp312-cp312-linux_x86_64.whl (7.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m128.3 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch==2.3.1+cu121)
  Downloading https://download.pytorch.org/whl/cu121/nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.7/23.7 MB[0m [31m114.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cuda-runtime-cu12==12.1.105 (from torch==2.3.1+cu121)
 

In [2]:
# now trying with food 101 data
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import Layers, loaders
from tqdm import tqdm
import wandb



In [3]:
device = "cuda" if torch.cuda.is_available() else "cpu"
epochs = 4
lr = 1e-4

In [4]:
train_loader, test_loader = loaders.get_food101_dataloaders()

Downloading https://data.vision.ee.ethz.ch/cvl/food-101.tar.gz to ./data/food-101.tar.gz


100%|██████████| 4996278331/4996278331 [03:52<00:00, 21478743.31it/s]


Extracting ./data/food-101.tar.gz to ./data


In [5]:
# Initialize model, loss, optimizer
model = Layers.BCVNN(image_channels=3, filter_dimension=3).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr)

In [6]:
# set up wandb
wandb.init(project="hpml-final", name="food101-nn-validate")
wandb.config.update({
    "model_name": "custom-BCVNN",
    "batch_size": 64, "lr": 1e-4,
    "optimizer": "Adam", "num_workers": 4,
    "kernel_size": 3,
    "epochs": epochs, "compile_mode": False,
    "device": str(device)
})

  | |_| | '_ \/ _` / _` |  _/ -_)
[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
[34m[1mwandb[0m: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mlz2837[0m ([33mlz2837-columbia-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [7]:
# Training loop
for epoch in range(epochs):
    model.train()
    total_loss = 0.0
    train_correct = 0
    train_total = 0

    # training pass
    for batch_idx, (images, labels) in tqdm(enumerate(train_loader),
                                           total=len(train_loader), desc="training batches"):
        images, labels = images.to(device), labels.to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # accumulate train stats
        total_loss += loss.item()
        preds = outputs.argmax(dim=1)
        train_correct += (preds == labels).sum().item()
        train_total += labels.size(0)

    # compute train averages
    avg_train_loss = total_loss / len(train_loader)
    train_acc = train_correct / train_total if train_total > 0 else 0.0

    # validation pass
    model.eval()
    test_loss_sum = 0.0
    test_correct = 0
    test_total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            test_loss_sum += loss.item()
            preds = outputs.argmax(dim=1)
            test_correct += (preds == labels).sum().item()
            test_total += labels.size(0)

    avg_test_loss = test_loss_sum / len(test_loader)
    test_acc = test_correct / test_total if test_total > 0 else 0.0

    # log to wandb
    wandb.log({
      "train/loss": avg_train_loss,
      "train/accuracy": train_acc,
      "test/loss": avg_test_loss,
      "test/accuracy": test_acc,
      "epoch": epoch + 1,
      "device": str(device)
    }, step=epoch + 1)




training batches: 100%|██████████| 1184/1184 [01:58<00:00, 10.02it/s]
training batches: 100%|██████████| 1184/1184 [02:00<00:00,  9.80it/s]
training batches: 100%|██████████| 1184/1184 [02:00<00:00,  9.83it/s]
training batches: 100%|██████████| 1184/1184 [02:01<00:00,  9.78it/s]


below made extra for testing, can ignore

In [8]:

# Training loop
for epoch in range(1):
    model.train()
    total_loss = 0.0
    train_correct = 0
    train_total = 0

    # training pass
    for batch_idx, (images, labels) in tqdm(enumerate(train_loader),
                                           total=len(train_loader), desc="training batches"):
        images, labels = images.to(device), labels.to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # accumulate train stats
        total_loss += loss.item()
        preds = outputs.argmax(dim=1)
        train_correct += (preds == labels).sum().item()
        train_total += labels.size(0)

    # compute train averages
    avg_train_loss = total_loss / len(train_loader)
    train_acc = train_correct / train_total if train_total > 0 else 0.0

    # validation pass
    model.eval()
    test_loss_sum = 0.0
    test_correct = 0
    test_total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            test_loss_sum += loss.item()
            preds = outputs.argmax(dim=1)
            test_correct += (preds == labels).sum().item()
            test_total += labels.size(0)

    avg_test_loss = test_loss_sum / len(test_loader)
    test_acc = test_correct / test_total if test_total > 0 else 0.0

    print(f"train/loss:{avg_train_loss} \n train/accuracy:{train_acc} \n test/loss:{avg_test_loss} \n test/accuracy:{test_acc}")

    # log to wandb
    wandb.log({
      "train/loss": avg_train_loss,
      "train/accuracy": train_acc,
      "test/loss": avg_test_loss,
      "test/accuracy": test_acc,
      "epoch": 5,
      "device": str(device)
    }, step=5)

training batches: 100%|██████████| 1184/1184 [01:57<00:00, 10.05it/s]


train/loss:4.6151869280918225 
 train/accuracy:0.00965016501650165 
 test/loss:4.615145899374274 
 test/accuracy:0.009900990099009901


In [9]:
wandb.finish()

0,1
epoch,▁▃▆█
test/accuracy,▁█▁▁
test/loss,█▄▄▁
train/accuracy,▃▁▇█
train/loss,█▃▂▁

0,1
device,cuda
epoch,4
test/accuracy,0.0099
test/loss,4.61515
train/accuracy,0.00965
train/loss,4.61519
