In [1]:
# James Bebarski
# Computer Vision
# Boat MNIST

In [2]:
import argparse
import copy
import json
import os
import random
from matplotlib.image import imread
import time

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import Dataset

In [3]:
class Boats(Dataset):

    def __init__(self, root_dir, transform=None, gt_json_path=''):
        self.root_dir = root_dir
        self.transform = transform
        self.gt_json_path = gt_json_path
        self.labels = json.load(open(gt_json_path, 'r'))
        self.image_list = sorted(os.listdir(root_dir))
        self.image_ids = dict(enumerate(self.image_list, start=0))

    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, idx):
        img = self.load_image(idx)
        img_name = self.image_ids[idx]
        label = self.labels[img_name]
        if self.transform:
            img = self.transform(img)
        sample = (img, label)
        return sample

    def load_image(self, image_index):
        image_name = self.image_ids[image_index]
        path = os.path.join(self.root_dir, image_name)
        img = imread(path)
        return img


class Net(nn.Module):
    
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(64)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(128)
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(256)
        self.conv4 = nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1)
        self.bn4 = nn.BatchNorm2d(512)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(512 * 6 * 12, 512)
        self.dropout = nn.Dropout(0.3)
        self.fc2 = nn.Linear(512, 1)
        
    def forward(self, x):
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        x = self.pool(F.relu(self.bn3(self.conv3(x))))
        x = self.pool(F.relu(self.bn4(self.conv4(x))))
        x = torch.flatten(x, start_dim=1)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = torch.sigmoid(self.fc2(x))
        return x
        

def train(log_interval, model, device, train_loader, optimizer, criterion, epoch,dry_run):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device).float()
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, torch.unsqueeze(target, 1))
        loss.backward()
        optimizer.step()
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
            if dry_run:
                break


def test(model, device, test_loader, criterion):
    model.eval()
    test_loss = 0
    correct = 0

    misclassified_images = []
    misclassified_labels = []
    
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device).float()
            output = model(data)
            test_loss += criterion(output, torch.unsqueeze(target, 1)).item()
            pred = torch.round(output)
            correct += pred.eq(target.view_as(pred)).sum().item()

            incorrect_indices = (pred != target.view_as(pred)).nonzero(as_tuple=True)[0]
            misclassified_images.extend(data[incorrect_indices].cpu())
            misclassified_labels.extend(target[incorrect_indices].cpu())

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    return 100. * correct / len(test_loader.dataset)
    
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

def save_my_model(model, path):
    if not os.path.exists('models'):
        os.makedirs('models')
    torch.save(model.state_dict(), path)
    

In [None]:
def main():

    # It is worth noting that I used 28 in my implementation
    # it took me about 13 minutes to generate
    
    batch_size = 64
    test_batch_size = 500
    epochs = 50
    learning_rate = .001
    no_cuda = True
    dry_run = False
    seed = random.randint(1,1000)
    log_interval = 10
    save_model = False 
    
    
    torch.manual_seed(seed)
    use_cuda = no_cuda
    device = torch.device("cuda" if use_cuda else "cpu")
    train_kwargs = {'batch_size': batch_size}
    val_kwargs = {'batch_size': test_batch_size}
    if use_cuda:
        cuda_kwargs = {'num_workers': 1,
                       'pin_memory': True,
                       'shuffle': True}
        train_kwargs.update(cuda_kwargs)
        val_kwargs.update(cuda_kwargs)
 
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.2404, 0.2967, 0.3563], [0.0547, 0.0527, 0.0477])
        ])
    
    # This was the path I used for my dataset, however, you'll need to change this to your own path
    path_to_dataset = "/courses/CS5330.202450/data/Boat-MNIST"
    train_set = Boats(root_dir=path_to_dataset + "/train", transform=transform,
                      gt_json_path=path_to_dataset + "/boat_mnist_labels_trainval.json")
    val_set = Boats(root_dir=path_to_dataset + "/val", transform=transform,
                    gt_json_path=path_to_dataset +"/boat_mnist_labels_trainval.json")

    train_loader = torch.utils.data.DataLoader(train_set, **train_kwargs)
    test_loader = torch.utils.data.DataLoader(val_set, **val_kwargs)

    model = Net().to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.BCELoss()


    total_params = count_parameters(model)
    print(f"Total Parameters: {total_params}")

    start_time = time.time()
    
    # Train and validate
    best_acc = 0
    
    best_model_wts = copy.deepcopy(model.state_dict())
    for epoch in range(1, epochs + 1):
        train(log_interval, model, device, train_loader, optimizer, criterion, epoch, dry_run)
        acc = test(model, device, test_loader, criterion)
        if acc > best_acc:
            best_acc = acc
            best_model_wts = copy.deepcopy(model.state_dict())
            
            save_my_model(model, "models/best_model.pth")
            print(f"New best model saved with accuracy: {best_acc}")

    end_time = time.time()
    total_time = end_time - start_time
    print(f"Total training and evaluation time: {total_time:.2f} seconds")
    
    model.load_state_dict(best_model_wts)
    print(f"Best accuracy (val): {best_acc}")

    print(f"GPU Name: {torch.cuda.get_device_name(0)}")
    print(f"CUDA Version: {torch.version.cuda}")

    if save_model:
        torch.save(model.state_dict(), "model.pth")

    dummy_input = torch.randn(1, 3, 108, 192, device=device)
    input_names = ["img_1"]
    output_names = ["output1"]
    torch.onnx.export(model, dummy_input, "models/ship_example.onnx", input_names=input_names, output_names=output_names)

if __name__ == '__main__':
    main()

Total Parameters: 20428289

Test set: Average loss: 0.0083, Accuracy: 1408/1506 (93%)

New best model saved with accuracy: 93.49269588313413

Test set: Average loss: 0.0408, Accuracy: 1329/1506 (88%)


Test set: Average loss: 0.0028, Accuracy: 1458/1506 (97%)

New best model saved with accuracy: 96.81274900398407

Test set: Average loss: 0.0035, Accuracy: 1453/1506 (96%)


Test set: Average loss: 0.0010, Accuracy: 1485/1506 (99%)

New best model saved with accuracy: 98.60557768924303

Test set: Average loss: 0.0010, Accuracy: 1482/1506 (98%)

