In [None]:
import argparse
import copy
import json
import os
import random
from matplotlib.image import imread

import torch#use: pip install torch, then restart the kernel
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms#pip install torchvision
from torch.utils.data import Dataset




In [None]:
class Boats(Dataset):

    def __init__(self, root_dir, transform=None, gt_json_path=''):
        self.root_dir = root_dir
        self.transform = transform
        self.gt_json_path = gt_json_path
        self.labels = json.load(open(gt_json_path, 'r'))
        self.image_list = sorted(os.listdir(root_dir))
        self.image_ids = dict(enumerate(self.image_list, start=0))

    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, idx):
        img = self.load_image(idx)
        img_name = self.image_ids[idx]
        label = self.labels[img_name]
        if self.transform:
            img = self.transform(img)
        sample = (img, label)
        return sample

    def load_image(self, image_index):
        image_name = self.image_ids[image_index]
        path = os.path.join(self.root_dir, image_name)
        img = imread(path)
        return img

"""
Explanation my NN architecture:
1. Added Batch Normalization (bn1, bn2) to stabilize and speed up training.
2. Increased filters in conv1 (32) and conv2 (64) for better feature extraction.
3. Used MaxPooling to downsample spatial dimensions and extract key features.
4. Introduced Dropout (0.5) before the final layer to prevent overfitting.
5. Adjusted fc1 input size to 64 * 27 * 48, matching the flattened tensor shape.
6. Applied Sigmoid activation in fc2 for binary classification output.
These changes enhance learning, reduce overfitting, and ensure compatibility with binary classification.
"""
class Net(nn.Module):#TODO 9) #creat your own NN architecture
    def __init__(self):
        super(Net, self).__init__()
        # Convolutional layers
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        
        self.fc1 = nn.Linear(64 * 27 * 48, 256)  #TODO Question 1) #This defines a fully connected layer with 64 * 27 * 48 input features (flattened image pixels) and 256 output feature (intermediate feature representation for further processing).
        self.fc2 = nn.Linear(256, 1) # Input size: 256 (from fc1, Output size: 1 (for binary classification, producing a single probability value between 0 and 1).
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        # Convolutional layers with BatchNorm, ReLU, and Pooling
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        
        # Flatten the tensor for fully connected layers
        x = torch.flatten(x, start_dim=1)#TODO Question 2) # This flattens the input tensor (batch size, channels, height, width) into a 2D tensor (batch size, features), preparing it for the fully connected layer.
        x = self.fc1(x)#TODO Question 3) # This applies the fully connected layer (fc1) to the input tensor

        # Apply fully connected layers with Dropout
        x = F.relu(x)
        x = self.dropout(x)
        output = torch.sigmoid(self.fc2(x))  # Sigmoid activation for binary classification
        return output


def train(log_interval, model, device, train_loader, optimizer, criterion, epoch,dry_run):
    """
    Train a network
    You can find example code here: https://pytorch.org/tutorials/beginner/basics/quickstart_tutorial.html
    """
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device).float()
        optimizer.zero_grad()#TODO Question 4) # This clears the gradients from the previous training step to prevent accumulation during backpropagation.
        output = model(data)
        loss = criterion(output, torch.unsqueeze(target, 1))#TODO Question 5) # This computes the loss between the model’s output and the ground truth labels. The target is unsqueezed to match the dimensions of the output.
        loss.backward()
        optimizer.step()
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
            if dry_run:
                break


def test(model, device, test_loader, criterion):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device).float()
            output = model(data)
            test_loss += criterion(output, torch.unsqueeze(target, 1)).item()  # sum up batch loss
            pred = torch.round(output)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    return 100. * correct / len(test_loader.dataset)




In [None]:
def main():
    # Training settings #you can mess around with change these values!
    batch_size = 32
    test_batch_size = 1000
    epochs = 20
    learning_rate = 0.001
    no_cuda = True #If you using course cpu leave False, if you are using GPU set true
    dry_run = False
    seed = random.randint(1,1000)#random seed. Set to constant if you want to train on the same data
    log_interval = 10#how many batches to wait before logging training status
    save_model = False 
    
    
    """
    #This is used if you want to run it as a script file.
    parser = argparse.ArgumentParser(description='PyTorch Ship Detection')
    parser.add_argument('--batch-size', type=int, default=64, metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
                        help='input batch size for testing (default: 1000)')
    parser.add_argument('--epochs', type=int, default=14, metavar='N',
                        help='number of epochs to train (default: 14)')
    parser.add_argument('--lr', type=float, default=0.1, metavar='LR',
                        help='learning rate (default: 0.1)')
    parser.add_argument('--no-cuda', action='store_true', default=False,
                        help='disables CUDA training')
    parser.add_argument('--dry-run', action='store_true', default=False,
                        help='quickly check a single pass')
    parser.add_argument('--seed', type=int, default=1, metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument('--log-interval', type=int, default=10, metavar='N',
                        help='how many batches to wait before logging training status')
    parser.add_argument('--save-model', action='store_true', default=False,
                        help='For Saving the current Model')
    args = parser.parse_args()
    """
    #torch.manual_seed(args.seed)
    torch.manual_seed(seed)
    #use_cuda = not args.no_cuda and torch.cuda.is_available()
    use_cuda = no_cuda
    device = torch.device("cuda" if use_cuda else "cpu")
    #train_kwargs = {'batch_size': args.batch_size}
    #val_kwargs = {'batch_size': args.test_batch_size}
    train_kwargs = {'batch_size': batch_size}
    val_kwargs = {'batch_size': test_batch_size}
    if use_cuda:
        cuda_kwargs = {'num_workers': 1,
                       'pin_memory': True,
                       'shuffle': True}
        train_kwargs.update(cuda_kwargs)
        val_kwargs.update(cuda_kwargs)

    # Create transform
    transform = transforms.Compose([
        transforms.ToTensor(),
        # This normalization is used on the test server
        transforms.Normalize([0.2404, 0.2967, 0.3563], [0.0547, 0.0527, 0.0477])
        ])

    # Create train and test set
    path_to_dataset = "/courses/CS5330.202510/data/Boat-MNIST"#gobal path to the data on Discovery 
    train_set = Boats(root_dir=path_to_dataset + "/train", transform=transform,
                      gt_json_path=path_to_dataset + "/boat_mnist_labels_trainval.json")
    val_set = Boats(root_dir=path_to_dataset + "/val", transform=transform,
                    gt_json_path=path_to_dataset +"/boat_mnist_labels_trainval.json")

    # Create data loaders
    train_loader = torch.utils.data.DataLoader(train_set, **train_kwargs)
    test_loader = torch.utils.data.DataLoader(val_set, **val_kwargs)

    # Create network, optimizer and loss
    model = Net().to(device)#TODO Question 6) # This initializes the Net model and moves it to the specified device (CPU or GPU).
    optimizer = optim.SGD(model.parameters(), lr=learning_rate)#TODO Question 7) # This initializes the stochastic gradient descent (SGD) optimizer, specifying which parameters to update and the learning rate.
    criterion = nn.MSELoss()#TODO Question 8) # This sets the mean squared error (MSE) loss function, which is used to evaluate the difference between the predicted and true values.

    # Train and validate
    best_acc = 0
    best_model_wts = copy.deepcopy(model.state_dict())
    for epoch in range(1, epochs + 1):
        train(log_interval, model, device, train_loader, optimizer, criterion, epoch, dry_run)
        acc = test(model, device, test_loader, criterion)
        if acc > best_acc:
            best_acc = acc
            best_model_wts = copy.deepcopy(model.state_dict())

    # Load best model weights
    model.load_state_dict(best_model_wts)
    print(f"Best accuracy (val): {best_acc}")

    #if args.save_model:
    #    torch.save(model.state_dict(), "model.pth")
    if save_model:
        torch.save(model.state_dict(), "model.pth")
    
    # --- Do not touch -----
    # Save model as onnx file
    dummy_input = torch.randn(1, 3, 108, 192, device=device)
    input_names = ["img_1"]
    output_names = ["output1"]
    torch.onnx.export(model, dummy_input, "ship_example.onnx", input_names=input_names, output_names=output_names)
    # ----------------------


#if __name__ == '__main__':
main()